The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_generic.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)sys_generic.c       8.5 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/5.4/sys/kern/sys_generic.c 144848 2005-04-10 00:53:36Z kensmith $");
   39 
   40 #include "opt_ktrace.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/sysproto.h>
   45 #include <sys/filedesc.h>
   46 #include <sys/filio.h>
   47 #include <sys/fcntl.h>
   48 #include <sys/file.h>
   49 #include <sys/proc.h>
   50 #include <sys/signalvar.h>
   51 #include <sys/socketvar.h>
   52 #include <sys/uio.h>
   53 #include <sys/kernel.h>
   54 #include <sys/limits.h>
   55 #include <sys/malloc.h>
   56 #include <sys/poll.h>
   57 #include <sys/resourcevar.h>
   58 #include <sys/selinfo.h>
   59 #include <sys/sleepqueue.h>
   60 #include <sys/syscallsubr.h>
   61 #include <sys/sysctl.h>
   62 #include <sys/sysent.h>
   63 #include <sys/vnode.h>
   64 #include <sys/bio.h>
   65 #include <sys/buf.h>
   66 #include <sys/condvar.h>
   67 #ifdef KTRACE
   68 #include <sys/ktrace.h>
   69 #endif
   70 #include <vm/vm.h>
   71 #include <vm/vm_page.h>
   72 
   73 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
   74 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
   75 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
   76 
   77 static int      pollscan(struct thread *, struct pollfd *, u_int);
   78 static int      selscan(struct thread *, fd_mask **, fd_mask **, int);
   79 static int      dofileread(struct thread *, struct file *, int, void *,
   80                     size_t, off_t, int);
   81 static int      dofilewrite(struct thread *, struct file *, int,
   82                     const void *, size_t, off_t, int);
   83 static void     doselwakeup(struct selinfo *, int);
   84 
   85 /*
   86  * Read system call.
   87  */
   88 #ifndef _SYS_SYSPROTO_H_
   89 struct read_args {
   90         int     fd;
   91         void    *buf;
   92         size_t  nbyte;
   93 };
   94 #endif
   95 /*
   96  * MPSAFE
   97  */
   98 int
   99 read(td, uap)
  100         struct thread *td;
  101         struct read_args *uap;
  102 {
  103         struct file *fp;
  104         int error;
  105 
  106         if ((error = fget_read(td, uap->fd, &fp)) == 0) {
  107                 error = dofileread(td, fp, uap->fd, uap->buf,
  108                             uap->nbyte, (off_t)-1, 0);
  109                 fdrop(fp, td);
  110         }
  111         return(error);
  112 }
  113 
  114 /*
  115  * Pread system call
  116  */
  117 #ifndef _SYS_SYSPROTO_H_
  118 struct pread_args {
  119         int     fd;
  120         void    *buf;
  121         size_t  nbyte;
  122         int     pad;
  123         off_t   offset;
  124 };
  125 #endif
  126 /*
  127  * MPSAFE
  128  */
  129 int
  130 pread(td, uap)
  131         struct thread *td;
  132         struct pread_args *uap;
  133 {
  134         struct file *fp;
  135         int error;
  136 
  137         if ((error = fget_read(td, uap->fd, &fp)) != 0)
  138                 return (error);
  139         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
  140                 error = ESPIPE;
  141         else if (uap->offset < 0 && fp->f_vnode->v_type != VCHR)
  142                 error = EINVAL;
  143         else {
  144                 error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte, 
  145                             uap->offset, FOF_OFFSET);
  146         }
  147         fdrop(fp, td);
  148         return(error);
  149 }
  150 
  151 /*
  152  * Code common for read and pread
  153  */
  154 static int
  155 dofileread(td, fp, fd, buf, nbyte, offset, flags)
  156         struct thread *td;
  157         struct file *fp;
  158         int fd, flags;
  159         void *buf;
  160         size_t nbyte;
  161         off_t offset;
  162 {
  163         struct uio auio;
  164         struct iovec aiov;
  165         long cnt, error = 0;
  166 #ifdef KTRACE
  167         struct uio *ktruio = NULL;
  168 #endif
  169 
  170         aiov.iov_base = buf;
  171         aiov.iov_len = nbyte;
  172         auio.uio_iov = &aiov;
  173         auio.uio_iovcnt = 1;
  174         auio.uio_offset = offset;
  175         if (nbyte > INT_MAX)
  176                 return (EINVAL);
  177         auio.uio_resid = nbyte;
  178         auio.uio_rw = UIO_READ;
  179         auio.uio_segflg = UIO_USERSPACE;
  180         auio.uio_td = td;
  181 #ifdef KTRACE
  182         if (KTRPOINT(td, KTR_GENIO))
  183                 ktruio = cloneuio(&auio);
  184 #endif
  185         cnt = nbyte;
  186 
  187         if ((error = fo_read(fp, &auio, td->td_ucred, flags, td))) {
  188                 if (auio.uio_resid != cnt && (error == ERESTART ||
  189                     error == EINTR || error == EWOULDBLOCK))
  190                         error = 0;
  191         }
  192         cnt -= auio.uio_resid;
  193 #ifdef KTRACE
  194         if (ktruio != NULL) {
  195                 ktruio->uio_resid = cnt;
  196                 ktrgenio(fd, UIO_READ, ktruio, error);
  197         }
  198 #endif
  199         td->td_retval[0] = cnt;
  200         return (error);
  201 }
  202 
  203 /*
  204  * Scatter read system call.
  205  */
  206 #ifndef _SYS_SYSPROTO_H_
  207 struct readv_args {
  208         int     fd;
  209         struct  iovec *iovp;
  210         u_int   iovcnt;
  211 };
  212 #endif
  213 /*
  214  * MPSAFE
  215  */
  216 int
  217 readv(struct thread *td, struct readv_args *uap)
  218 {
  219         struct file *fp;
  220         struct uio *auio = NULL;
  221         long cnt;
  222         int error;
  223 #ifdef KTRACE
  224         struct uio *ktruio = NULL;
  225 #endif
  226 
  227         error = fget_read(td, uap->fd, &fp);
  228         if (error)
  229                 return (error);
  230         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
  231         if (error) {
  232                 fdrop(fp, td);
  233                 return (error);
  234         }
  235         auio->uio_rw = UIO_READ;
  236         auio->uio_td = td;
  237 #ifdef KTRACE
  238         if (KTRPOINT(td, KTR_GENIO)) 
  239                 ktruio = cloneuio(auio);
  240 #endif
  241         cnt = auio->uio_resid;
  242         if ((error = fo_read(fp, auio, td->td_ucred, 0, td))) {
  243                 if (auio->uio_resid != cnt && (error == ERESTART ||
  244                     error == EINTR || error == EWOULDBLOCK))
  245                         error = 0;
  246         }
  247         cnt -= auio->uio_resid;
  248 #ifdef KTRACE
  249         if (ktruio != NULL) {
  250                 ktruio->uio_resid = cnt;
  251                 ktrgenio(uap->fd, UIO_READ, ktruio, error);
  252         }
  253 #endif
  254         td->td_retval[0] = cnt;
  255         free(auio, M_IOV);
  256         fdrop(fp, td);
  257         return (error);
  258 }
  259 
  260 /*
  261  * Write system call
  262  */
  263 #ifndef _SYS_SYSPROTO_H_
  264 struct write_args {
  265         int     fd;
  266         const void *buf;
  267         size_t  nbyte;
  268 };
  269 #endif
  270 /*
  271  * MPSAFE
  272  */
  273 int
  274 write(td, uap)
  275         struct thread *td;
  276         struct write_args *uap;
  277 {
  278         struct file *fp;
  279         int error;
  280 
  281         if ((error = fget_write(td, uap->fd, &fp)) == 0) {
  282                 error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte,
  283                             (off_t)-1, 0);
  284                 fdrop(fp, td);
  285         } else {
  286                 error = EBADF;  /* XXX this can't be right */
  287         }
  288         return(error);
  289 }
  290 
  291 /*
  292  * Pwrite system call
  293  */
  294 #ifndef _SYS_SYSPROTO_H_
  295 struct pwrite_args {
  296         int     fd;
  297         const void *buf;
  298         size_t  nbyte;
  299         int     pad;
  300         off_t   offset;
  301 };
  302 #endif
  303 /*
  304  * MPSAFE
  305  */
  306 int
  307 pwrite(td, uap)
  308         struct thread *td;
  309         struct pwrite_args *uap;
  310 {
  311         struct file *fp;
  312         int error;
  313 
  314         if ((error = fget_write(td, uap->fd, &fp)) == 0) {
  315                 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
  316                         error = ESPIPE;
  317                 else if (uap->offset < 0 && fp->f_vnode->v_type != VCHR)
  318                         error = EINVAL;
  319                 else {
  320                         error = dofilewrite(td, fp, uap->fd, uap->buf,
  321                                     uap->nbyte, uap->offset, FOF_OFFSET);
  322                 }
  323                 fdrop(fp, td);
  324         } else {
  325                 error = EBADF;  /* this can't be right */
  326         }
  327         return(error);
  328 }
  329 
  330 static int
  331 dofilewrite(td, fp, fd, buf, nbyte, offset, flags)
  332         struct thread *td;
  333         struct file *fp;
  334         int fd, flags;
  335         const void *buf;
  336         size_t nbyte;
  337         off_t offset;
  338 {
  339         struct uio auio;
  340         struct iovec aiov;
  341         long cnt, error = 0;
  342 #ifdef KTRACE
  343         struct uio *ktruio = NULL;
  344 #endif
  345 
  346         aiov.iov_base = (void *)(uintptr_t)buf;
  347         aiov.iov_len = nbyte;
  348         auio.uio_iov = &aiov;
  349         auio.uio_iovcnt = 1;
  350         auio.uio_offset = offset;
  351         if (nbyte > INT_MAX)
  352                 return (EINVAL);
  353         auio.uio_resid = nbyte;
  354         auio.uio_rw = UIO_WRITE;
  355         auio.uio_segflg = UIO_USERSPACE;
  356         auio.uio_td = td;
  357 #ifdef KTRACE
  358         if (KTRPOINT(td, KTR_GENIO))
  359                 ktruio = cloneuio(&auio);
  360 #endif
  361         cnt = nbyte;
  362         if (fp->f_type == DTYPE_VNODE)
  363                 bwillwrite();
  364         if ((error = fo_write(fp, &auio, td->td_ucred, flags, td))) {
  365                 if (auio.uio_resid != cnt && (error == ERESTART ||
  366                     error == EINTR || error == EWOULDBLOCK))
  367                         error = 0;
  368                 /* Socket layer is responsible for issuing SIGPIPE. */
  369                 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
  370                         PROC_LOCK(td->td_proc);
  371                         psignal(td->td_proc, SIGPIPE);
  372                         PROC_UNLOCK(td->td_proc);
  373                 }
  374         }
  375         cnt -= auio.uio_resid;
  376 #ifdef KTRACE
  377         if (ktruio != NULL) {
  378                 ktruio->uio_resid = cnt;
  379                 ktrgenio(fd, UIO_WRITE, ktruio, error);
  380         }
  381 #endif
  382         td->td_retval[0] = cnt;
  383         return (error);
  384 }
  385 
  386 /*
  387  * Gather write system call
  388  */
  389 #ifndef _SYS_SYSPROTO_H_
  390 struct writev_args {
  391         int     fd;
  392         struct  iovec *iovp;
  393         u_int   iovcnt;
  394 };
  395 #endif
  396 /*
  397  * MPSAFE
  398  */
  399 int
  400 writev(struct thread *td, struct writev_args *uap)
  401 {
  402         struct file *fp;
  403         struct uio *auio = NULL;
  404         long cnt;
  405         int error;
  406 #ifdef KTRACE
  407         struct uio *ktruio = NULL;
  408 #endif
  409 
  410         error = fget_write(td, uap->fd, &fp);
  411         if (error)
  412                 return (EBADF);
  413         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
  414         if (error) {
  415                 fdrop(fp, td);
  416                 return (error);
  417         }
  418         auio->uio_rw = UIO_WRITE;
  419         auio->uio_td = td;
  420 #ifdef KTRACE
  421         if (KTRPOINT(td, KTR_GENIO))
  422                 ktruio = cloneuio(auio);
  423 #endif
  424         cnt = auio->uio_resid;
  425         if (fp->f_type == DTYPE_VNODE)
  426                 bwillwrite();
  427         if ((error = fo_write(fp, auio, td->td_ucred, 0, td))) {
  428                 if (auio->uio_resid != cnt && (error == ERESTART ||
  429                     error == EINTR || error == EWOULDBLOCK))
  430                         error = 0;
  431                 if (error == EPIPE) {
  432                         PROC_LOCK(td->td_proc);
  433                         psignal(td->td_proc, SIGPIPE);
  434                         PROC_UNLOCK(td->td_proc);
  435                 }
  436         }
  437         cnt -= auio->uio_resid;
  438 #ifdef KTRACE
  439         if (ktruio != NULL) {
  440                 ktruio->uio_resid = cnt;
  441                 ktrgenio(uap->fd, UIO_WRITE, ktruio, error);
  442         }
  443 #endif
  444         td->td_retval[0] = cnt;
  445         fdrop(fp, td);
  446         free(auio, M_IOV);
  447         return (error);
  448 }
  449 
  450 /*
  451  * Ioctl system call
  452  */
  453 #ifndef _SYS_SYSPROTO_H_
  454 struct ioctl_args {
  455         int     fd;
  456         u_long  com;
  457         caddr_t data;
  458 };
  459 #endif
  460 /*
  461  * MPSAFE
  462  */
  463 /* ARGSUSED */
  464 int
  465 ioctl(struct thread *td, struct ioctl_args *uap)
  466 {
  467         struct file *fp;
  468         struct filedesc *fdp;
  469         u_long com;
  470         int error = 0;
  471         u_int size;
  472         caddr_t data, memp;
  473         int tmp;
  474 
  475         if ((error = fget(td, uap->fd, &fp)) != 0)
  476                 return (error);
  477         if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
  478                 fdrop(fp, td);
  479                 return (EBADF);
  480         }
  481         fdp = td->td_proc->p_fd;
  482         switch (com = uap->com) {
  483         case FIONCLEX:
  484                 FILEDESC_LOCK_FAST(fdp);
  485                 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
  486                 FILEDESC_UNLOCK_FAST(fdp);
  487                 fdrop(fp, td);
  488                 return (0);
  489         case FIOCLEX:
  490                 FILEDESC_LOCK_FAST(fdp);
  491                 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
  492                 FILEDESC_UNLOCK_FAST(fdp);
  493                 fdrop(fp, td);
  494                 return (0);
  495         }
  496 
  497         /*
  498          * Interpret high order word to find amount of data to be
  499          * copied to/from the user's address space.
  500          */
  501         size = IOCPARM_LEN(com);
  502         if ((size > IOCPARM_MAX) ||
  503             ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0)) {
  504                 fdrop(fp, td);
  505                 return (ENOTTY);
  506         }
  507 
  508         if (size > 0) {
  509                 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
  510                 data = memp;
  511         } else {
  512                 memp = NULL;
  513                 data = (void *)&uap->data;
  514         }
  515         if (com & IOC_IN) {
  516                 error = copyin(uap->data, data, (u_int)size);
  517                 if (error) {
  518                         free(memp, M_IOCTLOPS);
  519                         fdrop(fp, td);
  520                         return (error);
  521                 }
  522         } else if (com & IOC_OUT) {
  523                 /*
  524                  * Zero the buffer so the user always
  525                  * gets back something deterministic.
  526                  */
  527                 bzero(data, size);
  528         }
  529 
  530         if (com == FIONBIO) {
  531                 FILE_LOCK(fp);
  532                 if ((tmp = *(int *)data))
  533                         fp->f_flag |= FNONBLOCK;
  534                 else
  535                         fp->f_flag &= ~FNONBLOCK;
  536                 FILE_UNLOCK(fp);
  537                 data = (void *)&tmp;
  538         } else if (com == FIOASYNC) {
  539                 FILE_LOCK(fp);
  540                 if ((tmp = *(int *)data))
  541                         fp->f_flag |= FASYNC;
  542                 else
  543                         fp->f_flag &= ~FASYNC;
  544                 FILE_UNLOCK(fp);
  545                 data = (void *)&tmp;
  546         }
  547 
  548         error = fo_ioctl(fp, com, data, td->td_ucred, td);
  549 
  550         if (error == 0 && (com & IOC_OUT))
  551                 error = copyout(data, uap->data, (u_int)size);
  552 
  553         if (memp != NULL)
  554                 free(memp, M_IOCTLOPS);
  555         fdrop(fp, td);
  556         return (error);
  557 }
  558 
  559 /*
  560  * sellock and selwait are initialized in selectinit() via SYSINIT.
  561  */
  562 struct mtx      sellock;
  563 struct cv       selwait;
  564 u_int           nselcoll;       /* Select collisions since boot */
  565 SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
  566 
  567 /*
  568  * Select system call.
  569  */
  570 #ifndef _SYS_SYSPROTO_H_
  571 struct select_args {
  572         int     nd;
  573         fd_set  *in, *ou, *ex;
  574         struct  timeval *tv;
  575 };
  576 #endif
  577 /*
  578  * MPSAFE
  579  */
  580 int
  581 select(td, uap)
  582         register struct thread *td;
  583         register struct select_args *uap;
  584 {
  585         struct timeval tv, *tvp;
  586         int error;
  587 
  588         if (uap->tv != NULL) {
  589                 error = copyin(uap->tv, &tv, sizeof(tv));
  590                 if (error)
  591                         return (error);
  592                 tvp = &tv;
  593         } else
  594                 tvp = NULL;
  595 
  596         return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp));
  597 }
  598 
  599 int
  600 kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
  601     fd_set *fd_ex, struct timeval *tvp)
  602 {
  603         struct filedesc *fdp;
  604         /*
  605          * The magic 2048 here is chosen to be just enough for FD_SETSIZE
  606          * infds with the new FD_SETSIZE of 1024, and more than enough for
  607          * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
  608          * of 256.
  609          */
  610         fd_mask s_selbits[howmany(2048, NFDBITS)];
  611         fd_mask *ibits[3], *obits[3], *selbits, *sbp;
  612         struct timeval atv, rtv, ttv;
  613         int error, timo;
  614         u_int ncoll, nbufbytes, ncpbytes, nfdbits;
  615 
  616         if (nd < 0)
  617                 return (EINVAL);
  618         fdp = td->td_proc->p_fd;
  619         
  620         FILEDESC_LOCK_FAST(fdp);
  621 
  622         if (nd > td->td_proc->p_fd->fd_nfiles)
  623                 nd = td->td_proc->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
  624         FILEDESC_UNLOCK_FAST(fdp);
  625 
  626         /*
  627          * Allocate just enough bits for the non-null fd_sets.  Use the
  628          * preallocated auto buffer if possible.
  629          */
  630         nfdbits = roundup(nd, NFDBITS);
  631         ncpbytes = nfdbits / NBBY;
  632         nbufbytes = 0;
  633         if (fd_in != NULL)
  634                 nbufbytes += 2 * ncpbytes;
  635         if (fd_ou != NULL)
  636                 nbufbytes += 2 * ncpbytes;
  637         if (fd_ex != NULL)
  638                 nbufbytes += 2 * ncpbytes;
  639         if (nbufbytes <= sizeof s_selbits)
  640                 selbits = &s_selbits[0];
  641         else
  642                 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
  643 
  644         /*
  645          * Assign pointers into the bit buffers and fetch the input bits.
  646          * Put the output buffers together so that they can be bzeroed
  647          * together.
  648          */
  649         sbp = selbits;
  650 #define getbits(name, x) \
  651         do {                                                            \
  652                 if (name == NULL)                                       \
  653                         ibits[x] = NULL;                                \
  654                 else {                                                  \
  655                         ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;   \
  656                         obits[x] = sbp;                                 \
  657                         sbp += ncpbytes / sizeof *sbp;                  \
  658                         error = copyin(name, ibits[x], ncpbytes);       \
  659                         if (error != 0)                                 \
  660                                 goto done_nosellock;                    \
  661                 }                                                       \
  662         } while (0)
  663         getbits(fd_in, 0);
  664         getbits(fd_ou, 1);
  665         getbits(fd_ex, 2);
  666 #undef  getbits
  667         if (nbufbytes != 0)
  668                 bzero(selbits, nbufbytes / 2);
  669 
  670         if (tvp != NULL) {
  671                 atv = *tvp;
  672                 if (itimerfix(&atv)) {
  673                         error = EINVAL;
  674                         goto done_nosellock;
  675                 }
  676                 getmicrouptime(&rtv);
  677                 timevaladd(&atv, &rtv);
  678         } else {
  679                 atv.tv_sec = 0;
  680                 atv.tv_usec = 0;
  681         }
  682         timo = 0;
  683         TAILQ_INIT(&td->td_selq);
  684         mtx_lock(&sellock);
  685 retry:
  686         ncoll = nselcoll;
  687         mtx_lock_spin(&sched_lock);
  688         td->td_flags |= TDF_SELECT;
  689         mtx_unlock_spin(&sched_lock);
  690         mtx_unlock(&sellock);
  691 
  692         error = selscan(td, ibits, obits, nd);
  693         mtx_lock(&sellock);
  694         if (error || td->td_retval[0])
  695                 goto done;
  696         if (atv.tv_sec || atv.tv_usec) {
  697                 getmicrouptime(&rtv);
  698                 if (timevalcmp(&rtv, &atv, >=))
  699                         goto done;
  700                 ttv = atv;
  701                 timevalsub(&ttv, &rtv);
  702                 timo = ttv.tv_sec > 24 * 60 * 60 ?
  703                     24 * 60 * 60 * hz : tvtohz(&ttv);
  704         }
  705 
  706         /*
  707          * An event of interest may occur while we do not hold
  708          * sellock, so check TDF_SELECT and the number of
  709          * collisions and rescan the file descriptors if
  710          * necessary.
  711          */
  712         mtx_lock_spin(&sched_lock);
  713         if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
  714                 mtx_unlock_spin(&sched_lock);
  715                 goto retry;
  716         }
  717         mtx_unlock_spin(&sched_lock);
  718 
  719         if (timo > 0)
  720                 error = cv_timedwait_sig(&selwait, &sellock, timo);
  721         else
  722                 error = cv_wait_sig(&selwait, &sellock);
  723         
  724         if (error == 0)
  725                 goto retry;
  726 
  727 done:
  728         clear_selinfo_list(td);
  729         mtx_lock_spin(&sched_lock);
  730         td->td_flags &= ~TDF_SELECT;
  731         mtx_unlock_spin(&sched_lock);
  732         mtx_unlock(&sellock);
  733 
  734 done_nosellock:
  735         /* select is not restarted after signals... */
  736         if (error == ERESTART)
  737                 error = EINTR;
  738         if (error == EWOULDBLOCK)
  739                 error = 0;
  740 #define putbits(name, x) \
  741         if (name && (error2 = copyout(obits[x], name, ncpbytes))) \
  742                 error = error2;
  743         if (error == 0) {
  744                 int error2;
  745 
  746                 putbits(fd_in, 0);
  747                 putbits(fd_ou, 1);
  748                 putbits(fd_ex, 2);
  749 #undef putbits
  750         }
  751         if (selbits != &s_selbits[0])
  752                 free(selbits, M_SELECT);
  753 
  754         return (error);
  755 }
  756 
  757 static int
  758 selscan(td, ibits, obits, nfd)
  759         struct thread *td;
  760         fd_mask **ibits, **obits;
  761         int nfd;
  762 {
  763         int msk, i, fd;
  764         fd_mask bits;
  765         struct file *fp;
  766         int n = 0;
  767         /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
  768         static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
  769         struct filedesc *fdp = td->td_proc->p_fd;
  770 
  771         FILEDESC_LOCK(fdp);
  772         for (msk = 0; msk < 3; msk++) {
  773                 if (ibits[msk] == NULL)
  774                         continue;
  775                 for (i = 0; i < nfd; i += NFDBITS) {
  776                         bits = ibits[msk][i/NFDBITS];
  777                         /* ffs(int mask) not portable, fd_mask is long */
  778                         for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
  779                                 if (!(bits & 1))
  780                                         continue;
  781                                 if ((fp = fget_locked(fdp, fd)) == NULL) {
  782                                         FILEDESC_UNLOCK(fdp);
  783                                         return (EBADF);
  784                                 }
  785                                 if (fo_poll(fp, flag[msk], td->td_ucred,
  786                                     td)) {
  787                                         obits[msk][(fd)/NFDBITS] |=
  788                                             ((fd_mask)1 << ((fd) % NFDBITS));
  789                                         n++;
  790                                 }
  791                         }
  792                 }
  793         }
  794         FILEDESC_UNLOCK(fdp);
  795         td->td_retval[0] = n;
  796         return (0);
  797 }
  798 
  799 /*
  800  * Poll system call.
  801  */
  802 #ifndef _SYS_SYSPROTO_H_
  803 struct poll_args {
  804         struct pollfd *fds;
  805         u_int   nfds;
  806         int     timeout;
  807 };
  808 #endif
  809 /*
  810  * MPSAFE
  811  */
  812 int
  813 poll(td, uap)
  814         struct thread *td;
  815         struct poll_args *uap;
  816 {
  817         struct pollfd *bits;
  818         struct pollfd smallbits[32];
  819         struct timeval atv, rtv, ttv;
  820         int error = 0, timo;
  821         u_int ncoll, nfds;
  822         size_t ni;
  823 
  824         nfds = uap->nfds;
  825 
  826         /*
  827          * This is kinda bogus.  We have fd limits, but that is not
  828          * really related to the size of the pollfd array.  Make sure
  829          * we let the process use at least FD_SETSIZE entries and at
  830          * least enough for the current limits.  We want to be reasonably
  831          * safe, but not overly restrictive.
  832          */
  833         PROC_LOCK(td->td_proc);
  834         if ((nfds > lim_cur(td->td_proc, RLIMIT_NOFILE)) &&
  835             (nfds > FD_SETSIZE)) {
  836                 PROC_UNLOCK(td->td_proc);
  837                 error = EINVAL;
  838                 goto done2;
  839         }
  840         PROC_UNLOCK(td->td_proc);
  841         ni = nfds * sizeof(struct pollfd);
  842         if (ni > sizeof(smallbits))
  843                 bits = malloc(ni, M_TEMP, M_WAITOK);
  844         else
  845                 bits = smallbits;
  846         error = copyin(uap->fds, bits, ni);
  847         if (error)
  848                 goto done_nosellock;
  849         if (uap->timeout != INFTIM) {
  850                 atv.tv_sec = uap->timeout / 1000;
  851                 atv.tv_usec = (uap->timeout % 1000) * 1000;
  852                 if (itimerfix(&atv)) {
  853                         error = EINVAL;
  854                         goto done_nosellock;
  855                 }
  856                 getmicrouptime(&rtv);
  857                 timevaladd(&atv, &rtv);
  858         } else {
  859                 atv.tv_sec = 0;
  860                 atv.tv_usec = 0;
  861         }
  862         timo = 0;
  863         TAILQ_INIT(&td->td_selq);
  864         mtx_lock(&sellock);
  865 retry:
  866         ncoll = nselcoll;
  867         mtx_lock_spin(&sched_lock);
  868         td->td_flags |= TDF_SELECT;
  869         mtx_unlock_spin(&sched_lock);
  870         mtx_unlock(&sellock);
  871 
  872         error = pollscan(td, bits, nfds);
  873         mtx_lock(&sellock);
  874         if (error || td->td_retval[0])
  875                 goto done;
  876         if (atv.tv_sec || atv.tv_usec) {
  877                 getmicrouptime(&rtv);
  878                 if (timevalcmp(&rtv, &atv, >=))
  879                         goto done;
  880                 ttv = atv;
  881                 timevalsub(&ttv, &rtv);
  882                 timo = ttv.tv_sec > 24 * 60 * 60 ?
  883                     24 * 60 * 60 * hz : tvtohz(&ttv);
  884         }
  885         /*
  886          * An event of interest may occur while we do not hold
  887          * sellock, so check TDF_SELECT and the number of collisions
  888          * and rescan the file descriptors if necessary.
  889          */
  890         mtx_lock_spin(&sched_lock);
  891         if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
  892                 mtx_unlock_spin(&sched_lock);
  893                 goto retry;
  894         }
  895         mtx_unlock_spin(&sched_lock);
  896 
  897         if (timo > 0)
  898                 error = cv_timedwait_sig(&selwait, &sellock, timo);
  899         else
  900                 error = cv_wait_sig(&selwait, &sellock);
  901 
  902         if (error == 0)
  903                 goto retry;
  904 
  905 done:
  906         clear_selinfo_list(td);
  907         mtx_lock_spin(&sched_lock);
  908         td->td_flags &= ~TDF_SELECT;
  909         mtx_unlock_spin(&sched_lock);
  910         mtx_unlock(&sellock);
  911 
  912 done_nosellock:
  913         /* poll is not restarted after signals... */
  914         if (error == ERESTART)
  915                 error = EINTR;
  916         if (error == EWOULDBLOCK)
  917                 error = 0;
  918         if (error == 0) {
  919                 error = copyout(bits, uap->fds, ni);
  920                 if (error)
  921                         goto out;
  922         }
  923 out:
  924         if (ni > sizeof(smallbits))
  925                 free(bits, M_TEMP);
  926 done2:
  927         return (error);
  928 }
  929 
  930 static int
  931 pollscan(td, fds, nfd)
  932         struct thread *td;
  933         struct pollfd *fds;
  934         u_int nfd;
  935 {
  936         register struct filedesc *fdp = td->td_proc->p_fd;
  937         int i;
  938         struct file *fp;
  939         int n = 0;
  940 
  941         FILEDESC_LOCK(fdp);
  942         for (i = 0; i < nfd; i++, fds++) {
  943                 if (fds->fd >= fdp->fd_nfiles) {
  944                         fds->revents = POLLNVAL;
  945                         n++;
  946                 } else if (fds->fd < 0) {
  947                         fds->revents = 0;
  948                 } else {
  949                         fp = fdp->fd_ofiles[fds->fd];
  950                         if (fp == NULL) {
  951                                 fds->revents = POLLNVAL;
  952                                 n++;
  953                         } else {
  954                                 /*
  955                                  * Note: backend also returns POLLHUP and
  956                                  * POLLERR if appropriate.
  957                                  */
  958                                 fds->revents = fo_poll(fp, fds->events,
  959                                     td->td_ucred, td);
  960                                 if (fds->revents != 0)
  961                                         n++;
  962                         }
  963                 }
  964         }
  965         FILEDESC_UNLOCK(fdp);
  966         td->td_retval[0] = n;
  967         return (0);
  968 }
  969 
  970 /*
  971  * OpenBSD poll system call.
  972  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
  973  */
  974 #ifndef _SYS_SYSPROTO_H_
  975 struct openbsd_poll_args {
  976         struct pollfd *fds;
  977         u_int   nfds;
  978         int     timeout;
  979 };
  980 #endif
  981 /*
  982  * MPSAFE
  983  */
  984 int
  985 openbsd_poll(td, uap)
  986         register struct thread *td;
  987         register struct openbsd_poll_args *uap;
  988 {
  989         return (poll(td, (struct poll_args *)uap));
  990 }
  991 
  992 /*
  993  * Remove the references to the thread from all of the objects
  994  * we were polling.
  995  *
  996  * This code assumes that the underlying owner of the selinfo
  997  * structure will hold sellock before it changes it, and that
  998  * it will unlink itself from our list if it goes away.
  999  */
 1000 void
 1001 clear_selinfo_list(td)
 1002         struct thread *td;
 1003 {
 1004         struct selinfo *si;
 1005 
 1006         mtx_assert(&sellock, MA_OWNED);
 1007         TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
 1008                 si->si_thread = NULL;
 1009         TAILQ_INIT(&td->td_selq);
 1010 }
 1011 
 1012 /*
 1013  * Record a select request.
 1014  */
 1015 void
 1016 selrecord(selector, sip)
 1017         struct thread *selector;
 1018         struct selinfo *sip;
 1019 {
 1020 
 1021         mtx_lock(&sellock);
 1022         /*
 1023          * If the selinfo's thread pointer is NULL then take ownership of it.
 1024          *
 1025          * If the thread pointer is not NULL and it points to another
 1026          * thread, then we have a collision.
 1027          *
 1028          * If the thread pointer is not NULL and points back to us then leave
 1029          * it alone as we've already added pointed it at us and added it to
 1030          * our list.
 1031          */
 1032         if (sip->si_thread == NULL) {
 1033                 sip->si_thread = selector;
 1034                 TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
 1035         } else if (sip->si_thread != selector) {
 1036                 sip->si_flags |= SI_COLL;
 1037         }
 1038 
 1039         mtx_unlock(&sellock);
 1040 }
 1041 
 1042 /* Wake up a selecting thread. */
 1043 void
 1044 selwakeup(sip)
 1045         struct selinfo *sip;
 1046 {
 1047         doselwakeup(sip, -1);
 1048 }
 1049 
 1050 /* Wake up a selecting thread, and set its priority. */
 1051 void
 1052 selwakeuppri(sip, pri)
 1053         struct selinfo *sip;
 1054         int pri;
 1055 {
 1056         doselwakeup(sip, pri);
 1057 }
 1058 
 1059 /*
 1060  * Do a wakeup when a selectable event occurs.
 1061  */
 1062 static void
 1063 doselwakeup(sip, pri)
 1064         struct selinfo *sip;
 1065         int pri;
 1066 {
 1067         struct thread *td;
 1068 
 1069         mtx_lock(&sellock);
 1070         td = sip->si_thread;
 1071         if ((sip->si_flags & SI_COLL) != 0) {
 1072                 nselcoll++;
 1073                 sip->si_flags &= ~SI_COLL;
 1074                 cv_broadcastpri(&selwait, pri);
 1075         }
 1076         if (td == NULL) {
 1077                 mtx_unlock(&sellock);
 1078                 return;
 1079         }
 1080         TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
 1081         sip->si_thread = NULL;
 1082         mtx_lock_spin(&sched_lock);
 1083         td->td_flags &= ~TDF_SELECT;
 1084         mtx_unlock_spin(&sched_lock);
 1085         sleepq_remove(td, &selwait);
 1086         mtx_unlock(&sellock);
 1087 }
 1088 
 1089 static void selectinit(void *);
 1090 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
 1091 
 1092 /* ARGSUSED*/
 1093 static void
 1094 selectinit(dummy)
 1095         void *dummy;
 1096 {
 1097         cv_init(&selwait, "select");
 1098         mtx_init(&sellock, "sellck", NULL, MTX_DEF);
 1099 }

Cache object: 26e5dd8747862191ba638fd9e6c25d79


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.