The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_generic.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)sys_generic.c       8.5 (Berkeley) 1/21/94
   39  * $FreeBSD$
   40  */
   41 
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/sysproto.h>
   47 #include <sys/filedesc.h>
   48 #include <sys/filio.h>
   49 #include <sys/fcntl.h>
   50 #include <sys/file.h>
   51 #include <sys/proc.h>
   52 #include <sys/signalvar.h>
   53 #include <sys/socketvar.h>
   54 #include <sys/uio.h>
   55 #include <sys/kernel.h>
   56 #include <sys/malloc.h>
   57 #include <sys/poll.h>
   58 #include <sys/resourcevar.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/sysent.h>
   61 #include <sys/buf.h>
   62 #ifdef KTRACE
   63 #include <sys/ktrace.h>
   64 #endif
   65 #include <vm/vm.h>
   66 #include <vm/vm_page.h>
   67 
   68 #include <machine/limits.h>
   69 
   70 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
   71 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
   72 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
   73 
   74 static int      pollscan __P((struct proc *, struct pollfd *, u_int));
   75 static int      selscan __P((struct proc *, fd_mask **, fd_mask **, int));
   76 static int      dofileread __P((struct proc *, struct file *, int, void *,
   77                     size_t, off_t, int));
   78 static int      dofilewrite __P((struct proc *, struct file *, int,
   79                     const void *, size_t, off_t, int));
   80 
   81 struct file*
   82 holdfp(fdp, fd, flag)
   83         struct filedesc* fdp;
   84         int fd, flag;
   85 {
   86         struct file* fp;
   87 
   88         if (((u_int)fd) >= fdp->fd_nfiles ||
   89             (fp = fdp->fd_ofiles[fd]) == NULL ||
   90             (fp->f_flag & flag) == 0) {
   91                 return (NULL);
   92         }
   93         fhold(fp);
   94         return (fp);
   95 }
   96 
   97 /*
   98  * Read system call.
   99  */
  100 #ifndef _SYS_SYSPROTO_H_
  101 struct read_args {
  102         int     fd;
  103         void    *buf;
  104         size_t  nbyte;
  105 };
  106 #endif
  107 int
  108 read(p, uap)
  109         struct proc *p;
  110         register struct read_args *uap;
  111 {
  112         register struct file *fp;
  113         int error;
  114 
  115         if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
  116                 return (EBADF);
  117         error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0);
  118         fdrop(fp, p);
  119         return(error);
  120 }
  121 
  122 /*
  123  * Pread system call
  124  */
  125 #ifndef _SYS_SYSPROTO_H_
  126 struct pread_args {
  127         int     fd;
  128         void    *buf;
  129         size_t  nbyte;
  130         int     pad;
  131         off_t   offset;
  132 };
  133 #endif
  134 int
  135 pread(p, uap)
  136         struct proc *p;
  137         register struct pread_args *uap;
  138 {
  139         register struct file *fp;
  140         int error;
  141 
  142         if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
  143                 return (EBADF);
  144         if (fp->f_type != DTYPE_VNODE) {
  145                 error = ESPIPE;
  146         } else {
  147             error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 
  148                 uap->offset, FOF_OFFSET);
  149         }
  150         fdrop(fp, p);
  151         return(error);
  152 }
  153 
  154 /*
  155  * Code common for read and pread
  156  */
  157 int
  158 dofileread(p, fp, fd, buf, nbyte, offset, flags)
  159         struct proc *p;
  160         struct file *fp;
  161         int fd, flags;
  162         void *buf;
  163         size_t nbyte;
  164         off_t offset;
  165 {
  166         struct uio auio;
  167         struct iovec aiov;
  168         long cnt, error = 0;
  169 #ifdef KTRACE
  170         struct iovec ktriov;
  171         struct uio ktruio;
  172         int didktr = 0;
  173 #endif
  174 
  175         aiov.iov_base = (caddr_t)buf;
  176         aiov.iov_len = nbyte;
  177         auio.uio_iov = &aiov;
  178         auio.uio_iovcnt = 1;
  179         auio.uio_offset = offset;
  180         if (nbyte > INT_MAX)
  181                 return (EINVAL);
  182         auio.uio_resid = nbyte;
  183         auio.uio_rw = UIO_READ;
  184         auio.uio_segflg = UIO_USERSPACE;
  185         auio.uio_procp = p;
  186 #ifdef KTRACE
  187         /*
  188          * if tracing, save a copy of iovec
  189          */
  190         if (KTRPOINT(p, KTR_GENIO)) {
  191                 ktriov = aiov;
  192                 ktruio = auio;
  193                 didktr = 1;
  194         }
  195 #endif
  196         cnt = nbyte;
  197 
  198         if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
  199                 if (auio.uio_resid != cnt && (error == ERESTART ||
  200                     error == EINTR || error == EWOULDBLOCK))
  201                         error = 0;
  202         }
  203         cnt -= auio.uio_resid;
  204 #ifdef KTRACE
  205         if (didktr && error == 0) {
  206                 ktruio.uio_iov = &ktriov;
  207                 ktruio.uio_resid = cnt;
  208                 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error);
  209         }
  210 #endif
  211         p->p_retval[0] = cnt;
  212         return (error);
  213 }
  214 
  215 /*
  216  * Scatter read system call.
  217  */
  218 #ifndef _SYS_SYSPROTO_H_
  219 struct readv_args {
  220         int     fd;
  221         struct  iovec *iovp;
  222         u_int   iovcnt;
  223 };
  224 #endif
  225 int
  226 readv(p, uap)
  227         struct proc *p;
  228         register struct readv_args *uap;
  229 {
  230         register struct file *fp;
  231         register struct filedesc *fdp = p->p_fd;
  232         struct uio auio;
  233         register struct iovec *iov;
  234         struct iovec *needfree = NULL;
  235         struct iovec aiov[UIO_SMALLIOV];
  236         long i, cnt, error = 0;
  237         u_int iovlen;
  238 #ifdef KTRACE
  239         struct iovec *ktriov = NULL;
  240         struct uio ktruio;
  241 #endif
  242 
  243         if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL)
  244                 return (EBADF);
  245         /* note: can't use iovlen until iovcnt is validated */
  246         iovlen = uap->iovcnt * sizeof (struct iovec);
  247         if (uap->iovcnt > UIO_SMALLIOV) {
  248                 if (uap->iovcnt > UIO_MAXIOV) {
  249                         error = EINVAL;
  250                         goto done;
  251                 }
  252                 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
  253                 needfree = iov;
  254         } else
  255                 iov = aiov;
  256         auio.uio_iov = iov;
  257         auio.uio_iovcnt = uap->iovcnt;
  258         auio.uio_rw = UIO_READ;
  259         auio.uio_segflg = UIO_USERSPACE;
  260         auio.uio_procp = p;
  261         auio.uio_offset = -1;
  262         if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
  263                 goto done;
  264         auio.uio_resid = 0;
  265         for (i = 0; i < uap->iovcnt; i++) {
  266                 if (iov->iov_len > INT_MAX - auio.uio_resid) {
  267                         error = EINVAL;
  268                         goto done;
  269                 }
  270                 auio.uio_resid += iov->iov_len;
  271                 iov++;
  272         }
  273 #ifdef KTRACE
  274         /*
  275          * if tracing, save a copy of iovec
  276          */
  277         if (KTRPOINT(p, KTR_GENIO))  {
  278                 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
  279                 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
  280                 ktruio = auio;
  281         }
  282 #endif
  283         cnt = auio.uio_resid;
  284         if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) {
  285                 if (auio.uio_resid != cnt && (error == ERESTART ||
  286                     error == EINTR || error == EWOULDBLOCK))
  287                         error = 0;
  288         }
  289         cnt -= auio.uio_resid;
  290 #ifdef KTRACE
  291         if (ktriov != NULL) {
  292                 if (error == 0) {
  293                         ktruio.uio_iov = ktriov;
  294                         ktruio.uio_resid = cnt;
  295                         ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio,
  296                             error);
  297                 }
  298                 FREE(ktriov, M_TEMP);
  299         }
  300 #endif
  301         p->p_retval[0] = cnt;
  302 done:
  303         fdrop(fp, p);
  304         if (needfree)
  305                 FREE(needfree, M_IOV);
  306         return (error);
  307 }
  308 
  309 /*
  310  * Write system call
  311  */
  312 #ifndef _SYS_SYSPROTO_H_
  313 struct write_args {
  314         int     fd;
  315         const void *buf;
  316         size_t  nbyte;
  317 };
  318 #endif
  319 int
  320 write(p, uap)
  321         struct proc *p;
  322         register struct write_args *uap;
  323 {
  324         register struct file *fp;
  325         int error;
  326 
  327         if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
  328                 return (EBADF);
  329         error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0);
  330         fdrop(fp, p);
  331         return(error);
  332 }
  333 
  334 /*
  335  * Pwrite system call
  336  */
  337 #ifndef _SYS_SYSPROTO_H_
  338 struct pwrite_args {
  339         int     fd;
  340         const void *buf;
  341         size_t  nbyte;
  342         int     pad;
  343         off_t   offset;
  344 };
  345 #endif
  346 int
  347 pwrite(p, uap)
  348         struct proc *p;
  349         register struct pwrite_args *uap;
  350 {
  351         register struct file *fp;
  352         int error;
  353 
  354         if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
  355                 return (EBADF);
  356         if (fp->f_type != DTYPE_VNODE) {
  357                 error = ESPIPE;
  358         } else {
  359             error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
  360                 uap->offset, FOF_OFFSET);
  361         }
  362         fdrop(fp, p);
  363         return(error);
  364 }
  365 
  366 static int
  367 dofilewrite(p, fp, fd, buf, nbyte, offset, flags)
  368         struct proc *p;
  369         struct file *fp;
  370         int fd, flags;
  371         const void *buf;
  372         size_t nbyte;
  373         off_t offset;
  374 {
  375         struct uio auio;
  376         struct iovec aiov;
  377         long cnt, error = 0;
  378 #ifdef KTRACE
  379         struct iovec ktriov;
  380         struct uio ktruio;
  381         int didktr = 0;
  382 #endif
  383 
  384         aiov.iov_base = (void *)(uintptr_t)buf;
  385         aiov.iov_len = nbyte;
  386         auio.uio_iov = &aiov;
  387         auio.uio_iovcnt = 1;
  388         auio.uio_offset = offset;
  389         if (nbyte > INT_MAX)
  390                 return (EINVAL);
  391         auio.uio_resid = nbyte;
  392         auio.uio_rw = UIO_WRITE;
  393         auio.uio_segflg = UIO_USERSPACE;
  394         auio.uio_procp = p;
  395 #ifdef KTRACE
  396         /*
  397          * if tracing, save a copy of iovec and uio
  398          */
  399         if (KTRPOINT(p, KTR_GENIO)) {
  400                 ktriov = aiov;
  401                 ktruio = auio;
  402                 didktr = 1;
  403         }
  404 #endif
  405         cnt = nbyte;
  406         if (fp->f_type == DTYPE_VNODE)
  407                 bwillwrite();
  408         if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
  409                 if (auio.uio_resid != cnt && (error == ERESTART ||
  410                     error == EINTR || error == EWOULDBLOCK))
  411                         error = 0;
  412                 if (error == EPIPE)
  413                         psignal(p, SIGPIPE);
  414         }
  415         cnt -= auio.uio_resid;
  416 #ifdef KTRACE
  417         if (didktr && error == 0) {
  418                 ktruio.uio_iov = &ktriov;
  419                 ktruio.uio_resid = cnt;
  420                 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error);
  421         }
  422 #endif
  423         p->p_retval[0] = cnt;
  424         return (error);
  425 }
  426 
  427 /*
  428  * Gather write system call
  429  */
  430 #ifndef _SYS_SYSPROTO_H_
  431 struct writev_args {
  432         int     fd;
  433         struct  iovec *iovp;
  434         u_int   iovcnt;
  435 };
  436 #endif
  437 int
  438 writev(p, uap)
  439         struct proc *p;
  440         register struct writev_args *uap;
  441 {
  442         register struct file *fp;
  443         register struct filedesc *fdp = p->p_fd;
  444         struct uio auio;
  445         register struct iovec *iov;
  446         struct iovec *needfree;
  447         struct iovec aiov[UIO_SMALLIOV];
  448         long i, cnt, error = 0;
  449         u_int iovlen;
  450 #ifdef KTRACE
  451         struct iovec *ktriov = NULL;
  452         struct uio ktruio;
  453 #endif
  454 
  455         if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL)
  456                 return (EBADF);
  457         /* note: can't use iovlen until iovcnt is validated */
  458         iovlen = uap->iovcnt * sizeof (struct iovec);
  459         if (uap->iovcnt > UIO_SMALLIOV) {
  460                 if (uap->iovcnt > UIO_MAXIOV) {
  461                         needfree = NULL;
  462                         error = EINVAL;
  463                         goto done;
  464                 }
  465                 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
  466                 needfree = iov;
  467         } else {
  468                 iov = aiov;
  469                 needfree = NULL;
  470         }
  471         auio.uio_iov = iov;
  472         auio.uio_iovcnt = uap->iovcnt;
  473         auio.uio_rw = UIO_WRITE;
  474         auio.uio_segflg = UIO_USERSPACE;
  475         auio.uio_procp = p;
  476         auio.uio_offset = -1;
  477         if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
  478                 goto done;
  479         auio.uio_resid = 0;
  480         for (i = 0; i < uap->iovcnt; i++) {
  481                 if (iov->iov_len > INT_MAX - auio.uio_resid) {
  482                         error = EINVAL;
  483                         goto done;
  484                 }
  485                 auio.uio_resid += iov->iov_len;
  486                 iov++;
  487         }
  488 #ifdef KTRACE
  489         /*
  490          * if tracing, save a copy of iovec and uio
  491          */
  492         if (KTRPOINT(p, KTR_GENIO))  {
  493                 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
  494                 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
  495                 ktruio = auio;
  496         }
  497 #endif
  498         cnt = auio.uio_resid;
  499         if (fp->f_type == DTYPE_VNODE)
  500                 bwillwrite();
  501         if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) {
  502                 if (auio.uio_resid != cnt && (error == ERESTART ||
  503                     error == EINTR || error == EWOULDBLOCK))
  504                         error = 0;
  505                 if (error == EPIPE)
  506                         psignal(p, SIGPIPE);
  507         }
  508         cnt -= auio.uio_resid;
  509 #ifdef KTRACE
  510         if (ktriov != NULL) {
  511                 if (error == 0) {
  512                         ktruio.uio_iov = ktriov;
  513                         ktruio.uio_resid = cnt;
  514                         ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio,
  515                             error);
  516                 }
  517                 FREE(ktriov, M_TEMP);
  518         }
  519 #endif
  520         p->p_retval[0] = cnt;
  521 done:
  522         fdrop(fp, p);
  523         if (needfree)
  524                 FREE(needfree, M_IOV);
  525         return (error);
  526 }
  527 
  528 /*
  529  * Ioctl system call
  530  */
  531 #ifndef _SYS_SYSPROTO_H_
  532 struct ioctl_args {
  533         int     fd;
  534         u_long  com;
  535         caddr_t data;
  536 };
  537 #endif
  538 /* ARGSUSED */
  539 int
  540 ioctl(p, uap)
  541         struct proc *p;
  542         register struct ioctl_args *uap;
  543 {
  544         register struct file *fp;
  545         register struct filedesc *fdp;
  546         register u_long com;
  547         int error;
  548         register u_int size;
  549         caddr_t data, memp;
  550         int tmp;
  551 #define STK_PARAMS      128
  552         union {
  553             char stkbuf[STK_PARAMS];
  554             long align;
  555         } ubuf;
  556 
  557         fdp = p->p_fd;
  558         if ((u_int)uap->fd >= fdp->fd_nfiles ||
  559             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  560                 return (EBADF);
  561 
  562         if ((fp->f_flag & (FREAD | FWRITE)) == 0)
  563                 return (EBADF);
  564 
  565         switch (com = uap->com) {
  566         case FIONCLEX:
  567                 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
  568                 return (0);
  569         case FIOCLEX:
  570                 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
  571                 return (0);
  572         }
  573 
  574         /*
  575          * Interpret high order word to find amount of data to be
  576          * copied to/from the user's address space.
  577          */
  578         size = IOCPARM_LEN(com);
  579         if (size > IOCPARM_MAX)
  580                 return (ENOTTY);
  581 
  582         fhold(fp);
  583 
  584         memp = NULL;
  585         if (size > sizeof (ubuf.stkbuf)) {
  586                 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
  587                 data = memp;
  588         } else {
  589                 data = ubuf.stkbuf;
  590         }
  591         if (com&IOC_IN) {
  592                 if (size) {
  593                         error = copyin(uap->data, data, (u_int)size);
  594                         if (error) {
  595                                 if (memp)
  596                                         free(memp, M_IOCTLOPS);
  597                                 fdrop(fp, p);
  598                                 return (error);
  599                         }
  600                 } else {
  601                         *(caddr_t *)data = uap->data;
  602                 }
  603         } else if ((com&IOC_OUT) && size) {
  604                 /*
  605                  * Zero the buffer so the user always
  606                  * gets back something deterministic.
  607                  */
  608                 bzero(data, size);
  609         } else if (com&IOC_VOID) {
  610                 *(caddr_t *)data = uap->data;
  611         }
  612 
  613         switch (com) {
  614 
  615         case FIONBIO:
  616                 if ((tmp = *(int *)data))
  617                         fp->f_flag |= FNONBLOCK;
  618                 else
  619                         fp->f_flag &= ~FNONBLOCK;
  620                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
  621                 break;
  622 
  623         case FIOASYNC:
  624                 if ((tmp = *(int *)data))
  625                         fp->f_flag |= FASYNC;
  626                 else
  627                         fp->f_flag &= ~FASYNC;
  628                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
  629                 break;
  630 
  631         default:
  632                 error = fo_ioctl(fp, com, data, p);
  633                 /*
  634                  * Copy any data to user, size was
  635                  * already set and checked above.
  636                  */
  637                 if (error == 0 && (com&IOC_OUT) && size)
  638                         error = copyout(data, uap->data, (u_int)size);
  639                 break;
  640         }
  641         if (memp)
  642                 free(memp, M_IOCTLOPS);
  643         fdrop(fp, p);
  644         return (error);
  645 }
  646 
  647 static int      nselcoll;       /* Select collisions since boot */
  648 int     selwait;
  649 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
  650 
  651 /*
  652  * Select system call.
  653  */
  654 #ifndef _SYS_SYSPROTO_H_
  655 struct select_args {
  656         int     nd;
  657         fd_set  *in, *ou, *ex;
  658         struct  timeval *tv;
  659 };
  660 #endif
  661 int
  662 select(p, uap)
  663         register struct proc *p;
  664         register struct select_args *uap;
  665 {
  666         /*
  667          * The magic 2048 here is chosen to be just enough for FD_SETSIZE
  668          * infds with the new FD_SETSIZE of 1024, and more than enough for
  669          * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
  670          * of 256.
  671          */
  672         fd_mask s_selbits[howmany(2048, NFDBITS)];
  673         fd_mask *ibits[3], *obits[3], *selbits, *sbp;
  674         struct timeval atv, rtv, ttv;
  675         int s, ncoll, error, timo;
  676         u_int nbufbytes, ncpbytes, nfdbits;
  677 
  678         if (uap->nd < 0)
  679                 return (EINVAL);
  680         if (uap->nd > p->p_fd->fd_nfiles)
  681                 uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
  682 
  683         /*
  684          * Allocate just enough bits for the non-null fd_sets.  Use the
  685          * preallocated auto buffer if possible.
  686          */
  687         nfdbits = roundup(uap->nd, NFDBITS);
  688         ncpbytes = nfdbits / NBBY;
  689         nbufbytes = 0;
  690         if (uap->in != NULL)
  691                 nbufbytes += 2 * ncpbytes;
  692         if (uap->ou != NULL)
  693                 nbufbytes += 2 * ncpbytes;
  694         if (uap->ex != NULL)
  695                 nbufbytes += 2 * ncpbytes;
  696         if (nbufbytes <= sizeof s_selbits)
  697                 selbits = &s_selbits[0];
  698         else
  699                 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
  700 
  701         /*
  702          * Assign pointers into the bit buffers and fetch the input bits.
  703          * Put the output buffers together so that they can be bzeroed
  704          * together.
  705          */
  706         sbp = selbits;
  707 #define getbits(name, x) \
  708         do {                                                            \
  709                 if (uap->name == NULL)                                  \
  710                         ibits[x] = NULL;                                \
  711                 else {                                                  \
  712                         ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;   \
  713                         obits[x] = sbp;                                 \
  714                         sbp += ncpbytes / sizeof *sbp;                  \
  715                         error = copyin(uap->name, ibits[x], ncpbytes);  \
  716                         if (error != 0)                                 \
  717                                 goto done;                              \
  718                 }                                                       \
  719         } while (0)
  720         getbits(in, 0);
  721         getbits(ou, 1);
  722         getbits(ex, 2);
  723 #undef  getbits
  724         if (nbufbytes != 0)
  725                 bzero(selbits, nbufbytes / 2);
  726 
  727         if (uap->tv) {
  728                 error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
  729                         sizeof (atv));
  730                 if (error)
  731                         goto done;
  732                 if (itimerfix(&atv)) {
  733                         error = EINVAL;
  734                         goto done;
  735                 }
  736                 getmicrouptime(&rtv);
  737                 timevaladd(&atv, &rtv);
  738         } else {
  739                 atv.tv_sec = 0;
  740                 atv.tv_usec = 0;
  741         }
  742         timo = 0;
  743 retry:
  744         ncoll = nselcoll;
  745         p->p_flag |= P_SELECT;
  746         error = selscan(p, ibits, obits, uap->nd);
  747         if (error || p->p_retval[0])
  748                 goto done;
  749         if (atv.tv_sec || atv.tv_usec) {
  750                 getmicrouptime(&rtv);
  751                 if (timevalcmp(&rtv, &atv, >=)) 
  752                         goto done;
  753                 ttv = atv;
  754                 timevalsub(&ttv, &rtv);
  755                 timo = ttv.tv_sec > 24 * 60 * 60 ?
  756                     24 * 60 * 60 * hz : tvtohz(&ttv);
  757         }
  758         s = splhigh();
  759         if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
  760                 splx(s);
  761                 goto retry;
  762         }
  763         p->p_flag &= ~P_SELECT;
  764 
  765         error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
  766         
  767         splx(s);
  768         if (error == 0)
  769                 goto retry;
  770 done:
  771         p->p_flag &= ~P_SELECT;
  772         /* select is not restarted after signals... */
  773         if (error == ERESTART)
  774                 error = EINTR;
  775         if (error == EWOULDBLOCK)
  776                 error = 0;
  777 #define putbits(name, x) \
  778         if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
  779                 error = error2;
  780         if (error == 0) {
  781                 int error2;
  782 
  783                 putbits(in, 0);
  784                 putbits(ou, 1);
  785                 putbits(ex, 2);
  786 #undef putbits
  787         }
  788         if (selbits != &s_selbits[0])
  789                 free(selbits, M_SELECT);
  790         return (error);
  791 }
  792 
  793 static int
  794 selscan(p, ibits, obits, nfd)
  795         struct proc *p;
  796         fd_mask **ibits, **obits;
  797         int nfd;
  798 {
  799         struct filedesc *fdp = p->p_fd;
  800         int msk, i, fd;
  801         fd_mask bits;
  802         struct file *fp;
  803         int n = 0;
  804         /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
  805         static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
  806 
  807         for (msk = 0; msk < 3; msk++) {
  808                 if (ibits[msk] == NULL)
  809                         continue;
  810                 for (i = 0; i < nfd; i += NFDBITS) {
  811                         bits = ibits[msk][i/NFDBITS];
  812                         /* ffs(int mask) not portable, fd_mask is long */
  813                         for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
  814                                 if (!(bits & 1))
  815                                         continue;
  816                                 fp = fdp->fd_ofiles[fd];
  817                                 if (fp == NULL)
  818                                         return (EBADF);
  819                                 if (fo_poll(fp, flag[msk], fp->f_cred, p)) {
  820                                         obits[msk][(fd)/NFDBITS] |=
  821                                             ((fd_mask)1 << ((fd) % NFDBITS));
  822                                         n++;
  823                                 }
  824                         }
  825                 }
  826         }
  827         p->p_retval[0] = n;
  828         return (0);
  829 }
  830 
  831 /*
  832  * Poll system call.
  833  */
  834 #ifndef _SYS_SYSPROTO_H_
  835 struct poll_args {
  836         struct pollfd *fds;
  837         u_int   nfds;
  838         int     timeout;
  839 };
  840 #endif
  841 int
  842 poll(p, uap)
  843         struct proc *p;
  844         struct poll_args *uap;
  845 {
  846         struct pollfd *bits;
  847         struct pollfd smallbits[32];
  848         struct timeval atv, rtv, ttv;
  849         int s, ncoll, error = 0, timo;
  850         u_int nfds;
  851         size_t ni;
  852 
  853         nfds = SCARG(uap, nfds);
  854         /*
  855          * This is kinda bogus.  We have fd limits, but that is not
  856          * really related to the size of the pollfd array.  Make sure
  857          * we let the process use at least FD_SETSIZE entries and at
  858          * least enough for the current limits.  We want to be reasonably
  859          * safe, but not overly restrictive.
  860          */
  861         if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE)
  862                 return (EINVAL);
  863         ni = nfds * sizeof(struct pollfd);
  864         if (ni > sizeof(smallbits))
  865                 bits = malloc(ni, M_TEMP, M_WAITOK);
  866         else
  867                 bits = smallbits;
  868         error = copyin(SCARG(uap, fds), bits, ni);
  869         if (error)
  870                 goto done;
  871         if (SCARG(uap, timeout) != INFTIM) {
  872                 atv.tv_sec = SCARG(uap, timeout) / 1000;
  873                 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
  874                 if (itimerfix(&atv)) {
  875                         error = EINVAL;
  876                         goto done;
  877                 }
  878                 getmicrouptime(&rtv);
  879                 timevaladd(&atv, &rtv);
  880         } else {
  881                 atv.tv_sec = 0;
  882                 atv.tv_usec = 0;
  883         }
  884         timo = 0;
  885 retry:
  886         ncoll = nselcoll;
  887         p->p_flag |= P_SELECT;
  888         error = pollscan(p, bits, nfds);
  889         if (error || p->p_retval[0])
  890                 goto done;
  891         if (atv.tv_sec || atv.tv_usec) {
  892                 getmicrouptime(&rtv);
  893                 if (timevalcmp(&rtv, &atv, >=))
  894                         goto done;
  895                 ttv = atv;
  896                 timevalsub(&ttv, &rtv);
  897                 timo = ttv.tv_sec > 24 * 60 * 60 ?
  898                     24 * 60 * 60 * hz : tvtohz(&ttv);
  899         } 
  900         s = splhigh(); 
  901         if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
  902                 splx(s);
  903                 goto retry;
  904         }
  905         p->p_flag &= ~P_SELECT;
  906         error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
  907         splx(s);
  908         if (error == 0)
  909                 goto retry;
  910 done:
  911         p->p_flag &= ~P_SELECT;
  912         /* poll is not restarted after signals... */
  913         if (error == ERESTART)
  914                 error = EINTR;
  915         if (error == EWOULDBLOCK)
  916                 error = 0;
  917         if (error == 0) {
  918                 error = copyout(bits, SCARG(uap, fds), ni);
  919                 if (error)
  920                         goto out;
  921         }
  922 out:
  923         if (ni > sizeof(smallbits))
  924                 free(bits, M_TEMP);
  925         return (error);
  926 }
  927 
  928 static int
  929 pollscan(p, fds, nfd)
  930         struct proc *p;
  931         struct pollfd *fds;
  932         u_int nfd;
  933 {
  934         register struct filedesc *fdp = p->p_fd;
  935         int i;
  936         struct file *fp;
  937         int n = 0;
  938 
  939         for (i = 0; i < nfd; i++, fds++) {
  940                 if (fds->fd >= fdp->fd_nfiles) {
  941                         fds->revents = POLLNVAL;
  942                         n++;
  943                 } else if (fds->fd < 0) {
  944                         fds->revents = 0;
  945                 } else {
  946                         fp = fdp->fd_ofiles[fds->fd];
  947                         if (fp == NULL) {
  948                                 fds->revents = POLLNVAL;
  949                                 n++;
  950                         } else {
  951                                 /*
  952                                  * Note: backend also returns POLLHUP and
  953                                  * POLLERR if appropriate.
  954                                  */
  955                                 fds->revents = fo_poll(fp, fds->events,
  956                                     fp->f_cred, p);
  957                                 if (fds->revents != 0)
  958                                         n++;
  959                         }
  960                 }
  961         }
  962         p->p_retval[0] = n;
  963         return (0);
  964 }
  965 
  966 /*
  967  * OpenBSD poll system call.
  968  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
  969  */
  970 #ifndef _SYS_SYSPROTO_H_
  971 struct openbsd_poll_args {
  972         struct pollfd *fds;
  973         u_int   nfds;
  974         int     timeout;
  975 };
  976 #endif
  977 int
  978 openbsd_poll(p, uap)
  979         register struct proc *p;
  980         register struct openbsd_poll_args *uap;
  981 {
  982         return (poll(p, (struct poll_args *)uap));
  983 }
  984 
  985 /*ARGSUSED*/
  986 int
  987 seltrue(dev, events, p)
  988         dev_t dev;
  989         int events;
  990         struct proc *p;
  991 {
  992 
  993         return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
  994 }
  995 
  996 /*
  997  * Record a select request.
  998  */
  999 void
 1000 selrecord(selector, sip)
 1001         struct proc *selector;
 1002         struct selinfo *sip;
 1003 {
 1004         struct proc *p;
 1005         pid_t mypid;
 1006 
 1007         mypid = selector->p_pid;
 1008         if (sip->si_pid == mypid)
 1009                 return;
 1010         if (sip->si_pid && (p = pfind(sip->si_pid)) &&
 1011             p->p_wchan == (caddr_t)&selwait)
 1012                 sip->si_flags |= SI_COLL;
 1013         else
 1014                 sip->si_pid = mypid;
 1015 }
 1016 
 1017 /*
 1018  * Do a wakeup when a selectable event occurs.
 1019  */
 1020 void
 1021 selwakeup(sip)
 1022         register struct selinfo *sip;
 1023 {
 1024         register struct proc *p;
 1025         int s;
 1026 
 1027         if (sip->si_pid == 0)
 1028                 return;
 1029         if (sip->si_flags & SI_COLL) {
 1030                 nselcoll++;
 1031                 sip->si_flags &= ~SI_COLL;
 1032                 wakeup((caddr_t)&selwait);
 1033         }
 1034         p = pfind(sip->si_pid);
 1035         sip->si_pid = 0;
 1036         if (p != NULL) {
 1037                 s = splhigh();
 1038                 if (p->p_wchan == (caddr_t)&selwait) {
 1039                         if (p->p_stat == SSLEEP)
 1040                                 setrunnable(p);
 1041                         else
 1042                                 unsleep(p);
 1043                 } else if (p->p_flag & P_SELECT)
 1044                         p->p_flag &= ~P_SELECT;
 1045                 splx(s);
 1046         }
 1047 }

Cache object: 408d31a9ee9b351ae715e70fd914d6f8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.