The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_select.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: sys_select.c,v 1.10 2008/10/15 08:13:17 ad Exp $       */
    2 
    3 /*-
    4  * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Andrew Doran.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Copyright (c) 1982, 1986, 1989, 1993
   34  *      The Regents of the University of California.  All rights reserved.
   35  * (c) UNIX System Laboratories, Inc.
   36  * All or some portions of this file are derived from material licensed
   37  * to the University of California by American Telephone and Telegraph
   38  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   39  * the permission of UNIX System Laboratories, Inc.
   40  *
   41  * Redistribution and use in source and binary forms, with or without
   42  * modification, are permitted provided that the following conditions
   43  * are met:
   44  * 1. Redistributions of source code must retain the above copyright
   45  *    notice, this list of conditions and the following disclaimer.
   46  * 2. Redistributions in binary form must reproduce the above copyright
   47  *    notice, this list of conditions and the following disclaimer in the
   48  *    documentation and/or other materials provided with the distribution.
   49  * 3. Neither the name of the University nor the names of its contributors
   50  *    may be used to endorse or promote products derived from this software
   51  *    without specific prior written permission.
   52  *
   53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   63  * SUCH DAMAGE.
   64  *
   65  *      @(#)sys_generic.c       8.9 (Berkeley) 2/14/95
   66  */
   67 
   68 /*
   69  * System calls relating to files.
   70  */
   71 
   72 #include <sys/cdefs.h>
   73 __KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.10 2008/10/15 08:13:17 ad Exp $");
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/filedesc.h>
   78 #include <sys/ioctl.h>
   79 #include <sys/file.h>
   80 #include <sys/proc.h>
   81 #include <sys/socketvar.h>
   82 #include <sys/signalvar.h>
   83 #include <sys/uio.h>
   84 #include <sys/kernel.h>
   85 #include <sys/stat.h>
   86 #include <sys/poll.h>
   87 #include <sys/vnode.h>
   88 #include <sys/mount.h>
   89 #include <sys/syscallargs.h>
   90 #include <sys/cpu.h>
   91 #include <sys/atomic.h>
   92 #include <sys/socketvar.h>
   93 #include <sys/sleepq.h>
   94 
   95 /* Flags for lwp::l_selflag. */
   96 #define SEL_RESET       0       /* awoken, interrupted, or not yet polling */
   97 #define SEL_SCANNING    1       /* polling descriptors */
   98 #define SEL_BLOCKING    2       /* about to block on select_cv */
   99 
  100 /* Per-CPU state for select()/poll(). */
  101 #if MAXCPUS > 32
  102 #error adjust this code
  103 #endif
  104 typedef struct selcpu {
  105         kmutex_t        sc_lock;
  106         sleepq_t        sc_sleepq;
  107         int             sc_ncoll;
  108         uint32_t        sc_mask;
  109 } selcpu_t;
  110 
  111 static int      selscan(lwp_t *, fd_mask *, fd_mask *, int, register_t *);
  112 static int      pollscan(lwp_t *, struct pollfd *, int, register_t *);
  113 static void     selclear(void);
  114 
  115 static syncobj_t select_sobj = {
  116         SOBJ_SLEEPQ_FIFO,
  117         sleepq_unsleep,
  118         sleepq_changepri,
  119         sleepq_lendpri,
  120         syncobj_noowner,
  121 };
  122 
  123 /*
  124  * Select system call.
  125  */
  126 int
  127 sys_pselect(struct lwp *l, const struct sys_pselect_args *uap, register_t *retval)
  128 {
  129         /* {
  130                 syscallarg(int)                         nd;
  131                 syscallarg(fd_set *)                    in;
  132                 syscallarg(fd_set *)                    ou;
  133                 syscallarg(fd_set *)                    ex;
  134                 syscallarg(const struct timespec *)     ts;
  135                 syscallarg(sigset_t *)                  mask;
  136         } */
  137         struct timespec ats;
  138         struct timeval  atv, *tv = NULL;
  139         sigset_t        amask, *mask = NULL;
  140         int             error;
  141 
  142         if (SCARG(uap, ts)) {
  143                 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
  144                 if (error)
  145                         return error;
  146                 atv.tv_sec = ats.tv_sec;
  147                 atv.tv_usec = ats.tv_nsec / 1000;
  148                 tv = &atv;
  149         }
  150         if (SCARG(uap, mask) != NULL) {
  151                 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
  152                 if (error)
  153                         return error;
  154                 mask = &amask;
  155         }
  156 
  157         return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
  158             SCARG(uap, ou), SCARG(uap, ex), tv, mask);
  159 }
  160 
  161 int
  162 inittimeleft(struct timeval *tv, struct timeval *sleeptv)
  163 {
  164         if (itimerfix(tv))
  165                 return -1;
  166         getmicrouptime(sleeptv);
  167         return 0;
  168 }
  169 
  170 int
  171 gettimeleft(struct timeval *tv, struct timeval *sleeptv)
  172 {
  173         /*
  174          * We have to recalculate the timeout on every retry.
  175          */
  176         struct timeval slepttv;
  177         /*
  178          * reduce tv by elapsed time
  179          * based on monotonic time scale
  180          */
  181         getmicrouptime(&slepttv);
  182         timeradd(tv, sleeptv, tv);
  183         timersub(tv, &slepttv, tv);
  184         *sleeptv = slepttv;
  185         return tvtohz(tv);
  186 }
  187 
  188 int
  189 sys_select(struct lwp *l, const struct sys_select_args *uap, register_t *retval)
  190 {
  191         /* {
  192                 syscallarg(int)                 nd;
  193                 syscallarg(fd_set *)            in;
  194                 syscallarg(fd_set *)            ou;
  195                 syscallarg(fd_set *)            ex;
  196                 syscallarg(struct timeval *)    tv;
  197         } */
  198         struct timeval atv, *tv = NULL;
  199         int error;
  200 
  201         if (SCARG(uap, tv)) {
  202                 error = copyin(SCARG(uap, tv), (void *)&atv,
  203                         sizeof(atv));
  204                 if (error)
  205                         return error;
  206                 tv = &atv;
  207         }
  208 
  209         return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
  210             SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
  211 }
  212 
  213 int
  214 selcommon(lwp_t *l, register_t *retval, int nd, fd_set *u_in,
  215           fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
  216 {
  217         char            smallbits[howmany(FD_SETSIZE, NFDBITS) *
  218                             sizeof(fd_mask) * 6];
  219         proc_t          * const p = l->l_proc;
  220         char            *bits;
  221         int             ncoll, error, timo;
  222         size_t          ni;
  223         sigset_t        oldmask;
  224         struct timeval  sleeptv;
  225         selcpu_t        *sc;
  226 
  227         error = 0;
  228         if (nd < 0)
  229                 return (EINVAL);
  230         if (nd > p->p_fd->fd_nfiles) {
  231                 /* forgiving; slightly wrong */
  232                 nd = p->p_fd->fd_nfiles;
  233         }
  234         ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
  235         if (ni * 6 > sizeof(smallbits)) {
  236                 bits = kmem_alloc(ni * 6, KM_SLEEP);
  237                 if (bits == NULL)
  238                         return ENOMEM;
  239         } else
  240                 bits = smallbits;
  241 
  242 #define getbits(name, x)                                                \
  243         if (u_ ## name) {                                               \
  244                 error = copyin(u_ ## name, bits + ni * x, ni);          \
  245                 if (error)                                              \
  246                         goto done;                                      \
  247         } else                                                          \
  248                 memset(bits + ni * x, 0, ni);
  249         getbits(in, 0);
  250         getbits(ou, 1);
  251         getbits(ex, 2);
  252 #undef  getbits
  253 
  254         timo = 0;
  255         if (tv && inittimeleft(tv, &sleeptv) == -1) {
  256                 error = EINVAL;
  257                 goto done;
  258         }
  259 
  260         if (mask) {
  261                 sigminusset(&sigcantmask, mask);
  262                 mutex_enter(p->p_lock);
  263                 oldmask = l->l_sigmask;
  264                 l->l_sigmask = *mask;
  265                 mutex_exit(p->p_lock);
  266         } else
  267                 oldmask = l->l_sigmask; /* XXXgcc */
  268 
  269         sc = curcpu()->ci_data.cpu_selcpu;
  270         l->l_selcpu = sc;
  271         SLIST_INIT(&l->l_selwait);
  272         for (;;) {
  273                 /*
  274                  * No need to lock.  If this is overwritten by another
  275                  * value while scanning, we will retry below.  We only
  276                  * need to see exact state from the descriptors that
  277                  * we are about to poll, and lock activity resulting
  278                  * from fo_poll is enough to provide an up to date value
  279                  * for new polling activity.
  280                  */
  281                 l->l_selflag = SEL_SCANNING;
  282                 ncoll = sc->sc_ncoll;
  283 
  284                 error = selscan(l, (fd_mask *)(bits + ni * 0),
  285                     (fd_mask *)(bits + ni * 3), nd, retval);
  286 
  287                 if (error || *retval)
  288                         break;
  289                 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
  290                         break;
  291                 mutex_spin_enter(&sc->sc_lock);
  292                 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) {
  293                         mutex_spin_exit(&sc->sc_lock);
  294                         continue;
  295                 }
  296                 l->l_selflag = SEL_BLOCKING;
  297                 l->l_kpriority = true;
  298                 sleepq_enter(&sc->sc_sleepq, l, &sc->sc_lock);
  299                 sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj);
  300                 error = sleepq_block(timo, true);
  301                 if (error != 0)
  302                         break;
  303         }
  304         selclear();
  305 
  306         if (mask) {
  307                 mutex_enter(p->p_lock);
  308                 l->l_sigmask = oldmask;
  309                 mutex_exit(p->p_lock);
  310         }
  311 
  312  done:
  313         /* select is not restarted after signals... */
  314         if (error == ERESTART)
  315                 error = EINTR;
  316         if (error == EWOULDBLOCK)
  317                 error = 0;
  318         if (error == 0 && u_in != NULL)
  319                 error = copyout(bits + ni * 3, u_in, ni);
  320         if (error == 0 && u_ou != NULL)
  321                 error = copyout(bits + ni * 4, u_ou, ni);
  322         if (error == 0 && u_ex != NULL)
  323                 error = copyout(bits + ni * 5, u_ex, ni);
  324         if (bits != smallbits)
  325                 kmem_free(bits, ni * 6);
  326         return (error);
  327 }
  328 
  329 int
  330 selscan(lwp_t *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
  331         register_t *retval)
  332 {
  333         static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
  334                                POLLWRNORM | POLLHUP | POLLERR,
  335                                POLLRDBAND };
  336         int msk, i, j, fd, n;
  337         fd_mask ibits, obits;
  338         file_t *fp;
  339 
  340         n = 0;
  341         for (msk = 0; msk < 3; msk++) {
  342                 for (i = 0; i < nfd; i += NFDBITS) {
  343                         ibits = *ibitp++;
  344                         obits = 0;
  345                         while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
  346                                 ibits &= ~(1 << j);
  347                                 if ((fp = fd_getfile(fd)) == NULL)
  348                                         return (EBADF);
  349                                 if ((*fp->f_ops->fo_poll)(fp, flag[msk])) {
  350                                         obits |= (1 << j);
  351                                         n++;
  352                                 }
  353                                 fd_putfile(fd);
  354                         }
  355                         *obitp++ = obits;
  356                 }
  357         }
  358         *retval = n;
  359         return (0);
  360 }
  361 
  362 /*
  363  * Poll system call.
  364  */
  365 int
  366 sys_poll(struct lwp *l, const struct sys_poll_args *uap, register_t *retval)
  367 {
  368         /* {
  369                 syscallarg(struct pollfd *)     fds;
  370                 syscallarg(u_int)               nfds;
  371                 syscallarg(int)                 timeout;
  372         } */
  373         struct timeval  atv, *tv = NULL;
  374 
  375         if (SCARG(uap, timeout) != INFTIM) {
  376                 atv.tv_sec = SCARG(uap, timeout) / 1000;
  377                 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
  378                 tv = &atv;
  379         }
  380 
  381         return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
  382                 tv, NULL);
  383 }
  384 
  385 /*
  386  * Poll system call.
  387  */
  388 int
  389 sys_pollts(struct lwp *l, const struct sys_pollts_args *uap, register_t *retval)
  390 {
  391         /* {
  392                 syscallarg(struct pollfd *)             fds;
  393                 syscallarg(u_int)                       nfds;
  394                 syscallarg(const struct timespec *)     ts;
  395                 syscallarg(const sigset_t *)            mask;
  396         } */
  397         struct timespec ats;
  398         struct timeval  atv, *tv = NULL;
  399         sigset_t        amask, *mask = NULL;
  400         int             error;
  401 
  402         if (SCARG(uap, ts)) {
  403                 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
  404                 if (error)
  405                         return error;
  406                 atv.tv_sec = ats.tv_sec;
  407                 atv.tv_usec = ats.tv_nsec / 1000;
  408                 tv = &atv;
  409         }
  410         if (SCARG(uap, mask)) {
  411                 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
  412                 if (error)
  413                         return error;
  414                 mask = &amask;
  415         }
  416 
  417         return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
  418                 tv, mask);
  419 }
  420 
  421 int
  422 pollcommon(lwp_t *l, register_t *retval,
  423         struct pollfd *u_fds, u_int nfds,
  424         struct timeval *tv, sigset_t *mask)
  425 {
  426         char            smallbits[32 * sizeof(struct pollfd)];
  427         proc_t          * const p = l->l_proc;
  428         void *          bits;
  429         sigset_t        oldmask;
  430         int             ncoll, error, timo;
  431         size_t          ni;
  432         struct timeval  sleeptv;
  433         selcpu_t        *sc;
  434 
  435         if (nfds > p->p_fd->fd_nfiles) {
  436                 /* forgiving; slightly wrong */
  437                 nfds = p->p_fd->fd_nfiles;
  438         }
  439         ni = nfds * sizeof(struct pollfd);
  440         if (ni > sizeof(smallbits)) {
  441                 bits = kmem_alloc(ni, KM_SLEEP);
  442                 if (bits == NULL)
  443                         return ENOMEM;
  444         } else
  445                 bits = smallbits;
  446 
  447         error = copyin(u_fds, bits, ni);
  448         if (error)
  449                 goto done;
  450 
  451         timo = 0;
  452         if (tv && inittimeleft(tv, &sleeptv) == -1) {
  453                 error = EINVAL;
  454                 goto done;
  455         }
  456 
  457         if (mask) {
  458                 sigminusset(&sigcantmask, mask);
  459                 mutex_enter(p->p_lock);
  460                 oldmask = l->l_sigmask;
  461                 l->l_sigmask = *mask;
  462                 mutex_exit(p->p_lock);
  463         } else
  464                 oldmask = l->l_sigmask; /* XXXgcc */
  465 
  466         sc = curcpu()->ci_data.cpu_selcpu;
  467         l->l_selcpu = sc;
  468         SLIST_INIT(&l->l_selwait);
  469         for (;;) {
  470                 /*
  471                  * No need to lock.  If this is overwritten by another
  472                  * value while scanning, we will retry below.  We only
  473                  * need to see exact state from the descriptors that
  474                  * we are about to poll, and lock activity resulting
  475                  * from fo_poll is enough to provide an up to date value
  476                  * for new polling activity.
  477                  */
  478                 ncoll = sc->sc_ncoll;
  479                 l->l_selflag = SEL_SCANNING;
  480 
  481                 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
  482 
  483                 if (error || *retval)
  484                         break;
  485                 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
  486                         break;
  487                 mutex_spin_enter(&sc->sc_lock);
  488                 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) {
  489                         mutex_spin_exit(&sc->sc_lock);
  490                         continue;
  491                 }
  492                 l->l_selflag = SEL_BLOCKING;
  493                 l->l_kpriority = true;
  494                 sleepq_enter(&sc->sc_sleepq, l, &sc->sc_lock);
  495                 sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj);
  496                 error = sleepq_block(timo, true);
  497                 if (error != 0)
  498                         break;
  499         }
  500         selclear();
  501 
  502         if (mask) {
  503                 mutex_enter(p->p_lock);
  504                 l->l_sigmask = oldmask;
  505                 mutex_exit(p->p_lock);
  506         }
  507  done:
  508         /* poll is not restarted after signals... */
  509         if (error == ERESTART)
  510                 error = EINTR;
  511         if (error == EWOULDBLOCK)
  512                 error = 0;
  513         if (error == 0)
  514                 error = copyout(bits, u_fds, ni);
  515         if (bits != smallbits)
  516                 kmem_free(bits, ni);
  517         return (error);
  518 }
  519 
  520 int
  521 pollscan(lwp_t *l, struct pollfd *fds, int nfd, register_t *retval)
  522 {
  523         int i, n;
  524         file_t *fp;
  525 
  526         n = 0;
  527         for (i = 0; i < nfd; i++, fds++) {
  528                 if (fds->fd < 0) {
  529                         fds->revents = 0;
  530                 } else if ((fp = fd_getfile(fds->fd)) == NULL) {
  531                         fds->revents = POLLNVAL;
  532                         n++;
  533                 } else {
  534                         fds->revents = (*fp->f_ops->fo_poll)(fp,
  535                             fds->events | POLLERR | POLLHUP);
  536                         if (fds->revents != 0)
  537                                 n++;
  538                         fd_putfile(fds->fd);
  539                 }
  540         }
  541         *retval = n;
  542         return (0);
  543 }
  544 
  545 /*ARGSUSED*/
  546 int
  547 seltrue(dev_t dev, int events, lwp_t *l)
  548 {
  549 
  550         return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
  551 }
  552 
  553 /*
  554  * Record a select request.  Concurrency issues:
  555  *
  556  * The caller holds the same lock across calls to selrecord() and
  557  * selnotify(), so we don't need to consider a concurrent wakeup
  558  * while in this routine.
  559  *
  560  * The only activity we need to guard against is selclear(), called by
  561  * another thread that is exiting selcommon() or pollcommon().
  562  * `sel_lwp' can only become non-NULL while the caller's lock is held,
  563  * so it cannot become non-NULL due to a change made by another thread
  564  * while we are in this routine.  It can only become _NULL_ due to a
  565  * call to selclear().
  566  *
  567  * If it is non-NULL and != selector there is the potential for
  568  * selclear() to be called by another thread.  If either of those
  569  * conditions are true, we're not interested in touching the `named
  570  * waiter' part of the selinfo record because we need to record a
  571  * collision.  Hence there is no need for additional locking in this
  572  * routine.
  573  */
  574 void
  575 selrecord(lwp_t *selector, struct selinfo *sip)
  576 {
  577         selcpu_t *sc;
  578         lwp_t *other;
  579 
  580         KASSERT(selector == curlwp);
  581 
  582         sc = selector->l_selcpu;
  583         other = sip->sel_lwp;
  584 
  585         if (other == selector) {
  586                 /* `selector' has already claimed it. */
  587                 KASSERT(sip->sel_cpu = sc);
  588         } else if (other == NULL) {
  589                 /*
  590                  * First named waiter, although there may be unnamed
  591                  * waiters (collisions).  Issue a memory barrier to
  592                  * ensure that we access sel_lwp (above) before other
  593                  * fields - this guards against a call to selclear().
  594                  */
  595                 membar_enter();
  596                 sip->sel_lwp = selector;
  597                 SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain);
  598                 /* Replace selinfo's lock with our chosen CPU's lock. */
  599                 sip->sel_cpu = sc;
  600         } else {
  601                 /* Multiple waiters: record a collision. */
  602                 sip->sel_collision |= sc->sc_mask;
  603                 KASSERT(sip->sel_cpu != NULL);
  604         }
  605 }
  606 
  607 /*
  608  * Do a wakeup when a selectable event occurs.  Concurrency issues:
  609  *
  610  * As per selrecord(), the caller's object lock is held.  If there
  611  * is a named waiter, we must acquire the associated selcpu's lock
  612  * in order to synchronize with selclear() and pollers going to sleep
  613  * in selcommon() and/or pollcommon().
  614  *
  615  * sip->sel_cpu cannot change at this point, as it is only changed
  616  * in selrecord(), and concurrent calls to selrecord() are locked
  617  * out by the caller.
  618  */
  619 void
  620 selnotify(struct selinfo *sip, int events, long knhint)
  621 {
  622         selcpu_t *sc;
  623         uint32_t mask;
  624         int index, oflag, swapin;
  625         lwp_t *l;
  626 
  627         KNOTE(&sip->sel_klist, knhint);
  628 
  629         if (sip->sel_lwp != NULL) {
  630                 /* One named LWP is waiting. */
  631                 swapin = 0;
  632                 sc = sip->sel_cpu;
  633                 mutex_spin_enter(&sc->sc_lock);
  634                 /* Still there? */
  635                 if (sip->sel_lwp != NULL) {
  636                         l = sip->sel_lwp;
  637                         /*
  638                          * If thread is sleeping, wake it up.  If it's not
  639                          * yet asleep, it will notice the change in state
  640                          * and will re-poll the descriptors.
  641                          */
  642                         oflag = l->l_selflag;
  643                         l->l_selflag = SEL_RESET;
  644                         if (oflag == SEL_BLOCKING &&
  645                             l->l_mutex == &sc->sc_lock) {
  646                                 KASSERT(l->l_wchan == sc);
  647                                 swapin = sleepq_unsleep(l, false);
  648                         }
  649                 }
  650                 mutex_spin_exit(&sc->sc_lock);
  651                 if (swapin)
  652                         uvm_kick_scheduler();
  653         }
  654 
  655         if ((mask = sip->sel_collision) != 0) {
  656                 /*
  657                  * There was a collision (multiple waiters): we must
  658                  * inform all potentially interested waiters.
  659                  */
  660                 sip->sel_collision = 0;
  661                 do {
  662                         index = ffs(mask) - 1;
  663                         mask &= ~(1 << index);
  664                         sc = cpu_lookup(index)->ci_data.cpu_selcpu;
  665                         mutex_spin_enter(&sc->sc_lock);
  666                         sc->sc_ncoll++;
  667                         sleepq_wake(&sc->sc_sleepq, sc, (u_int)-1,
  668                             &sc->sc_lock);
  669                 } while (__predict_false(mask != 0));
  670         }
  671 }
  672 
  673 /*
  674  * Remove an LWP from all objects that it is waiting for.  Concurrency
  675  * issues:
  676  *
  677  * The object owner's (e.g. device driver) lock is not held here.  Calls
  678  * can be made to selrecord() and we do not synchronize against those
  679  * directly using locks.  However, we use `sel_lwp' to lock out changes.
  680  * Before clearing it we must use memory barriers to ensure that we can
  681  * safely traverse the list of selinfo records.
  682  */
  683 static void
  684 selclear(void)
  685 {
  686         struct selinfo *sip, *next;
  687         selcpu_t *sc;
  688         lwp_t *l;
  689 
  690         l = curlwp;
  691         sc = l->l_selcpu;
  692 
  693         mutex_spin_enter(&sc->sc_lock);
  694         for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) {
  695                 KASSERT(sip->sel_lwp == l);
  696                 KASSERT(sip->sel_cpu == l->l_selcpu);
  697                 /*
  698                  * Read link to next selinfo record, if any.
  699                  * It's no longer safe to touch `sip' after clearing
  700                  * `sel_lwp', so ensure that the read of `sel_chain'
  701                  * completes before the clearing of sel_lwp becomes
  702                  * globally visible.
  703                  */
  704                 next = SLIST_NEXT(sip, sel_chain);
  705                 membar_exit();
  706                 /* Release the record for another named waiter to use. */
  707                 sip->sel_lwp = NULL;
  708         }
  709         mutex_spin_exit(&sc->sc_lock);
  710 }
  711 
  712 /*
  713  * Initialize the select/poll system calls.  Called once for each
  714  * CPU in the system, as they are attached.
  715  */
  716 void
  717 selsysinit(struct cpu_info *ci)
  718 {
  719         selcpu_t *sc;
  720 
  721         sc = kmem_alloc(roundup2(sizeof(selcpu_t), coherency_unit) +
  722             coherency_unit, KM_SLEEP);
  723         sc = (void *)roundup2((uintptr_t)sc, coherency_unit);
  724         mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SCHED);
  725         sleepq_init(&sc->sc_sleepq);
  726         sc->sc_ncoll = 0;
  727         sc->sc_mask = (1 << cpu_index(ci));
  728         ci->ci_data.cpu_selcpu = sc;
  729 }
  730 
  731 /*
  732  * Initialize a selinfo record.
  733  */
  734 void
  735 selinit(struct selinfo *sip)
  736 {
  737 
  738         memset(sip, 0, sizeof(*sip));
  739 }
  740 
  741 /*
  742  * Destroy a selinfo record.  The owning object must not gain new
  743  * references while this is in progress: all activity on the record
  744  * must be stopped.
  745  *
  746  * Concurrency issues: we only need guard against a call to selclear()
  747  * by a thread exiting selcommon() and/or pollcommon().  The caller has
  748  * prevented further references being made to the selinfo record via
  749  * selrecord(), and it won't call selwakeup() again.
  750  */
  751 void
  752 seldestroy(struct selinfo *sip)
  753 {
  754         selcpu_t *sc;
  755         lwp_t *l;
  756 
  757         if (sip->sel_lwp == NULL)
  758                 return;
  759 
  760         /*
  761          * Lock out selclear().  The selcpu pointer can't change while
  762          * we are here since it is only ever changed in selrecord(),
  763          * and that will not be entered again for this record because
  764          * it is dying.
  765          */
  766         KASSERT(sip->sel_cpu != NULL);
  767         sc = sip->sel_cpu;
  768         mutex_spin_enter(&sc->sc_lock);
  769         if ((l = sip->sel_lwp) != NULL) {
  770                 /*
  771                  * This should rarely happen, so although SLIST_REMOVE()
  772                  * is slow, using it here is not a problem.
  773                  */
  774                 KASSERT(l->l_selcpu == sc);
  775                 SLIST_REMOVE(&l->l_selwait, sip, selinfo, sel_chain);
  776                 sip->sel_lwp = NULL;
  777         }
  778         mutex_spin_exit(&sc->sc_lock);
  779 }
  780 
  781 int
  782 pollsock(struct socket *so, const struct timeval *tvp, int events)
  783 {
  784         int             ncoll, error, timo;
  785         struct timeval  sleeptv, tv;
  786         selcpu_t        *sc;
  787         lwp_t           *l;
  788 
  789         timo = 0;
  790         if (tvp != NULL) {
  791                 tv = *tvp;
  792                 if (inittimeleft(&tv, &sleeptv) == -1)
  793                         return EINVAL;
  794         }
  795 
  796         l = curlwp;
  797         sc = l->l_cpu->ci_data.cpu_selcpu;
  798         l->l_selcpu = sc;
  799         SLIST_INIT(&l->l_selwait);
  800         error = 0;
  801         for (;;) {
  802                 /*
  803                  * No need to lock.  If this is overwritten by another
  804                  * value while scanning, we will retry below.  We only
  805                  * need to see exact state from the descriptors that
  806                  * we are about to poll, and lock activity resulting
  807                  * from fo_poll is enough to provide an up to date value
  808                  * for new polling activity.
  809                  */
  810                 ncoll = sc->sc_ncoll;
  811                 l->l_selflag = SEL_SCANNING;
  812                 if (sopoll(so, events) != 0)
  813                         break;
  814                 if (tvp && (timo = gettimeleft(&tv, &sleeptv)) <= 0)
  815                         break;
  816                 mutex_spin_enter(&sc->sc_lock);
  817                 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) {
  818                         mutex_spin_exit(&sc->sc_lock);
  819                         continue;
  820                 }
  821                 l->l_selflag = SEL_BLOCKING;
  822                 sleepq_enter(&sc->sc_sleepq, l, &sc->sc_lock);
  823                 sleepq_enqueue(&sc->sc_sleepq, sc, "pollsock", &select_sobj);
  824                 error = sleepq_block(timo, true);
  825                 if (error != 0)
  826                         break;
  827         }
  828         selclear();
  829         /* poll is not restarted after signals... */
  830         if (error == ERESTART)
  831                 error = EINTR;
  832         if (error == EWOULDBLOCK)
  833                 error = 0;
  834         return (error);
  835 }

Cache object: c6fafea3a785ecb5775baf452a992a15


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.