The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/common/io/devpoll.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or http://www.opensolaris.org/os/licensing.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   23  * Use is subject to license terms.
   24  */
   25 
   26 
   27 #include <sys/types.h>
   28 #include <sys/devops.h>
   29 #include <sys/conf.h>
   30 #include <sys/modctl.h>
   31 #include <sys/sunddi.h>
   32 #include <sys/stat.h>
   33 #include <sys/poll_impl.h>
   34 #include <sys/errno.h>
   35 #include <sys/kmem.h>
   36 #include <sys/mkdev.h>
   37 #include <sys/debug.h>
   38 #include <sys/file.h>
   39 #include <sys/sysmacros.h>
   40 #include <sys/systm.h>
   41 #include <sys/bitmap.h>
   42 #include <sys/devpoll.h>
   43 #include <sys/rctl.h>
   44 #include <sys/resource.h>
   45 
   46 #define RESERVED        1
   47 
   48 /* local data struct */
   49 static  dp_entry_t      **devpolltbl;   /* dev poll entries */
   50 static  size_t          dptblsize;
   51 
   52 static  kmutex_t        devpoll_lock;   /* lock protecting dev tbl */
   53 int                     devpoll_init;   /* is /dev/poll initialized already */
   54 
   55 /* device local functions */
   56 
   57 static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp);
   58 static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp);
   59 static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
   60     int *rvalp);
   61 static int dppoll(dev_t dev, short events, int anyyet, short *reventsp,
   62     struct pollhead **phpp);
   63 static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp);
   64 static dev_info_t *dpdevi;
   65 
   66 
   67 static struct cb_ops    dp_cb_ops = {
   68         dpopen,                 /* open */
   69         dpclose,                /* close */
   70         nodev,                  /* strategy */
   71         nodev,                  /* print */
   72         nodev,                  /* dump */
   73         nodev,                  /* read */
   74         dpwrite,                /* write */
   75         dpioctl,                /* ioctl */
   76         nodev,                  /* devmap */
   77         nodev,                  /* mmap */
   78         nodev,                  /* segmap */
   79         dppoll,                 /* poll */
   80         ddi_prop_op,            /* prop_op */
   81         (struct streamtab *)0,  /* streamtab */
   82         D_MP,                   /* flags */
   83         CB_REV,                 /* cb_ops revision */
   84         nodev,                  /* aread */
   85         nodev                   /* awrite */
   86 };
   87 
   88 static int dpattach(dev_info_t *, ddi_attach_cmd_t);
   89 static int dpdetach(dev_info_t *, ddi_detach_cmd_t);
   90 static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
   91 
   92 static struct dev_ops dp_ops = {
   93         DEVO_REV,               /* devo_rev */
   94         0,                      /* refcnt */
   95         dpinfo,                 /* info */
   96         nulldev,                /* identify */
   97         nulldev,                /* probe */
   98         dpattach,               /* attach */
   99         dpdetach,               /* detach */
  100         nodev,                  /* reset */
  101         &dp_cb_ops,             /* driver operations */
  102         (struct bus_ops *)NULL, /* bus operations */
  103         nulldev,                /* power */
  104         ddi_quiesce_not_needed,         /* quiesce */
  105 };
  106 
  107 
  108 static struct modldrv modldrv = {
  109         &mod_driverops,         /* type of module - a driver */
  110         "/dev/poll driver",
  111         &dp_ops,
  112 };
  113 
  114 static struct modlinkage modlinkage = {
  115         MODREV_1,
  116         (void *)&modldrv,
  117         NULL
  118 };
  119 
  120 /*
  121  * Locking Design
  122  *
  123  * The /dev/poll driver shares most of its code with poll sys call whose
  124  * code is in common/syscall/poll.c. In poll(2) design, the pollcache
  125  * structure is per lwp. An implicit assumption is made there that some
  126  * portion of pollcache will never be touched by other lwps. E.g., in
  127  * poll(2) design, no lwp will ever need to grow bitmap of other lwp.
  128  * This assumption is not true for /dev/poll; hence the need for extra
  129  * locking.
  130  *
  131  * To allow more parallelism, each /dev/poll file descriptor (indexed by
  132  * minor number) has its own lock. Since read (dpioctl) is a much more
  133  * frequent operation than write, we want to allow multiple reads on same
  134  * /dev/poll fd. However, we prevent writes from being starved by giving
  135  * priority to write operation. Theoretically writes can starve reads as
  136  * well. But in practical sense this is not important because (1) writes
  137  * happens less often than reads, and (2) write operation defines the
  138  * content of poll fd a cache set. If writes happens so often that they
  139  * can starve reads, that means the cached set is very unstable. It may
  140  * not make sense to read an unstable cache set anyway. Therefore, the
  141  * writers starving readers case is not handled in this design.
  142  */
  143 
  144 int
  145 _init()
  146 {
  147         int     error;
  148 
  149         dptblsize = DEVPOLLSIZE;
  150         devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP);
  151         mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL);
  152         devpoll_init = 1;
  153         if ((error = mod_install(&modlinkage)) != 0) {
  154                 mutex_destroy(&devpoll_lock);
  155                 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize);
  156                 devpoll_init = 0;
  157         }
  158         return (error);
  159 }
  160 
  161 int
  162 _fini()
  163 {
  164         int error;
  165 
  166         if ((error = mod_remove(&modlinkage)) != 0) {
  167                 return (error);
  168         }
  169         mutex_destroy(&devpoll_lock);
  170         kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize);
  171         return (0);
  172 }
  173 
  174 int
  175 _info(struct modinfo *modinfop)
  176 {
  177         return (mod_info(&modlinkage, modinfop));
  178 }
  179 
  180 /*ARGSUSED*/
  181 static int
  182 dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
  183 {
  184         if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL)
  185             == DDI_FAILURE) {
  186                 ddi_remove_minor_node(devi, NULL);
  187                 return (DDI_FAILURE);
  188         }
  189         dpdevi = devi;
  190         return (DDI_SUCCESS);
  191 }
  192 
  193 static int
  194 dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
  195 {
  196         if (cmd != DDI_DETACH)
  197                 return (DDI_FAILURE);
  198 
  199         ddi_remove_minor_node(devi, NULL);
  200         return (DDI_SUCCESS);
  201 }
  202 
  203 /* ARGSUSED */
  204 static int
  205 dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
  206 {
  207         int error;
  208 
  209         switch (infocmd) {
  210         case DDI_INFO_DEVT2DEVINFO:
  211                 *result = (void *)dpdevi;
  212                 error = DDI_SUCCESS;
  213                 break;
  214         case DDI_INFO_DEVT2INSTANCE:
  215                 *result = (void *)0;
  216                 error = DDI_SUCCESS;
  217                 break;
  218         default:
  219                 error = DDI_FAILURE;
  220         }
  221         return (error);
  222 }
  223 
  224 /*
  225  * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major
  226  * differences are: (1) /dev/poll requires scanning the bitmap starting at
  227  * where it was stopped last time, instead of always starting from 0,
  228  * (2) since user may not have cleaned up the cached fds when they are
  229  * closed, some polldats in cache may refer to closed or reused fds. We
  230  * need to check for those cases.
  231  *
  232  * NOTE: Upon closing an fd, automatic poll cache cleanup is done for
  233  *       poll(2) caches but NOT for /dev/poll caches. So expect some
  234  *       stale entries!
  235  */
  236 static int
  237 dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp)
  238 {
  239         int             start, ostart, end;
  240         int             fdcnt, fd;
  241         boolean_t       done;
  242         file_t          *fp;
  243         short           revent;
  244         boolean_t       no_wrap;
  245         pollhead_t      *php;
  246         polldat_t       *pdp;
  247         int             error = 0;
  248 
  249         ASSERT(MUTEX_HELD(&pcp->pc_lock));
  250         if (pcp->pc_bitmap == NULL) {
  251                 /*
  252                  * No Need to search because no poll fd
  253                  * has been cached.
  254                  */
  255                 return (error);
  256         }
  257 retry:
  258         start = ostart = pcp->pc_mapstart;
  259         end = pcp->pc_mapend;
  260         php = NULL;
  261 
  262         if (start == 0) {
  263                 /*
  264                  * started from every begining, no need to wrap around.
  265                  */
  266                 no_wrap = B_TRUE;
  267         } else {
  268                 no_wrap = B_FALSE;
  269         }
  270         done = B_FALSE;
  271         fdcnt = 0;
  272         while ((fdcnt < nfds) && !done) {
  273                 php = NULL;
  274                 revent = 0;
  275                 /*
  276                  * Examine the bit map in a circular fashion
  277                  * to avoid starvation. Always resume from
  278                  * last stop. Scan till end of the map. Then
  279                  * wrap around.
  280                  */
  281                 fd = bt_getlowbit(pcp->pc_bitmap, start, end);
  282                 ASSERT(fd <= end);
  283                 if (fd >= 0) {
  284                         if (fd == end) {
  285                                 if (no_wrap) {
  286                                         done = B_TRUE;
  287                                 } else {
  288                                         start = 0;
  289                                         end = ostart - 1;
  290                                         no_wrap = B_TRUE;
  291                                 }
  292                         } else {
  293                                 start = fd + 1;
  294                         }
  295                         pdp = pcache_lookup_fd(pcp, fd);
  296 repoll:
  297                         ASSERT(pdp != NULL);
  298                         ASSERT(pdp->pd_fd == fd);
  299                         if (pdp->pd_fp == NULL) {
  300                                 /*
  301                                  * The fd is POLLREMOVed. This fd is
  302                                  * logically no longer cached. So move
  303                                  * on to the next one.
  304                                  */
  305                                 continue;
  306                         }
  307                         if ((fp = getf(fd)) == NULL) {
  308                                 /*
  309                                  * The fd has been closed, but user has not
  310                                  * done a POLLREMOVE on this fd yet. Instead
  311                                  * of cleaning it here implicitly, we return
  312                                  * POLLNVAL. This is consistent with poll(2)
  313                                  * polling a closed fd. Hope this will remind
  314                                  * user to do a POLLREMOVE.
  315                                  */
  316                                 pfdp[fdcnt].fd = fd;
  317                                 pfdp[fdcnt].revents = POLLNVAL;
  318                                 fdcnt++;
  319                                 continue;
  320                         }
  321                         if (fp != pdp->pd_fp) {
  322                                 /*
  323                                  * user is polling on a cached fd which was
  324                                  * closed and then reused. Unfortunately
  325                                  * there is no good way to inform user.
  326                                  * If the file struct is also reused, we
  327                                  * may not be able to detect the fd reuse
  328                                  * at all.  As long as this does not
  329                                  * cause system failure and/or memory leak,
  330                                  * we will play along. Man page states if
  331                                  * user does not clean up closed fds, polling
  332                                  * results will be indeterministic.
  333                                  *
  334                                  * XXX - perhaps log the detection of fd
  335                                  *       reuse?
  336                                  */
  337                                 pdp->pd_fp = fp;
  338                         }
  339                         /*
  340                          * XXX - pollrelock() logic needs to know which
  341                          * which pollcache lock to grab. It'd be a
  342                          * cleaner solution if we could pass pcp as
  343                          * an arguement in VOP_POLL interface instead
  344                          * of implicitly passing it using thread_t
  345                          * struct. On the other hand, changing VOP_POLL
  346                          * interface will require all driver/file system
  347                          * poll routine to change. May want to revisit
  348                          * the tradeoff later.
  349                          */
  350                         curthread->t_pollcache = pcp;
  351                         error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0,
  352                             &revent, &php, NULL);
  353                         curthread->t_pollcache = NULL;
  354                         releasef(fd);
  355                         if (error != 0) {
  356                                 break;
  357                         }
  358                         /*
  359                          * layered devices (e.g. console driver)
  360                          * may change the vnode and thus the pollhead
  361                          * pointer out from underneath us.
  362                          */
  363                         if (php != NULL && pdp->pd_php != NULL &&
  364                             php != pdp->pd_php) {
  365                                 pollhead_delete(pdp->pd_php, pdp);
  366                                 pdp->pd_php = php;
  367                                 pollhead_insert(php, pdp);
  368                                 /*
  369                                  * The bit should still be set.
  370                                  */
  371                                 ASSERT(BT_TEST(pcp->pc_bitmap, fd));
  372                                 goto retry;
  373                         }
  374 
  375                         if (revent != 0) {
  376                                 pfdp[fdcnt].fd = fd;
  377                                 pfdp[fdcnt].events = pdp->pd_events;
  378                                 pfdp[fdcnt].revents = revent;
  379                                 fdcnt++;
  380                         } else if (php != NULL) {
  381                                 /*
  382                                  * We clear a bit or cache a poll fd if
  383                                  * the driver returns a poll head ptr,
  384                                  * which is expected in the case of 0
  385                                  * revents. Some buggy driver may return
  386                                  * NULL php pointer with 0 revents. In
  387                                  * this case, we just treat the driver as
  388                                  * "noncachable" and not clearing the bit
  389                                  * in bitmap.
  390                                  */
  391                                 if ((pdp->pd_php != NULL) &&
  392                                     ((pcp->pc_flag & T_POLLWAKE) == 0)) {
  393                                         BT_CLEAR(pcp->pc_bitmap, fd);
  394                                 }
  395                                 if (pdp->pd_php == NULL) {
  396                                         pollhead_insert(php, pdp);
  397                                         pdp->pd_php = php;
  398                                         /*
  399                                          * An event of interest may have
  400                                          * arrived between the VOP_POLL() and
  401                                          * the pollhead_insert(); check again.
  402                                          */
  403                                         goto repoll;
  404                                 }
  405                         }
  406                 } else {
  407                         /*
  408                          * No bit set in the range. Check for wrap around.
  409                          */
  410                         if (!no_wrap) {
  411                                 start = 0;
  412                                 end = ostart - 1;
  413                                 no_wrap = B_TRUE;
  414                         } else {
  415                                 done = B_TRUE;
  416                         }
  417                 }
  418         }
  419 
  420         if (!done) {
  421                 pcp->pc_mapstart = start;
  422         }
  423         ASSERT(*fdcntp == 0);
  424         *fdcntp = fdcnt;
  425         return (error);
  426 }
  427 
  428 /*ARGSUSED*/
  429 static int
  430 dpopen(dev_t *devp, int flag, int otyp, cred_t *credp)
  431 {
  432         minor_t         minordev;
  433         dp_entry_t      *dpep;
  434         pollcache_t     *pcp;
  435 
  436         ASSERT(devpoll_init);
  437         ASSERT(dptblsize <= MAXMIN);
  438         mutex_enter(&devpoll_lock);
  439         for (minordev = 0; minordev < dptblsize; minordev++) {
  440                 if (devpolltbl[minordev] == NULL) {
  441                         devpolltbl[minordev] = (dp_entry_t *)RESERVED;
  442                         break;
  443                 }
  444         }
  445         if (minordev == dptblsize) {
  446                 dp_entry_t      **newtbl;
  447                 size_t          oldsize;
  448 
  449                 /*
  450                  * Used up every entry in the existing devpoll table.
  451                  * Grow the table by DEVPOLLSIZE.
  452                  */
  453                 if ((oldsize = dptblsize) >= MAXMIN) {
  454                         mutex_exit(&devpoll_lock);
  455                         return (ENXIO);
  456                 }
  457                 dptblsize += DEVPOLLSIZE;
  458                 if (dptblsize > MAXMIN) {
  459                         dptblsize = MAXMIN;
  460                 }
  461                 newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP);
  462                 bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize);
  463                 kmem_free(devpolltbl, sizeof (caddr_t) * oldsize);
  464                 devpolltbl = newtbl;
  465                 devpolltbl[minordev] = (dp_entry_t *)RESERVED;
  466         }
  467         mutex_exit(&devpoll_lock);
  468 
  469         dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP);
  470         /*
  471          * allocate a pollcache skeleton here. Delay allocating bitmap
  472          * structures until dpwrite() time, since we don't know the
  473          * optimal size yet.
  474          */
  475         pcp = pcache_alloc();
  476         dpep->dpe_pcache = pcp;
  477         pcp->pc_pid = curproc->p_pid;
  478         *devp = makedevice(getmajor(*devp), minordev);  /* clone the driver */
  479         mutex_enter(&devpoll_lock);
  480         ASSERT(minordev < dptblsize);
  481         ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED);
  482         devpolltbl[minordev] = dpep;
  483         mutex_exit(&devpoll_lock);
  484         return (0);
  485 }
  486 
  487 /*
  488  * Write to dev/poll add/remove fd's to/from a cached poll fd set,
  489  * or change poll events for a watched fd.
  490  */
  491 /*ARGSUSED*/
  492 static int
  493 dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
  494 {
  495         minor_t         minor;
  496         dp_entry_t      *dpep;
  497         pollcache_t     *pcp;
  498         pollfd_t        *pollfdp, *pfdp;
  499         int             error;
  500         ssize_t         uiosize;
  501         nfds_t          pollfdnum;
  502         struct pollhead *php = NULL;
  503         polldat_t       *pdp;
  504         int             fd;
  505         file_t          *fp;
  506 
  507         minor = getminor(dev);
  508 
  509         mutex_enter(&devpoll_lock);
  510         ASSERT(minor < dptblsize);
  511         dpep = devpolltbl[minor];
  512         ASSERT(dpep != NULL);
  513         mutex_exit(&devpoll_lock);
  514         pcp = dpep->dpe_pcache;
  515         if (curproc->p_pid != pcp->pc_pid) {
  516                 return (EACCES);
  517         }
  518         uiosize = uiop->uio_resid;
  519         pollfdnum = uiosize / sizeof (pollfd_t);
  520         mutex_enter(&curproc->p_lock);
  521         if (pollfdnum > (uint_t)rctl_enforced_value(
  522             rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) {
  523                 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
  524                     curproc->p_rctls, curproc, RCA_SAFE);
  525                 mutex_exit(&curproc->p_lock);
  526                 return (set_errno(EINVAL));
  527         }
  528         mutex_exit(&curproc->p_lock);
  529         /*
  530          * Copy in the pollfd array.  Walk through the array and add
  531          * each polled fd to the cached set.
  532          */
  533         pollfdp = kmem_alloc(uiosize, KM_SLEEP);
  534 
  535         /*
  536          * Although /dev/poll uses the write(2) interface to cache fds, it's
  537          * not supposed to function as a seekable device. To prevent offset
  538          * from growing and eventually exceed the maximum, reset the offset
  539          * here for every call.
  540          */
  541         uiop->uio_loffset = 0;
  542         if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop))
  543             != 0) {
  544                 kmem_free(pollfdp, uiosize);
  545                 return (error);
  546         }
  547         /*
  548          * We are about to enter the core portion of dpwrite(). Make sure this
  549          * write has exclusive access in this portion of the code, i.e., no
  550          * other writers in this code and no other readers in dpioctl.
  551          */
  552         mutex_enter(&dpep->dpe_lock);
  553         dpep->dpe_writerwait++;
  554         while (dpep->dpe_refcnt != 0) {
  555                 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
  556                         dpep->dpe_writerwait--;
  557                         mutex_exit(&dpep->dpe_lock);
  558                         kmem_free(pollfdp, uiosize);
  559                         return (set_errno(EINTR));
  560                 }
  561         }
  562         dpep->dpe_writerwait--;
  563         dpep->dpe_flag |= DP_WRITER_PRESENT;
  564         dpep->dpe_refcnt++;
  565         mutex_exit(&dpep->dpe_lock);
  566 
  567         mutex_enter(&pcp->pc_lock);
  568         if (pcp->pc_bitmap == NULL) {
  569                 pcache_create(pcp, pollfdnum);
  570         }
  571         for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) {
  572                 fd = pfdp->fd;
  573                 if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles)
  574                         continue;
  575                 pdp = pcache_lookup_fd(pcp, fd);
  576                 if (pfdp->events != POLLREMOVE) {
  577                         if (pdp == NULL) {
  578                                 pdp = pcache_alloc_fd(0);
  579                                 pdp->pd_fd = fd;
  580                                 pdp->pd_pcache = pcp;
  581                                 pcache_insert_fd(pcp, pdp, pollfdnum);
  582                         }
  583                         ASSERT(pdp->pd_fd == fd);
  584                         ASSERT(pdp->pd_pcache == pcp);
  585                         if (fd >= pcp->pc_mapsize) {
  586                                 mutex_exit(&pcp->pc_lock);
  587                                 pcache_grow_map(pcp, fd);
  588                                 mutex_enter(&pcp->pc_lock);
  589                         }
  590                         if (fd > pcp->pc_mapend) {
  591                                 pcp->pc_mapend = fd;
  592                         }
  593                         if ((fp = getf(fd)) == NULL) {
  594                                 /*
  595                                  * The fd is not valid. Since we can't pass
  596                                  * this error back in the write() call, set
  597                                  * the bit in bitmap to force DP_POLL ioctl
  598                                  * to examine it.
  599                                  */
  600                                 BT_SET(pcp->pc_bitmap, fd);
  601                                 pdp->pd_events |= pfdp->events;
  602                                 continue;
  603                         }
  604                         /*
  605                          * Don't do VOP_POLL for an already cached fd with
  606                          * same poll events.
  607                          */
  608                         if ((pdp->pd_events == pfdp->events) &&
  609                             (pdp->pd_fp != NULL)) {
  610                                 /*
  611                                  * the events are already cached
  612                                  */
  613                                 releasef(fd);
  614                                 continue;
  615                         }
  616 
  617                         /*
  618                          * do VOP_POLL and cache this poll fd.
  619                          */
  620                         /*
  621                          * XXX - pollrelock() logic needs to know which
  622                          * which pollcache lock to grab. It'd be a
  623                          * cleaner solution if we could pass pcp as
  624                          * an arguement in VOP_POLL interface instead
  625                          * of implicitly passing it using thread_t
  626                          * struct. On the other hand, changing VOP_POLL
  627                          * interface will require all driver/file system
  628                          * poll routine to change. May want to revisit
  629                          * the tradeoff later.
  630                          */
  631                         curthread->t_pollcache = pcp;
  632                         error = VOP_POLL(fp->f_vnode, pfdp->events, 0,
  633                             &pfdp->revents, &php, NULL);
  634                         curthread->t_pollcache = NULL;
  635                         /*
  636                          * We always set the bit when this fd is cached;
  637                          * this forces the first DP_POLL to poll this fd.
  638                          * Real performance gain comes from subsequent
  639                          * DP_POLL.  We also attempt a pollhead_insert();
  640                          * if it's not possible, we'll do it in dpioctl().
  641                          */
  642                         BT_SET(pcp->pc_bitmap, fd);
  643                         if (error != 0) {
  644                                 releasef(fd);
  645                                 break;
  646                         }
  647                         pdp->pd_fp = fp;
  648                         pdp->pd_events |= pfdp->events;
  649                         if (php != NULL) {
  650                                 if (pdp->pd_php == NULL) {
  651                                         pollhead_insert(php, pdp);
  652                                         pdp->pd_php = php;
  653                                 } else {
  654                                         if (pdp->pd_php != php) {
  655                                                 pollhead_delete(pdp->pd_php,
  656                                                     pdp);
  657                                                 pollhead_insert(php, pdp);
  658                                                 pdp->pd_php = php;
  659                                         }
  660                                 }
  661 
  662                         }
  663                         releasef(fd);
  664                 } else {
  665                         if (pdp == NULL) {
  666                                 continue;
  667                         }
  668                         ASSERT(pdp->pd_fd == fd);
  669                         pdp->pd_fp = NULL;
  670                         pdp->pd_events = 0;
  671                         ASSERT(pdp->pd_thread == NULL);
  672                         if (pdp->pd_php != NULL) {
  673                                 pollhead_delete(pdp->pd_php, pdp);
  674                                 pdp->pd_php = NULL;
  675                         }
  676                         BT_CLEAR(pcp->pc_bitmap, fd);
  677                 }
  678         }
  679         mutex_exit(&pcp->pc_lock);
  680         mutex_enter(&dpep->dpe_lock);
  681         dpep->dpe_flag &= ~DP_WRITER_PRESENT;
  682         ASSERT(dpep->dpe_refcnt == 1);
  683         dpep->dpe_refcnt--;
  684         cv_broadcast(&dpep->dpe_cv);
  685         mutex_exit(&dpep->dpe_lock);
  686         kmem_free(pollfdp, uiosize);
  687         return (error);
  688 }
  689 
  690 /*ARGSUSED*/
  691 static int
  692 dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
  693 {
  694         timestruc_t     now;
  695         timestruc_t     rqtime;
  696         timestruc_t     *rqtp = NULL;
  697         int             timecheck = 0;
  698         minor_t         minor;
  699         dp_entry_t      *dpep;
  700         pollcache_t     *pcp;
  701         int             error = 0;
  702         STRUCT_DECL(dvpoll, dvpoll);
  703 
  704         if (cmd == DP_POLL) {
  705                 /* do this now, before we sleep on DP_WRITER_PRESENT below */
  706                 timecheck = timechanged;
  707                 gethrestime(&now);
  708         }
  709         minor = getminor(dev);
  710         mutex_enter(&devpoll_lock);
  711         ASSERT(minor < dptblsize);
  712         dpep = devpolltbl[minor];
  713         mutex_exit(&devpoll_lock);
  714         ASSERT(dpep != NULL);
  715         pcp = dpep->dpe_pcache;
  716         if (curproc->p_pid != pcp->pc_pid)
  717                 return (EACCES);
  718 
  719         mutex_enter(&dpep->dpe_lock);
  720         while ((dpep->dpe_flag & DP_WRITER_PRESENT) ||
  721             (dpep->dpe_writerwait != 0)) {
  722                 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
  723                         mutex_exit(&dpep->dpe_lock);
  724                         return (EINTR);
  725                 }
  726         }
  727         dpep->dpe_refcnt++;
  728         mutex_exit(&dpep->dpe_lock);
  729 
  730         switch (cmd) {
  731         case    DP_POLL:
  732         {
  733                 pollstate_t *ps;
  734                 nfds_t  nfds;
  735                 int     fdcnt = 0;
  736                 int     time_out;
  737                 int     rval;
  738 
  739                 STRUCT_INIT(dvpoll, mode);
  740                 error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll),
  741                     STRUCT_SIZE(dvpoll));
  742                 if (error) {
  743                         DP_REFRELE(dpep);
  744                         return (EFAULT);
  745                 }
  746 
  747                 time_out = STRUCT_FGET(dvpoll, dp_timeout);
  748                 if (time_out > 0) {
  749                         /*
  750                          * Determine the future time of the requested timeout.
  751                          */
  752                         rqtp = &rqtime;
  753                         rqtp->tv_sec = time_out / MILLISEC;
  754                         rqtp->tv_nsec = (time_out % MILLISEC) * MICROSEC;
  755                         timespecadd(rqtp, &now);
  756                 }
  757 
  758                 if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) {
  759                         /*
  760                          * We are just using DP_POLL to sleep, so
  761                          * we don't any of the devpoll apparatus.
  762                          * Do not check for signals if we have a zero timeout.
  763                          */
  764                         DP_REFRELE(dpep);
  765                         if (time_out == 0)
  766                                 return (0);
  767                         mutex_enter(&curthread->t_delay_lock);
  768                         while ((rval = cv_waituntil_sig(&curthread->t_delay_cv,
  769                             &curthread->t_delay_lock, rqtp, timecheck)) > 0)
  770                                 continue;
  771                         mutex_exit(&curthread->t_delay_lock);
  772                         return ((rval == 0)? EINTR : 0);
  773                 }
  774 
  775                 /*
  776                  * XXX It'd be nice not to have to alloc each time.
  777                  * But it requires another per thread structure hook.
  778                  * Do it later if there is data suggest that.
  779                  */
  780                 if ((ps = curthread->t_pollstate) == NULL) {
  781                         curthread->t_pollstate = pollstate_create();
  782                         ps = curthread->t_pollstate;
  783                 }
  784                 if (ps->ps_dpbufsize < nfds) {
  785                         struct proc *p = ttoproc(curthread);
  786                         /*
  787                          * The maximum size should be no large than
  788                          * current maximum open file count.
  789                          */
  790                         mutex_enter(&p->p_lock);
  791                         if (nfds > p->p_fno_ctl) {
  792                                 mutex_exit(&p->p_lock);
  793                                 DP_REFRELE(dpep);
  794                                 return (EINVAL);
  795                         }
  796                         mutex_exit(&p->p_lock);
  797                         kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) *
  798                             ps->ps_dpbufsize);
  799                         ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) *
  800                             nfds, KM_SLEEP);
  801                         ps->ps_dpbufsize = nfds;
  802                 }
  803 
  804                 mutex_enter(&pcp->pc_lock);
  805                 for (;;) {
  806                         pcp->pc_flag = 0;
  807                         error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt);
  808                         if (fdcnt > 0 || error != 0)
  809                                 break;
  810 
  811                         /*
  812                          * A pollwake has happened since we polled cache.
  813                          */
  814                         if (pcp->pc_flag & T_POLLWAKE)
  815                                 continue;
  816 
  817                         /*
  818                          * Sleep until we are notified, signaled, or timed out.
  819                          * Do not check for signals if we have a zero timeout.
  820                          */
  821                         if (time_out == 0)      /* immediate timeout */
  822                                 break;
  823                         rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
  824                             rqtp, timecheck);
  825                         /*
  826                          * If we were awakened by a signal or timeout
  827                          * then break the loop, else poll again.
  828                          */
  829                         if (rval <= 0) {
  830                                 if (rval == 0)  /* signal */
  831                                         error = EINTR;
  832                                 break;
  833                         }
  834                 }
  835                 mutex_exit(&pcp->pc_lock);
  836 
  837                 if (error == 0 && fdcnt > 0) {
  838                         if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll,
  839                             dp_fds), sizeof (pollfd_t) * fdcnt)) {
  840                                 DP_REFRELE(dpep);
  841                                 return (EFAULT);
  842                         }
  843                         *rvalp = fdcnt;
  844                 }
  845                 break;
  846         }
  847 
  848         case    DP_ISPOLLED:
  849         {
  850                 pollfd_t        pollfd;
  851                 polldat_t       *pdp;
  852 
  853                 STRUCT_INIT(dvpoll, mode);
  854                 error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t));
  855                 if (error) {
  856                         DP_REFRELE(dpep);
  857                         return (EFAULT);
  858                 }
  859                 mutex_enter(&pcp->pc_lock);
  860                 if (pcp->pc_hash == NULL) {
  861                         /*
  862                          * No Need to search because no poll fd
  863                          * has been cached.
  864                          */
  865                         mutex_exit(&pcp->pc_lock);
  866                         DP_REFRELE(dpep);
  867                         return (0);
  868                 }
  869                 if (pollfd.fd < 0) {
  870                         mutex_exit(&pcp->pc_lock);
  871                         break;
  872                 }
  873                 pdp = pcache_lookup_fd(pcp, pollfd.fd);
  874                 if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) &&
  875                     (pdp->pd_fp != NULL)) {
  876                         pollfd.revents = pdp->pd_events;
  877                         if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) {
  878                                 mutex_exit(&pcp->pc_lock);
  879                                 DP_REFRELE(dpep);
  880                                 return (EFAULT);
  881                         }
  882                         *rvalp = 1;
  883                 }
  884                 mutex_exit(&pcp->pc_lock);
  885                 break;
  886         }
  887 
  888         default:
  889                 DP_REFRELE(dpep);
  890                 return (EINVAL);
  891         }
  892         DP_REFRELE(dpep);
  893         return (error);
  894 }
  895 
  896 /*ARGSUSED*/
  897 static int
  898 dppoll(dev_t dev, short events, int anyyet, short *reventsp,
  899     struct pollhead **phpp)
  900 {
  901         /*
  902          * Polling on a /dev/poll fd is not fully supported yet.
  903          */
  904         *reventsp = POLLERR;
  905         return (0);
  906 }
  907 
  908 /*
  909  * devpoll close should do enough clean up before the pollcache is deleted,
  910  * i.e., it should ensure no one still references the pollcache later.
  911  * There is no "permission" check in here. Any process having the last
  912  * reference of this /dev/poll fd can close.
  913  */
  914 /*ARGSUSED*/
  915 static int
  916 dpclose(dev_t dev, int flag, int otyp, cred_t *credp)
  917 {
  918         minor_t         minor;
  919         dp_entry_t      *dpep;
  920         pollcache_t     *pcp;
  921         int             i;
  922         polldat_t       **hashtbl;
  923         polldat_t       *pdp;
  924 
  925         minor = getminor(dev);
  926 
  927         mutex_enter(&devpoll_lock);
  928         dpep = devpolltbl[minor];
  929         ASSERT(dpep != NULL);
  930         devpolltbl[minor] = NULL;
  931         mutex_exit(&devpoll_lock);
  932         pcp = dpep->dpe_pcache;
  933         ASSERT(pcp != NULL);
  934         /*
  935          * At this point, no other lwp can access this pollcache via the
  936          * /dev/poll fd. This pollcache is going away, so do the clean
  937          * up without the pc_lock.
  938          */
  939         hashtbl = pcp->pc_hash;
  940         for (i = 0; i < pcp->pc_hashsize; i++) {
  941                 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) {
  942                         if (pdp->pd_php != NULL) {
  943                                 pollhead_delete(pdp->pd_php, pdp);
  944                                 pdp->pd_php = NULL;
  945                                 pdp->pd_fp = NULL;
  946                         }
  947                 }
  948         }
  949         /*
  950          * pollwakeup() may still interact with this pollcache. Wait until
  951          * it is done.
  952          */
  953         mutex_enter(&pcp->pc_no_exit);
  954         ASSERT(pcp->pc_busy >= 0);
  955         while (pcp->pc_busy > 0)
  956                 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit);
  957         mutex_exit(&pcp->pc_no_exit);
  958         pcache_destroy(pcp);
  959         ASSERT(dpep->dpe_refcnt == 0);
  960         kmem_free(dpep, sizeof (dp_entry_t));
  961         return (0);
  962 }

Cache object: e0311836f221f8b13c8a1fddf295cefd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.