The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_event.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_event.c,v 1.33 2006/11/01 10:17:58 yamt Exp $     */
    2 
    3 /*-
    4  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  * $FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp $
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.33 2006/11/01 10:17:58 yamt Exp $");
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/kernel.h>
   37 #include <sys/proc.h>
   38 #include <sys/malloc.h>
   39 #include <sys/unistd.h>
   40 #include <sys/file.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/select.h>
   43 #include <sys/queue.h>
   44 #include <sys/event.h>
   45 #include <sys/eventvar.h>
   46 #include <sys/poll.h>
   47 #include <sys/pool.h>
   48 #include <sys/protosw.h>
   49 #include <sys/socket.h>
   50 #include <sys/socketvar.h>
   51 #include <sys/stat.h>
   52 #include <sys/uio.h>
   53 #include <sys/mount.h>
   54 #include <sys/filedesc.h>
   55 #include <sys/sa.h>
   56 #include <sys/syscallargs.h>
   57 #include <sys/kauth.h>
   58 
   59 static void     kqueue_wakeup(struct kqueue *kq);
   60 
   61 static int      kqueue_scan(struct file *, size_t, struct kevent *,
   62     const struct timespec *, struct lwp *, register_t *,
   63     const struct kevent_ops *);
   64 static int      kqueue_read(struct file *fp, off_t *offset, struct uio *uio,
   65                     kauth_cred_t cred, int flags);
   66 static int      kqueue_write(struct file *fp, off_t *offset, struct uio *uio,
   67                     kauth_cred_t cred, int flags);
   68 static int      kqueue_ioctl(struct file *fp, u_long com, void *data,
   69                     struct lwp *l);
   70 static int      kqueue_fcntl(struct file *fp, u_int com, void *data,
   71                     struct lwp *l);
   72 static int      kqueue_poll(struct file *fp, int events, struct lwp *l);
   73 static int      kqueue_kqfilter(struct file *fp, struct knote *kn);
   74 static int      kqueue_stat(struct file *fp, struct stat *sp, struct lwp *l);
   75 static int      kqueue_close(struct file *fp, struct lwp *l);
   76 
   77 static const struct fileops kqueueops = {
   78         kqueue_read, kqueue_write, kqueue_ioctl, kqueue_fcntl, kqueue_poll,
   79         kqueue_stat, kqueue_close, kqueue_kqfilter
   80 };
   81 
   82 static void     knote_attach(struct knote *kn, struct filedesc *fdp);
   83 static void     knote_drop(struct knote *kn, struct lwp *l,
   84                     struct filedesc *fdp);
   85 static void     knote_enqueue(struct knote *kn);
   86 static void     knote_dequeue(struct knote *kn);
   87 
   88 static void     filt_kqdetach(struct knote *kn);
   89 static int      filt_kqueue(struct knote *kn, long hint);
   90 static int      filt_procattach(struct knote *kn);
   91 static void     filt_procdetach(struct knote *kn);
   92 static int      filt_proc(struct knote *kn, long hint);
   93 static int      filt_fileattach(struct knote *kn);
   94 static void     filt_timerexpire(void *knx);
   95 static int      filt_timerattach(struct knote *kn);
   96 static void     filt_timerdetach(struct knote *kn);
   97 static int      filt_timer(struct knote *kn, long hint);
   98 
   99 static const struct filterops kqread_filtops =
  100         { 1, NULL, filt_kqdetach, filt_kqueue };
  101 static const struct filterops proc_filtops =
  102         { 0, filt_procattach, filt_procdetach, filt_proc };
  103 static const struct filterops file_filtops =
  104         { 1, filt_fileattach, NULL, NULL };
  105 static const struct filterops timer_filtops =
  106         { 0, filt_timerattach, filt_timerdetach, filt_timer };
  107 
  108 static POOL_INIT(kqueue_pool, sizeof(struct kqueue), 0, 0, 0, "kqueuepl", NULL);
  109 static POOL_INIT(knote_pool, sizeof(struct knote), 0, 0, 0, "knotepl", NULL);
  110 static int      kq_ncallouts = 0;
  111 static int      kq_calloutmax = (4 * 1024);
  112 
  113 MALLOC_DEFINE(M_KEVENT, "kevent", "kevents/knotes");
  114 
  115 #define KNOTE_ACTIVATE(kn)                                              \
  116 do {                                                                    \
  117         kn->kn_status |= KN_ACTIVE;                                     \
  118         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)           \
  119                 knote_enqueue(kn);                                      \
  120 } while(0)
  121 
  122 #define KN_HASHSIZE             64              /* XXX should be tunable */
  123 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
  124 
  125 extern const struct filterops sig_filtops;
  126 
  127 /*
  128  * Table for for all system-defined filters.
  129  * These should be listed in the numeric order of the EVFILT_* defines.
  130  * If filtops is NULL, the filter isn't implemented in NetBSD.
  131  * End of list is when name is NULL.
  132  */
  133 struct kfilter {
  134         const char       *name;         /* name of filter */
  135         uint32_t          filter;       /* id of filter */
  136         const struct filterops *filtops;/* operations for filter */
  137 };
  138 
  139                 /* System defined filters */
  140 static const struct kfilter sys_kfilters[] = {
  141         { "EVFILT_READ",        EVFILT_READ,    &file_filtops },
  142         { "EVFILT_WRITE",       EVFILT_WRITE,   &file_filtops },
  143         { "EVFILT_AIO",         EVFILT_AIO,     NULL },
  144         { "EVFILT_VNODE",       EVFILT_VNODE,   &file_filtops },
  145         { "EVFILT_PROC",        EVFILT_PROC,    &proc_filtops },
  146         { "EVFILT_SIGNAL",      EVFILT_SIGNAL,  &sig_filtops },
  147         { "EVFILT_TIMER",       EVFILT_TIMER,   &timer_filtops },
  148         { NULL,                 0,              NULL }, /* end of list */
  149 };
  150 
  151                 /* User defined kfilters */
  152 static struct kfilter   *user_kfilters;         /* array */
  153 static int              user_kfilterc;          /* current offset */
  154 static int              user_kfiltermaxc;       /* max size so far */
  155 
  156 /*
  157  * Find kfilter entry by name, or NULL if not found.
  158  */
  159 static const struct kfilter *
  160 kfilter_byname_sys(const char *name)
  161 {
  162         int i;
  163 
  164         for (i = 0; sys_kfilters[i].name != NULL; i++) {
  165                 if (strcmp(name, sys_kfilters[i].name) == 0)
  166                         return (&sys_kfilters[i]);
  167         }
  168         return (NULL);
  169 }
  170 
  171 static struct kfilter *
  172 kfilter_byname_user(const char *name)
  173 {
  174         int i;
  175 
  176         /* user filter slots have a NULL name if previously deregistered */
  177         for (i = 0; i < user_kfilterc ; i++) {
  178                 if (user_kfilters[i].name != NULL &&
  179                     strcmp(name, user_kfilters[i].name) == 0)
  180                         return (&user_kfilters[i]);
  181         }
  182         return (NULL);
  183 }
  184 
  185 static const struct kfilter *
  186 kfilter_byname(const char *name)
  187 {
  188         const struct kfilter *kfilter;
  189 
  190         if ((kfilter = kfilter_byname_sys(name)) != NULL)
  191                 return (kfilter);
  192 
  193         return (kfilter_byname_user(name));
  194 }
  195 
  196 /*
  197  * Find kfilter entry by filter id, or NULL if not found.
  198  * Assumes entries are indexed in filter id order, for speed.
  199  */
  200 static const struct kfilter *
  201 kfilter_byfilter(uint32_t filter)
  202 {
  203         const struct kfilter *kfilter;
  204 
  205         if (filter < EVFILT_SYSCOUNT)   /* it's a system filter */
  206                 kfilter = &sys_kfilters[filter];
  207         else if (user_kfilters != NULL &&
  208             filter < EVFILT_SYSCOUNT + user_kfilterc)
  209                                         /* it's a user filter */
  210                 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
  211         else
  212                 return (NULL);          /* out of range */
  213         KASSERT(kfilter->filter == filter);     /* sanity check! */
  214         return (kfilter);
  215 }
  216 
  217 /*
  218  * Register a new kfilter. Stores the entry in user_kfilters.
  219  * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
  220  * If retfilter != NULL, the new filterid is returned in it.
  221  */
  222 int
  223 kfilter_register(const char *name, const struct filterops *filtops,
  224     int *retfilter)
  225 {
  226         struct kfilter *kfilter;
  227         void *space;
  228         int len;
  229         int i;
  230 
  231         if (name == NULL || name[0] == '\0' || filtops == NULL)
  232                 return (EINVAL);        /* invalid args */
  233         if (kfilter_byname(name) != NULL)
  234                 return (EEXIST);        /* already exists */
  235         if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT)
  236                 return (EINVAL);        /* too many */
  237 
  238         for (i = 0; i < user_kfilterc; i++) {
  239                 kfilter = &user_kfilters[i];
  240                 if (kfilter->name == NULL) {
  241                         /* Previously deregistered slot.  Reuse. */
  242                         goto reuse;
  243                 }
  244         }
  245 
  246         /* check if need to grow user_kfilters */
  247         if (user_kfilterc + 1 > user_kfiltermaxc) {
  248                 /*
  249                  * Grow in KFILTER_EXTENT chunks. Use malloc(9), because we
  250                  * want to traverse user_kfilters as an array.
  251                  */
  252                 user_kfiltermaxc += KFILTER_EXTENT;
  253                 kfilter = malloc(user_kfiltermaxc * sizeof(struct filter *),
  254                     M_KEVENT, M_WAITOK);
  255 
  256                 /* copy existing user_kfilters */
  257                 if (user_kfilters != NULL)
  258                         memcpy((caddr_t)kfilter, (caddr_t)user_kfilters,
  259                             user_kfilterc * sizeof(struct kfilter *));
  260                                         /* zero new sections */
  261                 memset((caddr_t)kfilter +
  262                     user_kfilterc * sizeof(struct kfilter *), 0,
  263                     (user_kfiltermaxc - user_kfilterc) *
  264                     sizeof(struct kfilter *));
  265                                         /* switch to new kfilter */
  266                 if (user_kfilters != NULL)
  267                         free(user_kfilters, M_KEVENT);
  268                 user_kfilters = kfilter;
  269         }
  270         /* Adding new slot */
  271         kfilter = &user_kfilters[user_kfilterc++];
  272 reuse:
  273         len = strlen(name) + 1;         /* copy name */
  274         space = malloc(len, M_KEVENT, M_WAITOK);
  275         memcpy(space, name, len);
  276         kfilter->name = space;
  277 
  278         kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT;
  279 
  280         len = sizeof(struct filterops); /* copy filtops */
  281         space = malloc(len, M_KEVENT, M_WAITOK);
  282         memcpy(space, filtops, len);
  283         kfilter->filtops = space;
  284 
  285         if (retfilter != NULL)
  286                 *retfilter = kfilter->filter;
  287         return (0);
  288 }
  289 
  290 /*
  291  * Unregister a kfilter previously registered with kfilter_register.
  292  * This retains the filter id, but clears the name and frees filtops (filter
  293  * operations), so that the number isn't reused during a boot.
  294  * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
  295  */
  296 int
  297 kfilter_unregister(const char *name)
  298 {
  299         struct kfilter *kfilter;
  300 
  301         if (name == NULL || name[0] == '\0')
  302                 return (EINVAL);        /* invalid name */
  303 
  304         if (kfilter_byname_sys(name) != NULL)
  305                 return (EINVAL);        /* can't detach system filters */
  306 
  307         kfilter = kfilter_byname_user(name);
  308         if (kfilter == NULL)            /* not found */
  309                 return (ENOENT);
  310 
  311         /* XXXUNCONST Cast away const (but we know it's safe. */
  312         free(__UNCONST(kfilter->name), M_KEVENT);
  313         kfilter->name = NULL;   /* mark as `not implemented' */
  314 
  315         if (kfilter->filtops != NULL) {
  316                 /* XXXUNCONST Cast away const (but we know it's safe. */
  317                 free(__UNCONST(kfilter->filtops), M_KEVENT);
  318                 kfilter->filtops = NULL; /* mark as `not implemented' */
  319         }
  320         return (0);
  321 }
  322 
  323 
  324 /*
  325  * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
  326  * descriptors. Calls struct fileops kqfilter method for given file descriptor.
  327  */
  328 static int
  329 filt_fileattach(struct knote *kn)
  330 {
  331         struct file *fp;
  332 
  333         fp = kn->kn_fp;
  334         return ((*fp->f_ops->fo_kqfilter)(fp, kn));
  335 }
  336 
  337 /*
  338  * Filter detach method for EVFILT_READ on kqueue descriptor.
  339  */
  340 static void
  341 filt_kqdetach(struct knote *kn)
  342 {
  343         struct kqueue *kq;
  344 
  345         kq = (struct kqueue *)kn->kn_fp->f_data;
  346         SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext);
  347 }
  348 
  349 /*
  350  * Filter event method for EVFILT_READ on kqueue descriptor.
  351  */
  352 /*ARGSUSED*/
  353 static int
  354 filt_kqueue(struct knote *kn, long hint)
  355 {
  356         struct kqueue *kq;
  357 
  358         kq = (struct kqueue *)kn->kn_fp->f_data;
  359         kn->kn_data = kq->kq_count;
  360         return (kn->kn_data > 0);
  361 }
  362 
  363 /*
  364  * Filter attach method for EVFILT_PROC.
  365  */
  366 static int
  367 filt_procattach(struct knote *kn)
  368 {
  369         struct proc *p, *curp;
  370         struct lwp *curl;
  371 
  372         curl = curlwp;
  373         curp = curl->l_proc;
  374 
  375         p = pfind(kn->kn_id);
  376         if (p == NULL)
  377                 return (ESRCH);
  378 
  379         /*
  380          * Fail if it's not owned by you, or the last exec gave us
  381          * setuid/setgid privs (unless you're root).
  382          */
  383         if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(curl->l_cred) ||
  384             (p->p_flag & P_SUGID)) && kauth_authorize_generic(curl->l_cred,
  385             KAUTH_GENERIC_ISSUSER, &curl->l_acflag) != 0)
  386                 return (EACCES);
  387 
  388         kn->kn_ptr.p_proc = p;
  389         kn->kn_flags |= EV_CLEAR;       /* automatically set */
  390 
  391         /*
  392          * internal flag indicating registration done by kernel
  393          */
  394         if (kn->kn_flags & EV_FLAG1) {
  395                 kn->kn_data = kn->kn_sdata;     /* ppid */
  396                 kn->kn_fflags = NOTE_CHILD;
  397                 kn->kn_flags &= ~EV_FLAG1;
  398         }
  399 
  400         /* XXXSMP lock the process? */
  401         SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
  402 
  403         return (0);
  404 }
  405 
  406 /*
  407  * Filter detach method for EVFILT_PROC.
  408  *
  409  * The knote may be attached to a different process, which may exit,
  410  * leaving nothing for the knote to be attached to.  So when the process
  411  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
  412  * it will be deleted when read out.  However, as part of the knote deletion,
  413  * this routine is called, so a check is needed to avoid actually performing
  414  * a detach, because the original process might not exist any more.
  415  */
  416 static void
  417 filt_procdetach(struct knote *kn)
  418 {
  419         struct proc *p;
  420 
  421         if (kn->kn_status & KN_DETACHED)
  422                 return;
  423 
  424         p = kn->kn_ptr.p_proc;
  425         KASSERT(p->p_stat == SZOMB || pfind(kn->kn_id) == p);
  426 
  427         /* XXXSMP lock the process? */
  428         SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
  429 }
  430 
  431 /*
  432  * Filter event method for EVFILT_PROC.
  433  */
  434 static int
  435 filt_proc(struct knote *kn, long hint)
  436 {
  437         u_int event;
  438 
  439         /*
  440          * mask off extra data
  441          */
  442         event = (u_int)hint & NOTE_PCTRLMASK;
  443 
  444         /*
  445          * if the user is interested in this event, record it.
  446          */
  447         if (kn->kn_sfflags & event)
  448                 kn->kn_fflags |= event;
  449 
  450         /*
  451          * process is gone, so flag the event as finished.
  452          */
  453         if (event == NOTE_EXIT) {
  454                 /*
  455                  * Detach the knote from watched process and mark
  456                  * it as such. We can't leave this to kqueue_scan(),
  457                  * since the process might not exist by then. And we
  458                  * have to do this now, since psignal KNOTE() is called
  459                  * also for zombies and we might end up reading freed
  460                  * memory if the kevent would already be picked up
  461                  * and knote g/c'ed.
  462                  */
  463                 kn->kn_fop->f_detach(kn);
  464                 kn->kn_status |= KN_DETACHED;
  465 
  466                 /* Mark as ONESHOT, so that the knote it g/c'ed when read */
  467                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
  468                 return (1);
  469         }
  470 
  471         /*
  472          * process forked, and user wants to track the new process,
  473          * so attach a new knote to it, and immediately report an
  474          * event with the parent's pid.
  475          */
  476         if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
  477                 struct kevent kev;
  478                 int error;
  479 
  480                 /*
  481                  * register knote with new process.
  482                  */
  483                 kev.ident = hint & NOTE_PDATAMASK;      /* pid */
  484                 kev.filter = kn->kn_filter;
  485                 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
  486                 kev.fflags = kn->kn_sfflags;
  487                 kev.data = kn->kn_id;                   /* parent */
  488                 kev.udata = kn->kn_kevent.udata;        /* preserve udata */
  489                 error = kqueue_register(kn->kn_kq, &kev, NULL);
  490                 if (error)
  491                         kn->kn_fflags |= NOTE_TRACKERR;
  492         }
  493 
  494         return (kn->kn_fflags != 0);
  495 }
  496 
  497 static void
  498 filt_timerexpire(void *knx)
  499 {
  500         struct knote *kn = knx;
  501         int tticks;
  502 
  503         kn->kn_data++;
  504         KNOTE_ACTIVATE(kn);
  505 
  506         if ((kn->kn_flags & EV_ONESHOT) == 0) {
  507                 tticks = mstohz(kn->kn_sdata);
  508                 callout_schedule((struct callout *)kn->kn_hook, tticks);
  509         }
  510 }
  511 
  512 /*
  513  * data contains amount of time to sleep, in milliseconds
  514  */
  515 static int
  516 filt_timerattach(struct knote *kn)
  517 {
  518         struct callout *calloutp;
  519         int tticks;
  520 
  521         if (kq_ncallouts >= kq_calloutmax)
  522                 return (ENOMEM);
  523         kq_ncallouts++;
  524 
  525         tticks = mstohz(kn->kn_sdata);
  526 
  527         /* if the supplied value is under our resolution, use 1 tick */
  528         if (tticks == 0) {
  529                 if (kn->kn_sdata == 0)
  530                         return (EINVAL);
  531                 tticks = 1;
  532         }
  533 
  534         kn->kn_flags |= EV_CLEAR;               /* automatically set */
  535         MALLOC(calloutp, struct callout *, sizeof(*calloutp),
  536             M_KEVENT, 0);
  537         callout_init(calloutp);
  538         callout_reset(calloutp, tticks, filt_timerexpire, kn);
  539         kn->kn_hook = calloutp;
  540 
  541         return (0);
  542 }
  543 
  544 static void
  545 filt_timerdetach(struct knote *kn)
  546 {
  547         struct callout *calloutp;
  548 
  549         calloutp = (struct callout *)kn->kn_hook;
  550         callout_stop(calloutp);
  551         FREE(calloutp, M_KEVENT);
  552         kq_ncallouts--;
  553 }
  554 
  555 static int
  556 filt_timer(struct knote *kn, long hint)
  557 {
  558         return (kn->kn_data != 0);
  559 }
  560 
  561 /*
  562  * filt_seltrue:
  563  *
  564  *      This filter "event" routine simulates seltrue().
  565  */
  566 int
  567 filt_seltrue(struct knote *kn, long hint)
  568 {
  569 
  570         /*
  571          * We don't know how much data can be read/written,
  572          * but we know that it *can* be.  This is about as
  573          * good as select/poll does as well.
  574          */
  575         kn->kn_data = 0;
  576         return (1);
  577 }
  578 
  579 /*
  580  * This provides full kqfilter entry for device switch tables, which
  581  * has same effect as filter using filt_seltrue() as filter method.
  582  */
  583 static void
  584 filt_seltruedetach(struct knote *kn)
  585 {
  586         /* Nothing to do */
  587 }
  588 
  589 static const struct filterops seltrue_filtops =
  590         { 1, NULL, filt_seltruedetach, filt_seltrue };
  591 
  592 int
  593 seltrue_kqfilter(dev_t dev, struct knote *kn)
  594 {
  595         switch (kn->kn_filter) {
  596         case EVFILT_READ:
  597         case EVFILT_WRITE:
  598                 kn->kn_fop = &seltrue_filtops;
  599                 break;
  600         default:
  601                 return (1);
  602         }
  603 
  604         /* Nothing more to do */
  605         return (0);
  606 }
  607 
  608 /*
  609  * kqueue(2) system call.
  610  */
  611 int
  612 sys_kqueue(struct lwp *l, void *v, register_t *retval)
  613 {
  614         struct filedesc *fdp;
  615         struct kqueue   *kq;
  616         struct file     *fp;
  617         int             fd, error;
  618 
  619         fdp = l->l_proc->p_fd;
  620         error = falloc(l, &fp, &fd);    /* setup a new file descriptor */
  621         if (error)
  622                 return (error);
  623         fp->f_flag = FREAD | FWRITE;
  624         fp->f_type = DTYPE_KQUEUE;
  625         fp->f_ops = &kqueueops;
  626         kq = pool_get(&kqueue_pool, PR_WAITOK);
  627         memset((char *)kq, 0, sizeof(struct kqueue));
  628         simple_lock_init(&kq->kq_lock);
  629         TAILQ_INIT(&kq->kq_head);
  630         fp->f_data = (caddr_t)kq;       /* store the kqueue with the fp */
  631         *retval = fd;
  632         if (fdp->fd_knlistsize < 0)
  633                 fdp->fd_knlistsize = 0; /* this process has a kq */
  634         kq->kq_fdp = fdp;
  635         FILE_SET_MATURE(fp);
  636         FILE_UNUSE(fp, l);              /* falloc() does FILE_USE() */
  637         return (error);
  638 }
  639 
  640 /*
  641  * kevent(2) system call.
  642  */
  643 static int
  644 kevent_fetch_changes(void *private, const struct kevent *changelist,
  645     struct kevent *changes, size_t index, int n)
  646 {
  647         return copyin(changelist + index, changes, n * sizeof(*changes));
  648 }
  649 
  650 static int
  651 kevent_put_events(void *private, struct kevent *events,
  652     struct kevent *eventlist, size_t index, int n)
  653 {
  654         return copyout(events, eventlist + index, n * sizeof(*events));
  655 }
  656 
  657 static const struct kevent_ops kevent_native_ops = {
  658         keo_private: NULL,
  659         keo_fetch_timeout: copyin,
  660         keo_fetch_changes: kevent_fetch_changes,
  661         keo_put_events: kevent_put_events,
  662 };
  663 
  664 int
  665 sys_kevent(struct lwp *l, void *v, register_t *retval)
  666 {
  667         struct sys_kevent_args /* {
  668                 syscallarg(int) fd;
  669                 syscallarg(const struct kevent *) changelist;
  670                 syscallarg(size_t) nchanges;
  671                 syscallarg(struct kevent *) eventlist;
  672                 syscallarg(size_t) nevents;
  673                 syscallarg(const struct timespec *) timeout;
  674         } */ *uap = v;
  675 
  676         return kevent1(l, retval, SCARG(uap, fd), SCARG(uap, changelist),
  677             SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
  678             SCARG(uap, timeout), &kevent_native_ops);
  679 }
  680 
  681 int
  682 kevent1(struct lwp *l, register_t *retval, int fd,
  683     const struct kevent *changelist, size_t nchanges, struct kevent *eventlist,
  684     size_t nevents, const struct timespec *timeout,
  685     const struct kevent_ops *keops)
  686 {
  687         struct kevent   *kevp;
  688         struct kqueue   *kq;
  689         struct file     *fp;
  690         struct timespec ts;
  691         struct proc     *p;
  692         size_t          i, n, ichange;
  693         int             nerrors, error;
  694 
  695         p = l->l_proc;
  696         /* check that we're dealing with a kq */
  697         fp = fd_getfile(p->p_fd, fd);
  698         if (fp == NULL)
  699                 return (EBADF);
  700 
  701         if (fp->f_type != DTYPE_KQUEUE) {
  702                 simple_unlock(&fp->f_slock);
  703                 return (EBADF);
  704         }
  705 
  706         FILE_USE(fp);
  707 
  708         if (timeout != NULL) {
  709                 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
  710                 if (error)
  711                         goto done;
  712                 timeout = &ts;
  713         }
  714 
  715         kq = (struct kqueue *)fp->f_data;
  716         nerrors = 0;
  717         ichange = 0;
  718 
  719         /* traverse list of events to register */
  720         while (nchanges > 0) {
  721                 /* copyin a maximum of KQ_EVENTS at each pass */
  722                 n = MIN(nchanges, KQ_NEVENTS);
  723                 error = (*keops->keo_fetch_changes)(keops->keo_private,
  724                     changelist, kq->kq_kev, ichange, n);
  725                 if (error)
  726                         goto done;
  727                 for (i = 0; i < n; i++) {
  728                         kevp = &kq->kq_kev[i];
  729                         kevp->flags &= ~EV_SYSFLAGS;
  730                         /* register each knote */
  731                         error = kqueue_register(kq, kevp, l);
  732                         if (error) {
  733                                 if (nevents != 0) {
  734                                         kevp->flags = EV_ERROR;
  735                                         kevp->data = error;
  736                                         error = (*keops->keo_put_events)
  737                                             (keops->keo_private, kevp,
  738                                             eventlist, nerrors, 1);
  739                                         if (error)
  740                                                 goto done;
  741                                         nevents--;
  742                                         nerrors++;
  743                                 } else {
  744                                         goto done;
  745                                 }
  746                         }
  747                 }
  748                 nchanges -= n;  /* update the results */
  749                 ichange += n;
  750         }
  751         if (nerrors) {
  752                 *retval = nerrors;
  753                 error = 0;
  754                 goto done;
  755         }
  756 
  757         /* actually scan through the events */
  758         error = kqueue_scan(fp, nevents, eventlist, timeout, l, retval, keops);
  759  done:
  760         FILE_UNUSE(fp, l);
  761         return (error);
  762 }
  763 
  764 /*
  765  * Register a given kevent kev onto the kqueue
  766  */
  767 int
  768 kqueue_register(struct kqueue *kq, struct kevent *kev, struct lwp *l)
  769 {
  770         const struct kfilter *kfilter;
  771         struct filedesc *fdp;
  772         struct file     *fp;
  773         struct knote    *kn;
  774         int             s, error;
  775 
  776         fdp = kq->kq_fdp;
  777         fp = NULL;
  778         kn = NULL;
  779         error = 0;
  780         kfilter = kfilter_byfilter(kev->filter);
  781         if (kfilter == NULL || kfilter->filtops == NULL) {
  782                 /* filter not found nor implemented */
  783                 return (EINVAL);
  784         }
  785 
  786         /* search if knote already exists */
  787         if (kfilter->filtops->f_isfd) {
  788                 /* monitoring a file descriptor */
  789                 if ((fp = fd_getfile(fdp, kev->ident)) == NULL)
  790                         return (EBADF); /* validate descriptor */
  791                 FILE_USE(fp);
  792 
  793                 if (kev->ident < fdp->fd_knlistsize) {
  794                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
  795                                 if (kq == kn->kn_kq &&
  796                                     kev->filter == kn->kn_filter)
  797                                         break;
  798                 }
  799         } else {
  800                 /*
  801                  * not monitoring a file descriptor, so
  802                  * lookup knotes in internal hash table
  803                  */
  804                 if (fdp->fd_knhashmask != 0) {
  805                         struct klist *list;
  806 
  807                         list = &fdp->fd_knhash[
  808                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
  809                         SLIST_FOREACH(kn, list, kn_link)
  810                                 if (kev->ident == kn->kn_id &&
  811                                     kq == kn->kn_kq &&
  812                                     kev->filter == kn->kn_filter)
  813                                         break;
  814                 }
  815         }
  816 
  817         if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
  818                 error = ENOENT;         /* filter not found */
  819                 goto done;
  820         }
  821 
  822         /*
  823          * kn now contains the matching knote, or NULL if no match
  824          */
  825         if (kev->flags & EV_ADD) {
  826                 /* add knote */
  827 
  828                 if (kn == NULL) {
  829                         /* create new knote */
  830                         kn = pool_get(&knote_pool, PR_WAITOK);
  831                         if (kn == NULL) {
  832                                 error = ENOMEM;
  833                                 goto done;
  834                         }
  835                         kn->kn_fp = fp;
  836                         kn->kn_kq = kq;
  837                         kn->kn_fop = kfilter->filtops;
  838 
  839                         /*
  840                          * apply reference count to knote structure, and
  841                          * do not release it at the end of this routine.
  842                          */
  843                         fp = NULL;
  844 
  845                         kn->kn_sfflags = kev->fflags;
  846                         kn->kn_sdata = kev->data;
  847                         kev->fflags = 0;
  848                         kev->data = 0;
  849                         kn->kn_kevent = *kev;
  850 
  851                         knote_attach(kn, fdp);
  852                         if ((error = kfilter->filtops->f_attach(kn)) != 0) {
  853                                 knote_drop(kn, l, fdp);
  854                                 goto done;
  855                         }
  856                 } else {
  857                         /* modify existing knote */
  858 
  859                         /*
  860                          * The user may change some filter values after the
  861                          * initial EV_ADD, but doing so will not reset any
  862                          * filter which have already been triggered.
  863                          */
  864                         kn->kn_sfflags = kev->fflags;
  865                         kn->kn_sdata = kev->data;
  866                         kn->kn_kevent.udata = kev->udata;
  867                 }
  868 
  869                 s = splsched();
  870                 if (kn->kn_fop->f_event(kn, 0))
  871                         KNOTE_ACTIVATE(kn);
  872                 splx(s);
  873 
  874         } else if (kev->flags & EV_DELETE) {    /* delete knote */
  875                 kn->kn_fop->f_detach(kn);
  876                 knote_drop(kn, l, fdp);
  877                 goto done;
  878         }
  879 
  880         /* disable knote */
  881         if ((kev->flags & EV_DISABLE) &&
  882             ((kn->kn_status & KN_DISABLED) == 0)) {
  883                 s = splsched();
  884                 kn->kn_status |= KN_DISABLED;
  885                 splx(s);
  886         }
  887 
  888         /* enable knote */
  889         if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
  890                 s = splsched();
  891                 kn->kn_status &= ~KN_DISABLED;
  892                 if ((kn->kn_status & KN_ACTIVE) &&
  893                     ((kn->kn_status & KN_QUEUED) == 0))
  894                         knote_enqueue(kn);
  895                 splx(s);
  896         }
  897 
  898  done:
  899         if (fp != NULL)
  900                 FILE_UNUSE(fp, l);
  901         return (error);
  902 }
  903 
  904 /*
  905  * Scan through the list of events on fp (for a maximum of maxevents),
  906  * returning the results in to ulistp. Timeout is determined by tsp; if
  907  * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
  908  * as appropriate.
  909  */
  910 static int
  911 kqueue_scan(struct file *fp, size_t maxevents, struct kevent *ulistp,
  912     const struct timespec *tsp, struct lwp *l, register_t *retval,
  913     const struct kevent_ops *keops)
  914 {
  915         struct proc     *p = l->l_proc;
  916         struct kqueue   *kq;
  917         struct kevent   *kevp;
  918         struct timeval  atv, sleeptv;
  919         struct knote    *kn, *marker=NULL;
  920         size_t          count, nkev, nevents;
  921         int             s, timeout, error;
  922 
  923         kq = (struct kqueue *)fp->f_data;
  924         count = maxevents;
  925         nkev = nevents = error = 0;
  926         if (count == 0)
  927                 goto done;
  928 
  929         if (tsp) {                              /* timeout supplied */
  930                 TIMESPEC_TO_TIMEVAL(&atv, tsp);
  931                 if (inittimeleft(&atv, &sleeptv) == -1) {
  932                         error = EINVAL;
  933                         goto done;
  934                 }
  935                 timeout = tvtohz(&atv);
  936                 if (timeout <= 0)
  937                         timeout = -1;           /* do poll */
  938         } else {
  939                 /* no timeout, wait forever */
  940                 timeout = 0;
  941         }
  942 
  943         MALLOC(marker, struct knote *, sizeof(*marker), M_KEVENT, M_WAITOK);
  944         memset(marker, 0, sizeof(*marker));
  945 
  946         goto start;
  947 
  948  retry:
  949         if (tsp && (timeout = gettimeleft(&atv, &sleeptv)) <= 0) {
  950                 goto done;
  951         }
  952 
  953  start:
  954         kevp = kq->kq_kev;
  955         s = splsched();
  956         simple_lock(&kq->kq_lock);
  957         if (kq->kq_count == 0) {
  958                 if (timeout < 0) {
  959                         error = EWOULDBLOCK;
  960                         simple_unlock(&kq->kq_lock);
  961                 } else {
  962                         kq->kq_state |= KQ_SLEEP;
  963                         error = ltsleep(kq, PSOCK | PCATCH | PNORELOCK,
  964                                         "kqread", timeout, &kq->kq_lock);
  965                 }
  966                 splx(s);
  967                 if (error == 0)
  968                         goto retry;
  969                 /* don't restart after signals... */
  970                 if (error == ERESTART)
  971                         error = EINTR;
  972                 else if (error == EWOULDBLOCK)
  973                         error = 0;
  974                 goto done;
  975         }
  976 
  977         /* mark end of knote list */
  978         TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
  979         simple_unlock(&kq->kq_lock);
  980 
  981         while (count) {                         /* while user wants data ... */
  982                 simple_lock(&kq->kq_lock);
  983                 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
  984                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
  985                 if (kn == marker) {             /* if it's our marker, stop */
  986                         /* What if it's some else's marker? */
  987                         simple_unlock(&kq->kq_lock);
  988                         splx(s);
  989                         if (count == maxevents)
  990                                 goto retry;
  991                         goto done;
  992                 }
  993                 kq->kq_count--;
  994                 simple_unlock(&kq->kq_lock);
  995 
  996                 if (kn->kn_status & KN_DISABLED) {
  997                         /* don't want disabled events */
  998                         kn->kn_status &= ~KN_QUEUED;
  999                         continue;
 1000                 }
 1001                 if ((kn->kn_flags & EV_ONESHOT) == 0 &&
 1002                     kn->kn_fop->f_event(kn, 0) == 0) {
 1003                         /*
 1004                          * non-ONESHOT event that hasn't
 1005                          * triggered again, so de-queue.
 1006                          */
 1007                         kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
 1008                         continue;
 1009                 }
 1010                 *kevp = kn->kn_kevent;
 1011                 kevp++;
 1012                 nkev++;
 1013                 if (kn->kn_flags & EV_ONESHOT) {
 1014                         /* delete ONESHOT events after retrieval */
 1015                         kn->kn_status &= ~KN_QUEUED;
 1016                         splx(s);
 1017                         kn->kn_fop->f_detach(kn);
 1018                         knote_drop(kn, l, p->p_fd);
 1019                         s = splsched();
 1020                 } else if (kn->kn_flags & EV_CLEAR) {
 1021                         /* clear state after retrieval */
 1022                         kn->kn_data = 0;
 1023                         kn->kn_fflags = 0;
 1024                         kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
 1025                 } else {
 1026                         /* add event back on list */
 1027                         simple_lock(&kq->kq_lock);
 1028                         TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 1029                         kq->kq_count++;
 1030                         simple_unlock(&kq->kq_lock);
 1031                 }
 1032                 count--;
 1033                 if (nkev == KQ_NEVENTS) {
 1034                         /* do copyouts in KQ_NEVENTS chunks */
 1035                         splx(s);
 1036                         error = (*keops->keo_put_events)(keops->keo_private,
 1037                             &kq->kq_kev[0], ulistp, nevents, nkev);
 1038                         nevents += nkev;
 1039                         nkev = 0;
 1040                         kevp = kq->kq_kev;
 1041                         s = splsched();
 1042                         if (error)
 1043                                 break;
 1044                 }
 1045         }
 1046 
 1047         /* remove marker */
 1048         simple_lock(&kq->kq_lock);
 1049         TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
 1050         simple_unlock(&kq->kq_lock);
 1051         splx(s);
 1052  done:
 1053         if (marker)
 1054                 FREE(marker, M_KEVENT);
 1055 
 1056         if (nkev != 0)
 1057                 /* copyout remaining events */
 1058                 error = (*keops->keo_put_events)(keops->keo_private,
 1059                     &kq->kq_kev[0], ulistp, nevents, nkev);
 1060         *retval = maxevents - count;
 1061 
 1062         return (error);
 1063 }
 1064 
 1065 /*
 1066  * struct fileops read method for a kqueue descriptor.
 1067  * Not implemented.
 1068  * XXX: This could be expanded to call kqueue_scan, if desired.
 1069  */
 1070 /*ARGSUSED*/
 1071 static int
 1072 kqueue_read(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
 1073     int flags)
 1074 {
 1075 
 1076         return (ENXIO);
 1077 }
 1078 
 1079 /*
 1080  * struct fileops write method for a kqueue descriptor.
 1081  * Not implemented.
 1082  */
 1083 /*ARGSUSED*/
 1084 static int
 1085 kqueue_write(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
 1086     int flags)
 1087 {
 1088 
 1089         return (ENXIO);
 1090 }
 1091 
 1092 /*
 1093  * struct fileops ioctl method for a kqueue descriptor.
 1094  *
 1095  * Two ioctls are currently supported. They both use struct kfilter_mapping:
 1096  *      KFILTER_BYNAME          find name for filter, and return result in
 1097  *                              name, which is of size len.
 1098  *      KFILTER_BYFILTER        find filter for name. len is ignored.
 1099  */
 1100 /*ARGSUSED*/
 1101 static int
 1102 kqueue_ioctl(struct file *fp, u_long com, void *data, struct lwp *l)
 1103 {
 1104         struct kfilter_mapping  *km;
 1105         const struct kfilter    *kfilter;
 1106         char                    *name;
 1107         int                     error;
 1108 
 1109         km = (struct kfilter_mapping *)data;
 1110         error = 0;
 1111 
 1112         switch (com) {
 1113         case KFILTER_BYFILTER:  /* convert filter -> name */
 1114                 kfilter = kfilter_byfilter(km->filter);
 1115                 if (kfilter != NULL)
 1116                         error = copyoutstr(kfilter->name, km->name, km->len,
 1117                             NULL);
 1118                 else
 1119                         error = ENOENT;
 1120                 break;
 1121 
 1122         case KFILTER_BYNAME:    /* convert name -> filter */
 1123                 MALLOC(name, char *, KFILTER_MAXNAME, M_KEVENT, M_WAITOK);
 1124                 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
 1125                 if (error) {
 1126                         FREE(name, M_KEVENT);
 1127                         break;
 1128                 }
 1129                 kfilter = kfilter_byname(name);
 1130                 if (kfilter != NULL)
 1131                         km->filter = kfilter->filter;
 1132                 else
 1133                         error = ENOENT;
 1134                 FREE(name, M_KEVENT);
 1135                 break;
 1136 
 1137         default:
 1138                 error = ENOTTY;
 1139 
 1140         }
 1141         return (error);
 1142 }
 1143 
 1144 /*
 1145  * struct fileops fcntl method for a kqueue descriptor.
 1146  * Not implemented.
 1147  */
 1148 /*ARGSUSED*/
 1149 static int
 1150 kqueue_fcntl(struct file *fp, u_int com, void *data, struct lwp *l)
 1151 {
 1152 
 1153         return (ENOTTY);
 1154 }
 1155 
 1156 /*
 1157  * struct fileops poll method for a kqueue descriptor.
 1158  * Determine if kqueue has events pending.
 1159  */
 1160 static int
 1161 kqueue_poll(struct file *fp, int events, struct lwp *l)
 1162 {
 1163         struct kqueue   *kq;
 1164         int             revents;
 1165 
 1166         kq = (struct kqueue *)fp->f_data;
 1167         revents = 0;
 1168         if (events & (POLLIN | POLLRDNORM)) {
 1169                 if (kq->kq_count) {
 1170                         revents |= events & (POLLIN | POLLRDNORM);
 1171                 } else {
 1172                         selrecord(l, &kq->kq_sel);
 1173                 }
 1174         }
 1175         return (revents);
 1176 }
 1177 
 1178 /*
 1179  * struct fileops stat method for a kqueue descriptor.
 1180  * Returns dummy info, with st_size being number of events pending.
 1181  */
 1182 static int
 1183 kqueue_stat(struct file *fp, struct stat *st, struct lwp *l)
 1184 {
 1185         struct kqueue   *kq;
 1186 
 1187         kq = (struct kqueue *)fp->f_data;
 1188         memset((void *)st, 0, sizeof(*st));
 1189         st->st_size = kq->kq_count;
 1190         st->st_blksize = sizeof(struct kevent);
 1191         st->st_mode = S_IFIFO;
 1192         return (0);
 1193 }
 1194 
 1195 /*
 1196  * struct fileops close method for a kqueue descriptor.
 1197  * Cleans up kqueue.
 1198  */
 1199 static int
 1200 kqueue_close(struct file *fp, struct lwp *l)
 1201 {
 1202         struct proc     *p = l->l_proc;
 1203         struct kqueue   *kq;
 1204         struct filedesc *fdp;
 1205         struct knote    **knp, *kn, *kn0;
 1206         int             i;
 1207 
 1208         kq = (struct kqueue *)fp->f_data;
 1209         fdp = p->p_fd;
 1210         for (i = 0; i < fdp->fd_knlistsize; i++) {
 1211                 knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
 1212                 kn = *knp;
 1213                 while (kn != NULL) {
 1214                         kn0 = SLIST_NEXT(kn, kn_link);
 1215                         if (kq == kn->kn_kq) {
 1216                                 kn->kn_fop->f_detach(kn);
 1217                                 FILE_UNUSE(kn->kn_fp, l);
 1218                                 pool_put(&knote_pool, kn);
 1219                                 *knp = kn0;
 1220                         } else {
 1221                                 knp = &SLIST_NEXT(kn, kn_link);
 1222                         }
 1223                         kn = kn0;
 1224                 }
 1225         }
 1226         if (fdp->fd_knhashmask != 0) {
 1227                 for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
 1228                         knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
 1229                         kn = *knp;
 1230                         while (kn != NULL) {
 1231                                 kn0 = SLIST_NEXT(kn, kn_link);
 1232                                 if (kq == kn->kn_kq) {
 1233                                         kn->kn_fop->f_detach(kn);
 1234                                         /* XXX non-fd release of kn->kn_ptr */
 1235                                         pool_put(&knote_pool, kn);
 1236                                         *knp = kn0;
 1237                                 } else {
 1238                                         knp = &SLIST_NEXT(kn, kn_link);
 1239                                 }
 1240                                 kn = kn0;
 1241                         }
 1242                 }
 1243         }
 1244         pool_put(&kqueue_pool, kq);
 1245         fp->f_data = NULL;
 1246 
 1247         return (0);
 1248 }
 1249 
 1250 /*
 1251  * wakeup a kqueue
 1252  */
 1253 static void
 1254 kqueue_wakeup(struct kqueue *kq)
 1255 {
 1256         int s;
 1257 
 1258         s = splsched();
 1259         simple_lock(&kq->kq_lock);
 1260         if (kq->kq_state & KQ_SLEEP) {          /* if currently sleeping ...  */
 1261                 kq->kq_state &= ~KQ_SLEEP;
 1262                 wakeup(kq);                     /* ... wakeup */
 1263         }
 1264 
 1265         /* Notify select/poll and kevent. */
 1266         selnotify(&kq->kq_sel, 0);
 1267         simple_unlock(&kq->kq_lock);
 1268         splx(s);
 1269 }
 1270 
 1271 /*
 1272  * struct fileops kqfilter method for a kqueue descriptor.
 1273  * Event triggered when monitored kqueue changes.
 1274  */
 1275 /*ARGSUSED*/
 1276 static int
 1277 kqueue_kqfilter(struct file *fp, struct knote *kn)
 1278 {
 1279         struct kqueue *kq;
 1280 
 1281         KASSERT(fp == kn->kn_fp);
 1282         kq = (struct kqueue *)kn->kn_fp->f_data;
 1283         if (kn->kn_filter != EVFILT_READ)
 1284                 return (1);
 1285         kn->kn_fop = &kqread_filtops;
 1286         SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext);
 1287         return (0);
 1288 }
 1289 
 1290 
 1291 /*
 1292  * Walk down a list of knotes, activating them if their event has triggered.
 1293  */
 1294 void
 1295 knote(struct klist *list, long hint)
 1296 {
 1297         struct knote *kn;
 1298 
 1299         SLIST_FOREACH(kn, list, kn_selnext)
 1300                 if (kn->kn_fop->f_event(kn, hint))
 1301                         KNOTE_ACTIVATE(kn);
 1302 }
 1303 
 1304 /*
 1305  * Remove all knotes from a specified klist
 1306  */
 1307 void
 1308 knote_remove(struct lwp *l, struct klist *list)
 1309 {
 1310         struct knote *kn;
 1311 
 1312         while ((kn = SLIST_FIRST(list)) != NULL) {
 1313                 kn->kn_fop->f_detach(kn);
 1314                 knote_drop(kn, l, l->l_proc->p_fd);
 1315         }
 1316 }
 1317 
 1318 /*
 1319  * Remove all knotes referencing a specified fd
 1320  */
 1321 void
 1322 knote_fdclose(struct lwp *l, int fd)
 1323 {
 1324         struct filedesc *fdp;
 1325         struct klist    *list;
 1326 
 1327         fdp = l->l_proc->p_fd;
 1328         list = &fdp->fd_knlist[fd];
 1329         knote_remove(l, list);
 1330 }
 1331 
 1332 /*
 1333  * Attach a new knote to a file descriptor
 1334  */
 1335 static void
 1336 knote_attach(struct knote *kn, struct filedesc *fdp)
 1337 {
 1338         struct klist    *list;
 1339         int             size;
 1340 
 1341         if (! kn->kn_fop->f_isfd) {
 1342                 /* if knote is not on an fd, store on internal hash table */
 1343                 if (fdp->fd_knhashmask == 0)
 1344                         fdp->fd_knhash = hashinit(KN_HASHSIZE, HASH_LIST,
 1345                             M_KEVENT, M_WAITOK, &fdp->fd_knhashmask);
 1346                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
 1347                 goto done;
 1348         }
 1349 
 1350         /*
 1351          * otherwise, knote is on an fd.
 1352          * knotes are stored in fd_knlist indexed by kn->kn_id.
 1353          */
 1354         if (fdp->fd_knlistsize <= kn->kn_id) {
 1355                 /* expand list, it's too small */
 1356                 size = fdp->fd_knlistsize;
 1357                 while (size <= kn->kn_id) {
 1358                         /* grow in KQ_EXTENT chunks */
 1359                         size += KQ_EXTENT;
 1360                 }
 1361                 list = malloc(size * sizeof(struct klist *), M_KEVENT,M_WAITOK);
 1362                 if (fdp->fd_knlist) {
 1363                         /* copy existing knlist */
 1364                         memcpy((caddr_t)list, (caddr_t)fdp->fd_knlist,
 1365                             fdp->fd_knlistsize * sizeof(struct klist *));
 1366                 }
 1367                 /*
 1368                  * Zero new memory. Stylistically, SLIST_INIT() should be
 1369                  * used here, but that does same thing as the memset() anyway.
 1370                  */
 1371                 memset(&list[fdp->fd_knlistsize], 0,
 1372                     (size - fdp->fd_knlistsize) * sizeof(struct klist *));
 1373 
 1374                 /* switch to new knlist */
 1375                 if (fdp->fd_knlist != NULL)
 1376                         free(fdp->fd_knlist, M_KEVENT);
 1377                 fdp->fd_knlistsize = size;
 1378                 fdp->fd_knlist = list;
 1379         }
 1380 
 1381         /* get list head for this fd */
 1382         list = &fdp->fd_knlist[kn->kn_id];
 1383  done:
 1384         /* add new knote */
 1385         SLIST_INSERT_HEAD(list, kn, kn_link);
 1386         kn->kn_status = 0;
 1387 }
 1388 
 1389 /*
 1390  * Drop knote.
 1391  * Should be called at spl == 0, since we don't want to hold spl
 1392  * while calling FILE_UNUSE and free.
 1393  */
 1394 static void
 1395 knote_drop(struct knote *kn, struct lwp *l, struct filedesc *fdp)
 1396 {
 1397         struct klist    *list;
 1398 
 1399         if (kn->kn_fop->f_isfd)
 1400                 list = &fdp->fd_knlist[kn->kn_id];
 1401         else
 1402                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
 1403 
 1404         SLIST_REMOVE(list, kn, knote, kn_link);
 1405         if (kn->kn_status & KN_QUEUED)
 1406                 knote_dequeue(kn);
 1407         if (kn->kn_fop->f_isfd)
 1408                 FILE_UNUSE(kn->kn_fp, l);
 1409         pool_put(&knote_pool, kn);
 1410 }
 1411 
 1412 
 1413 /*
 1414  * Queue new event for knote.
 1415  */
 1416 static void
 1417 knote_enqueue(struct knote *kn)
 1418 {
 1419         struct kqueue   *kq;
 1420         int             s;
 1421 
 1422         kq = kn->kn_kq;
 1423         KASSERT((kn->kn_status & KN_QUEUED) == 0);
 1424 
 1425         s = splsched();
 1426         simple_lock(&kq->kq_lock);
 1427         TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 1428         kn->kn_status |= KN_QUEUED;
 1429         kq->kq_count++;
 1430         simple_unlock(&kq->kq_lock);
 1431         splx(s);
 1432         kqueue_wakeup(kq);
 1433 }
 1434 
 1435 /*
 1436  * Dequeue event for knote.
 1437  */
 1438 static void
 1439 knote_dequeue(struct knote *kn)
 1440 {
 1441         struct kqueue   *kq;
 1442         int             s;
 1443 
 1444         KASSERT(kn->kn_status & KN_QUEUED);
 1445         kq = kn->kn_kq;
 1446 
 1447         s = splsched();
 1448         simple_lock(&kq->kq_lock);
 1449         TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 1450         kn->kn_status &= ~KN_QUEUED;
 1451         kq->kq_count--;
 1452         simple_unlock(&kq->kq_lock);
 1453         splx(s);
 1454 }

Cache object: 482d5015df46b8abd88eb3e063ae4508


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.