The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_event.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
    3  * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include <sys/param.h>
   32 #include <sys/systm.h>
   33 #include <sys/kernel.h>
   34 #include <sys/lock.h>
   35 #include <sys/mutex.h>
   36 #include <sys/proc.h>
   37 #include <sys/malloc.h>
   38 #include <sys/unistd.h>
   39 #include <sys/file.h>
   40 #include <sys/filedesc.h>
   41 #include <sys/filio.h>
   42 #include <sys/fcntl.h>
   43 #include <sys/kthread.h>
   44 #include <sys/selinfo.h>
   45 #include <sys/queue.h>
   46 #include <sys/event.h>
   47 #include <sys/eventvar.h>
   48 #include <sys/poll.h>
   49 #include <sys/protosw.h>
   50 #include <sys/sigio.h>
   51 #include <sys/signalvar.h>
   52 #include <sys/socket.h>
   53 #include <sys/socketvar.h>
   54 #include <sys/stat.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/sysproto.h>
   57 #include <sys/syscallsubr.h>
   58 #include <sys/taskqueue.h>
   59 #include <sys/uio.h>
   60 
   61 #include <vm/uma.h>
   62 
   63 static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
   64 
   65 /*
   66  * This lock is used if multiple kq locks are required.  This possibly
   67  * should be made into a per proc lock.
   68  */
   69 static struct mtx       kq_global;
   70 MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
   71 #define KQ_GLOBAL_LOCK(lck, haslck)     do {    \
   72         if (!haslck)                            \
   73                 mtx_lock(lck);                  \
   74         haslck = 1;                             \
   75 } while (0)
   76 #define KQ_GLOBAL_UNLOCK(lck, haslck)   do {    \
   77         if (haslck)                             \
   78                 mtx_unlock(lck);                        \
   79         haslck = 0;                             \
   80 } while (0)
   81 
   82 TASKQUEUE_DEFINE_THREAD(kqueue);
   83 
   84 static int      kevent_copyout(void *arg, struct kevent *kevp, int count);
   85 static int      kevent_copyin(void *arg, struct kevent *kevp, int count);
   86 static int      kqueue_aquire(struct file *fp, struct kqueue **kqp);
   87 static void     kqueue_release(struct kqueue *kq, int locked);
   88 static int      kqueue_expand(struct kqueue *kq, struct filterops *fops,
   89                     uintptr_t ident, int waitok);
   90 static void     kqueue_task(void *arg, int pending);
   91 static int      kqueue_scan(struct kqueue *kq, int maxevents,
   92                     struct kevent_copyops *k_ops,
   93                     const struct timespec *timeout,
   94                     struct kevent *keva, struct thread *td);
   95 static void     kqueue_wakeup(struct kqueue *kq);
   96 static struct filterops *kqueue_fo_find(int filt);
   97 static void     kqueue_fo_release(int filt);
   98 
   99 static fo_rdwr_t        kqueue_read;
  100 static fo_rdwr_t        kqueue_write;
  101 static fo_ioctl_t       kqueue_ioctl;
  102 static fo_poll_t        kqueue_poll;
  103 static fo_kqfilter_t    kqueue_kqfilter;
  104 static fo_stat_t        kqueue_stat;
  105 static fo_close_t       kqueue_close;
  106 
  107 static struct fileops kqueueops = {
  108         .fo_read = kqueue_read,
  109         .fo_write = kqueue_write,
  110         .fo_ioctl = kqueue_ioctl,
  111         .fo_poll = kqueue_poll,
  112         .fo_kqfilter = kqueue_kqfilter,
  113         .fo_stat = kqueue_stat,
  114         .fo_close = kqueue_close,
  115 };
  116 
  117 static int      knote_attach(struct knote *kn, struct kqueue *kq);
  118 static void     knote_drop(struct knote *kn, struct thread *td);
  119 static void     knote_enqueue(struct knote *kn);
  120 static void     knote_dequeue(struct knote *kn);
  121 static void     knote_init(void);
  122 static struct   knote *knote_alloc(int waitok);
  123 static void     knote_free(struct knote *kn);
  124 
  125 static void     filt_kqdetach(struct knote *kn);
  126 static int      filt_kqueue(struct knote *kn, long hint);
  127 static int      filt_procattach(struct knote *kn);
  128 static void     filt_procdetach(struct knote *kn);
  129 static int      filt_proc(struct knote *kn, long hint);
  130 static int      filt_fileattach(struct knote *kn);
  131 static void     filt_timerexpire(void *knx);
  132 static int      filt_timerattach(struct knote *kn);
  133 static void     filt_timerdetach(struct knote *kn);
  134 static int      filt_timer(struct knote *kn, long hint);
  135 
  136 static struct filterops file_filtops =
  137         { 1, filt_fileattach, NULL, NULL };
  138 static struct filterops kqread_filtops =
  139         { 1, NULL, filt_kqdetach, filt_kqueue };
  140 /* XXX - move to kern_proc.c?  */
  141 static struct filterops proc_filtops =
  142         { 0, filt_procattach, filt_procdetach, filt_proc };
  143 static struct filterops timer_filtops =
  144         { 0, filt_timerattach, filt_timerdetach, filt_timer };
  145 
  146 static uma_zone_t       knote_zone;
  147 static int              kq_ncallouts = 0;
  148 static int              kq_calloutmax = (4 * 1024);
  149 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
  150     &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
  151 
  152 /* XXX - ensure not KN_INFLUX?? */
  153 #define KNOTE_ACTIVATE(kn, islock) do {                                 \
  154         if ((islock))                                                   \
  155                 mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED);            \
  156         else                                                            \
  157                 KQ_LOCK((kn)->kn_kq);                                   \
  158         (kn)->kn_status |= KN_ACTIVE;                                   \
  159         if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)         \
  160                 knote_enqueue((kn));                                    \
  161         if (!(islock))                                                  \
  162                 KQ_UNLOCK((kn)->kn_kq);                                 \
  163 } while(0)
  164 #define KQ_LOCK(kq) do {                                                \
  165         mtx_lock(&(kq)->kq_lock);                                       \
  166 } while (0)
  167 #define KQ_FLUX_WAKEUP(kq) do {                                         \
  168         if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) {            \
  169                 (kq)->kq_state &= ~KQ_FLUXWAIT;                         \
  170                 wakeup((kq));                                           \
  171         }                                                               \
  172 } while (0)
  173 #define KQ_UNLOCK_FLUX(kq) do {                                         \
  174         KQ_FLUX_WAKEUP(kq);                                             \
  175         mtx_unlock(&(kq)->kq_lock);                                     \
  176 } while (0)
  177 #define KQ_UNLOCK(kq) do {                                              \
  178         mtx_unlock(&(kq)->kq_lock);                                     \
  179 } while (0)
  180 #define KQ_OWNED(kq) do {                                               \
  181         mtx_assert(&(kq)->kq_lock, MA_OWNED);                           \
  182 } while (0)
  183 #define KQ_NOTOWNED(kq) do {                                            \
  184         mtx_assert(&(kq)->kq_lock, MA_NOTOWNED);                        \
  185 } while (0)
  186 #define KN_LIST_LOCK(kn) do {                                           \
  187         if (kn->kn_knlist != NULL)                                      \
  188                 kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg);      \
  189 } while (0)
  190 #define KN_LIST_UNLOCK(kn) do {                                         \
  191         if (kn->kn_knlist != NULL)                                      \
  192                 kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg);    \
  193 } while (0)
  194 #define KNL_ASSERT_LOCK(knl, islocked) do {                             \
  195         if (islocked)                                                   \
  196                 KNL_ASSERT_LOCKED(knl);                         \
  197         else                                                            \
  198                 KNL_ASSERT_UNLOCKED(knl);                               \
  199 } while (0)
  200 #ifdef INVARIANTS
  201 #define KNL_ASSERT_LOCKED(knl) do {                                     \
  202         if (!knl->kl_locked((knl)->kl_lockarg))                         \
  203                         panic("knlist not locked, but should be");      \
  204 } while (0)
  205 #define KNL_ASSERT_UNLOCKED(knl) do {                           \
  206         if (knl->kl_locked((knl)->kl_lockarg))                          \
  207                 panic("knlist locked, but should not be");              \
  208 } while (0)
  209 #else /* !INVARIANTS */
  210 #define KNL_ASSERT_LOCKED(knl) do {} while(0)
  211 #define KNL_ASSERT_UNLOCKED(knl) do {} while (0)
  212 #endif /* INVARIANTS */
  213 
  214 #define KN_HASHSIZE             64              /* XXX should be tunable */
  215 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
  216 
  217 static int
  218 filt_nullattach(struct knote *kn)
  219 {
  220 
  221         return (ENXIO);
  222 };
  223 
  224 struct filterops null_filtops =
  225         { 0, filt_nullattach, NULL, NULL };
  226 
  227 /* XXX - make SYSINIT to add these, and move into respective modules. */
  228 extern struct filterops sig_filtops;
  229 extern struct filterops fs_filtops;
  230 
  231 /*
  232  * Table for for all system-defined filters.
  233  */
  234 static struct mtx       filterops_lock;
  235 MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
  236         MTX_DEF);
  237 static struct {
  238         struct filterops *for_fop;
  239         int for_refcnt;
  240 } sysfilt_ops[EVFILT_SYSCOUNT] = {
  241         { &file_filtops },                      /* EVFILT_READ */
  242         { &file_filtops },                      /* EVFILT_WRITE */
  243         { &null_filtops },                      /* EVFILT_AIO */
  244         { &file_filtops },                      /* EVFILT_VNODE */
  245         { &proc_filtops },                      /* EVFILT_PROC */
  246         { &sig_filtops },                       /* EVFILT_SIGNAL */
  247         { &timer_filtops },                     /* EVFILT_TIMER */
  248         { &file_filtops },                      /* EVFILT_NETDEV */
  249         { &fs_filtops },                        /* EVFILT_FS */
  250 };
  251 
  252 /*
  253  * Simple redirection for all cdevsw style objects to call their fo_kqfilter
  254  * method.
  255  */
  256 static int
  257 filt_fileattach(struct knote *kn)
  258 {
  259 
  260         return (fo_kqfilter(kn->kn_fp, kn));
  261 }
  262 
  263 /*ARGSUSED*/
  264 static int
  265 kqueue_kqfilter(struct file *fp, struct knote *kn)
  266 {
  267         struct kqueue *kq = kn->kn_fp->f_data;
  268 
  269         if (kn->kn_filter != EVFILT_READ)
  270                 return (EINVAL);
  271 
  272         kn->kn_status |= KN_KQUEUE;
  273         kn->kn_fop = &kqread_filtops;
  274         knlist_add(&kq->kq_sel.si_note, kn, 0);
  275 
  276         return (0);
  277 }
  278 
  279 static void
  280 filt_kqdetach(struct knote *kn)
  281 {
  282         struct kqueue *kq = kn->kn_fp->f_data;
  283 
  284         knlist_remove(&kq->kq_sel.si_note, kn, 0);
  285 }
  286 
  287 /*ARGSUSED*/
  288 static int
  289 filt_kqueue(struct knote *kn, long hint)
  290 {
  291         struct kqueue *kq = kn->kn_fp->f_data;
  292 
  293         kn->kn_data = kq->kq_count;
  294         return (kn->kn_data > 0);
  295 }
  296 
  297 /* XXX - move to kern_proc.c?  */
  298 static int
  299 filt_procattach(struct knote *kn)
  300 {
  301         struct proc *p;
  302         int immediate;
  303         int error;
  304 
  305         immediate = 0;
  306         p = pfind(kn->kn_id);
  307         if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
  308                 p = zpfind(kn->kn_id);
  309                 immediate = 1;
  310         } else if (p != NULL && (p->p_flag & P_WEXIT)) {
  311                 immediate = 1;
  312         }
  313 
  314         if (p == NULL)
  315                 return (ESRCH);
  316         if ((error = p_cansee(curthread, p)))
  317                 return (error);
  318 
  319         kn->kn_ptr.p_proc = p;
  320         kn->kn_flags |= EV_CLEAR;               /* automatically set */
  321 
  322         /*
  323          * internal flag indicating registration done by kernel
  324          */
  325         if (kn->kn_flags & EV_FLAG1) {
  326                 kn->kn_data = kn->kn_sdata;             /* ppid */
  327                 kn->kn_fflags = NOTE_CHILD;
  328                 kn->kn_flags &= ~EV_FLAG1;
  329         }
  330 
  331         if (immediate == 0)
  332                 knlist_add(&p->p_klist, kn, 1);
  333 
  334         /*
  335          * Immediately activate any exit notes if the target process is a
  336          * zombie.  This is necessary to handle the case where the target
  337          * process, e.g. a child, dies before the kevent is registered.
  338          */
  339         if (immediate && filt_proc(kn, NOTE_EXIT))
  340                 KNOTE_ACTIVATE(kn, 0);
  341 
  342         PROC_UNLOCK(p);
  343 
  344         return (0);
  345 }
  346 
  347 /*
  348  * The knote may be attached to a different process, which may exit,
  349  * leaving nothing for the knote to be attached to.  So when the process
  350  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
  351  * it will be deleted when read out.  However, as part of the knote deletion,
  352  * this routine is called, so a check is needed to avoid actually performing
  353  * a detach, because the original process does not exist any more.
  354  */
  355 /* XXX - move to kern_proc.c?  */
  356 static void
  357 filt_procdetach(struct knote *kn)
  358 {
  359         struct proc *p;
  360 
  361         p = kn->kn_ptr.p_proc;
  362         knlist_remove(&p->p_klist, kn, 0);
  363         kn->kn_ptr.p_proc = NULL;
  364 }
  365 
  366 /* XXX - move to kern_proc.c?  */
  367 static int
  368 filt_proc(struct knote *kn, long hint)
  369 {
  370         struct proc *p = kn->kn_ptr.p_proc;
  371         u_int event;
  372 
  373         /*
  374          * mask off extra data
  375          */
  376         event = (u_int)hint & NOTE_PCTRLMASK;
  377 
  378         /*
  379          * if the user is interested in this event, record it.
  380          */
  381         if (kn->kn_sfflags & event)
  382                 kn->kn_fflags |= event;
  383 
  384         /*
  385          * process is gone, so flag the event as finished.
  386          */
  387         if (event == NOTE_EXIT) {
  388                 if (!(kn->kn_status & KN_DETACHED))
  389                         knlist_remove_inevent(&p->p_klist, kn);
  390                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
  391                 kn->kn_ptr.p_proc = NULL;
  392                 return (1);
  393         }
  394 
  395         return (kn->kn_fflags != 0);
  396 }
  397 
  398 /*
  399  * Called when the process forked. It mostly does the same as the
  400  * knote(), activating all knotes registered to be activated when the
  401  * process forked. Additionally, for each knote attached to the
  402  * parent, check whether user wants to track the new process. If so
  403  * attach a new knote to it, and immediately report an event with the
  404  * child's pid.
  405  */
  406 void
  407 knote_fork(struct knlist *list, int pid)
  408 {
  409         struct kqueue *kq;
  410         struct knote *kn;
  411         struct kevent kev;
  412         int error;
  413 
  414         if (list == NULL)
  415                 return;
  416         list->kl_lock(list->kl_lockarg);
  417 
  418         SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
  419                 if ((kn->kn_status & KN_INFLUX) == KN_INFLUX)
  420                         continue;
  421                 kq = kn->kn_kq;
  422                 KQ_LOCK(kq);
  423                 if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
  424                         KQ_UNLOCK(kq);
  425                         continue;
  426                 }
  427 
  428                 /*
  429                  * The same as knote(), activate the event.
  430                  */
  431                 if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
  432                         kn->kn_status |= KN_HASKQLOCK;
  433                         if (kn->kn_fop->f_event(kn, NOTE_FORK | pid))
  434                                 KNOTE_ACTIVATE(kn, 1);
  435                         kn->kn_status &= ~KN_HASKQLOCK;
  436                         KQ_UNLOCK(kq);
  437                         continue;
  438                 }
  439 
  440                 /*
  441                  * The NOTE_TRACK case. In addition to the activation
  442                  * of the event, we need to register new event to
  443                  * track the child. Drop the locks in preparation for
  444                  * the call to kqueue_register().
  445                  */
  446                 kn->kn_status |= KN_INFLUX;
  447                 KQ_UNLOCK(kq);
  448                 list->kl_unlock(list->kl_lockarg);
  449 
  450                 /*
  451                  * Activate existing knote and register a knote with
  452                  * new process.
  453                  */
  454                 kev.ident = pid;
  455                 kev.filter = kn->kn_filter;
  456                 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
  457                 kev.fflags = kn->kn_sfflags;
  458                 kev.data = kn->kn_id;           /* parent */
  459                 kev.udata = kn->kn_kevent.udata;/* preserve udata */
  460                 error = kqueue_register(kq, &kev, NULL, 0);
  461                 if (kn->kn_fop->f_event(kn, NOTE_FORK | pid))
  462                         KNOTE_ACTIVATE(kn, 0);
  463                 if (error)
  464                         kn->kn_fflags |= NOTE_TRACKERR;
  465                 KQ_LOCK(kq);
  466                 kn->kn_status &= ~KN_INFLUX;
  467                 KQ_UNLOCK_FLUX(kq);
  468                 list->kl_lock(list->kl_lockarg);
  469         }
  470         list->kl_unlock(list->kl_lockarg);
  471 }
  472 
  473 static int
  474 timertoticks(intptr_t data)
  475 {
  476         struct timeval tv;
  477         int tticks;
  478 
  479         tv.tv_sec = data / 1000;
  480         tv.tv_usec = (data % 1000) * 1000;
  481         tticks = tvtohz(&tv);
  482 
  483         return tticks;
  484 }
  485 
  486 /* XXX - move to kern_timeout.c? */
  487 static void
  488 filt_timerexpire(void *knx)
  489 {
  490         struct knote *kn = knx;
  491         struct callout *calloutp;
  492 
  493         kn->kn_data++;
  494         KNOTE_ACTIVATE(kn, 0);  /* XXX - handle locking */
  495 
  496         if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
  497                 calloutp = (struct callout *)kn->kn_hook;
  498                 callout_reset(calloutp, timertoticks(kn->kn_sdata),
  499                     filt_timerexpire, kn);
  500         }
  501 }
  502 
  503 /*
  504  * data contains amount of time to sleep, in milliseconds
  505  */
  506 /* XXX - move to kern_timeout.c? */
  507 static int
  508 filt_timerattach(struct knote *kn)
  509 {
  510         struct callout *calloutp;
  511 
  512         atomic_add_int(&kq_ncallouts, 1);
  513 
  514         if (kq_ncallouts >= kq_calloutmax) {
  515                 atomic_add_int(&kq_ncallouts, -1);
  516                 return (ENOMEM);
  517         }
  518 
  519         kn->kn_flags |= EV_CLEAR;               /* automatically set */
  520         kn->kn_status &= ~KN_DETACHED;          /* knlist_add usually sets it */
  521         MALLOC(calloutp, struct callout *, sizeof(*calloutp),
  522             M_KQUEUE, M_WAITOK);
  523         callout_init(calloutp, CALLOUT_MPSAFE);
  524         kn->kn_hook = calloutp;
  525         callout_reset(calloutp, timertoticks(kn->kn_sdata), filt_timerexpire,
  526             kn);
  527 
  528         return (0);
  529 }
  530 
  531 /* XXX - move to kern_timeout.c? */
  532 static void
  533 filt_timerdetach(struct knote *kn)
  534 {
  535         struct callout *calloutp;
  536 
  537         calloutp = (struct callout *)kn->kn_hook;
  538         callout_drain(calloutp);
  539         FREE(calloutp, M_KQUEUE);
  540         atomic_add_int(&kq_ncallouts, -1);
  541         kn->kn_status |= KN_DETACHED;   /* knlist_remove usually clears it */
  542 }
  543 
  544 /* XXX - move to kern_timeout.c? */
  545 static int
  546 filt_timer(struct knote *kn, long hint)
  547 {
  548 
  549         return (kn->kn_data != 0);
  550 }
  551 
  552 /*
  553  * MPSAFE
  554  */
  555 int
  556 kqueue(struct thread *td, struct kqueue_args *uap)
  557 {
  558         struct filedesc *fdp;
  559         struct kqueue *kq;
  560         struct file *fp;
  561         int fd, error;
  562 
  563         fdp = td->td_proc->p_fd;
  564         error = falloc(td, &fp, &fd);
  565         if (error)
  566                 goto done2;
  567 
  568         /* An extra reference on `nfp' has been held for us by falloc(). */
  569         kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
  570         mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
  571         TAILQ_INIT(&kq->kq_head);
  572         kq->kq_fdp = fdp;
  573         knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL);
  574         TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
  575 
  576         FILEDESC_LOCK_FAST(fdp);
  577         SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
  578         FILEDESC_UNLOCK_FAST(fdp);
  579 
  580         FILE_LOCK(fp);
  581         fp->f_flag = FREAD | FWRITE;
  582         fp->f_type = DTYPE_KQUEUE;
  583         fp->f_ops = &kqueueops;
  584         fp->f_data = kq;
  585         FILE_UNLOCK(fp);
  586         fdrop(fp, td);
  587 
  588         td->td_retval[0] = fd;
  589 done2:
  590         return (error);
  591 }
  592 
  593 #ifndef _SYS_SYSPROTO_H_
  594 struct kevent_args {
  595         int     fd;
  596         const struct kevent *changelist;
  597         int     nchanges;
  598         struct  kevent *eventlist;
  599         int     nevents;
  600         const struct timespec *timeout;
  601 };
  602 #endif
  603 /*
  604  * MPSAFE
  605  */
  606 int
  607 kevent(struct thread *td, struct kevent_args *uap)
  608 {
  609         struct timespec ts, *tsp;
  610         struct kevent_copyops k_ops = { uap,
  611                                         kevent_copyout,
  612                                         kevent_copyin};
  613         int error;
  614 
  615         if (uap->timeout != NULL) {
  616                 error = copyin(uap->timeout, &ts, sizeof(ts));
  617                 if (error)
  618                         return (error);
  619                 tsp = &ts;
  620         } else
  621                 tsp = NULL;
  622 
  623         return (kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
  624             &k_ops, tsp));
  625 }
  626 
  627 /*
  628  * Copy 'count' items into the destination list pointed to by uap->eventlist.
  629  */
  630 static int
  631 kevent_copyout(void *arg, struct kevent *kevp, int count)
  632 {
  633         struct kevent_args *uap;
  634         int error;
  635 
  636         KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
  637         uap = (struct kevent_args *)arg;
  638 
  639         error = copyout(kevp, uap->eventlist, count * sizeof *kevp);
  640         if (error == 0)
  641                 uap->eventlist += count;
  642         return (error);
  643 }
  644 
  645 /*
  646  * Copy 'count' items from the list pointed to by uap->changelist.
  647  */
  648 static int
  649 kevent_copyin(void *arg, struct kevent *kevp, int count)
  650 {
  651         struct kevent_args *uap;
  652         int error;
  653 
  654         KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
  655         uap = (struct kevent_args *)arg;
  656 
  657         error = copyin(uap->changelist, kevp, count * sizeof *kevp);
  658         if (error == 0)
  659                 uap->changelist += count;
  660         return (error);
  661 }
  662 
  663 int
  664 kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
  665     struct kevent_copyops *k_ops, const struct timespec *timeout)
  666 {
  667         struct kevent keva[KQ_NEVENTS];
  668         struct kevent *kevp, *changes;
  669         struct kqueue *kq;
  670         struct file *fp;
  671         int i, n, nerrors, error;
  672 
  673         if ((error = fget(td, fd, &fp)) != 0)
  674                 return (error);
  675         if ((error = kqueue_aquire(fp, &kq)) != 0)
  676                 goto done_norel;
  677 
  678         nerrors = 0;
  679 
  680         while (nchanges > 0) {
  681                 n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
  682                 error = k_ops->k_copyin(k_ops->arg, keva, n);
  683                 if (error)
  684                         goto done;
  685                 changes = keva;
  686                 for (i = 0; i < n; i++) {
  687                         kevp = &changes[i];
  688                         kevp->flags &= ~EV_SYSFLAGS;
  689                         error = kqueue_register(kq, kevp, td, 1);
  690                         if (error) {
  691                                 if (nevents != 0) {
  692                                         kevp->flags = EV_ERROR;
  693                                         kevp->data = error;
  694                                         (void) k_ops->k_copyout(k_ops->arg,
  695                                             kevp, 1);
  696                                         nevents--;
  697                                         nerrors++;
  698                                 } else {
  699                                         goto done;
  700                                 }
  701                         }
  702                 }
  703                 nchanges -= n;
  704         }
  705         if (nerrors) {
  706                 td->td_retval[0] = nerrors;
  707                 error = 0;
  708                 goto done;
  709         }
  710 
  711         error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td);
  712 done:
  713         kqueue_release(kq, 0);
  714 done_norel:
  715         if (fp != NULL)
  716                 fdrop(fp, td);
  717         return (error);
  718 }
  719 
  720 int
  721 kqueue_add_filteropts(int filt, struct filterops *filtops)
  722 {
  723         int error;
  724 
  725         if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) {
  726                 printf(
  727 "trying to add a filterop that is out of range: %d is beyond %d\n",
  728                     ~filt, EVFILT_SYSCOUNT);
  729                 return EINVAL;
  730         }
  731         mtx_lock(&filterops_lock);
  732         if (sysfilt_ops[~filt].for_fop != &null_filtops &&
  733             sysfilt_ops[~filt].for_fop != NULL)
  734                 error = EEXIST;
  735         else {
  736                 sysfilt_ops[~filt].for_fop = filtops;
  737                 sysfilt_ops[~filt].for_refcnt = 0;
  738         }
  739         mtx_unlock(&filterops_lock);
  740 
  741         return (0);
  742 }
  743 
  744 int
  745 kqueue_del_filteropts(int filt)
  746 {
  747         int error;
  748 
  749         error = 0;
  750         if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
  751                 return EINVAL;
  752 
  753         mtx_lock(&filterops_lock);
  754         if (sysfilt_ops[~filt].for_fop == &null_filtops ||
  755             sysfilt_ops[~filt].for_fop == NULL)
  756                 error = EINVAL;
  757         else if (sysfilt_ops[~filt].for_refcnt != 0)
  758                 error = EBUSY;
  759         else {
  760                 sysfilt_ops[~filt].for_fop = &null_filtops;
  761                 sysfilt_ops[~filt].for_refcnt = 0;
  762         }
  763         mtx_unlock(&filterops_lock);
  764 
  765         return error;
  766 }
  767 
  768 static struct filterops *
  769 kqueue_fo_find(int filt)
  770 {
  771 
  772         if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
  773                 return NULL;
  774 
  775         mtx_lock(&filterops_lock);
  776         sysfilt_ops[~filt].for_refcnt++;
  777         if (sysfilt_ops[~filt].for_fop == NULL)
  778                 sysfilt_ops[~filt].for_fop = &null_filtops;
  779         mtx_unlock(&filterops_lock);
  780 
  781         return sysfilt_ops[~filt].for_fop;
  782 }
  783 
  784 static void
  785 kqueue_fo_release(int filt)
  786 {
  787 
  788         if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
  789                 return;
  790 
  791         mtx_lock(&filterops_lock);
  792         KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
  793             ("filter object refcount not valid on release"));
  794         sysfilt_ops[~filt].for_refcnt--;
  795         mtx_unlock(&filterops_lock);
  796 }
  797 
  798 /*
  799  * A ref to kq (obtained via kqueue_aquire) should be held.  waitok will
  800  * influence if memory allocation should wait.  Make sure it is 0 if you
  801  * hold any mutexes.
  802  */
  803 int
  804 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok)
  805 {
  806         struct filedesc *fdp;
  807         struct filterops *fops;
  808         struct file *fp;
  809         struct knote *kn, *tkn;
  810         int error, filt, event;
  811         int haskqglobal;
  812         int fd;
  813 
  814         fdp = NULL;
  815         fp = NULL;
  816         kn = NULL;
  817         error = 0;
  818         haskqglobal = 0;
  819 
  820         filt = kev->filter;
  821         fops = kqueue_fo_find(filt);
  822         if (fops == NULL)
  823                 return EINVAL;
  824 
  825         tkn = knote_alloc(waitok);              /* prevent waiting with locks */
  826 
  827 findkn:
  828         if (fops->f_isfd) {
  829                 KASSERT(td != NULL, ("td is NULL"));
  830                 fdp = td->td_proc->p_fd;
  831                 FILEDESC_LOCK(fdp);
  832                 /* validate descriptor */
  833                 fd = kev->ident;
  834                 if (fd < 0 || fd >= fdp->fd_nfiles ||
  835                     (fp = fdp->fd_ofiles[fd]) == NULL) {
  836                         FILEDESC_UNLOCK(fdp);
  837                         error = EBADF;
  838                         goto done;
  839                 }
  840                 fhold(fp);
  841 
  842                 if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops,
  843                     kev->ident, 0) != 0) {
  844                         /* unlock and try again */
  845                         FILEDESC_UNLOCK(fdp);
  846                         fdrop(fp, td);
  847                         fp = NULL;
  848                         error = kqueue_expand(kq, fops, kev->ident, waitok);
  849                         if (error)
  850                                 goto done;
  851                         goto findkn;
  852                 }
  853 
  854                 if (fp->f_type == DTYPE_KQUEUE) {
  855                         /*
  856                          * if we add some inteligence about what we are doing,
  857                          * we should be able to support events on ourselves.
  858                          * We need to know when we are doing this to prevent
  859                          * getting both the knlist lock and the kq lock since
  860                          * they are the same thing.
  861                          */
  862                         if (fp->f_data == kq) {
  863                                 FILEDESC_UNLOCK(fdp);
  864                                 error = EINVAL;
  865                                 goto done;
  866                         }
  867 
  868                         KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
  869                 }
  870 
  871                 FILEDESC_UNLOCK(fdp);
  872                 KQ_LOCK(kq);
  873                 if (kev->ident < kq->kq_knlistsize) {
  874                         SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
  875                                 if (kev->filter == kn->kn_filter)
  876                                         break;
  877                 }
  878         } else {
  879                 if ((kev->flags & EV_ADD) == EV_ADD)
  880                         kqueue_expand(kq, fops, kev->ident, waitok);
  881 
  882                 KQ_LOCK(kq);
  883                 if (kq->kq_knhashmask != 0) {
  884                         struct klist *list;
  885 
  886                         list = &kq->kq_knhash[
  887                             KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
  888                         SLIST_FOREACH(kn, list, kn_link)
  889                                 if (kev->ident == kn->kn_id &&
  890                                     kev->filter == kn->kn_filter)
  891                                         break;
  892                 }
  893         }
  894 
  895         /* knote is in the process of changing, wait for it to stablize. */
  896         if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
  897                 if (fp != NULL) {
  898                         fdrop(fp, td);
  899                         fp = NULL;
  900                 }
  901                 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
  902                 kq->kq_state |= KQ_FLUXWAIT;
  903                 msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0);
  904                 goto findkn;
  905         }
  906 
  907         if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
  908                 KQ_UNLOCK(kq);
  909                 error = ENOENT;
  910                 goto done;
  911         }
  912 
  913         /*
  914          * kn now contains the matching knote, or NULL if no match
  915          */
  916         if (kev->flags & EV_ADD) {
  917                 if (kn == NULL) {
  918                         kn = tkn;
  919                         tkn = NULL;
  920                         if (kn == NULL) {
  921                                 KQ_UNLOCK(kq);
  922                                 error = ENOMEM;
  923                                 goto done;
  924                         }
  925                         kn->kn_fp = fp;
  926                         kn->kn_kq = kq;
  927                         kn->kn_fop = fops;
  928                         /*
  929                          * apply reference counts to knote structure, and
  930                          * do not release it at the end of this routine.
  931                          */
  932                         fops = NULL;
  933                         fp = NULL;
  934 
  935                         kn->kn_sfflags = kev->fflags;
  936                         kn->kn_sdata = kev->data;
  937                         kev->fflags = 0;
  938                         kev->data = 0;
  939                         kn->kn_kevent = *kev;
  940                         kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE |
  941                             EV_ENABLE | EV_DISABLE);
  942                         kn->kn_status = KN_INFLUX|KN_DETACHED;
  943 
  944                         error = knote_attach(kn, kq);
  945                         KQ_UNLOCK(kq);
  946                         if (error != 0) {
  947                                 tkn = kn;
  948                                 goto done;
  949                         }
  950 
  951                         if ((error = kn->kn_fop->f_attach(kn)) != 0) {
  952                                 knote_drop(kn, td);
  953                                 goto done;
  954                         }
  955                         KN_LIST_LOCK(kn);
  956                 } else {
  957                         /*
  958                          * The user may change some filter values after the
  959                          * initial EV_ADD, but doing so will not reset any
  960                          * filter which has already been triggered.
  961                          */
  962                         kn->kn_status |= KN_INFLUX;
  963                         KQ_UNLOCK(kq);
  964                         KN_LIST_LOCK(kn);
  965                         kn->kn_sfflags = kev->fflags;
  966                         kn->kn_sdata = kev->data;
  967                         kn->kn_kevent.udata = kev->udata;
  968                 }
  969 
  970                 /*
  971                  * We can get here with kn->kn_knlist == NULL.
  972                  * This can happen when the initial attach event decides that
  973                  * the event is "completed" already.  i.e. filt_procattach
  974                  * is called on a zombie process.  It will call filt_proc
  975                  * which will remove it from the list, and NULL kn_knlist.
  976                  */
  977                 event = kn->kn_fop->f_event(kn, 0);
  978                 KQ_LOCK(kq);
  979                 if (event)
  980                         KNOTE_ACTIVATE(kn, 1);
  981                 kn->kn_status &= ~KN_INFLUX;
  982                 KN_LIST_UNLOCK(kn);
  983         } else if (kev->flags & EV_DELETE) {
  984                 kn->kn_status |= KN_INFLUX;
  985                 KQ_UNLOCK(kq);
  986                 if (!(kn->kn_status & KN_DETACHED))
  987                         kn->kn_fop->f_detach(kn);
  988                 knote_drop(kn, td);
  989                 goto done;
  990         }
  991 
  992         if ((kev->flags & EV_DISABLE) &&
  993             ((kn->kn_status & KN_DISABLED) == 0)) {
  994                 kn->kn_status |= KN_DISABLED;
  995         }
  996 
  997         if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
  998                 kn->kn_status &= ~KN_DISABLED;
  999                 if ((kn->kn_status & KN_ACTIVE) &&
 1000                     ((kn->kn_status & KN_QUEUED) == 0))
 1001                         knote_enqueue(kn);
 1002         }
 1003         KQ_UNLOCK_FLUX(kq);
 1004 
 1005 done:
 1006         KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 1007         if (fp != NULL)
 1008                 fdrop(fp, td);
 1009         if (tkn != NULL)
 1010                 knote_free(tkn);
 1011         if (fops != NULL)
 1012                 kqueue_fo_release(filt);
 1013         return (error);
 1014 }
 1015 
 1016 static int
 1017 kqueue_aquire(struct file *fp, struct kqueue **kqp)
 1018 {
 1019         int error;
 1020         struct kqueue *kq;
 1021 
 1022         error = 0;
 1023 
 1024         FILE_LOCK(fp);
 1025         do {
 1026                 kq = fp->f_data;
 1027                 if (fp->f_type != DTYPE_KQUEUE || kq == NULL) {
 1028                         error = EBADF;
 1029                         break;
 1030                 }
 1031                 *kqp = kq;
 1032                 KQ_LOCK(kq);
 1033                 if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
 1034                         KQ_UNLOCK(kq);
 1035                         error = EBADF;
 1036                         break;
 1037                 }
 1038                 kq->kq_refcnt++;
 1039                 KQ_UNLOCK(kq);
 1040         } while (0);
 1041         FILE_UNLOCK(fp);
 1042 
 1043         return error;
 1044 }
 1045 
 1046 static void
 1047 kqueue_release(struct kqueue *kq, int locked)
 1048 {
 1049         if (locked)
 1050                 KQ_OWNED(kq);
 1051         else
 1052                 KQ_LOCK(kq);
 1053         kq->kq_refcnt--;
 1054         if (kq->kq_refcnt == 1)
 1055                 wakeup(&kq->kq_refcnt);
 1056         if (!locked)
 1057                 KQ_UNLOCK(kq);
 1058 }
 1059 
 1060 static void
 1061 kqueue_schedtask(struct kqueue *kq)
 1062 {
 1063 
 1064         KQ_OWNED(kq);
 1065         KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
 1066             ("scheduling kqueue task while draining"));
 1067 
 1068         if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
 1069                 taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task);
 1070                 kq->kq_state |= KQ_TASKSCHED;
 1071         }
 1072 }
 1073 
 1074 /*
 1075  * Expand the kq to make sure we have storage for fops/ident pair.
 1076  *
 1077  * Return 0 on success (or no work necessary), return errno on failure.
 1078  *
 1079  * Not calling hashinit w/ waitok (proper malloc flag) should be safe.
 1080  * If kqueue_register is called from a non-fd context, there usually/should
 1081  * be no locks held.
 1082  */
 1083 static int
 1084 kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident,
 1085         int waitok)
 1086 {
 1087         struct klist *list, *tmp_knhash;
 1088         u_long tmp_knhashmask;
 1089         int size;
 1090         int fd;
 1091         int mflag = waitok ? M_WAITOK : M_NOWAIT;
 1092 
 1093         KQ_NOTOWNED(kq);
 1094 
 1095         if (fops->f_isfd) {
 1096                 fd = ident;
 1097                 if (kq->kq_knlistsize <= fd) {
 1098                         size = kq->kq_knlistsize;
 1099                         while (size <= fd)
 1100                                 size += KQEXTENT;
 1101                         MALLOC(list, struct klist *,
 1102                             size * sizeof list, M_KQUEUE, mflag);
 1103                         if (list == NULL)
 1104                                 return ENOMEM;
 1105                         KQ_LOCK(kq);
 1106                         if (kq->kq_knlistsize > fd) {
 1107                                 FREE(list, M_KQUEUE);
 1108                                 list = NULL;
 1109                         } else {
 1110                                 if (kq->kq_knlist != NULL) {
 1111                                         bcopy(kq->kq_knlist, list,
 1112                                             kq->kq_knlistsize * sizeof list);
 1113                                         FREE(kq->kq_knlist, M_KQUEUE);
 1114                                         kq->kq_knlist = NULL;
 1115                                 }
 1116                                 bzero((caddr_t)list +
 1117                                     kq->kq_knlistsize * sizeof list,
 1118                                     (size - kq->kq_knlistsize) * sizeof list);
 1119                                 kq->kq_knlistsize = size;
 1120                                 kq->kq_knlist = list;
 1121                         }
 1122                         KQ_UNLOCK(kq);
 1123                 }
 1124         } else {
 1125                 if (kq->kq_knhashmask == 0) {
 1126                         tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
 1127                             &tmp_knhashmask);
 1128                         if (tmp_knhash == NULL)
 1129                                 return ENOMEM;
 1130                         KQ_LOCK(kq);
 1131                         if (kq->kq_knhashmask == 0) {
 1132                                 kq->kq_knhash = tmp_knhash;
 1133                                 kq->kq_knhashmask = tmp_knhashmask;
 1134                         } else {
 1135                                 free(tmp_knhash, M_KQUEUE);
 1136                         }
 1137                         KQ_UNLOCK(kq);
 1138                 }
 1139         }
 1140 
 1141         KQ_NOTOWNED(kq);
 1142         return 0;
 1143 }
 1144 
 1145 static void
 1146 kqueue_task(void *arg, int pending)
 1147 {
 1148         struct kqueue *kq;
 1149         int haskqglobal;
 1150 
 1151         haskqglobal = 0;
 1152         kq = arg;
 1153 
 1154         KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
 1155         KQ_LOCK(kq);
 1156 
 1157         KNOTE_LOCKED(&kq->kq_sel.si_note, 0);
 1158 
 1159         kq->kq_state &= ~KQ_TASKSCHED;
 1160         if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) {
 1161                 wakeup(&kq->kq_state);
 1162         }
 1163         KQ_UNLOCK(kq);
 1164         KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 1165 }
 1166 
 1167 /*
 1168  * Scan, update kn_data (if not ONESHOT), and copyout triggered events.
 1169  * We treat KN_MARKER knotes as if they are INFLUX.
 1170  */
 1171 static int
 1172 kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
 1173     const struct timespec *tsp, struct kevent *keva, struct thread *td)
 1174 {
 1175         struct kevent *kevp;
 1176         struct timeval atv, rtv, ttv;
 1177         struct knote *kn, *marker;
 1178         int count, timeout, nkev, error, influx;
 1179         int haskqglobal;
 1180 
 1181         count = maxevents;
 1182         nkev = 0;
 1183         error = 0;
 1184         haskqglobal = 0;
 1185 
 1186         if (maxevents == 0)
 1187                 goto done_nl;
 1188 
 1189         if (tsp != NULL) {
 1190                 TIMESPEC_TO_TIMEVAL(&atv, tsp);
 1191                 if (itimerfix(&atv)) {
 1192                         error = EINVAL;
 1193                         goto done_nl;
 1194                 }
 1195                 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
 1196                         timeout = -1;
 1197                 else
 1198                         timeout = atv.tv_sec > 24 * 60 * 60 ?
 1199                             24 * 60 * 60 * hz : tvtohz(&atv);
 1200                 getmicrouptime(&rtv);
 1201                 timevaladd(&atv, &rtv);
 1202         } else {
 1203                 atv.tv_sec = 0;
 1204                 atv.tv_usec = 0;
 1205                 timeout = 0;
 1206         }
 1207         marker = knote_alloc(1);
 1208         if (marker == NULL) {
 1209                 error = ENOMEM;
 1210                 goto done_nl;
 1211         }
 1212         marker->kn_status = KN_MARKER;
 1213         KQ_LOCK(kq);
 1214         goto start;
 1215 
 1216 retry:
 1217         if (atv.tv_sec || atv.tv_usec) {
 1218                 getmicrouptime(&rtv);
 1219                 if (timevalcmp(&rtv, &atv, >=))
 1220                         goto done;
 1221                 ttv = atv;
 1222                 timevalsub(&ttv, &rtv);
 1223                 timeout = ttv.tv_sec > 24 * 60 * 60 ?
 1224                         24 * 60 * 60 * hz : tvtohz(&ttv);
 1225         }
 1226 
 1227 start:
 1228         kevp = keva;
 1229         if (kq->kq_count == 0) {
 1230                 if (timeout < 0) {
 1231                         error = EWOULDBLOCK;
 1232                 } else {
 1233                         kq->kq_state |= KQ_SLEEP;
 1234                         error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH,
 1235                             "kqread", timeout);
 1236                 }
 1237                 if (error == 0)
 1238                         goto retry;
 1239                 /* don't restart after signals... */
 1240                 if (error == ERESTART)
 1241                         error = EINTR;
 1242                 else if (error == EWOULDBLOCK)
 1243                         error = 0;
 1244                 goto done;
 1245         }
 1246 
 1247         TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
 1248         influx = 0;
 1249         while (count) {
 1250                 KQ_OWNED(kq);
 1251                 kn = TAILQ_FIRST(&kq->kq_head);
 1252 
 1253                 if ((kn->kn_status == KN_MARKER && kn != marker) ||
 1254                     (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
 1255                         if (influx) {
 1256                                 influx = 0;
 1257                                 KQ_FLUX_WAKEUP(kq);
 1258                         }
 1259                         kq->kq_state |= KQ_FLUXWAIT;
 1260                         error = msleep(kq, &kq->kq_lock, PSOCK,
 1261                             "kqflxwt", 0);
 1262                         continue;
 1263                 }
 1264 
 1265                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 1266                 if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
 1267                         kn->kn_status &= ~KN_QUEUED;
 1268                         kq->kq_count--;
 1269                         continue;
 1270                 }
 1271                 if (kn == marker) {
 1272                         KQ_FLUX_WAKEUP(kq);
 1273                         if (count == maxevents)
 1274                                 goto retry;
 1275                         goto done;
 1276                 }
 1277                 KASSERT((kn->kn_status & KN_INFLUX) == 0,
 1278                     ("KN_INFLUX set when not suppose to be"));
 1279 
 1280                 if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
 1281                         kn->kn_status &= ~KN_QUEUED;
 1282                         kn->kn_status |= KN_INFLUX;
 1283                         kq->kq_count--;
 1284                         KQ_UNLOCK(kq);
 1285                         /*
 1286                          * We don't need to lock the list since we've marked
 1287                          * it _INFLUX.
 1288                          */
 1289                         *kevp = kn->kn_kevent;
 1290                         if (!(kn->kn_status & KN_DETACHED))
 1291                                 kn->kn_fop->f_detach(kn);
 1292                         knote_drop(kn, td);
 1293                         KQ_LOCK(kq);
 1294                         kn = NULL;
 1295                 } else {
 1296                         kn->kn_status |= KN_INFLUX;
 1297                         KQ_UNLOCK(kq);
 1298                         if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
 1299                                 KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
 1300                         KN_LIST_LOCK(kn);
 1301                         if (kn->kn_fop->f_event(kn, 0) == 0) {
 1302                                 KQ_LOCK(kq);
 1303                                 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 1304                                 kn->kn_status &=
 1305                                     ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX);
 1306                                 kq->kq_count--;
 1307                                 KN_LIST_UNLOCK(kn);
 1308                                 influx = 1;
 1309                                 continue;
 1310                         }
 1311                         *kevp = kn->kn_kevent;
 1312                         KQ_LOCK(kq);
 1313                         KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 1314                         if (kn->kn_flags & EV_CLEAR) {
 1315                                 kn->kn_data = 0;
 1316                                 kn->kn_fflags = 0;
 1317                                 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
 1318                                 kq->kq_count--;
 1319                         } else
 1320                                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 1321                         
 1322                         kn->kn_status &= ~(KN_INFLUX);
 1323                         KN_LIST_UNLOCK(kn);
 1324                         influx = 1;
 1325                 }
 1326 
 1327                 /* we are returning a copy to the user */
 1328                 kevp++;
 1329                 nkev++;
 1330                 count--;
 1331 
 1332                 if (nkev == KQ_NEVENTS) {
 1333                         influx = 0;
 1334                         KQ_UNLOCK_FLUX(kq);
 1335                         error = k_ops->k_copyout(k_ops->arg, keva, nkev);
 1336                         nkev = 0;
 1337                         kevp = keva;
 1338                         KQ_LOCK(kq);
 1339                         if (error)
 1340                                 break;
 1341                 }
 1342         }
 1343         TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
 1344 done:
 1345         KQ_OWNED(kq);
 1346         KQ_UNLOCK_FLUX(kq);
 1347         knote_free(marker);
 1348 done_nl:
 1349         KQ_NOTOWNED(kq);
 1350         if (nkev != 0)
 1351                 error = k_ops->k_copyout(k_ops->arg, keva, nkev);
 1352         td->td_retval[0] = maxevents - count;
 1353         return (error);
 1354 }
 1355 
 1356 /*
 1357  * XXX
 1358  * This could be expanded to call kqueue_scan, if desired.
 1359  */
 1360 /*ARGSUSED*/
 1361 static int
 1362 kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
 1363         int flags, struct thread *td)
 1364 {
 1365         return (ENXIO);
 1366 }
 1367 
 1368 /*ARGSUSED*/
 1369 static int
 1370 kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
 1371          int flags, struct thread *td)
 1372 {
 1373         return (ENXIO);
 1374 }
 1375 
 1376 /*ARGSUSED*/
 1377 static int
 1378 kqueue_ioctl(struct file *fp, u_long cmd, void *data,
 1379         struct ucred *active_cred, struct thread *td)
 1380 {
 1381         /*
 1382          * Enabling sigio causes two major problems:
 1383          * 1) infinite recursion:
 1384          * Synopsys: kevent is being used to track signals and have FIOASYNC
 1385          * set.  On receipt of a signal this will cause a kqueue to recurse
 1386          * into itself over and over.  Sending the sigio causes the kqueue
 1387          * to become ready, which in turn posts sigio again, forever.
 1388          * Solution: this can be solved by setting a flag in the kqueue that
 1389          * we have a SIGIO in progress.
 1390          * 2) locking problems:
 1391          * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
 1392          * us above the proc and pgrp locks.
 1393          * Solution: Post a signal using an async mechanism, being sure to
 1394          * record a generation count in the delivery so that we do not deliver
 1395          * a signal to the wrong process.
 1396          *
 1397          * Note, these two mechanisms are somewhat mutually exclusive!
 1398          */
 1399 #if 0
 1400         struct kqueue *kq;
 1401 
 1402         kq = fp->f_data;
 1403         switch (cmd) {
 1404         case FIOASYNC:
 1405                 if (*(int *)data) {
 1406                         kq->kq_state |= KQ_ASYNC;
 1407                 } else {
 1408                         kq->kq_state &= ~KQ_ASYNC;
 1409                 }
 1410                 return (0);
 1411 
 1412         case FIOSETOWN:
 1413                 return (fsetown(*(int *)data, &kq->kq_sigio));
 1414 
 1415         case FIOGETOWN:
 1416                 *(int *)data = fgetown(&kq->kq_sigio);
 1417                 return (0);
 1418         }
 1419 #endif
 1420 
 1421         return (ENOTTY);
 1422 }
 1423 
 1424 /*ARGSUSED*/
 1425 static int
 1426 kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
 1427         struct thread *td)
 1428 {
 1429         struct kqueue *kq;
 1430         int revents = 0;
 1431         int error;
 1432 
 1433         if ((error = kqueue_aquire(fp, &kq)))
 1434                 return POLLERR;
 1435 
 1436         KQ_LOCK(kq);
 1437         if (events & (POLLIN | POLLRDNORM)) {
 1438                 if (kq->kq_count) {
 1439                         revents |= events & (POLLIN | POLLRDNORM);
 1440                 } else {
 1441                         selrecord(td, &kq->kq_sel);
 1442                         kq->kq_state |= KQ_SEL;
 1443                 }
 1444         }
 1445         kqueue_release(kq, 1);
 1446         KQ_UNLOCK(kq);
 1447         return (revents);
 1448 }
 1449 
 1450 /*ARGSUSED*/
 1451 static int
 1452 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
 1453         struct thread *td)
 1454 {
 1455 
 1456         bzero((void *)st, sizeof *st);
 1457         /*
 1458          * We no longer return kq_count because the unlocked value is useless.
 1459          * If you spent all this time getting the count, why not spend your
 1460          * syscall better by calling kevent?
 1461          *
 1462          * XXX - This is needed for libc_r.
 1463          */
 1464         st->st_mode = S_IFIFO;
 1465         return (0);
 1466 }
 1467 
 1468 /*ARGSUSED*/
 1469 static int
 1470 kqueue_close(struct file *fp, struct thread *td)
 1471 {
 1472         struct kqueue *kq = fp->f_data;
 1473         struct filedesc *fdp;
 1474         struct knote *kn;
 1475         int i;
 1476         int error;
 1477 
 1478         if ((error = kqueue_aquire(fp, &kq)))
 1479                 return error;
 1480 
 1481         KQ_LOCK(kq);
 1482 
 1483         KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
 1484             ("kqueue already closing"));
 1485         kq->kq_state |= KQ_CLOSING;
 1486         if (kq->kq_refcnt > 1)
 1487                 msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
 1488 
 1489         KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
 1490         fdp = kq->kq_fdp;
 1491 
 1492         KASSERT(knlist_empty(&kq->kq_sel.si_note),
 1493             ("kqueue's knlist not empty"));
 1494 
 1495         for (i = 0; i < kq->kq_knlistsize; i++) {
 1496                 while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
 1497                         if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
 1498                                 kq->kq_state |= KQ_FLUXWAIT;
 1499                                 msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0);
 1500                                 continue;
 1501                         }
 1502                         kn->kn_status |= KN_INFLUX;
 1503                         KQ_UNLOCK(kq);
 1504                         if (!(kn->kn_status & KN_DETACHED))
 1505                                 kn->kn_fop->f_detach(kn);
 1506                         knote_drop(kn, td);
 1507                         KQ_LOCK(kq);
 1508                 }
 1509         }
 1510         if (kq->kq_knhashmask != 0) {
 1511                 for (i = 0; i <= kq->kq_knhashmask; i++) {
 1512                         while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
 1513                                 if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
 1514                                         kq->kq_state |= KQ_FLUXWAIT;
 1515                                         msleep(kq, &kq->kq_lock, PSOCK,
 1516                                                "kqclo2", 0);
 1517                                         continue;
 1518                                 }
 1519                                 kn->kn_status |= KN_INFLUX;
 1520                                 KQ_UNLOCK(kq);
 1521                                 if (!(kn->kn_status & KN_DETACHED))
 1522                                         kn->kn_fop->f_detach(kn);
 1523                                 knote_drop(kn, td);
 1524                                 KQ_LOCK(kq);
 1525                         }
 1526                 }
 1527         }
 1528 
 1529         if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) {
 1530                 kq->kq_state |= KQ_TASKDRAIN;
 1531                 msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0);
 1532         }
 1533 
 1534         if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
 1535                 kq->kq_state &= ~KQ_SEL;
 1536                 selwakeuppri(&kq->kq_sel, PSOCK);
 1537         }
 1538 
 1539         KQ_UNLOCK(kq);
 1540 
 1541         FILEDESC_LOCK_FAST(fdp);
 1542         SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list);
 1543         FILEDESC_UNLOCK_FAST(fdp);
 1544 
 1545         knlist_destroy(&kq->kq_sel.si_note);
 1546         mtx_destroy(&kq->kq_lock);
 1547         kq->kq_fdp = NULL;
 1548 
 1549         if (kq->kq_knhash != NULL)
 1550                 free(kq->kq_knhash, M_KQUEUE);
 1551         if (kq->kq_knlist != NULL)
 1552                 free(kq->kq_knlist, M_KQUEUE);
 1553 
 1554         funsetown(&kq->kq_sigio);
 1555         free(kq, M_KQUEUE);
 1556         fp->f_data = NULL;
 1557 
 1558         return (0);
 1559 }
 1560 
 1561 static void
 1562 kqueue_wakeup(struct kqueue *kq)
 1563 {
 1564         KQ_OWNED(kq);
 1565 
 1566         if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) {
 1567                 kq->kq_state &= ~KQ_SLEEP;
 1568                 wakeup(kq);
 1569         }
 1570         if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
 1571                 kq->kq_state &= ~KQ_SEL;
 1572                 selwakeuppri(&kq->kq_sel, PSOCK);
 1573         }
 1574         if (!knlist_empty(&kq->kq_sel.si_note))
 1575                 kqueue_schedtask(kq);
 1576         if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) {
 1577                 pgsigio(&kq->kq_sigio, SIGIO, 0);
 1578         }
 1579 }
 1580 
 1581 /*
 1582  * Walk down a list of knotes, activating them if their event has triggered.
 1583  *
 1584  * There is a possibility to optimize in the case of one kq watching another.
 1585  * Instead of scheduling a task to wake it up, you could pass enough state
 1586  * down the chain to make up the parent kqueue.  Make this code functional
 1587  * first.
 1588  */
 1589 void
 1590 knote(struct knlist *list, long hint, int islocked)
 1591 {
 1592         struct kqueue *kq;
 1593         struct knote *kn;
 1594 
 1595         if (list == NULL)
 1596                 return;
 1597 
 1598         KNL_ASSERT_LOCK(list, islocked);
 1599 
 1600         if (!islocked) 
 1601                 list->kl_lock(list->kl_lockarg); 
 1602 
 1603         /*
 1604          * If we unlock the list lock (and set KN_INFLUX), we can eliminate
 1605          * the kqueue scheduling, but this will introduce four
 1606          * lock/unlock's for each knote to test.  If we do, continue to use
 1607          * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is
 1608          * only safe if you want to remove the current item, which we are
 1609          * not doing.
 1610          */
 1611         SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
 1612                 kq = kn->kn_kq;
 1613                 if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) {
 1614                         KQ_LOCK(kq);
 1615                         if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) {
 1616                                 kn->kn_status |= KN_HASKQLOCK;
 1617                                 if (kn->kn_fop->f_event(kn, hint))
 1618                                         KNOTE_ACTIVATE(kn, 1);
 1619                                 kn->kn_status &= ~KN_HASKQLOCK;
 1620                         }
 1621                         KQ_UNLOCK(kq);
 1622                 }
 1623                 kq = NULL;
 1624         }
 1625         if (!islocked)
 1626                 list->kl_unlock(list->kl_lockarg); 
 1627 }
 1628 
 1629 /*
 1630  * add a knote to a knlist
 1631  */
 1632 void
 1633 knlist_add(struct knlist *knl, struct knote *kn, int islocked)
 1634 {
 1635         KNL_ASSERT_LOCK(knl, islocked);
 1636         KQ_NOTOWNED(kn->kn_kq);
 1637         KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) ==
 1638             (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED"));
 1639         if (!islocked)
 1640                 knl->kl_lock(knl->kl_lockarg);
 1641         SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
 1642         if (!islocked)
 1643                 knl->kl_unlock(knl->kl_lockarg);
 1644         KQ_LOCK(kn->kn_kq);
 1645         kn->kn_knlist = knl;
 1646         kn->kn_status &= ~KN_DETACHED;
 1647         KQ_UNLOCK(kn->kn_kq);
 1648 }
 1649 
 1650 static void
 1651 knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked)
 1652 {
 1653         KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked"));
 1654         KNL_ASSERT_LOCK(knl, knlislocked);
 1655         mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED);
 1656         if (!kqislocked)
 1657                 KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX,
 1658     ("knlist_remove called w/o knote being KN_INFLUX or already removed"));
 1659         if (!knlislocked)
 1660                 knl->kl_lock(knl->kl_lockarg);
 1661         SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
 1662         kn->kn_knlist = NULL;
 1663         if (!knlislocked)
 1664                 knl->kl_unlock(knl->kl_lockarg);
 1665         if (!kqislocked)
 1666                 KQ_LOCK(kn->kn_kq);
 1667         kn->kn_status |= KN_DETACHED;
 1668         if (!kqislocked)
 1669                 KQ_UNLOCK(kn->kn_kq);
 1670 }
 1671 
 1672 /*
 1673  * remove all knotes from a specified klist
 1674  */
 1675 void
 1676 knlist_remove(struct knlist *knl, struct knote *kn, int islocked)
 1677 {
 1678 
 1679         knlist_remove_kq(knl, kn, islocked, 0);
 1680 }
 1681 
 1682 /*
 1683  * remove knote from a specified klist while in f_event handler.
 1684  */
 1685 void
 1686 knlist_remove_inevent(struct knlist *knl, struct knote *kn)
 1687 {
 1688 
 1689         knlist_remove_kq(knl, kn, 1,
 1690             (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK);
 1691 }
 1692 
 1693 int
 1694 knlist_empty(struct knlist *knl)
 1695 {
 1696         KNL_ASSERT_LOCKED(knl);
 1697         return SLIST_EMPTY(&knl->kl_list);
 1698 }
 1699 
 1700 static struct mtx       knlist_lock;
 1701 MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
 1702         MTX_DEF);
 1703 static void knlist_mtx_lock(void *arg);
 1704 static void knlist_mtx_unlock(void *arg);
 1705 static int knlist_mtx_locked(void *arg);
 1706 
 1707 static void
 1708 knlist_mtx_lock(void *arg)
 1709 {
 1710         mtx_lock((struct mtx *)arg);
 1711 }
 1712 
 1713 static void
 1714 knlist_mtx_unlock(void *arg)
 1715 {
 1716         mtx_unlock((struct mtx *)arg);
 1717 }
 1718 
 1719 static int
 1720 knlist_mtx_locked(void *arg)
 1721 {
 1722         return (mtx_owned((struct mtx *)arg));
 1723 }
 1724 
 1725 void
 1726 knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
 1727     void (*kl_unlock)(void *), int (*kl_locked)(void *))
 1728 {
 1729 
 1730         if (lock == NULL)
 1731                 knl->kl_lockarg = &knlist_lock;
 1732         else
 1733                 knl->kl_lockarg = lock;
 1734 
 1735         if (kl_lock == NULL)
 1736                 knl->kl_lock = knlist_mtx_lock;
 1737         else
 1738                 knl->kl_lock = kl_lock;
 1739         if (kl_unlock == NULL)
 1740                 knl->kl_unlock = knlist_mtx_unlock;
 1741         else
 1742                 knl->kl_unlock = kl_unlock;
 1743         if (kl_locked == NULL)
 1744                 knl->kl_locked = knlist_mtx_locked;
 1745         else
 1746                 knl->kl_locked = kl_locked;
 1747 
 1748         SLIST_INIT(&knl->kl_list);
 1749 }
 1750 
 1751 void
 1752 knlist_destroy(struct knlist *knl)
 1753 {
 1754 
 1755 #ifdef INVARIANTS
 1756         /*
 1757          * if we run across this error, we need to find the offending
 1758          * driver and have it call knlist_clear.
 1759          */
 1760         if (!SLIST_EMPTY(&knl->kl_list))
 1761                 printf("WARNING: destroying knlist w/ knotes on it!\n");
 1762 #endif
 1763 
 1764         knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL;
 1765         SLIST_INIT(&knl->kl_list);
 1766 }
 1767 
 1768 /*
 1769  * Even if we are locked, we may need to drop the lock to allow any influx
 1770  * knotes time to "settle".
 1771  */
 1772 void
 1773 knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn)
 1774 {
 1775         struct knote *kn, *kn2;
 1776         struct kqueue *kq;
 1777 
 1778         if (islocked)
 1779                 KNL_ASSERT_LOCKED(knl);
 1780         else {
 1781                 KNL_ASSERT_UNLOCKED(knl);
 1782 again:          /* need to reaquire lock since we have dropped it */
 1783                 knl->kl_lock(knl->kl_lockarg);
 1784         }
 1785 
 1786         SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
 1787                 kq = kn->kn_kq;
 1788                 KQ_LOCK(kq);
 1789                 if ((kn->kn_status & KN_INFLUX)) {
 1790                         KQ_UNLOCK(kq);
 1791                         continue;
 1792                 }
 1793                 knlist_remove_kq(knl, kn, 1, 1);
 1794                 if (killkn) {
 1795                         kn->kn_status |= KN_INFLUX | KN_DETACHED;
 1796                         KQ_UNLOCK(kq);
 1797                         knote_drop(kn, td);
 1798                 } else {
 1799                         /* Make sure cleared knotes disappear soon */
 1800                         kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 1801                         KQ_UNLOCK(kq);
 1802                 }
 1803                 kq = NULL;
 1804         }
 1805 
 1806         if (!SLIST_EMPTY(&knl->kl_list)) {
 1807                 /* there are still KN_INFLUX remaining */
 1808                 kn = SLIST_FIRST(&knl->kl_list);
 1809                 kq = kn->kn_kq;
 1810                 KQ_LOCK(kq);
 1811                 KASSERT(kn->kn_status & KN_INFLUX,
 1812                     ("knote removed w/o list lock"));
 1813                 knl->kl_unlock(knl->kl_lockarg);
 1814                 kq->kq_state |= KQ_FLUXWAIT;
 1815                 msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0);
 1816                 kq = NULL;
 1817                 goto again;
 1818         }
 1819 
 1820         if (islocked)
 1821                 KNL_ASSERT_LOCKED(knl);
 1822         else {
 1823                 knl->kl_unlock(knl->kl_lockarg);
 1824                 KNL_ASSERT_UNLOCKED(knl);
 1825         }
 1826 }
 1827 
 1828 /*
 1829  * remove all knotes referencing a specified fd
 1830  * must be called with FILEDESC lock.  This prevents a race where a new fd
 1831  * comes along and occupies the entry and we attach a knote to the fd.
 1832  */
 1833 void
 1834 knote_fdclose(struct thread *td, int fd)
 1835 {
 1836         struct filedesc *fdp = td->td_proc->p_fd;
 1837         struct kqueue *kq;
 1838         struct knote *kn;
 1839         int influx;
 1840 
 1841         FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
 1842 
 1843         /*
 1844          * We shouldn't have to worry about new kevents appearing on fd
 1845          * since filedesc is locked.
 1846          */
 1847         SLIST_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
 1848                 KQ_LOCK(kq);
 1849 
 1850 again:
 1851                 influx = 0;
 1852                 while (kq->kq_knlistsize > fd &&
 1853                     (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
 1854                         if (kn->kn_status & KN_INFLUX) {
 1855                                 /* someone else might be waiting on our knote */
 1856                                 if (influx)
 1857                                         wakeup(kq);
 1858                                 kq->kq_state |= KQ_FLUXWAIT;
 1859                                 msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
 1860                                 goto again;
 1861                         }
 1862                         kn->kn_status |= KN_INFLUX;
 1863                         KQ_UNLOCK(kq);
 1864                         if (!(kn->kn_status & KN_DETACHED))
 1865                                 kn->kn_fop->f_detach(kn);
 1866                         knote_drop(kn, td);
 1867                         influx = 1;
 1868                         KQ_LOCK(kq);
 1869                 }
 1870                 KQ_UNLOCK_FLUX(kq);
 1871         }
 1872 }
 1873 
 1874 static int
 1875 knote_attach(struct knote *kn, struct kqueue *kq)
 1876 {
 1877         struct klist *list;
 1878 
 1879         KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX"));
 1880         KQ_OWNED(kq);
 1881 
 1882         if (kn->kn_fop->f_isfd) {
 1883                 if (kn->kn_id >= kq->kq_knlistsize)
 1884                         return ENOMEM;
 1885                 list = &kq->kq_knlist[kn->kn_id];
 1886         } else {
 1887                 if (kq->kq_knhash == NULL)
 1888                         return ENOMEM;
 1889                 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
 1890         }
 1891 
 1892         SLIST_INSERT_HEAD(list, kn, kn_link);
 1893 
 1894         return 0;
 1895 }
 1896 
 1897 /*
 1898  * knote must already have been detatched using the f_detach method.
 1899  * no lock need to be held, it is assumed that the KN_INFLUX flag is set
 1900  * to prevent other removal.
 1901  */
 1902 static void
 1903 knote_drop(struct knote *kn, struct thread *td)
 1904 {
 1905         struct kqueue *kq;
 1906         struct klist *list;
 1907 
 1908         kq = kn->kn_kq;
 1909 
 1910         KQ_NOTOWNED(kq);
 1911         KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX,
 1912             ("knote_drop called without KN_INFLUX set in kn_status"));
 1913 
 1914         KQ_LOCK(kq);
 1915         if (kn->kn_fop->f_isfd)
 1916                 list = &kq->kq_knlist[kn->kn_id];
 1917         else
 1918                 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
 1919 
 1920         SLIST_REMOVE(list, kn, knote, kn_link);
 1921         if (kn->kn_status & KN_QUEUED)
 1922                 knote_dequeue(kn);
 1923         KQ_UNLOCK_FLUX(kq);
 1924 
 1925         if (kn->kn_fop->f_isfd) {
 1926                 fdrop(kn->kn_fp, td);
 1927                 kn->kn_fp = NULL;
 1928         }
 1929         kqueue_fo_release(kn->kn_kevent.filter);
 1930         kn->kn_fop = NULL;
 1931         knote_free(kn);
 1932 }
 1933 
 1934 static void
 1935 knote_enqueue(struct knote *kn)
 1936 {
 1937         struct kqueue *kq = kn->kn_kq;
 1938 
 1939         KQ_OWNED(kn->kn_kq);
 1940         KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
 1941 
 1942         TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 1943         kn->kn_status |= KN_QUEUED;
 1944         kq->kq_count++;
 1945         kqueue_wakeup(kq);
 1946 }
 1947 
 1948 static void
 1949 knote_dequeue(struct knote *kn)
 1950 {
 1951         struct kqueue *kq = kn->kn_kq;
 1952 
 1953         KQ_OWNED(kn->kn_kq);
 1954         KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
 1955 
 1956         TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 1957         kn->kn_status &= ~KN_QUEUED;
 1958         kq->kq_count--;
 1959 }
 1960 
 1961 static void
 1962 knote_init(void)
 1963 {
 1964 
 1965         knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
 1966             NULL, NULL, UMA_ALIGN_PTR, 0);
 1967 }
 1968 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
 1969 
 1970 static struct knote *
 1971 knote_alloc(int waitok)
 1972 {
 1973         return ((struct knote *)uma_zalloc(knote_zone,
 1974             (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO));
 1975 }
 1976 
 1977 static void
 1978 knote_free(struct knote *kn)
 1979 {
 1980         if (kn != NULL)
 1981                 uma_zfree(knote_zone, kn);
 1982 }

Cache object: d0475733071fa8c236e2378c941f7496


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.