FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_event.c
1 /*-
2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: releng/5.2/sys/kern/kern_event.c 122686 2003-11-14 18:49:01Z cognet $");
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/malloc.h>
37 #include <sys/unistd.h>
38 #include <sys/file.h>
39 #include <sys/filedesc.h>
40 #include <sys/fcntl.h>
41 #include <sys/selinfo.h>
42 #include <sys/queue.h>
43 #include <sys/event.h>
44 #include <sys/eventvar.h>
45 #include <sys/poll.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/stat.h>
50 #include <sys/sysctl.h>
51 #include <sys/sysproto.h>
52 #include <sys/uio.h>
53
54 #include <vm/uma.h>
55
56 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
57
58 static int kqueue_scan(struct file *fp, int maxevents,
59 struct kevent *ulistp, const struct timespec *timeout,
60 struct thread *td);
61 static void kqueue_wakeup(struct kqueue *kq);
62
63 static fo_rdwr_t kqueue_read;
64 static fo_rdwr_t kqueue_write;
65 static fo_ioctl_t kqueue_ioctl;
66 static fo_poll_t kqueue_poll;
67 static fo_kqfilter_t kqueue_kqfilter;
68 static fo_stat_t kqueue_stat;
69 static fo_close_t kqueue_close;
70
71 static struct fileops kqueueops = {
72 .fo_read = kqueue_read,
73 .fo_write = kqueue_write,
74 .fo_ioctl = kqueue_ioctl,
75 .fo_poll = kqueue_poll,
76 .fo_kqfilter = kqueue_kqfilter,
77 .fo_stat = kqueue_stat,
78 .fo_close = kqueue_close,
79 };
80
81 static void knote_attach(struct knote *kn, struct filedesc *fdp);
82 static void knote_drop(struct knote *kn, struct thread *td);
83 static void knote_enqueue(struct knote *kn);
84 static void knote_dequeue(struct knote *kn);
85 static void knote_init(void);
86 static struct knote *knote_alloc(void);
87 static void knote_free(struct knote *kn);
88
89 static void filt_kqdetach(struct knote *kn);
90 static int filt_kqueue(struct knote *kn, long hint);
91 static int filt_procattach(struct knote *kn);
92 static void filt_procdetach(struct knote *kn);
93 static int filt_proc(struct knote *kn, long hint);
94 static int filt_fileattach(struct knote *kn);
95 static void filt_timerexpire(void *knx);
96 static int filt_timerattach(struct knote *kn);
97 static void filt_timerdetach(struct knote *kn);
98 static int filt_timer(struct knote *kn, long hint);
99
100 static struct filterops file_filtops =
101 { 1, filt_fileattach, NULL, NULL };
102 static struct filterops kqread_filtops =
103 { 1, NULL, filt_kqdetach, filt_kqueue };
104 static struct filterops proc_filtops =
105 { 0, filt_procattach, filt_procdetach, filt_proc };
106 static struct filterops timer_filtops =
107 { 0, filt_timerattach, filt_timerdetach, filt_timer };
108
109 static uma_zone_t knote_zone;
110 static int kq_ncallouts = 0;
111 static int kq_calloutmax = (4 * 1024);
112 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
113 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
114
115 #define KNOTE_ACTIVATE(kn) do { \
116 kn->kn_status |= KN_ACTIVE; \
117 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \
118 knote_enqueue(kn); \
119 } while(0)
120
121 #define KN_HASHSIZE 64 /* XXX should be tunable */
122 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
123
124 static int
125 filt_nullattach(struct knote *kn)
126 {
127
128 return (ENXIO);
129 };
130
131 struct filterops null_filtops =
132 { 0, filt_nullattach, NULL, NULL };
133
134 extern struct filterops sig_filtops;
135
136 /*
137 * Table for for all system-defined filters.
138 */
139 static struct filterops *sysfilt_ops[] = {
140 &file_filtops, /* EVFILT_READ */
141 &file_filtops, /* EVFILT_WRITE */
142 &null_filtops, /* EVFILT_AIO */
143 &file_filtops, /* EVFILT_VNODE */
144 &proc_filtops, /* EVFILT_PROC */
145 &sig_filtops, /* EVFILT_SIGNAL */
146 &timer_filtops, /* EVFILT_TIMER */
147 &file_filtops, /* EVFILT_NETDEV */
148 };
149
150 static int
151 filt_fileattach(struct knote *kn)
152 {
153
154 return (fo_kqfilter(kn->kn_fp, kn));
155 }
156
157 /*ARGSUSED*/
158 static int
159 kqueue_kqfilter(struct file *fp, struct knote *kn)
160 {
161 struct kqueue *kq = kn->kn_fp->f_data;
162
163 if (kn->kn_filter != EVFILT_READ)
164 return (1);
165
166 kn->kn_fop = &kqread_filtops;
167 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext);
168 return (0);
169 }
170
171 static void
172 filt_kqdetach(struct knote *kn)
173 {
174 struct kqueue *kq = kn->kn_fp->f_data;
175
176 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext);
177 }
178
179 /*ARGSUSED*/
180 static int
181 filt_kqueue(struct knote *kn, long hint)
182 {
183 struct kqueue *kq = kn->kn_fp->f_data;
184
185 kn->kn_data = kq->kq_count;
186 return (kn->kn_data > 0);
187 }
188
189 static int
190 filt_procattach(struct knote *kn)
191 {
192 struct proc *p;
193 int immediate;
194 int error;
195
196 immediate = 0;
197 p = pfind(kn->kn_id);
198 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
199 p = zpfind(kn->kn_id);
200 immediate = 1;
201 }
202 if (p == NULL)
203 return (ESRCH);
204 if ((error = p_cansee(curthread, p))) {
205 PROC_UNLOCK(p);
206 return (error);
207 }
208
209 kn->kn_ptr.p_proc = p;
210 kn->kn_flags |= EV_CLEAR; /* automatically set */
211
212 /*
213 * internal flag indicating registration done by kernel
214 */
215 if (kn->kn_flags & EV_FLAG1) {
216 kn->kn_data = kn->kn_sdata; /* ppid */
217 kn->kn_fflags = NOTE_CHILD;
218 kn->kn_flags &= ~EV_FLAG1;
219 }
220
221 if (immediate == 0)
222 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
223
224 /*
225 * Immediately activate any exit notes if the target process is a
226 * zombie. This is necessary to handle the case where the target
227 * process, e.g. a child, dies before the kevent is registered.
228 */
229 if (immediate && filt_proc(kn, NOTE_EXIT))
230 KNOTE_ACTIVATE(kn);
231
232 PROC_UNLOCK(p);
233
234 return (0);
235 }
236
237 /*
238 * The knote may be attached to a different process, which may exit,
239 * leaving nothing for the knote to be attached to. So when the process
240 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
241 * it will be deleted when read out. However, as part of the knote deletion,
242 * this routine is called, so a check is needed to avoid actually performing
243 * a detach, because the original process does not exist any more.
244 */
245 static void
246 filt_procdetach(struct knote *kn)
247 {
248 struct proc *p = kn->kn_ptr.p_proc;
249
250 if (kn->kn_status & KN_DETACHED)
251 return;
252
253 PROC_LOCK(p);
254 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
255 PROC_UNLOCK(p);
256 }
257
258 static int
259 filt_proc(struct knote *kn, long hint)
260 {
261 u_int event;
262
263 /*
264 * mask off extra data
265 */
266 event = (u_int)hint & NOTE_PCTRLMASK;
267
268 /*
269 * if the user is interested in this event, record it.
270 */
271 if (kn->kn_sfflags & event)
272 kn->kn_fflags |= event;
273
274 /*
275 * process is gone, so flag the event as finished.
276 */
277 if (event == NOTE_EXIT) {
278 kn->kn_status |= KN_DETACHED;
279 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
280 return (1);
281 }
282
283 /*
284 * process forked, and user wants to track the new process,
285 * so attach a new knote to it, and immediately report an
286 * event with the parent's pid.
287 */
288 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
289 struct kevent kev;
290 int error;
291
292 /*
293 * register knote with new process.
294 */
295 kev.ident = hint & NOTE_PDATAMASK; /* pid */
296 kev.filter = kn->kn_filter;
297 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
298 kev.fflags = kn->kn_sfflags;
299 kev.data = kn->kn_id; /* parent */
300 kev.udata = kn->kn_kevent.udata; /* preserve udata */
301 error = kqueue_register(kn->kn_kq, &kev, NULL);
302 if (error)
303 kn->kn_fflags |= NOTE_TRACKERR;
304 }
305
306 return (kn->kn_fflags != 0);
307 }
308
309 static void
310 filt_timerexpire(void *knx)
311 {
312 struct knote *kn = knx;
313 struct callout *calloutp;
314 struct timeval tv;
315 int tticks;
316
317 kn->kn_data++;
318 KNOTE_ACTIVATE(kn);
319
320 if ((kn->kn_flags & EV_ONESHOT) == 0) {
321 tv.tv_sec = kn->kn_sdata / 1000;
322 tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
323 tticks = tvtohz(&tv);
324 calloutp = (struct callout *)kn->kn_hook;
325 callout_reset(calloutp, tticks, filt_timerexpire, kn);
326 }
327 }
328
329 /*
330 * data contains amount of time to sleep, in milliseconds
331 */
332 static int
333 filt_timerattach(struct knote *kn)
334 {
335 struct callout *calloutp;
336 struct timeval tv;
337 int tticks;
338
339 if (kq_ncallouts >= kq_calloutmax)
340 return (ENOMEM);
341 kq_ncallouts++;
342
343 tv.tv_sec = kn->kn_sdata / 1000;
344 tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
345 tticks = tvtohz(&tv);
346
347 kn->kn_flags |= EV_CLEAR; /* automatically set */
348 MALLOC(calloutp, struct callout *, sizeof(*calloutp),
349 M_KQUEUE, M_WAITOK);
350 callout_init(calloutp, 0);
351 callout_reset(calloutp, tticks, filt_timerexpire, kn);
352 kn->kn_hook = calloutp;
353
354 return (0);
355 }
356
357 static void
358 filt_timerdetach(struct knote *kn)
359 {
360 struct callout *calloutp;
361
362 calloutp = (struct callout *)kn->kn_hook;
363 callout_stop(calloutp);
364 FREE(calloutp, M_KQUEUE);
365 kq_ncallouts--;
366 }
367
368 static int
369 filt_timer(struct knote *kn, long hint)
370 {
371
372 return (kn->kn_data != 0);
373 }
374
375 /*
376 * MPSAFE
377 */
378 int
379 kqueue(struct thread *td, struct kqueue_args *uap)
380 {
381 struct filedesc *fdp;
382 struct kqueue *kq;
383 struct file *fp;
384 int fd, error;
385
386 mtx_lock(&Giant);
387 fdp = td->td_proc->p_fd;
388 error = falloc(td, &fp, &fd);
389 if (error)
390 goto done2;
391 /* An extra reference on `nfp' has been held for us by falloc(). */
392 kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO);
393 TAILQ_INIT(&kq->kq_head);
394 FILE_LOCK(fp);
395 fp->f_flag = FREAD | FWRITE;
396 fp->f_type = DTYPE_KQUEUE;
397 fp->f_ops = &kqueueops;
398 TAILQ_INIT(&kq->kq_head);
399 fp->f_data = kq;
400 FILE_UNLOCK(fp);
401 fdrop(fp, td);
402 FILEDESC_LOCK(fdp);
403 td->td_retval[0] = fd;
404 if (fdp->fd_knlistsize < 0)
405 fdp->fd_knlistsize = 0; /* this process has a kq */
406 FILEDESC_UNLOCK(fdp);
407 kq->kq_fdp = fdp;
408 done2:
409 mtx_unlock(&Giant);
410 return (error);
411 }
412
413 #ifndef _SYS_SYSPROTO_H_
414 struct kevent_args {
415 int fd;
416 const struct kevent *changelist;
417 int nchanges;
418 struct kevent *eventlist;
419 int nevents;
420 const struct timespec *timeout;
421 };
422 #endif
423 /*
424 * MPSAFE
425 */
426 int
427 kevent(struct thread *td, struct kevent_args *uap)
428 {
429 struct kevent *kevp;
430 struct kqueue *kq;
431 struct file *fp;
432 struct timespec ts;
433 int i, n, nerrors, error;
434
435 if ((error = fget(td, uap->fd, &fp)) != 0)
436 return (error);
437 if (fp->f_type != DTYPE_KQUEUE) {
438 fdrop(fp, td);
439 return (EBADF);
440 }
441 if (uap->timeout != NULL) {
442 error = copyin(uap->timeout, &ts, sizeof(ts));
443 if (error)
444 goto done_nogiant;
445 uap->timeout = &ts;
446 }
447 mtx_lock(&Giant);
448
449 kq = fp->f_data;
450 nerrors = 0;
451
452 while (uap->nchanges > 0) {
453 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges;
454 error = copyin(uap->changelist, kq->kq_kev,
455 n * sizeof(struct kevent));
456 if (error)
457 goto done;
458 for (i = 0; i < n; i++) {
459 kevp = &kq->kq_kev[i];
460 kevp->flags &= ~EV_SYSFLAGS;
461 error = kqueue_register(kq, kevp, td);
462 if (error) {
463 if (uap->nevents != 0) {
464 kevp->flags = EV_ERROR;
465 kevp->data = error;
466 (void) copyout(kevp,
467 uap->eventlist,
468 sizeof(*kevp));
469 uap->eventlist++;
470 uap->nevents--;
471 nerrors++;
472 } else {
473 goto done;
474 }
475 }
476 }
477 uap->nchanges -= n;
478 uap->changelist += n;
479 }
480 if (nerrors) {
481 td->td_retval[0] = nerrors;
482 error = 0;
483 goto done;
484 }
485
486 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td);
487 done:
488 mtx_unlock(&Giant);
489 done_nogiant:
490 if (fp != NULL)
491 fdrop(fp, td);
492 return (error);
493 }
494
495 int
496 kqueue_add_filteropts(int filt, struct filterops *filtops)
497 {
498
499 if (filt > 0)
500 panic("filt(%d) > 0", filt);
501 if (filt + EVFILT_SYSCOUNT < 0)
502 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0",
503 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT);
504 if (sysfilt_ops[~filt] != &null_filtops)
505 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt);
506 sysfilt_ops[~filt] = filtops;
507 return (0);
508 }
509
510 int
511 kqueue_del_filteropts(int filt)
512 {
513
514 if (filt > 0)
515 panic("filt(%d) > 0", filt);
516 if (filt + EVFILT_SYSCOUNT < 0)
517 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0",
518 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT);
519 if (sysfilt_ops[~filt] == &null_filtops)
520 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt);
521 sysfilt_ops[~filt] = &null_filtops;
522 return (0);
523 }
524
525 int
526 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td)
527 {
528 struct filedesc *fdp = kq->kq_fdp;
529 struct filterops *fops;
530 struct file *fp = NULL;
531 struct knote *kn = NULL;
532 int s, error = 0;
533
534 if (kev->filter < 0) {
535 if (kev->filter + EVFILT_SYSCOUNT < 0)
536 return (EINVAL);
537 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */
538 } else {
539 /*
540 * XXX
541 * filter attach routine is responsible for insuring that
542 * the identifier can be attached to it.
543 */
544 printf("unknown filter: %d\n", kev->filter);
545 return (EINVAL);
546 }
547
548 FILEDESC_LOCK(fdp);
549 if (fops->f_isfd) {
550 /* validate descriptor */
551 if ((u_int)kev->ident >= fdp->fd_nfiles ||
552 (fp = fdp->fd_ofiles[kev->ident]) == NULL) {
553 FILEDESC_UNLOCK(fdp);
554 return (EBADF);
555 }
556 fhold(fp);
557
558 if (kev->ident < fdp->fd_knlistsize) {
559 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
560 if (kq == kn->kn_kq &&
561 kev->filter == kn->kn_filter)
562 break;
563 }
564 } else {
565 if (fdp->fd_knhashmask != 0) {
566 struct klist *list;
567
568 list = &fdp->fd_knhash[
569 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
570 SLIST_FOREACH(kn, list, kn_link)
571 if (kev->ident == kn->kn_id &&
572 kq == kn->kn_kq &&
573 kev->filter == kn->kn_filter)
574 break;
575 }
576 }
577 FILEDESC_UNLOCK(fdp);
578
579 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
580 error = ENOENT;
581 goto done;
582 }
583
584 /*
585 * kn now contains the matching knote, or NULL if no match
586 */
587 if (kev->flags & EV_ADD) {
588
589 if (kn == NULL) {
590 kn = knote_alloc();
591 if (kn == NULL) {
592 error = ENOMEM;
593 goto done;
594 }
595 kn->kn_fp = fp;
596 kn->kn_kq = kq;
597 kn->kn_fop = fops;
598
599 /*
600 * apply reference count to knote structure, and
601 * do not release it at the end of this routine.
602 */
603 fp = NULL;
604
605 kn->kn_sfflags = kev->fflags;
606 kn->kn_sdata = kev->data;
607 kev->fflags = 0;
608 kev->data = 0;
609 kn->kn_kevent = *kev;
610
611 knote_attach(kn, fdp);
612 if ((error = fops->f_attach(kn)) != 0) {
613 knote_drop(kn, td);
614 goto done;
615 }
616 } else {
617 /*
618 * The user may change some filter values after the
619 * initial EV_ADD, but doing so will not reset any
620 * filter which has already been triggered.
621 */
622 kn->kn_sfflags = kev->fflags;
623 kn->kn_sdata = kev->data;
624 kn->kn_kevent.udata = kev->udata;
625 }
626
627 s = splhigh();
628 if (kn->kn_fop->f_event(kn, 0))
629 KNOTE_ACTIVATE(kn);
630 splx(s);
631
632 } else if (kev->flags & EV_DELETE) {
633 kn->kn_fop->f_detach(kn);
634 knote_drop(kn, td);
635 goto done;
636 }
637
638 if ((kev->flags & EV_DISABLE) &&
639 ((kn->kn_status & KN_DISABLED) == 0)) {
640 s = splhigh();
641 kn->kn_status |= KN_DISABLED;
642 splx(s);
643 }
644
645 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
646 s = splhigh();
647 kn->kn_status &= ~KN_DISABLED;
648 if ((kn->kn_status & KN_ACTIVE) &&
649 ((kn->kn_status & KN_QUEUED) == 0))
650 knote_enqueue(kn);
651 splx(s);
652 }
653
654 done:
655 if (fp != NULL)
656 fdrop(fp, td);
657 return (error);
658 }
659
660 static int
661 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp,
662 const struct timespec *tsp, struct thread *td)
663 {
664 struct kqueue *kq;
665 struct kevent *kevp;
666 struct timeval atv, rtv, ttv;
667 struct knote *kn, marker;
668 int s, count, timeout, nkev = 0, error = 0;
669
670 FILE_LOCK_ASSERT(fp, MA_NOTOWNED);
671
672 kq = fp->f_data;
673 count = maxevents;
674 if (count == 0)
675 goto done;
676
677 if (tsp != NULL) {
678 TIMESPEC_TO_TIMEVAL(&atv, tsp);
679 if (itimerfix(&atv)) {
680 error = EINVAL;
681 goto done;
682 }
683 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
684 timeout = -1;
685 else
686 timeout = atv.tv_sec > 24 * 60 * 60 ?
687 24 * 60 * 60 * hz : tvtohz(&atv);
688 getmicrouptime(&rtv);
689 timevaladd(&atv, &rtv);
690 } else {
691 atv.tv_sec = 0;
692 atv.tv_usec = 0;
693 timeout = 0;
694 }
695 goto start;
696
697 retry:
698 if (atv.tv_sec || atv.tv_usec) {
699 getmicrouptime(&rtv);
700 if (timevalcmp(&rtv, &atv, >=))
701 goto done;
702 ttv = atv;
703 timevalsub(&ttv, &rtv);
704 timeout = ttv.tv_sec > 24 * 60 * 60 ?
705 24 * 60 * 60 * hz : tvtohz(&ttv);
706 }
707
708 start:
709 kevp = kq->kq_kev;
710 s = splhigh();
711 if (kq->kq_count == 0) {
712 if (timeout < 0) {
713 error = EWOULDBLOCK;
714 } else {
715 kq->kq_state |= KQ_SLEEP;
716 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout);
717 }
718 splx(s);
719 if (error == 0)
720 goto retry;
721 /* don't restart after signals... */
722 if (error == ERESTART)
723 error = EINTR;
724 else if (error == EWOULDBLOCK)
725 error = 0;
726 goto done;
727 }
728
729 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
730 while (count) {
731 kn = TAILQ_FIRST(&kq->kq_head);
732 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
733 if (kn == &marker) {
734 splx(s);
735 if (count == maxevents)
736 goto retry;
737 goto done;
738 }
739 if (kn->kn_status & KN_DISABLED) {
740 kn->kn_status &= ~KN_QUEUED;
741 kq->kq_count--;
742 continue;
743 }
744 if ((kn->kn_flags & EV_ONESHOT) == 0 &&
745 kn->kn_fop->f_event(kn, 0) == 0) {
746 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
747 kq->kq_count--;
748 continue;
749 }
750 *kevp = kn->kn_kevent;
751 kevp++;
752 nkev++;
753 if (kn->kn_flags & EV_ONESHOT) {
754 kn->kn_status &= ~KN_QUEUED;
755 kq->kq_count--;
756 splx(s);
757 kn->kn_fop->f_detach(kn);
758 knote_drop(kn, td);
759 s = splhigh();
760 } else if (kn->kn_flags & EV_CLEAR) {
761 kn->kn_data = 0;
762 kn->kn_fflags = 0;
763 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
764 kq->kq_count--;
765 } else {
766 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
767 }
768 count--;
769 if (nkev == KQ_NEVENTS) {
770 splx(s);
771 error = copyout(&kq->kq_kev, ulistp,
772 sizeof(struct kevent) * nkev);
773 ulistp += nkev;
774 nkev = 0;
775 kevp = kq->kq_kev;
776 s = splhigh();
777 if (error)
778 break;
779 }
780 }
781 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
782 splx(s);
783 done:
784 if (nkev != 0)
785 error = copyout(&kq->kq_kev, ulistp,
786 sizeof(struct kevent) * nkev);
787 td->td_retval[0] = maxevents - count;
788 return (error);
789 }
790
791 /*
792 * XXX
793 * This could be expanded to call kqueue_scan, if desired.
794 */
795 /*ARGSUSED*/
796 static int
797 kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
798 int flags, struct thread *td)
799 {
800 return (ENXIO);
801 }
802
803 /*ARGSUSED*/
804 static int
805 kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
806 int flags, struct thread *td)
807 {
808 return (ENXIO);
809 }
810
811 /*ARGSUSED*/
812 static int
813 kqueue_ioctl(struct file *fp, u_long com, void *data,
814 struct ucred *active_cred, struct thread *td)
815 {
816 return (ENOTTY);
817 }
818
819 /*ARGSUSED*/
820 static int
821 kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
822 struct thread *td)
823 {
824 struct kqueue *kq;
825 int revents = 0;
826 int s = splnet();
827
828 kq = fp->f_data;
829 if (events & (POLLIN | POLLRDNORM)) {
830 if (kq->kq_count) {
831 revents |= events & (POLLIN | POLLRDNORM);
832 } else {
833 selrecord(td, &kq->kq_sel);
834 kq->kq_state |= KQ_SEL;
835 }
836 }
837 splx(s);
838 return (revents);
839 }
840
841 /*ARGSUSED*/
842 static int
843 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
844 struct thread *td)
845 {
846 struct kqueue *kq;
847
848 kq = fp->f_data;
849 bzero((void *)st, sizeof(*st));
850 st->st_size = kq->kq_count;
851 st->st_blksize = sizeof(struct kevent);
852 st->st_mode = S_IFIFO;
853 return (0);
854 }
855
856 /*ARGSUSED*/
857 static int
858 kqueue_close(struct file *fp, struct thread *td)
859 {
860 struct kqueue *kq = fp->f_data;
861 struct filedesc *fdp = kq->kq_fdp;
862 struct knote **knp, *kn, *kn0;
863 int i;
864
865 FILEDESC_LOCK(fdp);
866 for (i = 0; i < fdp->fd_knlistsize; i++) {
867 knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
868 kn = *knp;
869 while (kn != NULL) {
870 kn0 = SLIST_NEXT(kn, kn_link);
871 if (kq == kn->kn_kq) {
872 kn->kn_fop->f_detach(kn);
873 *knp = kn0;
874 FILE_LOCK(kn->kn_fp);
875 FILEDESC_UNLOCK(fdp);
876 fdrop_locked(kn->kn_fp, td);
877 knote_free(kn);
878 FILEDESC_LOCK(fdp);
879 } else {
880 knp = &SLIST_NEXT(kn, kn_link);
881 }
882 kn = kn0;
883 }
884 }
885 if (fdp->fd_knhashmask != 0) {
886 for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
887 knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
888 kn = *knp;
889 while (kn != NULL) {
890 kn0 = SLIST_NEXT(kn, kn_link);
891 if (kq == kn->kn_kq) {
892 kn->kn_fop->f_detach(kn);
893 *knp = kn0;
894 /* XXX non-fd release of kn->kn_ptr */
895 FILEDESC_UNLOCK(fdp);
896 knote_free(kn);
897 FILEDESC_LOCK(fdp);
898 } else {
899 knp = &SLIST_NEXT(kn, kn_link);
900 }
901 kn = kn0;
902 }
903 }
904 }
905 FILEDESC_UNLOCK(fdp);
906 free(kq, M_KQUEUE);
907 fp->f_data = NULL;
908
909 return (0);
910 }
911
912 static void
913 kqueue_wakeup(struct kqueue *kq)
914 {
915
916 if (kq->kq_state & KQ_SLEEP) {
917 kq->kq_state &= ~KQ_SLEEP;
918 wakeup(kq);
919 }
920 if (kq->kq_state & KQ_SEL) {
921 kq->kq_state &= ~KQ_SEL;
922 selwakeuppri(&kq->kq_sel, PSOCK);
923 }
924 KNOTE(&kq->kq_sel.si_note, 0);
925 }
926
927 /*
928 * walk down a list of knotes, activating them if their event has triggered.
929 */
930 void
931 knote(struct klist *list, long hint)
932 {
933 struct knote *kn;
934
935 SLIST_FOREACH(kn, list, kn_selnext)
936 if (kn->kn_fop->f_event(kn, hint))
937 KNOTE_ACTIVATE(kn);
938 }
939
940 /*
941 * remove all knotes from a specified klist
942 */
943 void
944 knote_remove(struct thread *td, struct klist *list)
945 {
946 struct knote *kn;
947
948 while ((kn = SLIST_FIRST(list)) != NULL) {
949 kn->kn_fop->f_detach(kn);
950 knote_drop(kn, td);
951 }
952 }
953
954 /*
955 * remove all knotes referencing a specified fd
956 */
957 void
958 knote_fdclose(struct thread *td, int fd)
959 {
960 struct filedesc *fdp = td->td_proc->p_fd;
961 struct klist *list;
962
963 FILEDESC_LOCK(fdp);
964 list = &fdp->fd_knlist[fd];
965 FILEDESC_UNLOCK(fdp);
966 knote_remove(td, list);
967 }
968
969 static void
970 knote_attach(struct knote *kn, struct filedesc *fdp)
971 {
972 struct klist *list, *tmp_knhash;
973 u_long tmp_knhashmask;
974 int size;
975
976 FILEDESC_LOCK(fdp);
977
978 if (! kn->kn_fop->f_isfd) {
979 if (fdp->fd_knhashmask == 0) {
980 FILEDESC_UNLOCK(fdp);
981 tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
982 &tmp_knhashmask);
983 FILEDESC_LOCK(fdp);
984 if (fdp->fd_knhashmask == 0) {
985 fdp->fd_knhash = tmp_knhash;
986 fdp->fd_knhashmask = tmp_knhashmask;
987 } else {
988 free(tmp_knhash, M_KQUEUE);
989 }
990 }
991 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
992 goto done;
993 }
994
995 if (fdp->fd_knlistsize <= kn->kn_id) {
996 size = fdp->fd_knlistsize;
997 while (size <= kn->kn_id)
998 size += KQEXTENT;
999 FILEDESC_UNLOCK(fdp);
1000 MALLOC(list, struct klist *,
1001 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1002 FILEDESC_LOCK(fdp);
1003 if (fdp->fd_knlistsize > kn->kn_id) {
1004 FREE(list, M_KQUEUE);
1005 goto bigenough;
1006 }
1007 if (fdp->fd_knlist != NULL) {
1008 bcopy(fdp->fd_knlist, list,
1009 fdp->fd_knlistsize * sizeof(struct klist *));
1010 FREE(fdp->fd_knlist, M_KQUEUE);
1011 }
1012 bzero((caddr_t)list +
1013 fdp->fd_knlistsize * sizeof(struct klist *),
1014 (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1015 fdp->fd_knlistsize = size;
1016 fdp->fd_knlist = list;
1017 }
1018 bigenough:
1019 list = &fdp->fd_knlist[kn->kn_id];
1020 done:
1021 FILEDESC_UNLOCK(fdp);
1022 SLIST_INSERT_HEAD(list, kn, kn_link);
1023 kn->kn_status = 0;
1024 }
1025
1026 /*
1027 * should be called at spl == 0, since we don't want to hold spl
1028 * while calling fdrop and free.
1029 */
1030 static void
1031 knote_drop(struct knote *kn, struct thread *td)
1032 {
1033 struct filedesc *fdp = td->td_proc->p_fd;
1034 struct klist *list;
1035
1036 FILEDESC_LOCK(fdp);
1037 if (kn->kn_fop->f_isfd)
1038 list = &fdp->fd_knlist[kn->kn_id];
1039 else
1040 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1041 if (kn->kn_fop->f_isfd)
1042 FILE_LOCK(kn->kn_fp);
1043 FILEDESC_UNLOCK(fdp);
1044
1045 SLIST_REMOVE(list, kn, knote, kn_link);
1046 if (kn->kn_status & KN_QUEUED)
1047 knote_dequeue(kn);
1048 if (kn->kn_fop->f_isfd)
1049 fdrop_locked(kn->kn_fp, td);
1050 knote_free(kn);
1051 }
1052
1053
1054 static void
1055 knote_enqueue(struct knote *kn)
1056 {
1057 struct kqueue *kq = kn->kn_kq;
1058 int s = splhigh();
1059
1060 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
1061
1062 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1063 kn->kn_status |= KN_QUEUED;
1064 kq->kq_count++;
1065 splx(s);
1066 kqueue_wakeup(kq);
1067 }
1068
1069 static void
1070 knote_dequeue(struct knote *kn)
1071 {
1072 struct kqueue *kq = kn->kn_kq;
1073 int s = splhigh();
1074
1075 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
1076
1077 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1078 kn->kn_status &= ~KN_QUEUED;
1079 kq->kq_count--;
1080 splx(s);
1081 }
1082
1083 static void
1084 knote_init(void)
1085 {
1086 knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
1087 NULL, NULL, UMA_ALIGN_PTR, 0);
1088
1089 }
1090 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1091
1092 static struct knote *
1093 knote_alloc(void)
1094 {
1095 return ((struct knote *)uma_zalloc(knote_zone, M_WAITOK));
1096 }
1097
1098 static void
1099 knote_free(struct knote *kn)
1100 {
1101 uma_zfree(knote_zone, kn);
1102 }
Cache object: ab1bead06771429a761f895d4dbc2827
|