1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2007 Roman Divacky
5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "opt_compat.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/imgact.h>
37 #include <sys/kernel.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/callout.h>
42 #include <sys/capsicum.h>
43 #include <sys/types.h>
44 #include <sys/user.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/filio.h>
48 #include <sys/errno.h>
49 #include <sys/event.h>
50 #include <sys/poll.h>
51 #include <sys/proc.h>
52 #include <sys/selinfo.h>
53 #include <sys/specialfd.h>
54 #include <sys/sx.h>
55 #include <sys/syscallsubr.h>
56 #include <sys/timespec.h>
57 #include <sys/eventfd.h>
58
59 #ifdef COMPAT_LINUX32
60 #include <machine/../linux32/linux.h>
61 #include <machine/../linux32/linux32_proto.h>
62 #else
63 #include <machine/../linux/linux.h>
64 #include <machine/../linux/linux_proto.h>
65 #endif
66
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_event.h>
69 #include <compat/linux/linux_file.h>
70 #include <compat/linux/linux_signal.h>
71 #include <compat/linux/linux_timer.h>
72 #include <compat/linux/linux_util.h>
73
74 typedef uint64_t epoll_udata_t;
75
76 struct epoll_event {
77 uint32_t events;
78 epoll_udata_t data;
79 }
80 #if defined(__amd64__)
81 __attribute__((packed))
82 #endif
83 ;
84
85 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
86
87 static int epoll_to_kevent(struct thread *td, int fd,
88 struct epoll_event *l_event, struct kevent *kevent,
89 int *nkevents);
90 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
91 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
92 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
93 static int epoll_register_kevent(struct thread *td, struct file *epfp,
94 int fd, int filter, unsigned int flags);
95 static int epoll_fd_registered(struct thread *td, struct file *epfp,
96 int fd);
97 static int epoll_delete_all_events(struct thread *td, struct file *epfp,
98 int fd);
99
100 struct epoll_copyin_args {
101 struct kevent *changelist;
102 };
103
104 struct epoll_copyout_args {
105 struct epoll_event *leventlist;
106 struct proc *p;
107 uint32_t count;
108 int error;
109 };
110
111 /* timerfd */
112 typedef uint64_t timerfd_t;
113
114 static fo_rdwr_t timerfd_read;
115 static fo_ioctl_t timerfd_ioctl;
116 static fo_poll_t timerfd_poll;
117 static fo_kqfilter_t timerfd_kqfilter;
118 static fo_stat_t timerfd_stat;
119 static fo_close_t timerfd_close;
120 static fo_fill_kinfo_t timerfd_fill_kinfo;
121
122 static struct fileops timerfdops = {
123 .fo_read = timerfd_read,
124 .fo_write = invfo_rdwr,
125 .fo_truncate = invfo_truncate,
126 .fo_ioctl = timerfd_ioctl,
127 .fo_poll = timerfd_poll,
128 .fo_kqfilter = timerfd_kqfilter,
129 .fo_stat = timerfd_stat,
130 .fo_close = timerfd_close,
131 .fo_chmod = invfo_chmod,
132 .fo_chown = invfo_chown,
133 .fo_sendfile = invfo_sendfile,
134 .fo_fill_kinfo = timerfd_fill_kinfo,
135 .fo_flags = DFLAG_PASSABLE
136 };
137
138 static void filt_timerfddetach(struct knote *kn);
139 static int filt_timerfdread(struct knote *kn, long hint);
140
141 static struct filterops timerfd_rfiltops = {
142 .f_isfd = 1,
143 .f_detach = filt_timerfddetach,
144 .f_event = filt_timerfdread
145 };
146
147 struct timerfd {
148 clockid_t tfd_clockid;
149 struct itimerspec tfd_time;
150 struct callout tfd_callout;
151 timerfd_t tfd_count;
152 bool tfd_canceled;
153 struct selinfo tfd_sel;
154 struct mtx tfd_lock;
155 };
156
157 static void linux_timerfd_expire(void *);
158 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *);
159
160 static int
161 epoll_create_common(struct thread *td, int flags)
162 {
163
164 return (kern_kqueue(td, flags, NULL));
165 }
166
167 #ifdef LINUX_LEGACY_SYSCALLS
168 int
169 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
170 {
171
172 /*
173 * args->size is unused. Linux just tests it
174 * and then forgets it as well.
175 */
176 if (args->size <= 0)
177 return (EINVAL);
178
179 return (epoll_create_common(td, 0));
180 }
181 #endif
182
183 int
184 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
185 {
186 int flags;
187
188 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
189 return (EINVAL);
190
191 flags = 0;
192 if ((args->flags & LINUX_O_CLOEXEC) != 0)
193 flags |= O_CLOEXEC;
194
195 return (epoll_create_common(td, flags));
196 }
197
198 /* Structure converting function from epoll to kevent. */
199 static int
200 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
201 struct kevent *kevent, int *nkevents)
202 {
203 uint32_t levents = l_event->events;
204 struct linux_pemuldata *pem;
205 struct proc *p;
206 unsigned short kev_flags = EV_ADD | EV_ENABLE;
207
208 /* flags related to how event is registered */
209 if ((levents & LINUX_EPOLLONESHOT) != 0)
210 kev_flags |= EV_DISPATCH;
211 if ((levents & LINUX_EPOLLET) != 0)
212 kev_flags |= EV_CLEAR;
213 if ((levents & LINUX_EPOLLERR) != 0)
214 kev_flags |= EV_ERROR;
215 if ((levents & LINUX_EPOLLRDHUP) != 0)
216 kev_flags |= EV_EOF;
217
218 /* flags related to what event is registered */
219 if ((levents & LINUX_EPOLL_EVRD) != 0) {
220 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0);
221 kevent->ext[0] = l_event->data;
222 ++kevent;
223 ++(*nkevents);
224 }
225 if ((levents & LINUX_EPOLL_EVWR) != 0) {
226 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
227 kevent->ext[0] = l_event->data;
228 ++kevent;
229 ++(*nkevents);
230 }
231 /* zero event mask is legal */
232 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) {
233 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
234 ++(*nkevents);
235 }
236
237 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
238 p = td->td_proc;
239
240 pem = pem_find(p);
241 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
242
243 LINUX_PEM_XLOCK(pem);
244 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
245 pem->flags |= LINUX_XUNSUP_EPOLL;
246 LINUX_PEM_XUNLOCK(pem);
247 linux_msg(td, "epoll_ctl unsupported flags: 0x%x",
248 levents);
249 } else
250 LINUX_PEM_XUNLOCK(pem);
251 return (EINVAL);
252 }
253
254 return (0);
255 }
256
257 /*
258 * Structure converting function from kevent to epoll. In a case
259 * this is called on error in registration we store the error in
260 * event->data and pick it up later in linux_epoll_ctl().
261 */
262 static void
263 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
264 {
265
266 l_event->data = kevent->ext[0];
267
268 if ((kevent->flags & EV_ERROR) != 0) {
269 l_event->events = LINUX_EPOLLERR;
270 return;
271 }
272
273 /* XXX EPOLLPRI, EPOLLHUP */
274 switch (kevent->filter) {
275 case EVFILT_READ:
276 l_event->events = LINUX_EPOLLIN;
277 if ((kevent->flags & EV_EOF) != 0)
278 l_event->events |= LINUX_EPOLLRDHUP;
279 break;
280 case EVFILT_WRITE:
281 l_event->events = LINUX_EPOLLOUT;
282 break;
283 }
284 }
285
286 /*
287 * Copyout callback used by kevent. This converts kevent
288 * events to epoll events and copies them back to the
289 * userspace. This is also called on error on registering
290 * of the filter.
291 */
292 static int
293 epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
294 {
295 struct epoll_copyout_args *args;
296 struct epoll_event *eep;
297 int error, i;
298
299 args = (struct epoll_copyout_args*) arg;
300 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
301
302 for (i = 0; i < count; i++)
303 kevent_to_epoll(&kevp[i], &eep[i]);
304
305 error = copyout(eep, args->leventlist, count * sizeof(*eep));
306 if (error == 0) {
307 args->leventlist += count;
308 args->count += count;
309 } else if (args->error == 0)
310 args->error = error;
311
312 free(eep, M_EPOLL);
313 return (error);
314 }
315
316 /*
317 * Copyin callback used by kevent. This copies already
318 * converted filters from kernel memory to the kevent
319 * internal kernel memory. Hence the memcpy instead of
320 * copyin.
321 */
322 static int
323 epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
324 {
325 struct epoll_copyin_args *args;
326
327 args = (struct epoll_copyin_args*) arg;
328
329 memcpy(kevp, args->changelist, count * sizeof(*kevp));
330 args->changelist += count;
331
332 return (0);
333 }
334
335 /*
336 * Load epoll filter, convert it to kevent filter
337 * and load it into kevent subsystem.
338 */
339 int
340 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
341 {
342 struct file *epfp, *fp;
343 struct epoll_copyin_args ciargs;
344 struct kevent kev[2];
345 struct kevent_copyops k_ops = { &ciargs,
346 NULL,
347 epoll_kev_copyin};
348 struct epoll_event le;
349 cap_rights_t rights;
350 int nchanges = 0;
351 int error;
352
353 if (args->op != LINUX_EPOLL_CTL_DEL) {
354 error = copyin(args->event, &le, sizeof(le));
355 if (error != 0)
356 return (error);
357 }
358
359 error = fget(td, args->epfd,
360 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp);
361 if (error != 0)
362 return (error);
363 if (epfp->f_type != DTYPE_KQUEUE) {
364 error = EINVAL;
365 goto leave1;
366 }
367
368 /* Protect user data vector from incorrectly supplied fd. */
369 error = fget(td, args->fd,
370 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp);
371 if (error != 0)
372 goto leave1;
373
374 /* Linux disallows spying on himself */
375 if (epfp == fp) {
376 error = EINVAL;
377 goto leave0;
378 }
379
380 ciargs.changelist = kev;
381
382 if (args->op != LINUX_EPOLL_CTL_DEL) {
383 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges);
384 if (error != 0)
385 goto leave0;
386 }
387
388 switch (args->op) {
389 case LINUX_EPOLL_CTL_MOD:
390 error = epoll_delete_all_events(td, epfp, args->fd);
391 if (error != 0)
392 goto leave0;
393 break;
394
395 case LINUX_EPOLL_CTL_ADD:
396 if (epoll_fd_registered(td, epfp, args->fd)) {
397 error = EEXIST;
398 goto leave0;
399 }
400 break;
401
402 case LINUX_EPOLL_CTL_DEL:
403 /* CTL_DEL means unregister this fd with this epoll */
404 error = epoll_delete_all_events(td, epfp, args->fd);
405 goto leave0;
406
407 default:
408 error = EINVAL;
409 goto leave0;
410 }
411
412 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
413
414 leave0:
415 fdrop(fp, td);
416
417 leave1:
418 fdrop(epfp, td);
419 return (error);
420 }
421
422 /*
423 * Wait for a filter to be triggered on the epoll file descriptor.
424 */
425
426 static int
427 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events,
428 int maxevents, struct timespec *tsp, sigset_t *uset)
429 {
430 struct epoll_copyout_args coargs;
431 struct kevent_copyops k_ops = { &coargs,
432 epoll_kev_copyout,
433 NULL};
434 cap_rights_t rights;
435 struct file *epfp;
436 sigset_t omask;
437 int error;
438
439 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS)
440 return (EINVAL);
441
442 error = fget(td, epfd,
443 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp);
444 if (error != 0)
445 return (error);
446 if (epfp->f_type != DTYPE_KQUEUE) {
447 error = EINVAL;
448 goto leave;
449 }
450 if (uset != NULL) {
451 error = kern_sigprocmask(td, SIG_SETMASK, uset,
452 &omask, 0);
453 if (error != 0)
454 goto leave;
455 td->td_pflags |= TDP_OLDMASK;
456 /*
457 * Make sure that ast() is called on return to
458 * usermode and TDP_OLDMASK is cleared, restoring old
459 * sigmask.
460 */
461 thread_lock(td);
462 td->td_flags |= TDF_ASTPENDING;
463 thread_unlock(td);
464 }
465
466 coargs.leventlist = events;
467 coargs.p = td->td_proc;
468 coargs.count = 0;
469 coargs.error = 0;
470
471 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp);
472 if (error == 0 && coargs.error != 0)
473 error = coargs.error;
474
475 /*
476 * kern_kevent might return ENOMEM which is not expected from epoll_wait.
477 * Maybe we should translate that but I don't think it matters at all.
478 */
479 if (error == 0)
480 td->td_retval[0] = coargs.count;
481
482 if (uset != NULL)
483 error = kern_sigprocmask(td, SIG_SETMASK, &omask,
484 NULL, 0);
485 leave:
486 fdrop(epfp, td);
487 return (error);
488 }
489
490 static int
491 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events,
492 int maxevents, int timeout, sigset_t *uset)
493 {
494 struct timespec ts, *tsp;
495
496 /*
497 * Linux epoll_wait(2) man page states that timeout of -1 causes caller
498 * to block indefinitely. Real implementation does it if any negative
499 * timeout value is passed.
500 */
501 if (timeout >= 0) {
502 /* Convert from milliseconds to timespec. */
503 ts.tv_sec = timeout / 1000;
504 ts.tv_nsec = (timeout % 1000) * 1000000;
505 tsp = &ts;
506 } else {
507 tsp = NULL;
508 }
509 return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset));
510
511 }
512
513 #ifdef LINUX_LEGACY_SYSCALLS
514 int
515 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
516 {
517
518 return (linux_epoll_wait_common(td, args->epfd, args->events,
519 args->maxevents, args->timeout, NULL));
520 }
521 #endif
522
523 int
524 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args)
525 {
526 sigset_t mask, *pmask;
527 int error;
528
529 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
530 &mask, &pmask);
531 if (error != 0)
532 return (error);
533
534 return (linux_epoll_wait_common(td, args->epfd, args->events,
535 args->maxevents, args->timeout, pmask));
536 }
537
538 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
539 int
540 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args)
541 {
542 struct timespec ts, *tsa;
543 sigset_t mask, *pmask;
544 int error;
545
546 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
547 &mask, &pmask);
548 if (error != 0)
549 return (error);
550
551 if (args->timeout) {
552 error = linux_get_timespec64(&ts, args->timeout);
553 if (error != 0)
554 return (error);
555 tsa = &ts;
556 } else
557 tsa = NULL;
558
559 return (linux_epoll_wait_ts(td, args->epfd, args->events,
560 args->maxevents, tsa, pmask));
561 }
562 #else
563 int
564 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args)
565 {
566 struct timespec ts, *tsa;
567 sigset_t mask, *pmask;
568 int error;
569
570 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
571 &mask, &pmask);
572 if (error != 0)
573 return (error);
574
575 if (args->timeout) {
576 error = linux_get_timespec(&ts, args->timeout);
577 if (error != 0)
578 return (error);
579 tsa = &ts;
580 } else
581 tsa = NULL;
582
583 return (linux_epoll_wait_ts(td, args->epfd, args->events,
584 args->maxevents, tsa, pmask));
585 }
586 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
587
588 static int
589 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter,
590 unsigned int flags)
591 {
592 struct epoll_copyin_args ciargs;
593 struct kevent kev;
594 struct kevent_copyops k_ops = { &ciargs,
595 NULL,
596 epoll_kev_copyin};
597
598 ciargs.changelist = &kev;
599 EV_SET(&kev, fd, filter, flags, 0, 0, 0);
600
601 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL));
602 }
603
604 static int
605 epoll_fd_registered(struct thread *td, struct file *epfp, int fd)
606 {
607 /*
608 * Set empty filter flags to avoid accidental modification of already
609 * registered events. In the case of event re-registration:
610 * 1. If event does not exists kevent() does nothing and returns ENOENT
611 * 2. If event does exists, it's enabled/disabled state is preserved
612 * but fflags, data and udata fields are overwritten. So we can not
613 * set socket lowats and store user's context pointer in udata.
614 */
615 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT ||
616 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT)
617 return (1);
618
619 return (0);
620 }
621
622 static int
623 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
624 {
625 int error1, error2;
626
627 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE);
628 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE);
629
630 /* return 0 if at least one result positive */
631 return (error1 == 0 ? 0 : error2);
632 }
633
634 #ifdef LINUX_LEGACY_SYSCALLS
635 int
636 linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
637 {
638 struct specialfd_eventfd ae;
639
640 bzero(&ae, sizeof(ae));
641 ae.initval = args->initval;
642 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
643 }
644 #endif
645
646 int
647 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
648 {
649 struct specialfd_eventfd ae;
650 int flags;
651
652 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK |
653 LINUX_EFD_SEMAPHORE)) != 0)
654 return (EINVAL);
655 flags = 0;
656 if ((args->flags & LINUX_O_CLOEXEC) != 0)
657 flags |= EFD_CLOEXEC;
658 if ((args->flags & LINUX_O_NONBLOCK) != 0)
659 flags |= EFD_NONBLOCK;
660 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0)
661 flags |= EFD_SEMAPHORE;
662
663 bzero(&ae, sizeof(ae));
664 ae.flags = flags;
665 ae.initval = args->initval;
666 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
667 }
668
669 int
670 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args)
671 {
672 struct timerfd *tfd;
673 struct file *fp;
674 clockid_t clockid;
675 int fflags, fd, error;
676
677 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0)
678 return (EINVAL);
679
680 error = linux_to_native_clockid(&clockid, args->clockid);
681 if (error != 0)
682 return (error);
683 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
684 return (EINVAL);
685
686 fflags = 0;
687 if ((args->flags & LINUX_TFD_CLOEXEC) != 0)
688 fflags |= O_CLOEXEC;
689
690 error = falloc(td, &fp, &fd, fflags);
691 if (error != 0)
692 return (error);
693
694 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO);
695 tfd->tfd_clockid = clockid;
696 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
697
698 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
699 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
700
701 fflags = FREAD;
702 if ((args->flags & LINUX_O_NONBLOCK) != 0)
703 fflags |= FNONBLOCK;
704
705 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops);
706 fdrop(fp, td);
707
708 td->td_retval[0] = fd;
709 return (error);
710 }
711
712 static int
713 timerfd_close(struct file *fp, struct thread *td)
714 {
715 struct timerfd *tfd;
716
717 tfd = fp->f_data;
718 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
719 return (EINVAL);
720
721 timespecclear(&tfd->tfd_time.it_value);
722 timespecclear(&tfd->tfd_time.it_interval);
723
724 callout_drain(&tfd->tfd_callout);
725
726 seldrain(&tfd->tfd_sel);
727 knlist_destroy(&tfd->tfd_sel.si_note);
728
729 fp->f_ops = &badfileops;
730 mtx_destroy(&tfd->tfd_lock);
731 free(tfd, M_EPOLL);
732
733 return (0);
734 }
735
736 static int
737 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
738 int flags, struct thread *td)
739 {
740 struct timerfd *tfd;
741 timerfd_t count;
742 int error;
743
744 tfd = fp->f_data;
745 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
746 return (EINVAL);
747
748 if (uio->uio_resid < sizeof(timerfd_t))
749 return (EINVAL);
750
751 error = 0;
752 mtx_lock(&tfd->tfd_lock);
753 retry:
754 if (tfd->tfd_canceled) {
755 tfd->tfd_count = 0;
756 mtx_unlock(&tfd->tfd_lock);
757 return (ECANCELED);
758 }
759 if (tfd->tfd_count == 0) {
760 if ((fp->f_flag & FNONBLOCK) != 0) {
761 mtx_unlock(&tfd->tfd_lock);
762 return (EAGAIN);
763 }
764 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0);
765 if (error == 0)
766 goto retry;
767 }
768 if (error == 0) {
769 count = tfd->tfd_count;
770 tfd->tfd_count = 0;
771 mtx_unlock(&tfd->tfd_lock);
772 error = uiomove(&count, sizeof(timerfd_t), uio);
773 } else
774 mtx_unlock(&tfd->tfd_lock);
775
776 return (error);
777 }
778
779 static int
780 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
781 struct thread *td)
782 {
783 struct timerfd *tfd;
784 int revents = 0;
785
786 tfd = fp->f_data;
787 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
788 return (POLLERR);
789
790 mtx_lock(&tfd->tfd_lock);
791 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0)
792 revents |= events & (POLLIN|POLLRDNORM);
793 if (revents == 0)
794 selrecord(td, &tfd->tfd_sel);
795 mtx_unlock(&tfd->tfd_lock);
796
797 return (revents);
798 }
799
800 static int
801 timerfd_kqfilter(struct file *fp, struct knote *kn)
802 {
803 struct timerfd *tfd;
804
805 tfd = fp->f_data;
806 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
807 return (EINVAL);
808
809 if (kn->kn_filter == EVFILT_READ)
810 kn->kn_fop = &timerfd_rfiltops;
811 else
812 return (EINVAL);
813
814 kn->kn_hook = tfd;
815 knlist_add(&tfd->tfd_sel.si_note, kn, 0);
816
817 return (0);
818 }
819
820 static void
821 filt_timerfddetach(struct knote *kn)
822 {
823 struct timerfd *tfd = kn->kn_hook;
824
825 mtx_lock(&tfd->tfd_lock);
826 knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
827 mtx_unlock(&tfd->tfd_lock);
828 }
829
830 static int
831 filt_timerfdread(struct knote *kn, long hint)
832 {
833 struct timerfd *tfd = kn->kn_hook;
834
835 return (tfd->tfd_count > 0);
836 }
837
838 static int
839 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
840 struct ucred *active_cred, struct thread *td)
841 {
842
843 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
844 return (EINVAL);
845
846 switch (cmd) {
847 case FIONBIO:
848 case FIOASYNC:
849 return (0);
850 }
851
852 return (ENOTTY);
853 }
854
855 static int
856 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
857 struct thread *td)
858 {
859
860 return (ENXIO);
861 }
862
863 static int
864 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
865 {
866
867 kif->kf_type = KF_TYPE_UNKNOWN;
868 return (0);
869 }
870
871 static void
872 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts)
873 {
874
875 if (tfd->tfd_clockid == CLOCK_REALTIME)
876 getnanotime(ts);
877 else /* CLOCK_MONOTONIC */
878 getnanouptime(ts);
879 }
880
881 static void
882 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots)
883 {
884 struct timespec cts;
885
886 linux_timerfd_clocktime(tfd, &cts);
887 *ots = tfd->tfd_time;
888 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) {
889 timespecsub(&ots->it_value, &cts, &ots->it_value);
890 if (ots->it_value.tv_sec < 0 ||
891 (ots->it_value.tv_sec == 0 &&
892 ots->it_value.tv_nsec == 0)) {
893 ots->it_value.tv_sec = 0;
894 ots->it_value.tv_nsec = 1;
895 }
896 }
897 }
898
899 static int
900 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots)
901 {
902 struct timerfd *tfd;
903 struct file *fp;
904 int error;
905
906 error = fget(td, fd, &cap_read_rights, &fp);
907 if (error != 0)
908 return (error);
909 tfd = fp->f_data;
910 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
911 error = EINVAL;
912 goto out;
913 }
914
915 mtx_lock(&tfd->tfd_lock);
916 linux_timerfd_curval(tfd, ots);
917 mtx_unlock(&tfd->tfd_lock);
918
919 out:
920 fdrop(fp, td);
921 return (error);
922 }
923
924 int
925 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args)
926 {
927 struct l_itimerspec lots;
928 struct itimerspec ots;
929 int error;
930
931 error = linux_timerfd_gettime_common(td, args->fd, &ots);
932 if (error != 0)
933 return (error);
934 error = native_to_linux_itimerspec(&lots, &ots);
935 if (error == 0)
936 error = copyout(&lots, args->old_value, sizeof(lots));
937 return (error);
938 }
939
940 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
941 int
942 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args)
943 {
944 struct l_itimerspec64 lots;
945 struct itimerspec ots;
946 int error;
947
948 error = linux_timerfd_gettime_common(td, args->fd, &ots);
949 if (error != 0)
950 return (error);
951 error = native_to_linux_itimerspec64(&lots, &ots);
952 if (error == 0)
953 error = copyout(&lots, args->old_value, sizeof(lots));
954 return (error);
955 }
956 #endif
957
958 static int
959 linux_timerfd_settime_common(struct thread *td, int fd, int flags,
960 struct itimerspec *nts, struct itimerspec *oval)
961 {
962 struct timespec cts, ts;
963 struct timerfd *tfd;
964 struct timeval tv;
965 struct file *fp;
966 int error;
967
968 if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0)
969 return (EINVAL);
970
971 error = fget(td, fd, &cap_write_rights, &fp);
972 if (error != 0)
973 return (error);
974 tfd = fp->f_data;
975 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
976 error = EINVAL;
977 goto out;
978 }
979
980 mtx_lock(&tfd->tfd_lock);
981 if (!timespecisset(&nts->it_value))
982 timespecclear(&nts->it_interval);
983 if (oval != NULL)
984 linux_timerfd_curval(tfd, oval);
985
986 bcopy(nts, &tfd->tfd_time, sizeof(*nts));
987 tfd->tfd_count = 0;
988 if (timespecisset(&nts->it_value)) {
989 linux_timerfd_clocktime(tfd, &cts);
990 ts = nts->it_value;
991 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) {
992 timespecadd(&tfd->tfd_time.it_value, &cts,
993 &tfd->tfd_time.it_value);
994 } else {
995 timespecsub(&ts, &cts, &ts);
996 }
997 TIMESPEC_TO_TIMEVAL(&tv, &ts);
998 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
999 linux_timerfd_expire, tfd);
1000 tfd->tfd_canceled = false;
1001 } else {
1002 tfd->tfd_canceled = true;
1003 callout_stop(&tfd->tfd_callout);
1004 }
1005 mtx_unlock(&tfd->tfd_lock);
1006
1007 out:
1008 fdrop(fp, td);
1009 return (error);
1010 }
1011
1012 int
1013 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args)
1014 {
1015 struct l_itimerspec lots;
1016 struct itimerspec nts, ots, *pots;
1017 int error;
1018
1019 error = copyin(args->new_value, &lots, sizeof(lots));
1020 if (error != 0)
1021 return (error);
1022 error = linux_to_native_itimerspec(&nts, &lots);
1023 if (error != 0)
1024 return (error);
1025 pots = (args->old_value != NULL ? &ots : NULL);
1026 error = linux_timerfd_settime_common(td, args->fd, args->flags,
1027 &nts, pots);
1028 if (error == 0 && args->old_value != NULL) {
1029 error = native_to_linux_itimerspec(&lots, &ots);
1030 if (error == 0)
1031 error = copyout(&lots, args->old_value, sizeof(lots));
1032 }
1033 return (error);
1034 }
1035
1036 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1037 int
1038 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args)
1039 {
1040 struct l_itimerspec64 lots;
1041 struct itimerspec nts, ots, *pots;
1042 int error;
1043
1044 error = copyin(args->new_value, &lots, sizeof(lots));
1045 if (error != 0)
1046 return (error);
1047 error = linux_to_native_itimerspec64(&nts, &lots);
1048 if (error != 0)
1049 return (error);
1050 pots = (args->old_value != NULL ? &ots : NULL);
1051 error = linux_timerfd_settime_common(td, args->fd, args->flags,
1052 &nts, pots);
1053 if (error == 0 && args->old_value != NULL) {
1054 error = native_to_linux_itimerspec64(&lots, &ots);
1055 if (error == 0)
1056 error = copyout(&lots, args->old_value, sizeof(lots));
1057 }
1058 return (error);
1059 }
1060 #endif
1061
1062 static void
1063 linux_timerfd_expire(void *arg)
1064 {
1065 struct timespec cts, ts;
1066 struct timeval tv;
1067 struct timerfd *tfd;
1068
1069 tfd = (struct timerfd *)arg;
1070
1071 linux_timerfd_clocktime(tfd, &cts);
1072 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) {
1073 if (timespecisset(&tfd->tfd_time.it_interval))
1074 timespecadd(&tfd->tfd_time.it_value,
1075 &tfd->tfd_time.it_interval,
1076 &tfd->tfd_time.it_value);
1077 else
1078 /* single shot timer */
1079 timespecclear(&tfd->tfd_time.it_value);
1080 if (timespecisset(&tfd->tfd_time.it_value)) {
1081 timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
1082 TIMESPEC_TO_TIMEVAL(&tv, &ts);
1083 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
1084 linux_timerfd_expire, tfd);
1085 }
1086 tfd->tfd_count++;
1087 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
1088 selwakeup(&tfd->tfd_sel);
1089 wakeup(&tfd->tfd_count);
1090 } else if (timespecisset(&tfd->tfd_time.it_value)) {
1091 timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
1092 TIMESPEC_TO_TIMEVAL(&tv, &ts);
1093 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
1094 linux_timerfd_expire, tfd);
1095 }
1096 }
Cache object: 1dd96c9557bdb2ce0428136b906e1b58
|