The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_event.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2007 Roman Divacky
    5  * Copyright (c) 2014 Dmitry Chagin
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_compat.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/imgact.h>
   38 #include <sys/kernel.h>
   39 #include <sys/limits.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/callout.h>
   43 #include <sys/capsicum.h>
   44 #include <sys/types.h>
   45 #include <sys/user.h>
   46 #include <sys/file.h>
   47 #include <sys/filedesc.h>
   48 #include <sys/filio.h>
   49 #include <sys/errno.h>
   50 #include <sys/event.h>
   51 #include <sys/poll.h>
   52 #include <sys/proc.h>
   53 #include <sys/selinfo.h>
   54 #include <sys/specialfd.h>
   55 #include <sys/sx.h>
   56 #include <sys/syscallsubr.h>
   57 #include <sys/timespec.h>
   58 #include <sys/eventfd.h>
   59 
   60 #ifdef COMPAT_LINUX32
   61 #include <machine/../linux32/linux.h>
   62 #include <machine/../linux32/linux32_proto.h>
   63 #else
   64 #include <machine/../linux/linux.h>
   65 #include <machine/../linux/linux_proto.h>
   66 #endif
   67 
   68 #include <compat/linux/linux_emul.h>
   69 #include <compat/linux/linux_event.h>
   70 #include <compat/linux/linux_file.h>
   71 #include <compat/linux/linux_timer.h>
   72 #include <compat/linux/linux_util.h>
   73 
   74 typedef uint64_t        epoll_udata_t;
   75 
   76 struct epoll_event {
   77         uint32_t        events;
   78         epoll_udata_t   data;
   79 }
   80 #if defined(__amd64__)
   81 __attribute__((packed))
   82 #endif
   83 ;
   84 
   85 #define LINUX_MAX_EVENTS        (INT_MAX / sizeof(struct epoll_event))
   86 
   87 static int      epoll_to_kevent(struct thread *td, int fd,
   88                     struct epoll_event *l_event, struct kevent *kevent,
   89                     int *nkevents);
   90 static void     kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
   91 static int      epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
   92 static int      epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
   93 static int      epoll_register_kevent(struct thread *td, struct file *epfp,
   94                     int fd, int filter, unsigned int flags);
   95 static int      epoll_fd_registered(struct thread *td, struct file *epfp,
   96                     int fd);
   97 static int      epoll_delete_all_events(struct thread *td, struct file *epfp,
   98                     int fd);
   99 
  100 struct epoll_copyin_args {
  101         struct kevent   *changelist;
  102 };
  103 
  104 struct epoll_copyout_args {
  105         struct epoll_event      *leventlist;
  106         struct proc             *p;
  107         uint32_t                count;
  108         int                     error;
  109 };
  110 
  111 /* timerfd */
  112 typedef uint64_t        timerfd_t;
  113 
  114 static fo_rdwr_t        timerfd_read;
  115 static fo_ioctl_t       timerfd_ioctl;
  116 static fo_poll_t        timerfd_poll;
  117 static fo_kqfilter_t    timerfd_kqfilter;
  118 static fo_stat_t        timerfd_stat;
  119 static fo_close_t       timerfd_close;
  120 static fo_fill_kinfo_t  timerfd_fill_kinfo;
  121 
  122 static struct fileops timerfdops = {
  123         .fo_read = timerfd_read,
  124         .fo_write = invfo_rdwr,
  125         .fo_truncate = invfo_truncate,
  126         .fo_ioctl = timerfd_ioctl,
  127         .fo_poll = timerfd_poll,
  128         .fo_kqfilter = timerfd_kqfilter,
  129         .fo_stat = timerfd_stat,
  130         .fo_close = timerfd_close,
  131         .fo_chmod = invfo_chmod,
  132         .fo_chown = invfo_chown,
  133         .fo_sendfile = invfo_sendfile,
  134         .fo_fill_kinfo = timerfd_fill_kinfo,
  135         .fo_flags = DFLAG_PASSABLE
  136 };
  137 
  138 static void     filt_timerfddetach(struct knote *kn);
  139 static int      filt_timerfdread(struct knote *kn, long hint);
  140 
  141 static struct filterops timerfd_rfiltops = {
  142         .f_isfd = 1,
  143         .f_detach = filt_timerfddetach,
  144         .f_event = filt_timerfdread
  145 };
  146 
  147 struct timerfd {
  148         clockid_t       tfd_clockid;
  149         struct itimerspec tfd_time;
  150         struct callout  tfd_callout;
  151         timerfd_t       tfd_count;
  152         bool            tfd_canceled;
  153         struct selinfo  tfd_sel;
  154         struct mtx      tfd_lock;
  155 };
  156 
  157 static void     linux_timerfd_expire(void *);
  158 static void     linux_timerfd_curval(struct timerfd *, struct itimerspec *);
  159 
  160 static int
  161 epoll_create_common(struct thread *td, int flags)
  162 {
  163 
  164         return (kern_kqueue(td, flags, NULL));
  165 }
  166 
  167 #ifdef LINUX_LEGACY_SYSCALLS
  168 int
  169 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
  170 {
  171 
  172         /*
  173          * args->size is unused. Linux just tests it
  174          * and then forgets it as well.
  175          */
  176         if (args->size <= 0)
  177                 return (EINVAL);
  178 
  179         return (epoll_create_common(td, 0));
  180 }
  181 #endif
  182 
  183 int
  184 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
  185 {
  186         int flags;
  187 
  188         if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
  189                 return (EINVAL);
  190 
  191         flags = 0;
  192         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  193                 flags |= O_CLOEXEC;
  194 
  195         return (epoll_create_common(td, flags));
  196 }
  197 
  198 /* Structure converting function from epoll to kevent. */
  199 static int
  200 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
  201     struct kevent *kevent, int *nkevents)
  202 {
  203         uint32_t levents = l_event->events;
  204         struct linux_pemuldata *pem;
  205         struct proc *p;
  206         unsigned short kev_flags = EV_ADD | EV_ENABLE;
  207 
  208         /* flags related to how event is registered */
  209         if ((levents & LINUX_EPOLLONESHOT) != 0)
  210                 kev_flags |= EV_DISPATCH;
  211         if ((levents & LINUX_EPOLLET) != 0)
  212                 kev_flags |= EV_CLEAR;
  213         if ((levents & LINUX_EPOLLERR) != 0)
  214                 kev_flags |= EV_ERROR;
  215         if ((levents & LINUX_EPOLLRDHUP) != 0)
  216                 kev_flags |= EV_EOF;
  217 
  218         /* flags related to what event is registered */
  219         if ((levents & LINUX_EPOLL_EVRD) != 0) {
  220                 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0);
  221                 kevent->ext[0] = l_event->data;
  222                 ++kevent;
  223                 ++(*nkevents);
  224         }
  225         if ((levents & LINUX_EPOLL_EVWR) != 0) {
  226                 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
  227                 kevent->ext[0] = l_event->data;
  228                 ++kevent;
  229                 ++(*nkevents);
  230         }
  231         /* zero event mask is legal */
  232         if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) {
  233                 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
  234                 ++(*nkevents);
  235         }
  236 
  237         if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
  238                 p = td->td_proc;
  239 
  240                 pem = pem_find(p);
  241                 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
  242 
  243                 LINUX_PEM_XLOCK(pem);
  244                 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
  245                         pem->flags |= LINUX_XUNSUP_EPOLL;
  246                         LINUX_PEM_XUNLOCK(pem);
  247                         linux_msg(td, "epoll_ctl unsupported flags: 0x%x",
  248                             levents);
  249                 } else
  250                         LINUX_PEM_XUNLOCK(pem);
  251                 return (EINVAL);
  252         }
  253 
  254         return (0);
  255 }
  256 
  257 /*
  258  * Structure converting function from kevent to epoll. In a case
  259  * this is called on error in registration we store the error in
  260  * event->data and pick it up later in linux_epoll_ctl().
  261  */
  262 static void
  263 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
  264 {
  265 
  266         l_event->data = kevent->ext[0];
  267 
  268         if ((kevent->flags & EV_ERROR) != 0) {
  269                 l_event->events = LINUX_EPOLLERR;
  270                 return;
  271         }
  272 
  273         /* XXX EPOLLPRI, EPOLLHUP */
  274         switch (kevent->filter) {
  275         case EVFILT_READ:
  276                 l_event->events = LINUX_EPOLLIN;
  277                 if ((kevent->flags & EV_EOF) != 0)
  278                         l_event->events |= LINUX_EPOLLRDHUP;
  279         break;
  280         case EVFILT_WRITE:
  281                 l_event->events = LINUX_EPOLLOUT;
  282         break;
  283         }
  284 }
  285 
  286 /*
  287  * Copyout callback used by kevent. This converts kevent
  288  * events to epoll events and copies them back to the
  289  * userspace. This is also called on error on registering
  290  * of the filter.
  291  */
  292 static int
  293 epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
  294 {
  295         struct epoll_copyout_args *args;
  296         struct epoll_event *eep;
  297         int error, i;
  298 
  299         args = (struct epoll_copyout_args*) arg;
  300         eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
  301 
  302         for (i = 0; i < count; i++)
  303                 kevent_to_epoll(&kevp[i], &eep[i]);
  304 
  305         error = copyout(eep, args->leventlist, count * sizeof(*eep));
  306         if (error == 0) {
  307                 args->leventlist += count;
  308                 args->count += count;
  309         } else if (args->error == 0)
  310                 args->error = error;
  311 
  312         free(eep, M_EPOLL);
  313         return (error);
  314 }
  315 
  316 /*
  317  * Copyin callback used by kevent. This copies already
  318  * converted filters from kernel memory to the kevent
  319  * internal kernel memory. Hence the memcpy instead of
  320  * copyin.
  321  */
  322 static int
  323 epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
  324 {
  325         struct epoll_copyin_args *args;
  326 
  327         args = (struct epoll_copyin_args*) arg;
  328 
  329         memcpy(kevp, args->changelist, count * sizeof(*kevp));
  330         args->changelist += count;
  331 
  332         return (0);
  333 }
  334 
  335 /*
  336  * Load epoll filter, convert it to kevent filter
  337  * and load it into kevent subsystem.
  338  */
  339 int
  340 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
  341 {
  342         struct file *epfp, *fp;
  343         struct epoll_copyin_args ciargs;
  344         struct kevent kev[2];
  345         struct kevent_copyops k_ops = { &ciargs,
  346                                         NULL,
  347                                         epoll_kev_copyin};
  348         struct epoll_event le;
  349         cap_rights_t rights;
  350         int nchanges = 0;
  351         int error;
  352 
  353         if (args->op != LINUX_EPOLL_CTL_DEL) {
  354                 error = copyin(args->event, &le, sizeof(le));
  355                 if (error != 0)
  356                         return (error);
  357         }
  358 
  359         error = fget(td, args->epfd,
  360             cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp);
  361         if (error != 0)
  362                 return (error);
  363         if (epfp->f_type != DTYPE_KQUEUE) {
  364                 error = EINVAL;
  365                 goto leave1;
  366         }
  367 
  368          /* Protect user data vector from incorrectly supplied fd. */
  369         error = fget(td, args->fd,
  370                      cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp);
  371         if (error != 0)
  372                 goto leave1;
  373 
  374         /* Linux disallows spying on himself */
  375         if (epfp == fp) {
  376                 error = EINVAL;
  377                 goto leave0;
  378         }
  379 
  380         ciargs.changelist = kev;
  381 
  382         if (args->op != LINUX_EPOLL_CTL_DEL) {
  383                 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges);
  384                 if (error != 0)
  385                         goto leave0;
  386         }
  387 
  388         switch (args->op) {
  389         case LINUX_EPOLL_CTL_MOD:
  390                 error = epoll_delete_all_events(td, epfp, args->fd);
  391                 if (error != 0)
  392                         goto leave0;
  393                 break;
  394 
  395         case LINUX_EPOLL_CTL_ADD:
  396                 if (epoll_fd_registered(td, epfp, args->fd)) {
  397                         error = EEXIST;
  398                         goto leave0;
  399                 }
  400                 break;
  401 
  402         case LINUX_EPOLL_CTL_DEL:
  403                 /* CTL_DEL means unregister this fd with this epoll */
  404                 error = epoll_delete_all_events(td, epfp, args->fd);
  405                 goto leave0;
  406 
  407         default:
  408                 error = EINVAL;
  409                 goto leave0;
  410         }
  411 
  412         error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
  413 
  414 leave0:
  415         fdrop(fp, td);
  416 
  417 leave1:
  418         fdrop(epfp, td);
  419         return (error);
  420 }
  421 
  422 /*
  423  * Wait for a filter to be triggered on the epoll file descriptor.
  424  */
  425 static int
  426 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events,
  427     int maxevents, int timeout, sigset_t *uset)
  428 {
  429         struct epoll_copyout_args coargs;
  430         struct kevent_copyops k_ops = { &coargs,
  431                                         epoll_kev_copyout,
  432                                         NULL};
  433         struct timespec ts, *tsp;
  434         cap_rights_t rights;
  435         struct file *epfp;
  436         sigset_t omask;
  437         int error;
  438 
  439         if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS)
  440                 return (EINVAL);
  441 
  442         error = fget(td, epfd,
  443             cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp);
  444         if (error != 0)
  445                 return (error);
  446         if (epfp->f_type != DTYPE_KQUEUE) {
  447                 error = EINVAL;
  448                 goto leave;
  449         }
  450         if (uset != NULL) {
  451                 error = kern_sigprocmask(td, SIG_SETMASK, uset,
  452                     &omask, 0);
  453                 if (error != 0)
  454                         goto leave;
  455                 td->td_pflags |= TDP_OLDMASK;
  456                 /*
  457                  * Make sure that ast() is called on return to
  458                  * usermode and TDP_OLDMASK is cleared, restoring old
  459                  * sigmask.
  460                  */
  461                 thread_lock(td);
  462                 td->td_flags |= TDF_ASTPENDING;
  463                 thread_unlock(td);
  464         }
  465 
  466         coargs.leventlist = events;
  467         coargs.p = td->td_proc;
  468         coargs.count = 0;
  469         coargs.error = 0;
  470 
  471         /*
  472          * Linux epoll_wait(2) man page states that timeout of -1 causes caller
  473          * to block indefinitely. Real implementation does it if any negative
  474          * timeout value is passed.
  475          */
  476         if (timeout >= 0) {
  477                 /* Convert from milliseconds to timespec. */
  478                 ts.tv_sec = timeout / 1000;
  479                 ts.tv_nsec = (timeout % 1000) * 1000000;
  480                 tsp = &ts;
  481         } else {
  482                 tsp = NULL;
  483         }
  484 
  485         error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp);
  486         if (error == 0 && coargs.error != 0)
  487                 error = coargs.error;
  488 
  489         /*
  490          * kern_kevent might return ENOMEM which is not expected from epoll_wait.
  491          * Maybe we should translate that but I don't think it matters at all.
  492          */
  493         if (error == 0)
  494                 td->td_retval[0] = coargs.count;
  495 
  496         if (uset != NULL)
  497                 error = kern_sigprocmask(td, SIG_SETMASK, &omask,
  498                     NULL, 0);
  499 leave:
  500         fdrop(epfp, td);
  501         return (error);
  502 }
  503 
  504 #ifdef LINUX_LEGACY_SYSCALLS
  505 int
  506 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
  507 {
  508 
  509         return (linux_epoll_wait_common(td, args->epfd, args->events,
  510             args->maxevents, args->timeout, NULL));
  511 }
  512 #endif
  513 
  514 int
  515 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args)
  516 {
  517         sigset_t mask, *pmask;
  518         l_sigset_t lmask;
  519         int error;
  520 
  521         if (args->mask != NULL) {
  522                 if (args->sigsetsize != sizeof(l_sigset_t))
  523                         return (EINVAL);
  524                 error = copyin(args->mask, &lmask, sizeof(l_sigset_t));
  525                 if (error != 0)
  526                         return (error);
  527                 linux_to_bsd_sigset(&lmask, &mask);
  528                 pmask = &mask;
  529         } else
  530                 pmask = NULL;
  531         return (linux_epoll_wait_common(td, args->epfd, args->events,
  532             args->maxevents, args->timeout, pmask));
  533 }
  534 
  535 static int
  536 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter,
  537     unsigned int flags)
  538 {
  539         struct epoll_copyin_args ciargs;
  540         struct kevent kev;
  541         struct kevent_copyops k_ops = { &ciargs,
  542                                         NULL,
  543                                         epoll_kev_copyin};
  544 
  545         ciargs.changelist = &kev;
  546         EV_SET(&kev, fd, filter, flags, 0, 0, 0);
  547 
  548         return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL));
  549 }
  550 
  551 static int
  552 epoll_fd_registered(struct thread *td, struct file *epfp, int fd)
  553 {
  554         /*
  555          * Set empty filter flags to avoid accidental modification of already
  556          * registered events. In the case of event re-registration:
  557          * 1. If event does not exists kevent() does nothing and returns ENOENT
  558          * 2. If event does exists, it's enabled/disabled state is preserved
  559          *    but fflags, data and udata fields are overwritten. So we can not
  560          *    set socket lowats and store user's context pointer in udata.
  561          */
  562         if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT ||
  563             epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT)
  564                 return (1);
  565 
  566         return (0);
  567 }
  568 
  569 static int
  570 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
  571 {
  572         int error1, error2;
  573 
  574         error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE);
  575         error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE);
  576 
  577         /* return 0 if at least one result positive */
  578         return (error1 == 0 ? 0 : error2);
  579 }
  580 
  581 #ifdef LINUX_LEGACY_SYSCALLS
  582 int
  583 linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
  584 {
  585         struct specialfd_eventfd ae;
  586 
  587         bzero(&ae, sizeof(ae));
  588         ae.initval = args->initval;
  589         return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
  590 }
  591 #endif
  592 
  593 int
  594 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
  595 {
  596         struct specialfd_eventfd ae;
  597         int flags;
  598 
  599         if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK |
  600             LINUX_EFD_SEMAPHORE)) != 0)
  601                 return (EINVAL);
  602         flags = 0;
  603         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  604                 flags |= EFD_CLOEXEC;
  605         if ((args->flags & LINUX_O_NONBLOCK) != 0)
  606                 flags |= EFD_NONBLOCK;
  607         if ((args->flags & LINUX_EFD_SEMAPHORE) != 0)
  608                 flags |= EFD_SEMAPHORE;
  609 
  610         bzero(&ae, sizeof(ae));
  611         ae.flags = flags;
  612         ae.initval = args->initval;
  613         return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
  614 }
  615 
  616 int
  617 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args)
  618 {
  619         struct filedesc *fdp;
  620         struct timerfd *tfd;
  621         struct file *fp;
  622         clockid_t clockid;
  623         int fflags, fd, error;
  624 
  625         if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0)
  626                 return (EINVAL);
  627 
  628         error = linux_to_native_clockid(&clockid, args->clockid);
  629         if (error != 0)
  630                 return (error);
  631         if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
  632                 return (EINVAL);
  633 
  634         fflags = 0;
  635         if ((args->flags & LINUX_TFD_CLOEXEC) != 0)
  636                 fflags |= O_CLOEXEC;
  637 
  638         fdp = td->td_proc->p_fd;
  639         error = falloc(td, &fp, &fd, fflags);
  640         if (error != 0)
  641                 return (error);
  642 
  643         tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO);
  644         tfd->tfd_clockid = clockid;
  645         mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
  646 
  647         callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
  648         knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
  649 
  650         fflags = FREAD;
  651         if ((args->flags & LINUX_O_NONBLOCK) != 0)
  652                 fflags |= FNONBLOCK;
  653 
  654         finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops);
  655         fdrop(fp, td);
  656 
  657         td->td_retval[0] = fd;
  658         return (error);
  659 }
  660 
  661 static int
  662 timerfd_close(struct file *fp, struct thread *td)
  663 {
  664         struct timerfd *tfd;
  665 
  666         tfd = fp->f_data;
  667         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  668                 return (EINVAL);
  669 
  670         timespecclear(&tfd->tfd_time.it_value);
  671         timespecclear(&tfd->tfd_time.it_interval);
  672 
  673         mtx_lock(&tfd->tfd_lock);
  674         callout_drain(&tfd->tfd_callout);
  675         mtx_unlock(&tfd->tfd_lock);
  676 
  677         seldrain(&tfd->tfd_sel);
  678         knlist_destroy(&tfd->tfd_sel.si_note);
  679 
  680         fp->f_ops = &badfileops;
  681         mtx_destroy(&tfd->tfd_lock);
  682         free(tfd, M_EPOLL);
  683 
  684         return (0);
  685 }
  686 
  687 static int
  688 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
  689     int flags, struct thread *td)
  690 {
  691         struct timerfd *tfd;
  692         timerfd_t count;
  693         int error;
  694 
  695         tfd = fp->f_data;
  696         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  697                 return (EINVAL);
  698 
  699         if (uio->uio_resid < sizeof(timerfd_t))
  700                 return (EINVAL);
  701 
  702         error = 0;
  703         mtx_lock(&tfd->tfd_lock);
  704 retry:
  705         if (tfd->tfd_canceled) {
  706                 tfd->tfd_count = 0;
  707                 mtx_unlock(&tfd->tfd_lock);
  708                 return (ECANCELED);
  709         }
  710         if (tfd->tfd_count == 0) {
  711                 if ((fp->f_flag & FNONBLOCK) != 0) {
  712                         mtx_unlock(&tfd->tfd_lock);
  713                         return (EAGAIN);
  714                 }
  715                 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0);
  716                 if (error == 0)
  717                         goto retry;
  718         }
  719         if (error == 0) {
  720                 count = tfd->tfd_count;
  721                 tfd->tfd_count = 0;
  722                 mtx_unlock(&tfd->tfd_lock);
  723                 error = uiomove(&count, sizeof(timerfd_t), uio);
  724         } else
  725                 mtx_unlock(&tfd->tfd_lock);
  726 
  727         return (error);
  728 }
  729 
  730 static int
  731 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
  732     struct thread *td)
  733 {
  734         struct timerfd *tfd;
  735         int revents = 0;
  736 
  737         tfd = fp->f_data;
  738         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  739                 return (POLLERR);
  740 
  741         mtx_lock(&tfd->tfd_lock);
  742         if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0)
  743                 revents |= events & (POLLIN|POLLRDNORM);
  744         if (revents == 0)
  745                 selrecord(td, &tfd->tfd_sel);
  746         mtx_unlock(&tfd->tfd_lock);
  747 
  748         return (revents);
  749 }
  750 
  751 static int
  752 timerfd_kqfilter(struct file *fp, struct knote *kn)
  753 {
  754         struct timerfd *tfd;
  755 
  756         tfd = fp->f_data;
  757         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  758                 return (EINVAL);
  759 
  760         if (kn->kn_filter == EVFILT_READ)
  761                 kn->kn_fop = &timerfd_rfiltops;
  762         else
  763                 return (EINVAL);
  764 
  765         kn->kn_hook = tfd;
  766         knlist_add(&tfd->tfd_sel.si_note, kn, 0);
  767 
  768         return (0);
  769 }
  770 
  771 static void
  772 filt_timerfddetach(struct knote *kn)
  773 {
  774         struct timerfd *tfd = kn->kn_hook;
  775 
  776         mtx_lock(&tfd->tfd_lock);
  777         knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
  778         mtx_unlock(&tfd->tfd_lock);
  779 }
  780 
  781 static int
  782 filt_timerfdread(struct knote *kn, long hint)
  783 {
  784         struct timerfd *tfd = kn->kn_hook;
  785 
  786         return (tfd->tfd_count > 0);
  787 }
  788 
  789 static int
  790 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
  791     struct ucred *active_cred, struct thread *td)
  792 {
  793 
  794         if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
  795                 return (EINVAL);
  796 
  797         switch (cmd) {
  798         case FIONBIO:
  799         case FIOASYNC:
  800                 return (0);
  801         }
  802 
  803         return (ENOTTY);
  804 }
  805 
  806 static int
  807 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
  808     struct thread *td)
  809 {
  810 
  811         return (ENXIO);
  812 }
  813 
  814 static int
  815 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
  816 {
  817 
  818         kif->kf_type = KF_TYPE_UNKNOWN;
  819         return (0);
  820 }
  821 
  822 static void
  823 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts)
  824 {
  825 
  826         if (tfd->tfd_clockid == CLOCK_REALTIME)
  827                 getnanotime(ts);
  828         else    /* CLOCK_MONOTONIC */
  829                 getnanouptime(ts);
  830 }
  831 
  832 static void
  833 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots)
  834 {
  835         struct timespec cts;
  836 
  837         linux_timerfd_clocktime(tfd, &cts);
  838         *ots = tfd->tfd_time;
  839         if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) {
  840                 timespecsub(&ots->it_value, &cts, &ots->it_value);
  841                 if (ots->it_value.tv_sec < 0 ||
  842                     (ots->it_value.tv_sec == 0 &&
  843                      ots->it_value.tv_nsec == 0)) {
  844                         ots->it_value.tv_sec  = 0;
  845                         ots->it_value.tv_nsec = 1;
  846                 }
  847         }
  848 }
  849 
  850 int
  851 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args)
  852 {
  853         struct l_itimerspec lots;
  854         struct itimerspec ots;
  855         struct timerfd *tfd;
  856         struct file *fp;
  857         int error;
  858 
  859         error = fget(td, args->fd, &cap_read_rights, &fp);
  860         if (error != 0)
  861                 return (error);
  862         tfd = fp->f_data;
  863         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
  864                 error = EINVAL;
  865                 goto out;
  866         }
  867 
  868         mtx_lock(&tfd->tfd_lock);
  869         linux_timerfd_curval(tfd, &ots);
  870         mtx_unlock(&tfd->tfd_lock);
  871 
  872         error = native_to_linux_itimerspec(&lots, &ots);
  873         if (error == 0)
  874                 error = copyout(&lots, args->old_value, sizeof(lots));
  875 
  876 out:
  877         fdrop(fp, td);
  878         return (error);
  879 }
  880 
  881 int
  882 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args)
  883 {
  884         struct l_itimerspec lots;
  885         struct itimerspec nts, ots;
  886         struct timespec cts, ts;
  887         struct timerfd *tfd;
  888         struct timeval tv;
  889         struct file *fp;
  890         int error;
  891 
  892         if ((args->flags & ~LINUX_TFD_SETTIME_FLAGS) != 0)
  893                 return (EINVAL);
  894 
  895         error = copyin(args->new_value, &lots, sizeof(lots));
  896         if (error != 0)
  897                 return (error);
  898         error = linux_to_native_itimerspec(&nts, &lots);
  899         if (error != 0)
  900                 return (error);
  901 
  902         error = fget(td, args->fd, &cap_write_rights, &fp);
  903         if (error != 0)
  904                 return (error);
  905         tfd = fp->f_data;
  906         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
  907                 error = EINVAL;
  908                 goto out;
  909         }
  910 
  911         mtx_lock(&tfd->tfd_lock);
  912         if (!timespecisset(&nts.it_value))
  913                 timespecclear(&nts.it_interval);
  914         if (args->old_value != NULL)
  915                 linux_timerfd_curval(tfd, &ots);
  916 
  917         tfd->tfd_time = nts;
  918         tfd->tfd_count = 0;
  919         if (timespecisset(&nts.it_value)) {
  920                 linux_timerfd_clocktime(tfd, &cts);
  921                 ts = nts.it_value;
  922                 if ((args->flags & LINUX_TFD_TIMER_ABSTIME) == 0) {
  923                         timespecadd(&tfd->tfd_time.it_value, &cts,
  924                                 &tfd->tfd_time.it_value);
  925                 } else {
  926                         timespecsub(&ts, &cts, &ts);
  927                 }
  928                 TIMESPEC_TO_TIMEVAL(&tv, &ts);
  929                 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
  930                         linux_timerfd_expire, tfd);
  931                 tfd->tfd_canceled = false;
  932         } else {
  933                 tfd->tfd_canceled = true;
  934                 callout_stop(&tfd->tfd_callout);
  935         }
  936         mtx_unlock(&tfd->tfd_lock);
  937 
  938         if (args->old_value != NULL) {
  939                 error = native_to_linux_itimerspec(&lots, &ots);
  940                 if (error == 0)
  941                         error = copyout(&lots, args->old_value, sizeof(lots));
  942         }
  943 
  944 out:
  945         fdrop(fp, td);
  946         return (error);
  947 }
  948 
  949 static void
  950 linux_timerfd_expire(void *arg)
  951 {
  952         struct timespec cts, ts;
  953         struct timeval tv;
  954         struct timerfd *tfd;
  955 
  956         tfd = (struct timerfd *)arg;
  957 
  958         linux_timerfd_clocktime(tfd, &cts);
  959         if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) {
  960                 if (timespecisset(&tfd->tfd_time.it_interval))
  961                         timespecadd(&tfd->tfd_time.it_value,
  962                                     &tfd->tfd_time.it_interval,
  963                                     &tfd->tfd_time.it_value);
  964                 else
  965                         /* single shot timer */
  966                         timespecclear(&tfd->tfd_time.it_value);
  967                 if (timespecisset(&tfd->tfd_time.it_value)) {
  968                         timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
  969                         TIMESPEC_TO_TIMEVAL(&tv, &ts);
  970                         callout_reset(&tfd->tfd_callout, tvtohz(&tv),
  971                                 linux_timerfd_expire, tfd);
  972                 }
  973                 tfd->tfd_count++;
  974                 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
  975                 selwakeup(&tfd->tfd_sel);
  976                 wakeup(&tfd->tfd_count);
  977         } else if (timespecisset(&tfd->tfd_time.it_value)) {
  978                 timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
  979                 TIMESPEC_TO_TIMEVAL(&tv, &ts);
  980                 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
  981                     linux_timerfd_expire, tfd);
  982         }
  983 }

Cache object: 06404ec9a2beb676166618aed9759565


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.