The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_event.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2007 Roman Divacky
    5  * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_compat.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/imgact.h>
   37 #include <sys/kernel.h>
   38 #include <sys/limits.h>
   39 #include <sys/lock.h>
   40 #include <sys/mutex.h>
   41 #include <sys/callout.h>
   42 #include <sys/capsicum.h>
   43 #include <sys/types.h>
   44 #include <sys/user.h>
   45 #include <sys/file.h>
   46 #include <sys/filedesc.h>
   47 #include <sys/filio.h>
   48 #include <sys/errno.h>
   49 #include <sys/event.h>
   50 #include <sys/poll.h>
   51 #include <sys/proc.h>
   52 #include <sys/selinfo.h>
   53 #include <sys/specialfd.h>
   54 #include <sys/sx.h>
   55 #include <sys/syscallsubr.h>
   56 #include <sys/timespec.h>
   57 #include <sys/eventfd.h>
   58 
   59 #ifdef COMPAT_LINUX32
   60 #include <machine/../linux32/linux.h>
   61 #include <machine/../linux32/linux32_proto.h>
   62 #else
   63 #include <machine/../linux/linux.h>
   64 #include <machine/../linux/linux_proto.h>
   65 #endif
   66 
   67 #include <compat/linux/linux_emul.h>
   68 #include <compat/linux/linux_event.h>
   69 #include <compat/linux/linux_file.h>
   70 #include <compat/linux/linux_signal.h>
   71 #include <compat/linux/linux_timer.h>
   72 #include <compat/linux/linux_util.h>
   73 
   74 typedef uint64_t        epoll_udata_t;
   75 
   76 struct epoll_event {
   77         uint32_t        events;
   78         epoll_udata_t   data;
   79 }
   80 #if defined(__amd64__)
   81 __attribute__((packed))
   82 #endif
   83 ;
   84 
   85 #define LINUX_MAX_EVENTS        (INT_MAX / sizeof(struct epoll_event))
   86 
   87 static int      epoll_to_kevent(struct thread *td, int fd,
   88                     struct epoll_event *l_event, struct kevent *kevent,
   89                     int *nkevents);
   90 static void     kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
   91 static int      epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
   92 static int      epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
   93 static int      epoll_register_kevent(struct thread *td, struct file *epfp,
   94                     int fd, int filter, unsigned int flags);
   95 static int      epoll_fd_registered(struct thread *td, struct file *epfp,
   96                     int fd);
   97 static int      epoll_delete_all_events(struct thread *td, struct file *epfp,
   98                     int fd);
   99 
  100 struct epoll_copyin_args {
  101         struct kevent   *changelist;
  102 };
  103 
  104 struct epoll_copyout_args {
  105         struct epoll_event      *leventlist;
  106         struct proc             *p;
  107         uint32_t                count;
  108         int                     error;
  109 };
  110 
  111 /* timerfd */
  112 typedef uint64_t        timerfd_t;
  113 
  114 static fo_rdwr_t        timerfd_read;
  115 static fo_ioctl_t       timerfd_ioctl;
  116 static fo_poll_t        timerfd_poll;
  117 static fo_kqfilter_t    timerfd_kqfilter;
  118 static fo_stat_t        timerfd_stat;
  119 static fo_close_t       timerfd_close;
  120 static fo_fill_kinfo_t  timerfd_fill_kinfo;
  121 
  122 static struct fileops timerfdops = {
  123         .fo_read = timerfd_read,
  124         .fo_write = invfo_rdwr,
  125         .fo_truncate = invfo_truncate,
  126         .fo_ioctl = timerfd_ioctl,
  127         .fo_poll = timerfd_poll,
  128         .fo_kqfilter = timerfd_kqfilter,
  129         .fo_stat = timerfd_stat,
  130         .fo_close = timerfd_close,
  131         .fo_chmod = invfo_chmod,
  132         .fo_chown = invfo_chown,
  133         .fo_sendfile = invfo_sendfile,
  134         .fo_fill_kinfo = timerfd_fill_kinfo,
  135         .fo_flags = DFLAG_PASSABLE
  136 };
  137 
  138 static void     filt_timerfddetach(struct knote *kn);
  139 static int      filt_timerfdread(struct knote *kn, long hint);
  140 
  141 static struct filterops timerfd_rfiltops = {
  142         .f_isfd = 1,
  143         .f_detach = filt_timerfddetach,
  144         .f_event = filt_timerfdread
  145 };
  146 
  147 struct timerfd {
  148         clockid_t       tfd_clockid;
  149         struct itimerspec tfd_time;
  150         struct callout  tfd_callout;
  151         timerfd_t       tfd_count;
  152         bool            tfd_canceled;
  153         struct selinfo  tfd_sel;
  154         struct mtx      tfd_lock;
  155 };
  156 
  157 static void     linux_timerfd_expire(void *);
  158 static void     linux_timerfd_curval(struct timerfd *, struct itimerspec *);
  159 
  160 static int
  161 epoll_create_common(struct thread *td, int flags)
  162 {
  163 
  164         return (kern_kqueue(td, flags, NULL));
  165 }
  166 
  167 #ifdef LINUX_LEGACY_SYSCALLS
  168 int
  169 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
  170 {
  171 
  172         /*
  173          * args->size is unused. Linux just tests it
  174          * and then forgets it as well.
  175          */
  176         if (args->size <= 0)
  177                 return (EINVAL);
  178 
  179         return (epoll_create_common(td, 0));
  180 }
  181 #endif
  182 
  183 int
  184 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
  185 {
  186         int flags;
  187 
  188         if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
  189                 return (EINVAL);
  190 
  191         flags = 0;
  192         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  193                 flags |= O_CLOEXEC;
  194 
  195         return (epoll_create_common(td, flags));
  196 }
  197 
  198 /* Structure converting function from epoll to kevent. */
  199 static int
  200 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
  201     struct kevent *kevent, int *nkevents)
  202 {
  203         uint32_t levents = l_event->events;
  204         struct linux_pemuldata *pem;
  205         struct proc *p;
  206         unsigned short kev_flags = EV_ADD | EV_ENABLE;
  207 
  208         /* flags related to how event is registered */
  209         if ((levents & LINUX_EPOLLONESHOT) != 0)
  210                 kev_flags |= EV_DISPATCH;
  211         if ((levents & LINUX_EPOLLET) != 0)
  212                 kev_flags |= EV_CLEAR;
  213         if ((levents & LINUX_EPOLLERR) != 0)
  214                 kev_flags |= EV_ERROR;
  215         if ((levents & LINUX_EPOLLRDHUP) != 0)
  216                 kev_flags |= EV_EOF;
  217 
  218         /* flags related to what event is registered */
  219         if ((levents & LINUX_EPOLL_EVRD) != 0) {
  220                 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0);
  221                 kevent->ext[0] = l_event->data;
  222                 ++kevent;
  223                 ++(*nkevents);
  224         }
  225         if ((levents & LINUX_EPOLL_EVWR) != 0) {
  226                 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
  227                 kevent->ext[0] = l_event->data;
  228                 ++kevent;
  229                 ++(*nkevents);
  230         }
  231         /* zero event mask is legal */
  232         if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) {
  233                 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
  234                 ++(*nkevents);
  235         }
  236 
  237         if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
  238                 p = td->td_proc;
  239 
  240                 pem = pem_find(p);
  241                 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
  242 
  243                 LINUX_PEM_XLOCK(pem);
  244                 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
  245                         pem->flags |= LINUX_XUNSUP_EPOLL;
  246                         LINUX_PEM_XUNLOCK(pem);
  247                         linux_msg(td, "epoll_ctl unsupported flags: 0x%x",
  248                             levents);
  249                 } else
  250                         LINUX_PEM_XUNLOCK(pem);
  251                 return (EINVAL);
  252         }
  253 
  254         return (0);
  255 }
  256 
  257 /*
  258  * Structure converting function from kevent to epoll. In a case
  259  * this is called on error in registration we store the error in
  260  * event->data and pick it up later in linux_epoll_ctl().
  261  */
  262 static void
  263 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
  264 {
  265 
  266         l_event->data = kevent->ext[0];
  267 
  268         if ((kevent->flags & EV_ERROR) != 0) {
  269                 l_event->events = LINUX_EPOLLERR;
  270                 return;
  271         }
  272 
  273         /* XXX EPOLLPRI, EPOLLHUP */
  274         switch (kevent->filter) {
  275         case EVFILT_READ:
  276                 l_event->events = LINUX_EPOLLIN;
  277                 if ((kevent->flags & EV_EOF) != 0)
  278                         l_event->events |= LINUX_EPOLLRDHUP;
  279         break;
  280         case EVFILT_WRITE:
  281                 l_event->events = LINUX_EPOLLOUT;
  282         break;
  283         }
  284 }
  285 
  286 /*
  287  * Copyout callback used by kevent. This converts kevent
  288  * events to epoll events and copies them back to the
  289  * userspace. This is also called on error on registering
  290  * of the filter.
  291  */
  292 static int
  293 epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
  294 {
  295         struct epoll_copyout_args *args;
  296         struct epoll_event *eep;
  297         int error, i;
  298 
  299         args = (struct epoll_copyout_args*) arg;
  300         eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
  301 
  302         for (i = 0; i < count; i++)
  303                 kevent_to_epoll(&kevp[i], &eep[i]);
  304 
  305         error = copyout(eep, args->leventlist, count * sizeof(*eep));
  306         if (error == 0) {
  307                 args->leventlist += count;
  308                 args->count += count;
  309         } else if (args->error == 0)
  310                 args->error = error;
  311 
  312         free(eep, M_EPOLL);
  313         return (error);
  314 }
  315 
  316 /*
  317  * Copyin callback used by kevent. This copies already
  318  * converted filters from kernel memory to the kevent
  319  * internal kernel memory. Hence the memcpy instead of
  320  * copyin.
  321  */
  322 static int
  323 epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
  324 {
  325         struct epoll_copyin_args *args;
  326 
  327         args = (struct epoll_copyin_args*) arg;
  328 
  329         memcpy(kevp, args->changelist, count * sizeof(*kevp));
  330         args->changelist += count;
  331 
  332         return (0);
  333 }
  334 
  335 /*
  336  * Load epoll filter, convert it to kevent filter
  337  * and load it into kevent subsystem.
  338  */
  339 int
  340 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
  341 {
  342         struct file *epfp, *fp;
  343         struct epoll_copyin_args ciargs;
  344         struct kevent kev[2];
  345         struct kevent_copyops k_ops = { &ciargs,
  346                                         NULL,
  347                                         epoll_kev_copyin};
  348         struct epoll_event le;
  349         cap_rights_t rights;
  350         int nchanges = 0;
  351         int error;
  352 
  353         if (args->op != LINUX_EPOLL_CTL_DEL) {
  354                 error = copyin(args->event, &le, sizeof(le));
  355                 if (error != 0)
  356                         return (error);
  357         }
  358 
  359         error = fget(td, args->epfd,
  360             cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp);
  361         if (error != 0)
  362                 return (error);
  363         if (epfp->f_type != DTYPE_KQUEUE) {
  364                 error = EINVAL;
  365                 goto leave1;
  366         }
  367 
  368          /* Protect user data vector from incorrectly supplied fd. */
  369         error = fget(td, args->fd,
  370                      cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp);
  371         if (error != 0)
  372                 goto leave1;
  373 
  374         /* Linux disallows spying on himself */
  375         if (epfp == fp) {
  376                 error = EINVAL;
  377                 goto leave0;
  378         }
  379 
  380         ciargs.changelist = kev;
  381 
  382         if (args->op != LINUX_EPOLL_CTL_DEL) {
  383                 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges);
  384                 if (error != 0)
  385                         goto leave0;
  386         }
  387 
  388         switch (args->op) {
  389         case LINUX_EPOLL_CTL_MOD:
  390                 error = epoll_delete_all_events(td, epfp, args->fd);
  391                 if (error != 0)
  392                         goto leave0;
  393                 break;
  394 
  395         case LINUX_EPOLL_CTL_ADD:
  396                 if (epoll_fd_registered(td, epfp, args->fd)) {
  397                         error = EEXIST;
  398                         goto leave0;
  399                 }
  400                 break;
  401 
  402         case LINUX_EPOLL_CTL_DEL:
  403                 /* CTL_DEL means unregister this fd with this epoll */
  404                 error = epoll_delete_all_events(td, epfp, args->fd);
  405                 goto leave0;
  406 
  407         default:
  408                 error = EINVAL;
  409                 goto leave0;
  410         }
  411 
  412         error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
  413 
  414 leave0:
  415         fdrop(fp, td);
  416 
  417 leave1:
  418         fdrop(epfp, td);
  419         return (error);
  420 }
  421 
  422 /*
  423  * Wait for a filter to be triggered on the epoll file descriptor.
  424  */
  425 
  426 static int
  427 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events,
  428     int maxevents, struct timespec *tsp, sigset_t *uset)
  429 {
  430         struct epoll_copyout_args coargs;
  431         struct kevent_copyops k_ops = { &coargs,
  432                                         epoll_kev_copyout,
  433                                         NULL};
  434         cap_rights_t rights;
  435         struct file *epfp;
  436         sigset_t omask;
  437         int error;
  438 
  439         if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS)
  440                 return (EINVAL);
  441 
  442         error = fget(td, epfd,
  443             cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp);
  444         if (error != 0)
  445                 return (error);
  446         if (epfp->f_type != DTYPE_KQUEUE) {
  447                 error = EINVAL;
  448                 goto leave;
  449         }
  450         if (uset != NULL) {
  451                 error = kern_sigprocmask(td, SIG_SETMASK, uset,
  452                     &omask, 0);
  453                 if (error != 0)
  454                         goto leave;
  455                 td->td_pflags |= TDP_OLDMASK;
  456                 /*
  457                  * Make sure that ast() is called on return to
  458                  * usermode and TDP_OLDMASK is cleared, restoring old
  459                  * sigmask.
  460                  */
  461                 ast_sched(td, TDA_SIGSUSPEND);
  462         }
  463 
  464         coargs.leventlist = events;
  465         coargs.p = td->td_proc;
  466         coargs.count = 0;
  467         coargs.error = 0;
  468 
  469         error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp);
  470         if (error == 0 && coargs.error != 0)
  471                 error = coargs.error;
  472 
  473         /*
  474          * kern_kevent might return ENOMEM which is not expected from epoll_wait.
  475          * Maybe we should translate that but I don't think it matters at all.
  476          */
  477         if (error == 0)
  478                 td->td_retval[0] = coargs.count;
  479 
  480         if (uset != NULL)
  481                 error = kern_sigprocmask(td, SIG_SETMASK, &omask,
  482                     NULL, 0);
  483 leave:
  484         fdrop(epfp, td);
  485         return (error);
  486 }
  487 
  488 static int
  489 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events,
  490     int maxevents, int timeout, sigset_t *uset)
  491 {
  492         struct timespec ts, *tsp;
  493 
  494         /*
  495          * Linux epoll_wait(2) man page states that timeout of -1 causes caller
  496          * to block indefinitely. Real implementation does it if any negative
  497          * timeout value is passed.
  498          */
  499         if (timeout >= 0) {
  500                 /* Convert from milliseconds to timespec. */
  501                 ts.tv_sec = timeout / 1000;
  502                 ts.tv_nsec = (timeout % 1000) * 1000000;
  503                 tsp = &ts;
  504         } else {
  505                 tsp = NULL;
  506         }
  507         return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset));
  508 
  509 }
  510 
  511 #ifdef LINUX_LEGACY_SYSCALLS
  512 int
  513 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
  514 {
  515 
  516         return (linux_epoll_wait_common(td, args->epfd, args->events,
  517             args->maxevents, args->timeout, NULL));
  518 }
  519 #endif
  520 
  521 int
  522 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args)
  523 {
  524         sigset_t mask, *pmask;
  525         int error;
  526 
  527         error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
  528             &mask, &pmask);
  529         if (error != 0)
  530                 return (error);
  531 
  532         return (linux_epoll_wait_common(td, args->epfd, args->events,
  533             args->maxevents, args->timeout, pmask));
  534 }
  535 
  536 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  537 int
  538 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args)
  539 {
  540         struct timespec ts, *tsa;
  541         sigset_t mask, *pmask;
  542         int error;
  543 
  544         error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
  545             &mask, &pmask);
  546         if (error != 0)
  547                 return (error);
  548 
  549         if (args->timeout) {
  550                 error = linux_get_timespec64(&ts, args->timeout);
  551                 if (error != 0)
  552                         return (error);
  553                 tsa = &ts;
  554         } else
  555                 tsa = NULL;
  556 
  557         return (linux_epoll_wait_ts(td, args->epfd, args->events,
  558             args->maxevents, tsa, pmask));
  559 }
  560 #else
  561 int
  562 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args)
  563 {
  564         struct timespec ts, *tsa;
  565         sigset_t mask, *pmask;
  566         int error;
  567 
  568         error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
  569             &mask, &pmask);
  570         if (error != 0)
  571                 return (error);
  572 
  573         if (args->timeout) {
  574                 error = linux_get_timespec(&ts, args->timeout);
  575                 if (error != 0)
  576                         return (error);
  577                 tsa = &ts;
  578         } else
  579                 tsa = NULL;
  580 
  581         return (linux_epoll_wait_ts(td, args->epfd, args->events,
  582             args->maxevents, tsa, pmask));
  583 }
  584 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
  585 
  586 static int
  587 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter,
  588     unsigned int flags)
  589 {
  590         struct epoll_copyin_args ciargs;
  591         struct kevent kev;
  592         struct kevent_copyops k_ops = { &ciargs,
  593                                         NULL,
  594                                         epoll_kev_copyin};
  595 
  596         ciargs.changelist = &kev;
  597         EV_SET(&kev, fd, filter, flags, 0, 0, 0);
  598 
  599         return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL));
  600 }
  601 
  602 static int
  603 epoll_fd_registered(struct thread *td, struct file *epfp, int fd)
  604 {
  605         /*
  606          * Set empty filter flags to avoid accidental modification of already
  607          * registered events. In the case of event re-registration:
  608          * 1. If event does not exists kevent() does nothing and returns ENOENT
  609          * 2. If event does exists, it's enabled/disabled state is preserved
  610          *    but fflags, data and udata fields are overwritten. So we can not
  611          *    set socket lowats and store user's context pointer in udata.
  612          */
  613         if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT ||
  614             epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT)
  615                 return (1);
  616 
  617         return (0);
  618 }
  619 
  620 static int
  621 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
  622 {
  623         int error1, error2;
  624 
  625         error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE);
  626         error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE);
  627 
  628         /* return 0 if at least one result positive */
  629         return (error1 == 0 ? 0 : error2);
  630 }
  631 
  632 #ifdef LINUX_LEGACY_SYSCALLS
  633 int
  634 linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
  635 {
  636         struct specialfd_eventfd ae;
  637 
  638         bzero(&ae, sizeof(ae));
  639         ae.initval = args->initval;
  640         return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
  641 }
  642 #endif
  643 
  644 int
  645 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
  646 {
  647         struct specialfd_eventfd ae;
  648         int flags;
  649 
  650         if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK |
  651             LINUX_EFD_SEMAPHORE)) != 0)
  652                 return (EINVAL);
  653         flags = 0;
  654         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  655                 flags |= EFD_CLOEXEC;
  656         if ((args->flags & LINUX_O_NONBLOCK) != 0)
  657                 flags |= EFD_NONBLOCK;
  658         if ((args->flags & LINUX_EFD_SEMAPHORE) != 0)
  659                 flags |= EFD_SEMAPHORE;
  660 
  661         bzero(&ae, sizeof(ae));
  662         ae.flags = flags;
  663         ae.initval = args->initval;
  664         return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
  665 }
  666 
  667 int
  668 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args)
  669 {
  670         struct timerfd *tfd;
  671         struct file *fp;
  672         clockid_t clockid;
  673         int fflags, fd, error;
  674 
  675         if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0)
  676                 return (EINVAL);
  677 
  678         error = linux_to_native_clockid(&clockid, args->clockid);
  679         if (error != 0)
  680                 return (error);
  681         if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
  682                 return (EINVAL);
  683 
  684         fflags = 0;
  685         if ((args->flags & LINUX_TFD_CLOEXEC) != 0)
  686                 fflags |= O_CLOEXEC;
  687 
  688         error = falloc(td, &fp, &fd, fflags);
  689         if (error != 0)
  690                 return (error);
  691 
  692         tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO);
  693         tfd->tfd_clockid = clockid;
  694         mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
  695 
  696         callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
  697         knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
  698 
  699         fflags = FREAD;
  700         if ((args->flags & LINUX_O_NONBLOCK) != 0)
  701                 fflags |= FNONBLOCK;
  702 
  703         finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops);
  704         fdrop(fp, td);
  705 
  706         td->td_retval[0] = fd;
  707         return (error);
  708 }
  709 
  710 static int
  711 timerfd_close(struct file *fp, struct thread *td)
  712 {
  713         struct timerfd *tfd;
  714 
  715         tfd = fp->f_data;
  716         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  717                 return (EINVAL);
  718 
  719         timespecclear(&tfd->tfd_time.it_value);
  720         timespecclear(&tfd->tfd_time.it_interval);
  721 
  722         callout_drain(&tfd->tfd_callout);
  723 
  724         seldrain(&tfd->tfd_sel);
  725         knlist_destroy(&tfd->tfd_sel.si_note);
  726 
  727         fp->f_ops = &badfileops;
  728         mtx_destroy(&tfd->tfd_lock);
  729         free(tfd, M_EPOLL);
  730 
  731         return (0);
  732 }
  733 
  734 static int
  735 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
  736     int flags, struct thread *td)
  737 {
  738         struct timerfd *tfd;
  739         timerfd_t count;
  740         int error;
  741 
  742         tfd = fp->f_data;
  743         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  744                 return (EINVAL);
  745 
  746         if (uio->uio_resid < sizeof(timerfd_t))
  747                 return (EINVAL);
  748 
  749         error = 0;
  750         mtx_lock(&tfd->tfd_lock);
  751 retry:
  752         if (tfd->tfd_canceled) {
  753                 tfd->tfd_count = 0;
  754                 mtx_unlock(&tfd->tfd_lock);
  755                 return (ECANCELED);
  756         }
  757         if (tfd->tfd_count == 0) {
  758                 if ((fp->f_flag & FNONBLOCK) != 0) {
  759                         mtx_unlock(&tfd->tfd_lock);
  760                         return (EAGAIN);
  761                 }
  762                 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0);
  763                 if (error == 0)
  764                         goto retry;
  765         }
  766         if (error == 0) {
  767                 count = tfd->tfd_count;
  768                 tfd->tfd_count = 0;
  769                 mtx_unlock(&tfd->tfd_lock);
  770                 error = uiomove(&count, sizeof(timerfd_t), uio);
  771         } else
  772                 mtx_unlock(&tfd->tfd_lock);
  773 
  774         return (error);
  775 }
  776 
  777 static int
  778 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
  779     struct thread *td)
  780 {
  781         struct timerfd *tfd;
  782         int revents = 0;
  783 
  784         tfd = fp->f_data;
  785         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  786                 return (POLLERR);
  787 
  788         mtx_lock(&tfd->tfd_lock);
  789         if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0)
  790                 revents |= events & (POLLIN|POLLRDNORM);
  791         if (revents == 0)
  792                 selrecord(td, &tfd->tfd_sel);
  793         mtx_unlock(&tfd->tfd_lock);
  794 
  795         return (revents);
  796 }
  797 
  798 static int
  799 timerfd_kqfilter(struct file *fp, struct knote *kn)
  800 {
  801         struct timerfd *tfd;
  802 
  803         tfd = fp->f_data;
  804         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  805                 return (EINVAL);
  806 
  807         if (kn->kn_filter == EVFILT_READ)
  808                 kn->kn_fop = &timerfd_rfiltops;
  809         else
  810                 return (EINVAL);
  811 
  812         kn->kn_hook = tfd;
  813         knlist_add(&tfd->tfd_sel.si_note, kn, 0);
  814 
  815         return (0);
  816 }
  817 
  818 static void
  819 filt_timerfddetach(struct knote *kn)
  820 {
  821         struct timerfd *tfd = kn->kn_hook;
  822 
  823         mtx_lock(&tfd->tfd_lock);
  824         knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
  825         mtx_unlock(&tfd->tfd_lock);
  826 }
  827 
  828 static int
  829 filt_timerfdread(struct knote *kn, long hint)
  830 {
  831         struct timerfd *tfd = kn->kn_hook;
  832 
  833         return (tfd->tfd_count > 0);
  834 }
  835 
  836 static int
  837 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
  838     struct ucred *active_cred, struct thread *td)
  839 {
  840 
  841         if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
  842                 return (EINVAL);
  843 
  844         switch (cmd) {
  845         case FIONBIO:
  846         case FIOASYNC:
  847                 return (0);
  848         }
  849 
  850         return (ENOTTY);
  851 }
  852 
  853 static int
  854 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
  855 {
  856 
  857         return (ENXIO);
  858 }
  859 
  860 static int
  861 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
  862 {
  863 
  864         kif->kf_type = KF_TYPE_UNKNOWN;
  865         return (0);
  866 }
  867 
  868 static void
  869 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts)
  870 {
  871 
  872         if (tfd->tfd_clockid == CLOCK_REALTIME)
  873                 getnanotime(ts);
  874         else    /* CLOCK_MONOTONIC */
  875                 getnanouptime(ts);
  876 }
  877 
  878 static void
  879 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots)
  880 {
  881         struct timespec cts;
  882 
  883         linux_timerfd_clocktime(tfd, &cts);
  884         *ots = tfd->tfd_time;
  885         if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) {
  886                 timespecsub(&ots->it_value, &cts, &ots->it_value);
  887                 if (ots->it_value.tv_sec < 0 ||
  888                     (ots->it_value.tv_sec == 0 &&
  889                      ots->it_value.tv_nsec == 0)) {
  890                         ots->it_value.tv_sec  = 0;
  891                         ots->it_value.tv_nsec = 1;
  892                 }
  893         }
  894 }
  895 
  896 static int
  897 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots)
  898 {
  899         struct timerfd *tfd;
  900         struct file *fp;
  901         int error;
  902 
  903         error = fget(td, fd, &cap_read_rights, &fp);
  904         if (error != 0)
  905                 return (error);
  906         tfd = fp->f_data;
  907         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
  908                 error = EINVAL;
  909                 goto out;
  910         }
  911 
  912         mtx_lock(&tfd->tfd_lock);
  913         linux_timerfd_curval(tfd, ots);
  914         mtx_unlock(&tfd->tfd_lock);
  915 
  916 out:
  917         fdrop(fp, td);
  918         return (error);
  919 }
  920 
  921 int
  922 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args)
  923 {
  924         struct l_itimerspec lots;
  925         struct itimerspec ots;
  926         int error;
  927 
  928         error = linux_timerfd_gettime_common(td, args->fd, &ots);
  929         if (error != 0)
  930                 return (error);
  931         error = native_to_linux_itimerspec(&lots, &ots);
  932         if (error == 0)
  933                 error = copyout(&lots, args->old_value, sizeof(lots));
  934         return (error);
  935 }
  936 
  937 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  938 int
  939 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args)
  940 {
  941         struct l_itimerspec64 lots;
  942         struct itimerspec ots;
  943         int error;
  944 
  945         error = linux_timerfd_gettime_common(td, args->fd, &ots);
  946         if (error != 0)
  947                 return (error);
  948         error = native_to_linux_itimerspec64(&lots, &ots);
  949         if (error == 0)
  950                 error = copyout(&lots, args->old_value, sizeof(lots));
  951         return (error);
  952 }
  953 #endif
  954 
  955 static int
  956 linux_timerfd_settime_common(struct thread *td, int fd, int flags,
  957     struct itimerspec *nts, struct itimerspec *oval)
  958 {
  959         struct timespec cts, ts;
  960         struct timerfd *tfd;
  961         struct timeval tv;
  962         struct file *fp;
  963         int error;
  964 
  965         if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0)
  966                 return (EINVAL);
  967 
  968         error = fget(td, fd, &cap_write_rights, &fp);
  969         if (error != 0)
  970                 return (error);
  971         tfd = fp->f_data;
  972         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
  973                 error = EINVAL;
  974                 goto out;
  975         }
  976 
  977         mtx_lock(&tfd->tfd_lock);
  978         if (!timespecisset(&nts->it_value))
  979                 timespecclear(&nts->it_interval);
  980         if (oval != NULL)
  981                 linux_timerfd_curval(tfd, oval);
  982 
  983         bcopy(nts, &tfd->tfd_time, sizeof(*nts));
  984         tfd->tfd_count = 0;
  985         if (timespecisset(&nts->it_value)) {
  986                 linux_timerfd_clocktime(tfd, &cts);
  987                 ts = nts->it_value;
  988                 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) {
  989                         timespecadd(&tfd->tfd_time.it_value, &cts,
  990                                 &tfd->tfd_time.it_value);
  991                 } else {
  992                         timespecsub(&ts, &cts, &ts);
  993                 }
  994                 TIMESPEC_TO_TIMEVAL(&tv, &ts);
  995                 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
  996                         linux_timerfd_expire, tfd);
  997                 tfd->tfd_canceled = false;
  998         } else {
  999                 tfd->tfd_canceled = true;
 1000                 callout_stop(&tfd->tfd_callout);
 1001         }
 1002         mtx_unlock(&tfd->tfd_lock);
 1003 
 1004 out:
 1005         fdrop(fp, td);
 1006         return (error);
 1007 }
 1008 
 1009 int
 1010 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args)
 1011 {
 1012         struct l_itimerspec lots;
 1013         struct itimerspec nts, ots, *pots;
 1014         int error;
 1015 
 1016         error = copyin(args->new_value, &lots, sizeof(lots));
 1017         if (error != 0)
 1018                 return (error);
 1019         error = linux_to_native_itimerspec(&nts, &lots);
 1020         if (error != 0)
 1021                 return (error);
 1022         pots = (args->old_value != NULL ? &ots : NULL);
 1023         error = linux_timerfd_settime_common(td, args->fd, args->flags,
 1024             &nts, pots);
 1025         if (error == 0 && args->old_value != NULL) {
 1026                 error = native_to_linux_itimerspec(&lots, &ots);
 1027                 if (error == 0)
 1028                         error = copyout(&lots, args->old_value, sizeof(lots));
 1029         }
 1030         return (error);
 1031 }
 1032 
 1033 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1034 int
 1035 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args)
 1036 {
 1037         struct l_itimerspec64 lots;
 1038         struct itimerspec nts, ots, *pots;
 1039         int error;
 1040 
 1041         error = copyin(args->new_value, &lots, sizeof(lots));
 1042         if (error != 0)
 1043                 return (error);
 1044         error = linux_to_native_itimerspec64(&nts, &lots);
 1045         if (error != 0)
 1046                 return (error);
 1047         pots = (args->old_value != NULL ? &ots : NULL);
 1048         error = linux_timerfd_settime_common(td, args->fd, args->flags,
 1049             &nts, pots);
 1050         if (error == 0 && args->old_value != NULL) {
 1051                 error = native_to_linux_itimerspec64(&lots, &ots);
 1052                 if (error == 0)
 1053                         error = copyout(&lots, args->old_value, sizeof(lots));
 1054         }
 1055         return (error);
 1056 }
 1057 #endif
 1058 
 1059 static void
 1060 linux_timerfd_expire(void *arg)
 1061 {
 1062         struct timespec cts, ts;
 1063         struct timeval tv;
 1064         struct timerfd *tfd;
 1065 
 1066         tfd = (struct timerfd *)arg;
 1067 
 1068         linux_timerfd_clocktime(tfd, &cts);
 1069         if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) {
 1070                 if (timespecisset(&tfd->tfd_time.it_interval))
 1071                         timespecadd(&tfd->tfd_time.it_value,
 1072                                     &tfd->tfd_time.it_interval,
 1073                                     &tfd->tfd_time.it_value);
 1074                 else
 1075                         /* single shot timer */
 1076                         timespecclear(&tfd->tfd_time.it_value);
 1077                 if (timespecisset(&tfd->tfd_time.it_value)) {
 1078                         timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
 1079                         TIMESPEC_TO_TIMEVAL(&tv, &ts);
 1080                         callout_reset(&tfd->tfd_callout, tvtohz(&tv),
 1081                                 linux_timerfd_expire, tfd);
 1082                 }
 1083                 tfd->tfd_count++;
 1084                 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
 1085                 selwakeup(&tfd->tfd_sel);
 1086                 wakeup(&tfd->tfd_count);
 1087         } else if (timespecisset(&tfd->tfd_time.it_value)) {
 1088                 timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
 1089                 TIMESPEC_TO_TIMEVAL(&tv, &ts);
 1090                 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
 1091                     linux_timerfd_expire, tfd);
 1092         }
 1093 }

Cache object: d959b1aec703bd25b8370ae42babad0b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.