The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_event.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2007 Roman Divacky
    5  * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_compat.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/imgact.h>
   37 #include <sys/kernel.h>
   38 #include <sys/limits.h>
   39 #include <sys/lock.h>
   40 #include <sys/mutex.h>
   41 #include <sys/callout.h>
   42 #include <sys/capsicum.h>
   43 #include <sys/types.h>
   44 #include <sys/user.h>
   45 #include <sys/file.h>
   46 #include <sys/filedesc.h>
   47 #include <sys/filio.h>
   48 #include <sys/errno.h>
   49 #include <sys/event.h>
   50 #include <sys/poll.h>
   51 #include <sys/proc.h>
   52 #include <sys/selinfo.h>
   53 #include <sys/specialfd.h>
   54 #include <sys/sx.h>
   55 #include <sys/syscallsubr.h>
   56 #include <sys/timespec.h>
   57 #include <sys/eventfd.h>
   58 
   59 #ifdef COMPAT_LINUX32
   60 #include <machine/../linux32/linux.h>
   61 #include <machine/../linux32/linux32_proto.h>
   62 #else
   63 #include <machine/../linux/linux.h>
   64 #include <machine/../linux/linux_proto.h>
   65 #endif
   66 
   67 #include <compat/linux/linux_emul.h>
   68 #include <compat/linux/linux_event.h>
   69 #include <compat/linux/linux_file.h>
   70 #include <compat/linux/linux_signal.h>
   71 #include <compat/linux/linux_timer.h>
   72 #include <compat/linux/linux_util.h>
   73 
   74 typedef uint64_t        epoll_udata_t;
   75 
   76 struct epoll_event {
   77         uint32_t        events;
   78         epoll_udata_t   data;
   79 }
   80 #if defined(__amd64__)
   81 __attribute__((packed))
   82 #endif
   83 ;
   84 
   85 #define LINUX_MAX_EVENTS        (INT_MAX / sizeof(struct epoll_event))
   86 
   87 static int      epoll_to_kevent(struct thread *td, int fd,
   88                     struct epoll_event *l_event, struct kevent *kevent,
   89                     int *nkevents);
   90 static void     kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
   91 static int      epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
   92 static int      epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
   93 static int      epoll_register_kevent(struct thread *td, struct file *epfp,
   94                     int fd, int filter, unsigned int flags);
   95 static int      epoll_fd_registered(struct thread *td, struct file *epfp,
   96                     int fd);
   97 static int      epoll_delete_all_events(struct thread *td, struct file *epfp,
   98                     int fd);
   99 
  100 struct epoll_copyin_args {
  101         struct kevent   *changelist;
  102 };
  103 
  104 struct epoll_copyout_args {
  105         struct epoll_event      *leventlist;
  106         struct proc             *p;
  107         uint32_t                count;
  108         int                     error;
  109 };
  110 
  111 /* timerfd */
  112 typedef uint64_t        timerfd_t;
  113 
  114 static fo_rdwr_t        timerfd_read;
  115 static fo_ioctl_t       timerfd_ioctl;
  116 static fo_poll_t        timerfd_poll;
  117 static fo_kqfilter_t    timerfd_kqfilter;
  118 static fo_stat_t        timerfd_stat;
  119 static fo_close_t       timerfd_close;
  120 static fo_fill_kinfo_t  timerfd_fill_kinfo;
  121 
  122 static struct fileops timerfdops = {
  123         .fo_read = timerfd_read,
  124         .fo_write = invfo_rdwr,
  125         .fo_truncate = invfo_truncate,
  126         .fo_ioctl = timerfd_ioctl,
  127         .fo_poll = timerfd_poll,
  128         .fo_kqfilter = timerfd_kqfilter,
  129         .fo_stat = timerfd_stat,
  130         .fo_close = timerfd_close,
  131         .fo_chmod = invfo_chmod,
  132         .fo_chown = invfo_chown,
  133         .fo_sendfile = invfo_sendfile,
  134         .fo_fill_kinfo = timerfd_fill_kinfo,
  135         .fo_flags = DFLAG_PASSABLE
  136 };
  137 
  138 static void     filt_timerfddetach(struct knote *kn);
  139 static int      filt_timerfdread(struct knote *kn, long hint);
  140 
  141 static struct filterops timerfd_rfiltops = {
  142         .f_isfd = 1,
  143         .f_detach = filt_timerfddetach,
  144         .f_event = filt_timerfdread
  145 };
  146 
  147 struct timerfd {
  148         clockid_t       tfd_clockid;
  149         struct itimerspec tfd_time;
  150         struct callout  tfd_callout;
  151         timerfd_t       tfd_count;
  152         bool            tfd_canceled;
  153         struct selinfo  tfd_sel;
  154         struct mtx      tfd_lock;
  155 };
  156 
  157 static void     linux_timerfd_expire(void *);
  158 static void     linux_timerfd_curval(struct timerfd *, struct itimerspec *);
  159 
  160 static int
  161 epoll_create_common(struct thread *td, int flags)
  162 {
  163 
  164         return (kern_kqueue(td, flags, NULL));
  165 }
  166 
  167 #ifdef LINUX_LEGACY_SYSCALLS
  168 int
  169 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
  170 {
  171 
  172         /*
  173          * args->size is unused. Linux just tests it
  174          * and then forgets it as well.
  175          */
  176         if (args->size <= 0)
  177                 return (EINVAL);
  178 
  179         return (epoll_create_common(td, 0));
  180 }
  181 #endif
  182 
  183 int
  184 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
  185 {
  186         int flags;
  187 
  188         if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
  189                 return (EINVAL);
  190 
  191         flags = 0;
  192         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  193                 flags |= O_CLOEXEC;
  194 
  195         return (epoll_create_common(td, flags));
  196 }
  197 
  198 /* Structure converting function from epoll to kevent. */
  199 static int
  200 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
  201     struct kevent *kevent, int *nkevents)
  202 {
  203         uint32_t levents = l_event->events;
  204         struct linux_pemuldata *pem;
  205         struct proc *p;
  206         unsigned short kev_flags = EV_ADD | EV_ENABLE;
  207 
  208         /* flags related to how event is registered */
  209         if ((levents & LINUX_EPOLLONESHOT) != 0)
  210                 kev_flags |= EV_DISPATCH;
  211         if ((levents & LINUX_EPOLLET) != 0)
  212                 kev_flags |= EV_CLEAR;
  213         if ((levents & LINUX_EPOLLERR) != 0)
  214                 kev_flags |= EV_ERROR;
  215         if ((levents & LINUX_EPOLLRDHUP) != 0)
  216                 kev_flags |= EV_EOF;
  217 
  218         /* flags related to what event is registered */
  219         if ((levents & LINUX_EPOLL_EVRD) != 0) {
  220                 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0);
  221                 kevent->ext[0] = l_event->data;
  222                 ++kevent;
  223                 ++(*nkevents);
  224         }
  225         if ((levents & LINUX_EPOLL_EVWR) != 0) {
  226                 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
  227                 kevent->ext[0] = l_event->data;
  228                 ++kevent;
  229                 ++(*nkevents);
  230         }
  231         /* zero event mask is legal */
  232         if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) {
  233                 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
  234                 ++(*nkevents);
  235         }
  236 
  237         if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
  238                 p = td->td_proc;
  239 
  240                 pem = pem_find(p);
  241                 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
  242 
  243                 LINUX_PEM_XLOCK(pem);
  244                 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
  245                         pem->flags |= LINUX_XUNSUP_EPOLL;
  246                         LINUX_PEM_XUNLOCK(pem);
  247                         linux_msg(td, "epoll_ctl unsupported flags: 0x%x",
  248                             levents);
  249                 } else
  250                         LINUX_PEM_XUNLOCK(pem);
  251                 return (EINVAL);
  252         }
  253 
  254         return (0);
  255 }
  256 
  257 /*
  258  * Structure converting function from kevent to epoll. In a case
  259  * this is called on error in registration we store the error in
  260  * event->data and pick it up later in linux_epoll_ctl().
  261  */
  262 static void
  263 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
  264 {
  265 
  266         l_event->data = kevent->ext[0];
  267 
  268         if ((kevent->flags & EV_ERROR) != 0) {
  269                 l_event->events = LINUX_EPOLLERR;
  270                 return;
  271         }
  272 
  273         /* XXX EPOLLPRI, EPOLLHUP */
  274         switch (kevent->filter) {
  275         case EVFILT_READ:
  276                 l_event->events = LINUX_EPOLLIN;
  277                 if ((kevent->flags & EV_EOF) != 0)
  278                         l_event->events |= LINUX_EPOLLRDHUP;
  279         break;
  280         case EVFILT_WRITE:
  281                 l_event->events = LINUX_EPOLLOUT;
  282         break;
  283         }
  284 }
  285 
  286 /*
  287  * Copyout callback used by kevent. This converts kevent
  288  * events to epoll events and copies them back to the
  289  * userspace. This is also called on error on registering
  290  * of the filter.
  291  */
  292 static int
  293 epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
  294 {
  295         struct epoll_copyout_args *args;
  296         struct epoll_event *eep;
  297         int error, i;
  298 
  299         args = (struct epoll_copyout_args*) arg;
  300         eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
  301 
  302         for (i = 0; i < count; i++)
  303                 kevent_to_epoll(&kevp[i], &eep[i]);
  304 
  305         error = copyout(eep, args->leventlist, count * sizeof(*eep));
  306         if (error == 0) {
  307                 args->leventlist += count;
  308                 args->count += count;
  309         } else if (args->error == 0)
  310                 args->error = error;
  311 
  312         free(eep, M_EPOLL);
  313         return (error);
  314 }
  315 
  316 /*
  317  * Copyin callback used by kevent. This copies already
  318  * converted filters from kernel memory to the kevent
  319  * internal kernel memory. Hence the memcpy instead of
  320  * copyin.
  321  */
  322 static int
  323 epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
  324 {
  325         struct epoll_copyin_args *args;
  326 
  327         args = (struct epoll_copyin_args*) arg;
  328 
  329         memcpy(kevp, args->changelist, count * sizeof(*kevp));
  330         args->changelist += count;
  331 
  332         return (0);
  333 }
  334 
  335 /*
  336  * Load epoll filter, convert it to kevent filter
  337  * and load it into kevent subsystem.
  338  */
  339 int
  340 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
  341 {
  342         struct file *epfp, *fp;
  343         struct epoll_copyin_args ciargs;
  344         struct kevent kev[2];
  345         struct kevent_copyops k_ops = { &ciargs,
  346                                         NULL,
  347                                         epoll_kev_copyin};
  348         struct epoll_event le;
  349         cap_rights_t rights;
  350         int nchanges = 0;
  351         int error;
  352 
  353         if (args->op != LINUX_EPOLL_CTL_DEL) {
  354                 error = copyin(args->event, &le, sizeof(le));
  355                 if (error != 0)
  356                         return (error);
  357         }
  358 
  359         error = fget(td, args->epfd,
  360             cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp);
  361         if (error != 0)
  362                 return (error);
  363         if (epfp->f_type != DTYPE_KQUEUE) {
  364                 error = EINVAL;
  365                 goto leave1;
  366         }
  367 
  368          /* Protect user data vector from incorrectly supplied fd. */
  369         error = fget(td, args->fd,
  370                      cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp);
  371         if (error != 0)
  372                 goto leave1;
  373 
  374         /* Linux disallows spying on himself */
  375         if (epfp == fp) {
  376                 error = EINVAL;
  377                 goto leave0;
  378         }
  379 
  380         ciargs.changelist = kev;
  381 
  382         if (args->op != LINUX_EPOLL_CTL_DEL) {
  383                 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges);
  384                 if (error != 0)
  385                         goto leave0;
  386         }
  387 
  388         switch (args->op) {
  389         case LINUX_EPOLL_CTL_MOD:
  390                 error = epoll_delete_all_events(td, epfp, args->fd);
  391                 if (error != 0)
  392                         goto leave0;
  393                 break;
  394 
  395         case LINUX_EPOLL_CTL_ADD:
  396                 if (epoll_fd_registered(td, epfp, args->fd)) {
  397                         error = EEXIST;
  398                         goto leave0;
  399                 }
  400                 break;
  401 
  402         case LINUX_EPOLL_CTL_DEL:
  403                 /* CTL_DEL means unregister this fd with this epoll */
  404                 error = epoll_delete_all_events(td, epfp, args->fd);
  405                 goto leave0;
  406 
  407         default:
  408                 error = EINVAL;
  409                 goto leave0;
  410         }
  411 
  412         error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
  413 
  414 leave0:
  415         fdrop(fp, td);
  416 
  417 leave1:
  418         fdrop(epfp, td);
  419         return (error);
  420 }
  421 
  422 /*
  423  * Wait for a filter to be triggered on the epoll file descriptor.
  424  */
  425 
  426 static int
  427 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events,
  428     int maxevents, struct timespec *tsp, sigset_t *uset)
  429 {
  430         struct epoll_copyout_args coargs;
  431         struct kevent_copyops k_ops = { &coargs,
  432                                         epoll_kev_copyout,
  433                                         NULL};
  434         cap_rights_t rights;
  435         struct file *epfp;
  436         sigset_t omask;
  437         int error;
  438 
  439         if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS)
  440                 return (EINVAL);
  441 
  442         error = fget(td, epfd,
  443             cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp);
  444         if (error != 0)
  445                 return (error);
  446         if (epfp->f_type != DTYPE_KQUEUE) {
  447                 error = EINVAL;
  448                 goto leave;
  449         }
  450         if (uset != NULL) {
  451                 error = kern_sigprocmask(td, SIG_SETMASK, uset,
  452                     &omask, 0);
  453                 if (error != 0)
  454                         goto leave;
  455                 td->td_pflags |= TDP_OLDMASK;
  456                 /*
  457                  * Make sure that ast() is called on return to
  458                  * usermode and TDP_OLDMASK is cleared, restoring old
  459                  * sigmask.
  460                  */
  461                 thread_lock(td);
  462                 td->td_flags |= TDF_ASTPENDING;
  463                 thread_unlock(td);
  464         }
  465 
  466         coargs.leventlist = events;
  467         coargs.p = td->td_proc;
  468         coargs.count = 0;
  469         coargs.error = 0;
  470 
  471         error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp);
  472         if (error == 0 && coargs.error != 0)
  473                 error = coargs.error;
  474 
  475         /*
  476          * kern_kevent might return ENOMEM which is not expected from epoll_wait.
  477          * Maybe we should translate that but I don't think it matters at all.
  478          */
  479         if (error == 0)
  480                 td->td_retval[0] = coargs.count;
  481 
  482         if (uset != NULL)
  483                 error = kern_sigprocmask(td, SIG_SETMASK, &omask,
  484                     NULL, 0);
  485 leave:
  486         fdrop(epfp, td);
  487         return (error);
  488 }
  489 
  490 static int
  491 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events,
  492     int maxevents, int timeout, sigset_t *uset)
  493 {
  494         struct timespec ts, *tsp;
  495 
  496         /*
  497          * Linux epoll_wait(2) man page states that timeout of -1 causes caller
  498          * to block indefinitely. Real implementation does it if any negative
  499          * timeout value is passed.
  500          */
  501         if (timeout >= 0) {
  502                 /* Convert from milliseconds to timespec. */
  503                 ts.tv_sec = timeout / 1000;
  504                 ts.tv_nsec = (timeout % 1000) * 1000000;
  505                 tsp = &ts;
  506         } else {
  507                 tsp = NULL;
  508         }
  509         return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset));
  510 
  511 }
  512 
  513 #ifdef LINUX_LEGACY_SYSCALLS
  514 int
  515 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
  516 {
  517 
  518         return (linux_epoll_wait_common(td, args->epfd, args->events,
  519             args->maxevents, args->timeout, NULL));
  520 }
  521 #endif
  522 
  523 int
  524 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args)
  525 {
  526         sigset_t mask, *pmask;
  527         int error;
  528 
  529         error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
  530             &mask, &pmask);
  531         if (error != 0)
  532                 return (error);
  533 
  534         return (linux_epoll_wait_common(td, args->epfd, args->events,
  535             args->maxevents, args->timeout, pmask));
  536 }
  537 
  538 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  539 int
  540 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args)
  541 {
  542         struct timespec ts, *tsa;
  543         sigset_t mask, *pmask;
  544         int error;
  545 
  546         error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
  547             &mask, &pmask);
  548         if (error != 0)
  549                 return (error);
  550 
  551         if (args->timeout) {
  552                 error = linux_get_timespec64(&ts, args->timeout);
  553                 if (error != 0)
  554                         return (error);
  555                 tsa = &ts;
  556         } else
  557                 tsa = NULL;
  558 
  559         return (linux_epoll_wait_ts(td, args->epfd, args->events,
  560             args->maxevents, tsa, pmask));
  561 }
  562 #else
  563 int
  564 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args)
  565 {
  566         struct timespec ts, *tsa;
  567         sigset_t mask, *pmask;
  568         int error;
  569 
  570         error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t),
  571             &mask, &pmask);
  572         if (error != 0)
  573                 return (error);
  574 
  575         if (args->timeout) {
  576                 error = linux_get_timespec(&ts, args->timeout);
  577                 if (error != 0)
  578                         return (error);
  579                 tsa = &ts;
  580         } else
  581                 tsa = NULL;
  582 
  583         return (linux_epoll_wait_ts(td, args->epfd, args->events,
  584             args->maxevents, tsa, pmask));
  585 }
  586 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
  587 
  588 static int
  589 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter,
  590     unsigned int flags)
  591 {
  592         struct epoll_copyin_args ciargs;
  593         struct kevent kev;
  594         struct kevent_copyops k_ops = { &ciargs,
  595                                         NULL,
  596                                         epoll_kev_copyin};
  597 
  598         ciargs.changelist = &kev;
  599         EV_SET(&kev, fd, filter, flags, 0, 0, 0);
  600 
  601         return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL));
  602 }
  603 
  604 static int
  605 epoll_fd_registered(struct thread *td, struct file *epfp, int fd)
  606 {
  607         /*
  608          * Set empty filter flags to avoid accidental modification of already
  609          * registered events. In the case of event re-registration:
  610          * 1. If event does not exists kevent() does nothing and returns ENOENT
  611          * 2. If event does exists, it's enabled/disabled state is preserved
  612          *    but fflags, data and udata fields are overwritten. So we can not
  613          *    set socket lowats and store user's context pointer in udata.
  614          */
  615         if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT ||
  616             epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT)
  617                 return (1);
  618 
  619         return (0);
  620 }
  621 
  622 static int
  623 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
  624 {
  625         int error1, error2;
  626 
  627         error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE);
  628         error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE);
  629 
  630         /* return 0 if at least one result positive */
  631         return (error1 == 0 ? 0 : error2);
  632 }
  633 
  634 #ifdef LINUX_LEGACY_SYSCALLS
  635 int
  636 linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
  637 {
  638         struct specialfd_eventfd ae;
  639 
  640         bzero(&ae, sizeof(ae));
  641         ae.initval = args->initval;
  642         return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
  643 }
  644 #endif
  645 
  646 int
  647 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
  648 {
  649         struct specialfd_eventfd ae;
  650         int flags;
  651 
  652         if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK |
  653             LINUX_EFD_SEMAPHORE)) != 0)
  654                 return (EINVAL);
  655         flags = 0;
  656         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  657                 flags |= EFD_CLOEXEC;
  658         if ((args->flags & LINUX_O_NONBLOCK) != 0)
  659                 flags |= EFD_NONBLOCK;
  660         if ((args->flags & LINUX_EFD_SEMAPHORE) != 0)
  661                 flags |= EFD_SEMAPHORE;
  662 
  663         bzero(&ae, sizeof(ae));
  664         ae.flags = flags;
  665         ae.initval = args->initval;
  666         return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
  667 }
  668 
  669 int
  670 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args)
  671 {
  672         struct timerfd *tfd;
  673         struct file *fp;
  674         clockid_t clockid;
  675         int fflags, fd, error;
  676 
  677         if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0)
  678                 return (EINVAL);
  679 
  680         error = linux_to_native_clockid(&clockid, args->clockid);
  681         if (error != 0)
  682                 return (error);
  683         if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
  684                 return (EINVAL);
  685 
  686         fflags = 0;
  687         if ((args->flags & LINUX_TFD_CLOEXEC) != 0)
  688                 fflags |= O_CLOEXEC;
  689 
  690         error = falloc(td, &fp, &fd, fflags);
  691         if (error != 0)
  692                 return (error);
  693 
  694         tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO);
  695         tfd->tfd_clockid = clockid;
  696         mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
  697 
  698         callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
  699         knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
  700 
  701         fflags = FREAD;
  702         if ((args->flags & LINUX_O_NONBLOCK) != 0)
  703                 fflags |= FNONBLOCK;
  704 
  705         finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops);
  706         fdrop(fp, td);
  707 
  708         td->td_retval[0] = fd;
  709         return (error);
  710 }
  711 
  712 static int
  713 timerfd_close(struct file *fp, struct thread *td)
  714 {
  715         struct timerfd *tfd;
  716 
  717         tfd = fp->f_data;
  718         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  719                 return (EINVAL);
  720 
  721         timespecclear(&tfd->tfd_time.it_value);
  722         timespecclear(&tfd->tfd_time.it_interval);
  723 
  724         callout_drain(&tfd->tfd_callout);
  725 
  726         seldrain(&tfd->tfd_sel);
  727         knlist_destroy(&tfd->tfd_sel.si_note);
  728 
  729         fp->f_ops = &badfileops;
  730         mtx_destroy(&tfd->tfd_lock);
  731         free(tfd, M_EPOLL);
  732 
  733         return (0);
  734 }
  735 
  736 static int
  737 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
  738     int flags, struct thread *td)
  739 {
  740         struct timerfd *tfd;
  741         timerfd_t count;
  742         int error;
  743 
  744         tfd = fp->f_data;
  745         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  746                 return (EINVAL);
  747 
  748         if (uio->uio_resid < sizeof(timerfd_t))
  749                 return (EINVAL);
  750 
  751         error = 0;
  752         mtx_lock(&tfd->tfd_lock);
  753 retry:
  754         if (tfd->tfd_canceled) {
  755                 tfd->tfd_count = 0;
  756                 mtx_unlock(&tfd->tfd_lock);
  757                 return (ECANCELED);
  758         }
  759         if (tfd->tfd_count == 0) {
  760                 if ((fp->f_flag & FNONBLOCK) != 0) {
  761                         mtx_unlock(&tfd->tfd_lock);
  762                         return (EAGAIN);
  763                 }
  764                 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0);
  765                 if (error == 0)
  766                         goto retry;
  767         }
  768         if (error == 0) {
  769                 count = tfd->tfd_count;
  770                 tfd->tfd_count = 0;
  771                 mtx_unlock(&tfd->tfd_lock);
  772                 error = uiomove(&count, sizeof(timerfd_t), uio);
  773         } else
  774                 mtx_unlock(&tfd->tfd_lock);
  775 
  776         return (error);
  777 }
  778 
  779 static int
  780 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
  781     struct thread *td)
  782 {
  783         struct timerfd *tfd;
  784         int revents = 0;
  785 
  786         tfd = fp->f_data;
  787         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  788                 return (POLLERR);
  789 
  790         mtx_lock(&tfd->tfd_lock);
  791         if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0)
  792                 revents |= events & (POLLIN|POLLRDNORM);
  793         if (revents == 0)
  794                 selrecord(td, &tfd->tfd_sel);
  795         mtx_unlock(&tfd->tfd_lock);
  796 
  797         return (revents);
  798 }
  799 
  800 static int
  801 timerfd_kqfilter(struct file *fp, struct knote *kn)
  802 {
  803         struct timerfd *tfd;
  804 
  805         tfd = fp->f_data;
  806         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
  807                 return (EINVAL);
  808 
  809         if (kn->kn_filter == EVFILT_READ)
  810                 kn->kn_fop = &timerfd_rfiltops;
  811         else
  812                 return (EINVAL);
  813 
  814         kn->kn_hook = tfd;
  815         knlist_add(&tfd->tfd_sel.si_note, kn, 0);
  816 
  817         return (0);
  818 }
  819 
  820 static void
  821 filt_timerfddetach(struct knote *kn)
  822 {
  823         struct timerfd *tfd = kn->kn_hook;
  824 
  825         mtx_lock(&tfd->tfd_lock);
  826         knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
  827         mtx_unlock(&tfd->tfd_lock);
  828 }
  829 
  830 static int
  831 filt_timerfdread(struct knote *kn, long hint)
  832 {
  833         struct timerfd *tfd = kn->kn_hook;
  834 
  835         return (tfd->tfd_count > 0);
  836 }
  837 
  838 static int
  839 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
  840     struct ucred *active_cred, struct thread *td)
  841 {
  842 
  843         if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
  844                 return (EINVAL);
  845 
  846         switch (cmd) {
  847         case FIONBIO:
  848         case FIOASYNC:
  849                 return (0);
  850         }
  851 
  852         return (ENOTTY);
  853 }
  854 
  855 static int
  856 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
  857     struct thread *td)
  858 {
  859 
  860         return (ENXIO);
  861 }
  862 
  863 static int
  864 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
  865 {
  866 
  867         kif->kf_type = KF_TYPE_UNKNOWN;
  868         return (0);
  869 }
  870 
  871 static void
  872 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts)
  873 {
  874 
  875         if (tfd->tfd_clockid == CLOCK_REALTIME)
  876                 getnanotime(ts);
  877         else    /* CLOCK_MONOTONIC */
  878                 getnanouptime(ts);
  879 }
  880 
  881 static void
  882 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots)
  883 {
  884         struct timespec cts;
  885 
  886         linux_timerfd_clocktime(tfd, &cts);
  887         *ots = tfd->tfd_time;
  888         if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) {
  889                 timespecsub(&ots->it_value, &cts, &ots->it_value);
  890                 if (ots->it_value.tv_sec < 0 ||
  891                     (ots->it_value.tv_sec == 0 &&
  892                      ots->it_value.tv_nsec == 0)) {
  893                         ots->it_value.tv_sec  = 0;
  894                         ots->it_value.tv_nsec = 1;
  895                 }
  896         }
  897 }
  898 
  899 static int
  900 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots)
  901 {
  902         struct timerfd *tfd;
  903         struct file *fp;
  904         int error;
  905 
  906         error = fget(td, fd, &cap_read_rights, &fp);
  907         if (error != 0)
  908                 return (error);
  909         tfd = fp->f_data;
  910         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
  911                 error = EINVAL;
  912                 goto out;
  913         }
  914 
  915         mtx_lock(&tfd->tfd_lock);
  916         linux_timerfd_curval(tfd, ots);
  917         mtx_unlock(&tfd->tfd_lock);
  918 
  919 out:
  920         fdrop(fp, td);
  921         return (error);
  922 }
  923 
  924 int
  925 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args)
  926 {
  927         struct l_itimerspec lots;
  928         struct itimerspec ots;
  929         int error;
  930 
  931         error = linux_timerfd_gettime_common(td, args->fd, &ots);
  932         if (error != 0)
  933                 return (error);
  934         error = native_to_linux_itimerspec(&lots, &ots);
  935         if (error == 0)
  936                 error = copyout(&lots, args->old_value, sizeof(lots));
  937         return (error);
  938 }
  939 
  940 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  941 int
  942 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args)
  943 {
  944         struct l_itimerspec64 lots;
  945         struct itimerspec ots;
  946         int error;
  947 
  948         error = linux_timerfd_gettime_common(td, args->fd, &ots);
  949         if (error != 0)
  950                 return (error);
  951         error = native_to_linux_itimerspec64(&lots, &ots);
  952         if (error == 0)
  953                 error = copyout(&lots, args->old_value, sizeof(lots));
  954         return (error);
  955 }
  956 #endif
  957 
  958 static int
  959 linux_timerfd_settime_common(struct thread *td, int fd, int flags,
  960     struct itimerspec *nts, struct itimerspec *oval)
  961 {
  962         struct timespec cts, ts;
  963         struct timerfd *tfd;
  964         struct timeval tv;
  965         struct file *fp;
  966         int error;
  967 
  968         if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0)
  969                 return (EINVAL);
  970 
  971         error = fget(td, fd, &cap_write_rights, &fp);
  972         if (error != 0)
  973                 return (error);
  974         tfd = fp->f_data;
  975         if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
  976                 error = EINVAL;
  977                 goto out;
  978         }
  979 
  980         mtx_lock(&tfd->tfd_lock);
  981         if (!timespecisset(&nts->it_value))
  982                 timespecclear(&nts->it_interval);
  983         if (oval != NULL)
  984                 linux_timerfd_curval(tfd, oval);
  985 
  986         bcopy(nts, &tfd->tfd_time, sizeof(*nts));
  987         tfd->tfd_count = 0;
  988         if (timespecisset(&nts->it_value)) {
  989                 linux_timerfd_clocktime(tfd, &cts);
  990                 ts = nts->it_value;
  991                 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) {
  992                         timespecadd(&tfd->tfd_time.it_value, &cts,
  993                                 &tfd->tfd_time.it_value);
  994                 } else {
  995                         timespecsub(&ts, &cts, &ts);
  996                 }
  997                 TIMESPEC_TO_TIMEVAL(&tv, &ts);
  998                 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
  999                         linux_timerfd_expire, tfd);
 1000                 tfd->tfd_canceled = false;
 1001         } else {
 1002                 tfd->tfd_canceled = true;
 1003                 callout_stop(&tfd->tfd_callout);
 1004         }
 1005         mtx_unlock(&tfd->tfd_lock);
 1006 
 1007 out:
 1008         fdrop(fp, td);
 1009         return (error);
 1010 }
 1011 
 1012 int
 1013 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args)
 1014 {
 1015         struct l_itimerspec lots;
 1016         struct itimerspec nts, ots, *pots;
 1017         int error;
 1018 
 1019         error = copyin(args->new_value, &lots, sizeof(lots));
 1020         if (error != 0)
 1021                 return (error);
 1022         error = linux_to_native_itimerspec(&nts, &lots);
 1023         if (error != 0)
 1024                 return (error);
 1025         pots = (args->old_value != NULL ? &ots : NULL);
 1026         error = linux_timerfd_settime_common(td, args->fd, args->flags,
 1027             &nts, pots);
 1028         if (error == 0 && args->old_value != NULL) {
 1029                 error = native_to_linux_itimerspec(&lots, &ots);
 1030                 if (error == 0)
 1031                         error = copyout(&lots, args->old_value, sizeof(lots));
 1032         }
 1033         return (error);
 1034 }
 1035 
 1036 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1037 int
 1038 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args)
 1039 {
 1040         struct l_itimerspec64 lots;
 1041         struct itimerspec nts, ots, *pots;
 1042         int error;
 1043 
 1044         error = copyin(args->new_value, &lots, sizeof(lots));
 1045         if (error != 0)
 1046                 return (error);
 1047         error = linux_to_native_itimerspec64(&nts, &lots);
 1048         if (error != 0)
 1049                 return (error);
 1050         pots = (args->old_value != NULL ? &ots : NULL);
 1051         error = linux_timerfd_settime_common(td, args->fd, args->flags,
 1052             &nts, pots);
 1053         if (error == 0 && args->old_value != NULL) {
 1054                 error = native_to_linux_itimerspec64(&lots, &ots);
 1055                 if (error == 0)
 1056                         error = copyout(&lots, args->old_value, sizeof(lots));
 1057         }
 1058         return (error);
 1059 }
 1060 #endif
 1061 
 1062 static void
 1063 linux_timerfd_expire(void *arg)
 1064 {
 1065         struct timespec cts, ts;
 1066         struct timeval tv;
 1067         struct timerfd *tfd;
 1068 
 1069         tfd = (struct timerfd *)arg;
 1070 
 1071         linux_timerfd_clocktime(tfd, &cts);
 1072         if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) {
 1073                 if (timespecisset(&tfd->tfd_time.it_interval))
 1074                         timespecadd(&tfd->tfd_time.it_value,
 1075                                     &tfd->tfd_time.it_interval,
 1076                                     &tfd->tfd_time.it_value);
 1077                 else
 1078                         /* single shot timer */
 1079                         timespecclear(&tfd->tfd_time.it_value);
 1080                 if (timespecisset(&tfd->tfd_time.it_value)) {
 1081                         timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
 1082                         TIMESPEC_TO_TIMEVAL(&tv, &ts);
 1083                         callout_reset(&tfd->tfd_callout, tvtohz(&tv),
 1084                                 linux_timerfd_expire, tfd);
 1085                 }
 1086                 tfd->tfd_count++;
 1087                 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
 1088                 selwakeup(&tfd->tfd_sel);
 1089                 wakeup(&tfd->tfd_count);
 1090         } else if (timespecisset(&tfd->tfd_time.it_value)) {
 1091                 timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
 1092                 TIMESPEC_TO_TIMEVAL(&tv, &ts);
 1093                 callout_reset(&tfd->tfd_callout, tvtohz(&tv),
 1094                     linux_timerfd_expire, tfd);
 1095         }
 1096 }

Cache object: 1dd96c9557bdb2ce0428136b906e1b58


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.