The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_event.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2007 Roman Divacky
    3  * Copyright (c) 2014 Dmitry Chagin
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_compat.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/imgact.h>
   36 #include <sys/kernel.h>
   37 #include <sys/limits.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/capsicum.h>
   41 #include <sys/types.h>
   42 #include <sys/file.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/filio.h>
   45 #include <sys/errno.h>
   46 #include <sys/event.h>
   47 #include <sys/poll.h>
   48 #include <sys/proc.h>
   49 #include <sys/selinfo.h>
   50 #include <sys/sx.h>
   51 #include <sys/syscallsubr.h>
   52 #include <sys/timespec.h>
   53 
   54 #ifdef COMPAT_LINUX32
   55 #include <machine/../linux32/linux.h>
   56 #include <machine/../linux32/linux32_proto.h>
   57 #else
   58 #include <machine/../linux/linux.h>
   59 #include <machine/../linux/linux_proto.h>
   60 #endif
   61 
   62 #include <compat/linux/linux_emul.h>
   63 #include <compat/linux/linux_event.h>
   64 #include <compat/linux/linux_file.h>
   65 #include <compat/linux/linux_util.h>
   66 
   67 /*
   68  * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
   69  * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
   70  * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
   71  * data verbatuim. Therefore we allocate 64-bit memory block to pass
   72  * user supplied data for every file descriptor.
   73  */
   74 
   75 typedef uint64_t        epoll_udata_t;
   76 
   77 struct epoll_emuldata {
   78         uint32_t        fdc;            /* epoll udata max index */
   79         epoll_udata_t   udata[1];       /* epoll user data vector */
   80 };
   81 
   82 #define EPOLL_DEF_SZ            16
   83 #define EPOLL_SIZE(fdn)                 \
   84         (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t))
   85 
   86 struct epoll_event {
   87         uint32_t        events;
   88         epoll_udata_t   data;
   89 }
   90 #if defined(__amd64__)
   91 __attribute__((packed))
   92 #endif
   93 ;
   94 
   95 #define LINUX_MAX_EVENTS        (INT_MAX / sizeof(struct epoll_event))
   96 
   97 static void     epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata);
   98 static int      epoll_to_kevent(struct thread *td, struct file *epfp,
   99                     int fd, struct epoll_event *l_event, int *kev_flags,
  100                     struct kevent *kevent, int *nkevents);
  101 static void     kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
  102 static int      epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
  103 static int      epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
  104 static int      epoll_delete_event(struct thread *td, struct file *epfp,
  105                     int fd, int filter);
  106 static int      epoll_delete_all_events(struct thread *td, struct file *epfp,
  107                     int fd);
  108 
  109 struct epoll_copyin_args {
  110         struct kevent   *changelist;
  111 };
  112 
  113 struct epoll_copyout_args {
  114         struct epoll_event      *leventlist;
  115         struct proc             *p;
  116         uint32_t                count;
  117         int                     error;
  118 };
  119 
  120 /* eventfd */
  121 typedef uint64_t        eventfd_t;
  122 
  123 static fo_rdwr_t        eventfd_read;
  124 static fo_rdwr_t        eventfd_write;
  125 static fo_truncate_t    eventfd_truncate;
  126 static fo_ioctl_t       eventfd_ioctl;
  127 static fo_poll_t        eventfd_poll;
  128 static fo_kqfilter_t    eventfd_kqfilter;
  129 static fo_stat_t        eventfd_stat;
  130 static fo_close_t       eventfd_close;
  131 
  132 static struct fileops eventfdops = {
  133         .fo_read = eventfd_read,
  134         .fo_write = eventfd_write,
  135         .fo_truncate = eventfd_truncate,
  136         .fo_ioctl = eventfd_ioctl,
  137         .fo_poll = eventfd_poll,
  138         .fo_kqfilter = eventfd_kqfilter,
  139         .fo_stat = eventfd_stat,
  140         .fo_close = eventfd_close,
  141         .fo_chmod = invfo_chmod,
  142         .fo_chown = invfo_chown,
  143         .fo_sendfile = invfo_sendfile,
  144         .fo_flags = DFLAG_PASSABLE
  145 };
  146 
  147 static void     filt_eventfddetach(struct knote *kn);
  148 static int      filt_eventfdread(struct knote *kn, long hint);
  149 static int      filt_eventfdwrite(struct knote *kn, long hint);
  150 
  151 static struct filterops eventfd_rfiltops = {
  152         .f_isfd = 1,
  153         .f_detach = filt_eventfddetach,
  154         .f_event = filt_eventfdread
  155 };
  156 static struct filterops eventfd_wfiltops = {
  157         .f_isfd = 1,
  158         .f_detach = filt_eventfddetach,
  159         .f_event = filt_eventfdwrite
  160 };
  161 
  162 struct eventfd {
  163         eventfd_t       efd_count;
  164         uint32_t        efd_flags;
  165         struct selinfo  efd_sel;
  166         struct mtx      efd_lock;
  167 };
  168 
  169 static int      eventfd_create(struct thread *td, uint32_t initval, int flags);
  170 
  171 
  172 static void
  173 epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata)
  174 {
  175         struct linux_pemuldata *pem;
  176         struct epoll_emuldata *emd;
  177         struct proc *p;
  178 
  179         p = td->td_proc;
  180 
  181         pem = pem_find(p);
  182         KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
  183 
  184         LINUX_PEM_XLOCK(pem);
  185         if (pem->epoll == NULL) {
  186                 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
  187                 emd->fdc = fd;
  188                 pem->epoll = emd;
  189         } else {
  190                 emd = pem->epoll;
  191                 if (fd > emd->fdc) {
  192                         emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
  193                         emd->fdc = fd;
  194                         pem->epoll = emd;
  195                 }
  196         }
  197         emd->udata[fd] = udata;
  198         LINUX_PEM_XUNLOCK(pem);
  199 }
  200 
  201 static int
  202 epoll_create_common(struct thread *td, int flags)
  203 {
  204         int error;
  205 
  206         error = kern_kqueue(td, flags);
  207         if (error)
  208                 return (error);
  209 
  210         epoll_fd_install(td, EPOLL_DEF_SZ, 0);
  211 
  212         return (0);
  213 }
  214 
  215 int
  216 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
  217 {
  218 
  219         /*
  220          * args->size is unused. Linux just tests it
  221          * and then forgets it as well.
  222          */
  223         if (args->size <= 0)
  224                 return (EINVAL);
  225 
  226         return (epoll_create_common(td, 0));
  227 }
  228 
  229 int
  230 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
  231 {
  232         int flags;
  233 
  234         if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
  235                 return (EINVAL);
  236 
  237         flags = 0;
  238         if ((args->flags & LINUX_O_CLOEXEC) != 0)
  239                 flags |= O_CLOEXEC;
  240 
  241         return (epoll_create_common(td, flags));
  242 }
  243 
  244 /* Structure converting function from epoll to kevent. */
  245 static int
  246 epoll_to_kevent(struct thread *td, struct file *epfp,
  247     int fd, struct epoll_event *l_event, int *kev_flags,
  248     struct kevent *kevent, int *nkevents)
  249 {
  250         uint32_t levents = l_event->events;
  251         struct linux_pemuldata *pem;
  252         struct proc *p;
  253 
  254         /* flags related to how event is registered */
  255         if ((levents & LINUX_EPOLLONESHOT) != 0)
  256                 *kev_flags |= EV_ONESHOT;
  257         if ((levents & LINUX_EPOLLET) != 0)
  258                 *kev_flags |= EV_CLEAR;
  259         if ((levents & LINUX_EPOLLERR) != 0)
  260                 *kev_flags |= EV_ERROR;
  261         if ((levents & LINUX_EPOLLRDHUP) != 0)
  262                 *kev_flags |= EV_EOF;
  263 
  264         /* flags related to what event is registered */
  265         if ((levents & LINUX_EPOLL_EVRD) != 0) {
  266                 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0);
  267                 ++(*nkevents);
  268         }
  269         if ((levents & LINUX_EPOLL_EVWR) != 0) {
  270                 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0);
  271                 ++(*nkevents);
  272         }
  273 
  274         if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
  275                 p = td->td_proc;
  276 
  277                 pem = pem_find(p);
  278                 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
  279                 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n"));
  280 
  281                 LINUX_PEM_XLOCK(pem);
  282                 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
  283                         pem->flags |= LINUX_XUNSUP_EPOLL;
  284                         LINUX_PEM_XUNLOCK(pem);
  285                         linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n",
  286                             levents);
  287                 } else
  288                         LINUX_PEM_XUNLOCK(pem);
  289                 return (EINVAL);
  290         }
  291 
  292         return (0);
  293 }
  294 
  295 /* 
  296  * Structure converting function from kevent to epoll. In a case
  297  * this is called on error in registration we store the error in
  298  * event->data and pick it up later in linux_epoll_ctl().
  299  */
  300 static void
  301 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
  302 {
  303 
  304         if ((kevent->flags & EV_ERROR) != 0) {
  305                 l_event->events = LINUX_EPOLLERR;
  306                 return;
  307         }
  308 
  309         switch (kevent->filter) {
  310         case EVFILT_READ:
  311                 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI;
  312                 if ((kevent->flags & EV_EOF) != 0)
  313                         l_event->events |= LINUX_EPOLLRDHUP;
  314         break;
  315         case EVFILT_WRITE:
  316                 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM;
  317         break;
  318         }
  319 }
  320 
  321 /* 
  322  * Copyout callback used by kevent. This converts kevent
  323  * events to epoll events and copies them back to the
  324  * userspace. This is also called on error on registering
  325  * of the filter.
  326  */
  327 static int
  328 epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
  329 {
  330         struct epoll_copyout_args *args;
  331         struct linux_pemuldata *pem;
  332         struct epoll_emuldata *emd;
  333         struct epoll_event *eep;
  334         int error, fd, i;
  335 
  336         args = (struct epoll_copyout_args*) arg;
  337         eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
  338 
  339         pem = pem_find(args->p);
  340         KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
  341         LINUX_PEM_SLOCK(pem);
  342         emd = pem->epoll;
  343         KASSERT(emd != NULL, ("epoll proc epolldata not found.\n"));
  344 
  345         for (i = 0; i < count; i++) {
  346                 kevent_to_epoll(&kevp[i], &eep[i]);
  347 
  348                 fd = kevp[i].ident;
  349                 KASSERT(fd <= emd->fdc, ("epoll user data vector"
  350                                                     " is too small.\n"));
  351                 eep[i].data = emd->udata[fd];
  352         }
  353         LINUX_PEM_SUNLOCK(pem);
  354 
  355         error = copyout(eep, args->leventlist, count * sizeof(*eep));
  356         if (error == 0) {
  357                 args->leventlist += count;
  358                 args->count += count;
  359         } else if (args->error == 0)
  360                 args->error = error;
  361 
  362         free(eep, M_EPOLL);
  363         return (error);
  364 }
  365 
  366 /*
  367  * Copyin callback used by kevent. This copies already
  368  * converted filters from kernel memory to the kevent 
  369  * internal kernel memory. Hence the memcpy instead of
  370  * copyin.
  371  */
  372 static int
  373 epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
  374 {
  375         struct epoll_copyin_args *args;
  376 
  377         args = (struct epoll_copyin_args*) arg;
  378         
  379         memcpy(kevp, args->changelist, count * sizeof(*kevp));
  380         args->changelist += count;
  381 
  382         return (0);
  383 }
  384 
  385 /*
  386  * Load epoll filter, convert it to kevent filter
  387  * and load it into kevent subsystem.
  388  */
  389 int
  390 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
  391 {
  392         struct file *epfp, *fp;
  393         struct epoll_copyin_args ciargs;
  394         struct kevent kev[2];
  395         struct kevent_copyops k_ops = { &ciargs,
  396                                         NULL,
  397                                         epoll_kev_copyin};
  398         struct epoll_event le;
  399         cap_rights_t rights;
  400         int kev_flags;
  401         int nchanges = 0;
  402         int error;
  403 
  404         if (args->op != LINUX_EPOLL_CTL_DEL) {
  405                 error = copyin(args->event, &le, sizeof(le));
  406                 if (error != 0)
  407                         return (error);
  408         }
  409 
  410         error = fget(td, args->epfd,
  411             cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp);
  412         if (error != 0)
  413                 return (error);
  414         if (epfp->f_type != DTYPE_KQUEUE)
  415                 goto leave1;
  416 
  417          /* Protect user data vector from incorrectly supplied fd. */
  418         error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp);
  419         if (error != 0)
  420                 goto leave1;
  421 
  422         /* Linux disallows spying on himself */
  423         if (epfp == fp) {
  424                 error = EINVAL;
  425                 goto leave0;
  426         }
  427 
  428         ciargs.changelist = kev;
  429 
  430         switch (args->op) {
  431         case LINUX_EPOLL_CTL_MOD:
  432                 /*
  433                  * We don't memorize which events were set for this FD
  434                  * on this level, so just delete all we could have set:
  435                  * EVFILT_READ and EVFILT_WRITE, ignoring any errors
  436                  */
  437                 error = epoll_delete_all_events(td, epfp, args->fd);
  438                 if (error)
  439                         goto leave0;
  440                 /* FALLTHROUGH */
  441 
  442         case LINUX_EPOLL_CTL_ADD:
  443                         kev_flags = EV_ADD | EV_ENABLE;
  444                 break;
  445 
  446         case LINUX_EPOLL_CTL_DEL:
  447                 /* CTL_DEL means unregister this fd with this epoll */
  448                 error = epoll_delete_all_events(td, epfp, args->fd);
  449                 goto leave0;
  450 
  451         default:
  452                 error = EINVAL;
  453                 goto leave0;
  454         }
  455 
  456         error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags,
  457             kev, &nchanges);
  458         if (error)
  459                 goto leave0;
  460 
  461         epoll_fd_install(td, args->fd, le.data);
  462 
  463         error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
  464 
  465 leave0:
  466         fdrop(fp, td);
  467 
  468 leave1:
  469         fdrop(epfp, td);
  470         return (error);
  471 }
  472 
  473 /*
  474  * Wait for a filter to be triggered on the epoll file descriptor.
  475  */
  476 static int
  477 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events,
  478     int maxevents, int timeout, sigset_t *uset)
  479 {
  480         struct file *epfp;
  481         struct timespec ts, *tsp;
  482         cap_rights_t rights;
  483         struct epoll_copyout_args coargs;
  484         struct kevent_copyops k_ops = { &coargs,
  485                                         epoll_kev_copyout,
  486                                         NULL};
  487         int error;
  488 
  489         if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS)
  490                 return (EINVAL);
  491 
  492         if (uset != NULL) {
  493                 error = kern_sigprocmask(td, SIG_SETMASK, uset,
  494                     &td->td_oldsigmask, 0);
  495                 if (error != 0)
  496                         return (error);
  497                 td->td_pflags |= TDP_OLDMASK;
  498                 /*
  499                  * Make sure that ast() is called on return to
  500                  * usermode and TDP_OLDMASK is cleared, restoring old
  501                  * sigmask.
  502                  */
  503                 thread_lock(td);
  504                 td->td_flags |= TDF_ASTPENDING;
  505                 thread_unlock(td);
  506         }
  507 
  508         error = fget(td, epfd,
  509             cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp);
  510         if (error != 0)
  511                 return (error);
  512 
  513         coargs.leventlist = events;
  514         coargs.p = td->td_proc;
  515         coargs.count = 0;
  516         coargs.error = 0;
  517 
  518         if (timeout != -1) {
  519                 if (timeout < 0) {
  520                         error = EINVAL;
  521                         goto leave;
  522                 }
  523                 /* Convert from milliseconds to timespec. */
  524                 ts.tv_sec = timeout / 1000;
  525                 ts.tv_nsec = (timeout % 1000) * 1000000;
  526                 tsp = &ts;
  527         } else {
  528                 tsp = NULL;
  529         }
  530 
  531         error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp);
  532         if (error == 0 && coargs.error != 0)
  533                 error = coargs.error;
  534 
  535         /* 
  536          * kern_kevent might return ENOMEM which is not expected from epoll_wait.
  537          * Maybe we should translate that but I don't think it matters at all.
  538          */
  539         if (error == 0)
  540                 td->td_retval[0] = coargs.count;
  541 leave:
  542         fdrop(epfp, td);
  543         return (error);
  544 }
  545 
  546 int
  547 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
  548 {
  549 
  550         return (linux_epoll_wait_common(td, args->epfd, args->events,
  551             args->maxevents, args->timeout, NULL));
  552 }
  553 
  554 int
  555 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args)
  556 {
  557         sigset_t mask, *pmask;
  558         l_sigset_t lmask;
  559         int error;
  560 
  561         if (args->mask != NULL) {
  562                 error = copyin(args->mask, &lmask, sizeof(l_sigset_t));
  563                 if (error != 0)
  564                         return (error);
  565                 linux_to_bsd_sigset(&lmask, &mask);
  566                 pmask = &mask;
  567         } else
  568                 pmask = NULL;
  569         return (linux_epoll_wait_common(td, args->epfd, args->events,
  570             args->maxevents, args->timeout, pmask));
  571 }
  572 
  573 static int
  574 epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter)
  575 {
  576         struct epoll_copyin_args ciargs;
  577         struct kevent kev;
  578         struct kevent_copyops k_ops = { &ciargs,
  579                                         NULL,
  580                                         epoll_kev_copyin};
  581         int error;
  582 
  583         ciargs.changelist = &kev;
  584         EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
  585 
  586         error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL);
  587 
  588         /*
  589          * here we ignore ENONT, because we don't keep track of events here
  590          */
  591         if (error == ENOENT)
  592                 error = 0;
  593         return (error);
  594 }
  595 
  596 static int
  597 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
  598 {
  599         int error1, error2;
  600 
  601         error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ);
  602         error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE);
  603 
  604         /* report any errors we got */
  605         return (error1 == 0 ? error2 : error1);
  606 }
  607 
  608 static int
  609 eventfd_create(struct thread *td, uint32_t initval, int flags)
  610 {
  611         struct filedesc *fdp;
  612         struct eventfd *efd;
  613         struct file *fp;
  614         int fflags, fd, error;
  615 
  616         fflags = 0;
  617         if ((flags & LINUX_O_CLOEXEC) != 0)
  618                 fflags |= O_CLOEXEC;
  619 
  620         fdp = td->td_proc->p_fd;
  621         error = falloc(td, &fp, &fd, fflags);
  622         if (error)
  623                 return (error);
  624 
  625         efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO);
  626         efd->efd_flags = flags;
  627         efd->efd_count = initval;
  628         mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
  629 
  630         knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
  631 
  632         fflags = FREAD | FWRITE; 
  633         if ((flags & LINUX_O_NONBLOCK) != 0)
  634                 fflags |= FNONBLOCK;
  635 
  636         finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops);
  637         fdrop(fp, td);
  638 
  639         td->td_retval[0] = fd;
  640         return (error);
  641 }
  642 
  643 int
  644 linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
  645 {
  646 
  647         return (eventfd_create(td, args->initval, 0));
  648 }
  649 
  650 int
  651 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
  652 {
  653 
  654         if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0)
  655                 return (EINVAL);
  656 
  657         return (eventfd_create(td, args->initval, args->flags));
  658 }
  659 
  660 static int
  661 eventfd_close(struct file *fp, struct thread *td)
  662 {
  663         struct eventfd *efd;
  664 
  665         efd = fp->f_data;
  666         if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
  667                 return (EBADF);
  668 
  669         seldrain(&efd->efd_sel);
  670         knlist_destroy(&efd->efd_sel.si_note);
  671 
  672         fp->f_ops = &badfileops;
  673         mtx_destroy(&efd->efd_lock);
  674         free(efd, M_EPOLL);
  675 
  676         return (0);
  677 }
  678 
  679 static int
  680 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
  681         int flags, struct thread *td)
  682 {
  683         struct eventfd *efd;
  684         eventfd_t count;
  685         int error;
  686 
  687         efd = fp->f_data;
  688         if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
  689                 return (EBADF);
  690 
  691         if (uio->uio_resid < sizeof(eventfd_t))
  692                 return (EINVAL);
  693 
  694         error = 0;
  695         mtx_lock(&efd->efd_lock);
  696 retry:
  697         if (efd->efd_count == 0) {
  698                 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
  699                         mtx_unlock(&efd->efd_lock);
  700                         return (EAGAIN);
  701                 }
  702                 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
  703                 if (error == 0)
  704                         goto retry;
  705         }
  706         if (error == 0) {
  707                 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
  708                         count = 1;
  709                         --efd->efd_count;
  710                 } else {
  711                         count = efd->efd_count;
  712                         efd->efd_count = 0;
  713                 }
  714                 KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
  715                 selwakeup(&efd->efd_sel);
  716                 wakeup(&efd->efd_count);
  717                 mtx_unlock(&efd->efd_lock);
  718                 error = uiomove(&count, sizeof(eventfd_t), uio);
  719         } else
  720                 mtx_unlock(&efd->efd_lock);
  721 
  722         return (error);
  723 }
  724 
  725 static int
  726 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
  727          int flags, struct thread *td)
  728 {
  729         struct eventfd *efd;
  730         eventfd_t count;
  731         int error;
  732 
  733         efd = fp->f_data;
  734         if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
  735                 return (EBADF);
  736 
  737         if (uio->uio_resid < sizeof(eventfd_t))
  738                 return (EINVAL);
  739 
  740         error = uiomove(&count, sizeof(eventfd_t), uio);
  741         if (error)
  742                 return (error);
  743         if (count == UINT64_MAX)
  744                 return (EINVAL);
  745 
  746         mtx_lock(&efd->efd_lock);
  747 retry:
  748         if (UINT64_MAX - efd->efd_count <= count) {
  749                 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
  750                         mtx_unlock(&efd->efd_lock);
  751                         /* Do not not return the number of bytes written */
  752                         uio->uio_resid += sizeof(eventfd_t);
  753                         return (EAGAIN);
  754                 }
  755                 error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
  756                     PCATCH, "lefdwr", 0);
  757                 if (error == 0)
  758                         goto retry;
  759         }
  760         if (error == 0) {
  761                 efd->efd_count += count;
  762                 KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
  763                 selwakeup(&efd->efd_sel);
  764                 wakeup(&efd->efd_count);
  765         }
  766         mtx_unlock(&efd->efd_lock);
  767 
  768         return (error);
  769 }
  770 
  771 static int
  772 eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
  773         struct thread *td)
  774 {
  775         struct eventfd *efd;
  776         int revents = 0;
  777 
  778         efd = fp->f_data;
  779         if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
  780                 return (POLLERR);
  781 
  782         mtx_lock(&efd->efd_lock);
  783         if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
  784                 revents |= events & (POLLIN|POLLRDNORM);
  785         if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
  786                 revents |= events & (POLLOUT|POLLWRNORM);
  787         if (revents == 0)
  788                 selrecord(td, &efd->efd_sel);
  789         mtx_unlock(&efd->efd_lock);
  790 
  791         return (revents);
  792 }
  793 
  794 /*ARGSUSED*/
  795 static int
  796 eventfd_kqfilter(struct file *fp, struct knote *kn)
  797 {
  798         struct eventfd *efd;
  799 
  800         efd = fp->f_data;
  801         if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
  802                 return (EINVAL);
  803 
  804         mtx_lock(&efd->efd_lock);
  805         switch (kn->kn_filter) {
  806         case EVFILT_READ:
  807                 kn->kn_fop = &eventfd_rfiltops;
  808                 break;
  809         case EVFILT_WRITE:
  810                 kn->kn_fop = &eventfd_wfiltops;
  811                 break;
  812         default:
  813                 mtx_unlock(&efd->efd_lock);
  814                 return (EINVAL);
  815         }
  816 
  817         kn->kn_hook = efd;
  818         knlist_add(&efd->efd_sel.si_note, kn, 1);
  819         mtx_unlock(&efd->efd_lock);
  820 
  821         return (0);
  822 }
  823 
  824 static void
  825 filt_eventfddetach(struct knote *kn)
  826 {
  827         struct eventfd *efd = kn->kn_hook;
  828 
  829         mtx_lock(&efd->efd_lock);
  830         knlist_remove(&efd->efd_sel.si_note, kn, 1);
  831         mtx_unlock(&efd->efd_lock);
  832 }
  833 
  834 /*ARGSUSED*/
  835 static int
  836 filt_eventfdread(struct knote *kn, long hint)
  837 {
  838         struct eventfd *efd = kn->kn_hook;
  839         int ret;
  840 
  841         mtx_assert(&efd->efd_lock, MA_OWNED);
  842         ret = (efd->efd_count > 0);
  843 
  844         return (ret);
  845 }
  846 
  847 /*ARGSUSED*/
  848 static int
  849 filt_eventfdwrite(struct knote *kn, long hint)
  850 {
  851         struct eventfd *efd = kn->kn_hook;
  852         int ret;
  853 
  854         mtx_assert(&efd->efd_lock, MA_OWNED);
  855         ret = (UINT64_MAX - 1 > efd->efd_count);
  856 
  857         return (ret);
  858 }
  859 
  860 /*ARGSUSED*/
  861 static int
  862 eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred,
  863         struct thread *td)
  864 {
  865 
  866         return (ENXIO);
  867 }
  868 
  869 /*ARGSUSED*/
  870 static int
  871 eventfd_ioctl(struct file *fp, u_long cmd, void *data,
  872         struct ucred *active_cred, struct thread *td)
  873 {
  874         struct eventfd *efd;
  875 
  876         efd = fp->f_data;
  877         if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
  878                 return (EINVAL);
  879 
  880         switch (cmd)
  881         {
  882         case FIONBIO:
  883                 if (*(int *)data)
  884                         efd->efd_flags |= LINUX_O_NONBLOCK;
  885                 else
  886                         efd->efd_flags &= ~LINUX_O_NONBLOCK;
  887         case FIOASYNC:
  888                 return (0);
  889         default:
  890                 return (ENXIO);
  891         }
  892 }
  893 
  894 /*ARGSUSED*/
  895 static int
  896 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
  897         struct thread *td)
  898 {
  899 
  900         return (ENXIO);
  901 }

Cache object: 1b54d9e2b2315834ee84424dc13fc61c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.