The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/eventfd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  fs/eventfd.c
    3  *
    4  *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
    5  *
    6  */
    7 
    8 #include <linux/file.h>
    9 #include <linux/poll.h>
   10 #include <linux/init.h>
   11 #include <linux/fs.h>
   12 #include <linux/sched.h>
   13 #include <linux/kernel.h>
   14 #include <linux/slab.h>
   15 #include <linux/list.h>
   16 #include <linux/spinlock.h>
   17 #include <linux/anon_inodes.h>
   18 #include <linux/syscalls.h>
   19 #include <linux/export.h>
   20 #include <linux/kref.h>
   21 #include <linux/eventfd.h>
   22 #include <linux/proc_fs.h>
   23 #include <linux/seq_file.h>
   24 
   25 struct eventfd_ctx {
   26         struct kref kref;
   27         wait_queue_head_t wqh;
   28         /*
   29          * Every time that a write(2) is performed on an eventfd, the
   30          * value of the __u64 being written is added to "count" and a
   31          * wakeup is performed on "wqh". A read(2) will return the "count"
   32          * value to userspace, and will reset "count" to zero. The kernel
   33          * side eventfd_signal() also, adds to the "count" counter and
   34          * issue a wakeup.
   35          */
   36         __u64 count;
   37         unsigned int flags;
   38 };
   39 
   40 /**
   41  * eventfd_signal - Adds @n to the eventfd counter.
   42  * @ctx: [in] Pointer to the eventfd context.
   43  * @n: [in] Value of the counter to be added to the eventfd internal counter.
   44  *          The value cannot be negative.
   45  *
   46  * This function is supposed to be called by the kernel in paths that do not
   47  * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
   48  * value, and we signal this as overflow condition by returining a POLLERR
   49  * to poll(2).
   50  *
   51  * Returns the amount by which the counter was incrememnted.  This will be less
   52  * than @n if the counter has overflowed.
   53  */
   54 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
   55 {
   56         unsigned long flags;
   57 
   58         spin_lock_irqsave(&ctx->wqh.lock, flags);
   59         if (ULLONG_MAX - ctx->count < n)
   60                 n = ULLONG_MAX - ctx->count;
   61         ctx->count += n;
   62         if (waitqueue_active(&ctx->wqh))
   63                 wake_up_locked_poll(&ctx->wqh, POLLIN);
   64         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
   65 
   66         return n;
   67 }
   68 EXPORT_SYMBOL_GPL(eventfd_signal);
   69 
   70 static void eventfd_free_ctx(struct eventfd_ctx *ctx)
   71 {
   72         kfree(ctx);
   73 }
   74 
   75 static void eventfd_free(struct kref *kref)
   76 {
   77         struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
   78 
   79         eventfd_free_ctx(ctx);
   80 }
   81 
   82 /**
   83  * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
   84  * @ctx: [in] Pointer to the eventfd context.
   85  *
   86  * Returns: In case of success, returns a pointer to the eventfd context.
   87  */
   88 struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
   89 {
   90         kref_get(&ctx->kref);
   91         return ctx;
   92 }
   93 EXPORT_SYMBOL_GPL(eventfd_ctx_get);
   94 
   95 /**
   96  * eventfd_ctx_put - Releases a reference to the internal eventfd context.
   97  * @ctx: [in] Pointer to eventfd context.
   98  *
   99  * The eventfd context reference must have been previously acquired either
  100  * with eventfd_ctx_get() or eventfd_ctx_fdget().
  101  */
  102 void eventfd_ctx_put(struct eventfd_ctx *ctx)
  103 {
  104         kref_put(&ctx->kref, eventfd_free);
  105 }
  106 EXPORT_SYMBOL_GPL(eventfd_ctx_put);
  107 
  108 static int eventfd_release(struct inode *inode, struct file *file)
  109 {
  110         struct eventfd_ctx *ctx = file->private_data;
  111 
  112         wake_up_poll(&ctx->wqh, POLLHUP);
  113         eventfd_ctx_put(ctx);
  114         return 0;
  115 }
  116 
  117 static unsigned int eventfd_poll(struct file *file, poll_table *wait)
  118 {
  119         struct eventfd_ctx *ctx = file->private_data;
  120         unsigned int events = 0;
  121         unsigned long flags;
  122 
  123         poll_wait(file, &ctx->wqh, wait);
  124 
  125         spin_lock_irqsave(&ctx->wqh.lock, flags);
  126         if (ctx->count > 0)
  127                 events |= POLLIN;
  128         if (ctx->count == ULLONG_MAX)
  129                 events |= POLLERR;
  130         if (ULLONG_MAX - 1 > ctx->count)
  131                 events |= POLLOUT;
  132         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  133 
  134         return events;
  135 }
  136 
  137 static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
  138 {
  139         *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
  140         ctx->count -= *cnt;
  141 }
  142 
  143 /**
  144  * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
  145  * @ctx: [in] Pointer to eventfd context.
  146  * @wait: [in] Wait queue to be removed.
  147  * @cnt: [out] Pointer to the 64-bit counter value.
  148  *
  149  * Returns %0 if successful, or the following error codes:
  150  *
  151  * -EAGAIN      : The operation would have blocked.
  152  *
  153  * This is used to atomically remove a wait queue entry from the eventfd wait
  154  * queue head, and read/reset the counter value.
  155  */
  156 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
  157                                   __u64 *cnt)
  158 {
  159         unsigned long flags;
  160 
  161         spin_lock_irqsave(&ctx->wqh.lock, flags);
  162         eventfd_ctx_do_read(ctx, cnt);
  163         __remove_wait_queue(&ctx->wqh, wait);
  164         if (*cnt != 0 && waitqueue_active(&ctx->wqh))
  165                 wake_up_locked_poll(&ctx->wqh, POLLOUT);
  166         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  167 
  168         return *cnt != 0 ? 0 : -EAGAIN;
  169 }
  170 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
  171 
  172 /**
  173  * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
  174  * @ctx: [in] Pointer to eventfd context.
  175  * @no_wait: [in] Different from zero if the operation should not block.
  176  * @cnt: [out] Pointer to the 64-bit counter value.
  177  *
  178  * Returns %0 if successful, or the following error codes:
  179  *
  180  * -EAGAIN      : The operation would have blocked but @no_wait was non-zero.
  181  * -ERESTARTSYS : A signal interrupted the wait operation.
  182  *
  183  * If @no_wait is zero, the function might sleep until the eventfd internal
  184  * counter becomes greater than zero.
  185  */
  186 ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
  187 {
  188         ssize_t res;
  189         DECLARE_WAITQUEUE(wait, current);
  190 
  191         spin_lock_irq(&ctx->wqh.lock);
  192         *cnt = 0;
  193         res = -EAGAIN;
  194         if (ctx->count > 0)
  195                 res = 0;
  196         else if (!no_wait) {
  197                 __add_wait_queue(&ctx->wqh, &wait);
  198                 for (;;) {
  199                         set_current_state(TASK_INTERRUPTIBLE);
  200                         if (ctx->count > 0) {
  201                                 res = 0;
  202                                 break;
  203                         }
  204                         if (signal_pending(current)) {
  205                                 res = -ERESTARTSYS;
  206                                 break;
  207                         }
  208                         spin_unlock_irq(&ctx->wqh.lock);
  209                         schedule();
  210                         spin_lock_irq(&ctx->wqh.lock);
  211                 }
  212                 __remove_wait_queue(&ctx->wqh, &wait);
  213                 __set_current_state(TASK_RUNNING);
  214         }
  215         if (likely(res == 0)) {
  216                 eventfd_ctx_do_read(ctx, cnt);
  217                 if (waitqueue_active(&ctx->wqh))
  218                         wake_up_locked_poll(&ctx->wqh, POLLOUT);
  219         }
  220         spin_unlock_irq(&ctx->wqh.lock);
  221 
  222         return res;
  223 }
  224 EXPORT_SYMBOL_GPL(eventfd_ctx_read);
  225 
  226 static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
  227                             loff_t *ppos)
  228 {
  229         struct eventfd_ctx *ctx = file->private_data;
  230         ssize_t res;
  231         __u64 cnt;
  232 
  233         if (count < sizeof(cnt))
  234                 return -EINVAL;
  235         res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
  236         if (res < 0)
  237                 return res;
  238 
  239         return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
  240 }
  241 
  242 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
  243                              loff_t *ppos)
  244 {
  245         struct eventfd_ctx *ctx = file->private_data;
  246         ssize_t res;
  247         __u64 ucnt;
  248         DECLARE_WAITQUEUE(wait, current);
  249 
  250         if (count < sizeof(ucnt))
  251                 return -EINVAL;
  252         if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
  253                 return -EFAULT;
  254         if (ucnt == ULLONG_MAX)
  255                 return -EINVAL;
  256         spin_lock_irq(&ctx->wqh.lock);
  257         res = -EAGAIN;
  258         if (ULLONG_MAX - ctx->count > ucnt)
  259                 res = sizeof(ucnt);
  260         else if (!(file->f_flags & O_NONBLOCK)) {
  261                 __add_wait_queue(&ctx->wqh, &wait);
  262                 for (res = 0;;) {
  263                         set_current_state(TASK_INTERRUPTIBLE);
  264                         if (ULLONG_MAX - ctx->count > ucnt) {
  265                                 res = sizeof(ucnt);
  266                                 break;
  267                         }
  268                         if (signal_pending(current)) {
  269                                 res = -ERESTARTSYS;
  270                                 break;
  271                         }
  272                         spin_unlock_irq(&ctx->wqh.lock);
  273                         schedule();
  274                         spin_lock_irq(&ctx->wqh.lock);
  275                 }
  276                 __remove_wait_queue(&ctx->wqh, &wait);
  277                 __set_current_state(TASK_RUNNING);
  278         }
  279         if (likely(res > 0)) {
  280                 ctx->count += ucnt;
  281                 if (waitqueue_active(&ctx->wqh))
  282                         wake_up_locked_poll(&ctx->wqh, POLLIN);
  283         }
  284         spin_unlock_irq(&ctx->wqh.lock);
  285 
  286         return res;
  287 }
  288 
  289 #ifdef CONFIG_PROC_FS
  290 static int eventfd_show_fdinfo(struct seq_file *m, struct file *f)
  291 {
  292         struct eventfd_ctx *ctx = f->private_data;
  293         int ret;
  294 
  295         spin_lock_irq(&ctx->wqh.lock);
  296         ret = seq_printf(m, "eventfd-count: %16llx\n",
  297                          (unsigned long long)ctx->count);
  298         spin_unlock_irq(&ctx->wqh.lock);
  299 
  300         return ret;
  301 }
  302 #endif
  303 
  304 static const struct file_operations eventfd_fops = {
  305 #ifdef CONFIG_PROC_FS
  306         .show_fdinfo    = eventfd_show_fdinfo,
  307 #endif
  308         .release        = eventfd_release,
  309         .poll           = eventfd_poll,
  310         .read           = eventfd_read,
  311         .write          = eventfd_write,
  312         .llseek         = noop_llseek,
  313 };
  314 
  315 /**
  316  * eventfd_fget - Acquire a reference of an eventfd file descriptor.
  317  * @fd: [in] Eventfd file descriptor.
  318  *
  319  * Returns a pointer to the eventfd file structure in case of success, or the
  320  * following error pointer:
  321  *
  322  * -EBADF    : Invalid @fd file descriptor.
  323  * -EINVAL   : The @fd file descriptor is not an eventfd file.
  324  */
  325 struct file *eventfd_fget(int fd)
  326 {
  327         struct file *file;
  328 
  329         file = fget(fd);
  330         if (!file)
  331                 return ERR_PTR(-EBADF);
  332         if (file->f_op != &eventfd_fops) {
  333                 fput(file);
  334                 return ERR_PTR(-EINVAL);
  335         }
  336 
  337         return file;
  338 }
  339 EXPORT_SYMBOL_GPL(eventfd_fget);
  340 
  341 /**
  342  * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
  343  * @fd: [in] Eventfd file descriptor.
  344  *
  345  * Returns a pointer to the internal eventfd context, otherwise the error
  346  * pointers returned by the following functions:
  347  *
  348  * eventfd_fget
  349  */
  350 struct eventfd_ctx *eventfd_ctx_fdget(int fd)
  351 {
  352         struct file *file;
  353         struct eventfd_ctx *ctx;
  354 
  355         file = eventfd_fget(fd);
  356         if (IS_ERR(file))
  357                 return (struct eventfd_ctx *) file;
  358         ctx = eventfd_ctx_get(file->private_data);
  359         fput(file);
  360 
  361         return ctx;
  362 }
  363 EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
  364 
  365 /**
  366  * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
  367  * @file: [in] Eventfd file pointer.
  368  *
  369  * Returns a pointer to the internal eventfd context, otherwise the error
  370  * pointer:
  371  *
  372  * -EINVAL   : The @fd file descriptor is not an eventfd file.
  373  */
  374 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
  375 {
  376         if (file->f_op != &eventfd_fops)
  377                 return ERR_PTR(-EINVAL);
  378 
  379         return eventfd_ctx_get(file->private_data);
  380 }
  381 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
  382 
  383 /**
  384  * eventfd_file_create - Creates an eventfd file pointer.
  385  * @count: Initial eventfd counter value.
  386  * @flags: Flags for the eventfd file.
  387  *
  388  * This function creates an eventfd file pointer, w/out installing it into
  389  * the fd table. This is useful when the eventfd file is used during the
  390  * initialization of data structures that require extra setup after the eventfd
  391  * creation. So the eventfd creation is split into the file pointer creation
  392  * phase, and the file descriptor installation phase.
  393  * In this way races with userspace closing the newly installed file descriptor
  394  * can be avoided.
  395  * Returns an eventfd file pointer, or a proper error pointer.
  396  */
  397 struct file *eventfd_file_create(unsigned int count, int flags)
  398 {
  399         struct file *file;
  400         struct eventfd_ctx *ctx;
  401 
  402         /* Check the EFD_* constants for consistency.  */
  403         BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
  404         BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
  405 
  406         if (flags & ~EFD_FLAGS_SET)
  407                 return ERR_PTR(-EINVAL);
  408 
  409         ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
  410         if (!ctx)
  411                 return ERR_PTR(-ENOMEM);
  412 
  413         kref_init(&ctx->kref);
  414         init_waitqueue_head(&ctx->wqh);
  415         ctx->count = count;
  416         ctx->flags = flags;
  417 
  418         file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
  419                                   O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
  420         if (IS_ERR(file))
  421                 eventfd_free_ctx(ctx);
  422 
  423         return file;
  424 }
  425 
  426 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
  427 {
  428         int fd, error;
  429         struct file *file;
  430 
  431         error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
  432         if (error < 0)
  433                 return error;
  434         fd = error;
  435 
  436         file = eventfd_file_create(count, flags);
  437         if (IS_ERR(file)) {
  438                 error = PTR_ERR(file);
  439                 goto err_put_unused_fd;
  440         }
  441         fd_install(fd, file);
  442 
  443         return fd;
  444 
  445 err_put_unused_fd:
  446         put_unused_fd(fd);
  447 
  448         return error;
  449 }
  450 
  451 SYSCALL_DEFINE1(eventfd, unsigned int, count)
  452 {
  453         return sys_eventfd2(count, 0);
  454 }
  455 

Cache object: 8707a048c5851026d3e87f28d8d67dcb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.