The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/pipe.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/fs/pipe.c
    3  *
    4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
    5  */
    6 
    7 #include <linux/mm.h>
    8 #include <linux/file.h>
    9 #include <linux/poll.h>
   10 #include <linux/slab.h>
   11 #include <linux/module.h>
   12 #include <linux/init.h>
   13 #include <linux/fs.h>
   14 #include <linux/log2.h>
   15 #include <linux/mount.h>
   16 #include <linux/magic.h>
   17 #include <linux/pipe_fs_i.h>
   18 #include <linux/uio.h>
   19 #include <linux/highmem.h>
   20 #include <linux/pagemap.h>
   21 #include <linux/audit.h>
   22 #include <linux/syscalls.h>
   23 #include <linux/fcntl.h>
   24 
   25 #include <asm/uaccess.h>
   26 #include <asm/ioctls.h>
   27 
   28 /*
   29  * The max size that a non-root user is allowed to grow the pipe. Can
   30  * be set by root in /proc/sys/fs/pipe-max-size
   31  */
   32 unsigned int pipe_max_size = 1048576;
   33 
   34 /*
   35  * Minimum pipe size, as required by POSIX
   36  */
   37 unsigned int pipe_min_size = PAGE_SIZE;
   38 
   39 /*
   40  * We use a start+len construction, which provides full use of the 
   41  * allocated memory.
   42  * -- Florian Coosmann (FGC)
   43  * 
   44  * Reads with count = 0 should always return 0.
   45  * -- Julian Bradfield 1999-06-07.
   46  *
   47  * FIFOs and Pipes now generate SIGIO for both readers and writers.
   48  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
   49  *
   50  * pipe_read & write cleanup
   51  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
   52  */
   53 
   54 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
   55 {
   56         if (pipe->inode)
   57                 mutex_lock_nested(&pipe->inode->i_mutex, subclass);
   58 }
   59 
   60 void pipe_lock(struct pipe_inode_info *pipe)
   61 {
   62         /*
   63          * pipe_lock() nests non-pipe inode locks (for writing to a file)
   64          */
   65         pipe_lock_nested(pipe, I_MUTEX_PARENT);
   66 }
   67 EXPORT_SYMBOL(pipe_lock);
   68 
   69 void pipe_unlock(struct pipe_inode_info *pipe)
   70 {
   71         if (pipe->inode)
   72                 mutex_unlock(&pipe->inode->i_mutex);
   73 }
   74 EXPORT_SYMBOL(pipe_unlock);
   75 
   76 void pipe_double_lock(struct pipe_inode_info *pipe1,
   77                       struct pipe_inode_info *pipe2)
   78 {
   79         BUG_ON(pipe1 == pipe2);
   80 
   81         if (pipe1 < pipe2) {
   82                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
   83                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
   84         } else {
   85                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
   86                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
   87         }
   88 }
   89 
   90 /* Drop the inode semaphore and wait for a pipe event, atomically */
   91 void pipe_wait(struct pipe_inode_info *pipe)
   92 {
   93         DEFINE_WAIT(wait);
   94 
   95         /*
   96          * Pipes are system-local resources, so sleeping on them
   97          * is considered a noninteractive wait:
   98          */
   99         prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
  100         pipe_unlock(pipe);
  101         schedule();
  102         finish_wait(&pipe->wait, &wait);
  103         pipe_lock(pipe);
  104 }
  105 
  106 static int
  107 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
  108                         int atomic)
  109 {
  110         unsigned long copy;
  111 
  112         while (len > 0) {
  113                 while (!iov->iov_len)
  114                         iov++;
  115                 copy = min_t(unsigned long, len, iov->iov_len);
  116 
  117                 if (atomic) {
  118                         if (__copy_from_user_inatomic(to, iov->iov_base, copy))
  119                                 return -EFAULT;
  120                 } else {
  121                         if (copy_from_user(to, iov->iov_base, copy))
  122                                 return -EFAULT;
  123                 }
  124                 to += copy;
  125                 len -= copy;
  126                 iov->iov_base += copy;
  127                 iov->iov_len -= copy;
  128         }
  129         return 0;
  130 }
  131 
  132 static int
  133 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
  134                       int atomic)
  135 {
  136         unsigned long copy;
  137 
  138         while (len > 0) {
  139                 while (!iov->iov_len)
  140                         iov++;
  141                 copy = min_t(unsigned long, len, iov->iov_len);
  142 
  143                 if (atomic) {
  144                         if (__copy_to_user_inatomic(iov->iov_base, from, copy))
  145                                 return -EFAULT;
  146                 } else {
  147                         if (copy_to_user(iov->iov_base, from, copy))
  148                                 return -EFAULT;
  149                 }
  150                 from += copy;
  151                 len -= copy;
  152                 iov->iov_base += copy;
  153                 iov->iov_len -= copy;
  154         }
  155         return 0;
  156 }
  157 
  158 /*
  159  * Attempt to pre-fault in the user memory, so we can use atomic copies.
  160  * Returns the number of bytes not faulted in.
  161  */
  162 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
  163 {
  164         while (!iov->iov_len)
  165                 iov++;
  166 
  167         while (len > 0) {
  168                 unsigned long this_len;
  169 
  170                 this_len = min_t(unsigned long, len, iov->iov_len);
  171                 if (fault_in_pages_writeable(iov->iov_base, this_len))
  172                         break;
  173 
  174                 len -= this_len;
  175                 iov++;
  176         }
  177 
  178         return len;
  179 }
  180 
  181 /*
  182  * Pre-fault in the user memory, so we can use atomic copies.
  183  */
  184 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
  185 {
  186         while (!iov->iov_len)
  187                 iov++;
  188 
  189         while (len > 0) {
  190                 unsigned long this_len;
  191 
  192                 this_len = min_t(unsigned long, len, iov->iov_len);
  193                 fault_in_pages_readable(iov->iov_base, this_len);
  194                 len -= this_len;
  195                 iov++;
  196         }
  197 }
  198 
  199 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
  200                                   struct pipe_buffer *buf)
  201 {
  202         struct page *page = buf->page;
  203 
  204         /*
  205          * If nobody else uses this page, and we don't already have a
  206          * temporary page, let's keep track of it as a one-deep
  207          * allocation cache. (Otherwise just release our reference to it)
  208          */
  209         if (page_count(page) == 1 && !pipe->tmp_page)
  210                 pipe->tmp_page = page;
  211         else
  212                 page_cache_release(page);
  213 }
  214 
  215 /**
  216  * generic_pipe_buf_map - virtually map a pipe buffer
  217  * @pipe:       the pipe that the buffer belongs to
  218  * @buf:        the buffer that should be mapped
  219  * @atomic:     whether to use an atomic map
  220  *
  221  * Description:
  222  *      This function returns a kernel virtual address mapping for the
  223  *      pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
  224  *      and the caller has to be careful not to fault before calling
  225  *      the unmap function.
  226  *
  227  *      Note that this function calls kmap_atomic() if @atomic != 0.
  228  */
  229 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
  230                            struct pipe_buffer *buf, int atomic)
  231 {
  232         if (atomic) {
  233                 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
  234                 return kmap_atomic(buf->page);
  235         }
  236 
  237         return kmap(buf->page);
  238 }
  239 EXPORT_SYMBOL(generic_pipe_buf_map);
  240 
  241 /**
  242  * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
  243  * @pipe:       the pipe that the buffer belongs to
  244  * @buf:        the buffer that should be unmapped
  245  * @map_data:   the data that the mapping function returned
  246  *
  247  * Description:
  248  *      This function undoes the mapping that ->map() provided.
  249  */
  250 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
  251                             struct pipe_buffer *buf, void *map_data)
  252 {
  253         if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
  254                 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
  255                 kunmap_atomic(map_data);
  256         } else
  257                 kunmap(buf->page);
  258 }
  259 EXPORT_SYMBOL(generic_pipe_buf_unmap);
  260 
  261 /**
  262  * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
  263  * @pipe:       the pipe that the buffer belongs to
  264  * @buf:        the buffer to attempt to steal
  265  *
  266  * Description:
  267  *      This function attempts to steal the &struct page attached to
  268  *      @buf. If successful, this function returns 0 and returns with
  269  *      the page locked. The caller may then reuse the page for whatever
  270  *      he wishes; the typical use is insertion into a different file
  271  *      page cache.
  272  */
  273 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
  274                            struct pipe_buffer *buf)
  275 {
  276         struct page *page = buf->page;
  277 
  278         /*
  279          * A reference of one is golden, that means that the owner of this
  280          * page is the only one holding a reference to it. lock the page
  281          * and return OK.
  282          */
  283         if (page_count(page) == 1) {
  284                 lock_page(page);
  285                 return 0;
  286         }
  287 
  288         return 1;
  289 }
  290 EXPORT_SYMBOL(generic_pipe_buf_steal);
  291 
  292 /**
  293  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
  294  * @pipe:       the pipe that the buffer belongs to
  295  * @buf:        the buffer to get a reference to
  296  *
  297  * Description:
  298  *      This function grabs an extra reference to @buf. It's used in
  299  *      in the tee() system call, when we duplicate the buffers in one
  300  *      pipe into another.
  301  */
  302 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
  303 {
  304         page_cache_get(buf->page);
  305 }
  306 EXPORT_SYMBOL(generic_pipe_buf_get);
  307 
  308 /**
  309  * generic_pipe_buf_confirm - verify contents of the pipe buffer
  310  * @info:       the pipe that the buffer belongs to
  311  * @buf:        the buffer to confirm
  312  *
  313  * Description:
  314  *      This function does nothing, because the generic pipe code uses
  315  *      pages that are always good when inserted into the pipe.
  316  */
  317 int generic_pipe_buf_confirm(struct pipe_inode_info *info,
  318                              struct pipe_buffer *buf)
  319 {
  320         return 0;
  321 }
  322 EXPORT_SYMBOL(generic_pipe_buf_confirm);
  323 
  324 /**
  325  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
  326  * @pipe:       the pipe that the buffer belongs to
  327  * @buf:        the buffer to put a reference to
  328  *
  329  * Description:
  330  *      This function releases a reference to @buf.
  331  */
  332 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
  333                               struct pipe_buffer *buf)
  334 {
  335         page_cache_release(buf->page);
  336 }
  337 EXPORT_SYMBOL(generic_pipe_buf_release);
  338 
  339 static const struct pipe_buf_operations anon_pipe_buf_ops = {
  340         .can_merge = 1,
  341         .map = generic_pipe_buf_map,
  342         .unmap = generic_pipe_buf_unmap,
  343         .confirm = generic_pipe_buf_confirm,
  344         .release = anon_pipe_buf_release,
  345         .steal = generic_pipe_buf_steal,
  346         .get = generic_pipe_buf_get,
  347 };
  348 
  349 static const struct pipe_buf_operations packet_pipe_buf_ops = {
  350         .can_merge = 0,
  351         .map = generic_pipe_buf_map,
  352         .unmap = generic_pipe_buf_unmap,
  353         .confirm = generic_pipe_buf_confirm,
  354         .release = anon_pipe_buf_release,
  355         .steal = generic_pipe_buf_steal,
  356         .get = generic_pipe_buf_get,
  357 };
  358 
  359 static ssize_t
  360 pipe_read(struct kiocb *iocb, const struct iovec *_iov,
  361            unsigned long nr_segs, loff_t pos)
  362 {
  363         struct file *filp = iocb->ki_filp;
  364         struct inode *inode = filp->f_path.dentry->d_inode;
  365         struct pipe_inode_info *pipe;
  366         int do_wakeup;
  367         ssize_t ret;
  368         struct iovec *iov = (struct iovec *)_iov;
  369         size_t total_len;
  370 
  371         total_len = iov_length(iov, nr_segs);
  372         /* Null read succeeds. */
  373         if (unlikely(total_len == 0))
  374                 return 0;
  375 
  376         do_wakeup = 0;
  377         ret = 0;
  378         mutex_lock(&inode->i_mutex);
  379         pipe = inode->i_pipe;
  380         for (;;) {
  381                 int bufs = pipe->nrbufs;
  382                 if (bufs) {
  383                         int curbuf = pipe->curbuf;
  384                         struct pipe_buffer *buf = pipe->bufs + curbuf;
  385                         const struct pipe_buf_operations *ops = buf->ops;
  386                         void *addr;
  387                         size_t chars = buf->len;
  388                         int error, atomic;
  389 
  390                         if (chars > total_len)
  391                                 chars = total_len;
  392 
  393                         error = ops->confirm(pipe, buf);
  394                         if (error) {
  395                                 if (!ret)
  396                                         ret = error;
  397                                 break;
  398                         }
  399 
  400                         atomic = !iov_fault_in_pages_write(iov, chars);
  401 redo:
  402                         addr = ops->map(pipe, buf, atomic);
  403                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
  404                         ops->unmap(pipe, buf, addr);
  405                         if (unlikely(error)) {
  406                                 /*
  407                                  * Just retry with the slow path if we failed.
  408                                  */
  409                                 if (atomic) {
  410                                         atomic = 0;
  411                                         goto redo;
  412                                 }
  413                                 if (!ret)
  414                                         ret = error;
  415                                 break;
  416                         }
  417                         ret += chars;
  418                         buf->offset += chars;
  419                         buf->len -= chars;
  420 
  421                         /* Was it a packet buffer? Clean up and exit */
  422                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
  423                                 total_len = chars;
  424                                 buf->len = 0;
  425                         }
  426 
  427                         if (!buf->len) {
  428                                 buf->ops = NULL;
  429                                 ops->release(pipe, buf);
  430                                 curbuf = (curbuf + 1) & (pipe->buffers - 1);
  431                                 pipe->curbuf = curbuf;
  432                                 pipe->nrbufs = --bufs;
  433                                 do_wakeup = 1;
  434                         }
  435                         total_len -= chars;
  436                         if (!total_len)
  437                                 break;  /* common path: read succeeded */
  438                 }
  439                 if (bufs)       /* More to do? */
  440                         continue;
  441                 if (!pipe->writers)
  442                         break;
  443                 if (!pipe->waiting_writers) {
  444                         /* syscall merging: Usually we must not sleep
  445                          * if O_NONBLOCK is set, or if we got some data.
  446                          * But if a writer sleeps in kernel space, then
  447                          * we can wait for that data without violating POSIX.
  448                          */
  449                         if (ret)
  450                                 break;
  451                         if (filp->f_flags & O_NONBLOCK) {
  452                                 ret = -EAGAIN;
  453                                 break;
  454                         }
  455                 }
  456                 if (signal_pending(current)) {
  457                         if (!ret)
  458                                 ret = -ERESTARTSYS;
  459                         break;
  460                 }
  461                 if (do_wakeup) {
  462                         wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
  463                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
  464                 }
  465                 pipe_wait(pipe);
  466         }
  467         mutex_unlock(&inode->i_mutex);
  468 
  469         /* Signal writers asynchronously that there is more room. */
  470         if (do_wakeup) {
  471                 wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
  472                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
  473         }
  474         if (ret > 0)
  475                 file_accessed(filp);
  476         return ret;
  477 }
  478 
  479 static inline int is_packetized(struct file *file)
  480 {
  481         return (file->f_flags & O_DIRECT) != 0;
  482 }
  483 
  484 static ssize_t
  485 pipe_write(struct kiocb *iocb, const struct iovec *_iov,
  486             unsigned long nr_segs, loff_t ppos)
  487 {
  488         struct file *filp = iocb->ki_filp;
  489         struct inode *inode = filp->f_path.dentry->d_inode;
  490         struct pipe_inode_info *pipe;
  491         ssize_t ret;
  492         int do_wakeup;
  493         struct iovec *iov = (struct iovec *)_iov;
  494         size_t total_len;
  495         ssize_t chars;
  496 
  497         total_len = iov_length(iov, nr_segs);
  498         /* Null write succeeds. */
  499         if (unlikely(total_len == 0))
  500                 return 0;
  501 
  502         do_wakeup = 0;
  503         ret = 0;
  504         mutex_lock(&inode->i_mutex);
  505         pipe = inode->i_pipe;
  506 
  507         if (!pipe->readers) {
  508                 send_sig(SIGPIPE, current, 0);
  509                 ret = -EPIPE;
  510                 goto out;
  511         }
  512 
  513         /* We try to merge small writes */
  514         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
  515         if (pipe->nrbufs && chars != 0) {
  516                 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
  517                                                         (pipe->buffers - 1);
  518                 struct pipe_buffer *buf = pipe->bufs + lastbuf;
  519                 const struct pipe_buf_operations *ops = buf->ops;
  520                 int offset = buf->offset + buf->len;
  521 
  522                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
  523                         int error, atomic = 1;
  524                         void *addr;
  525 
  526                         error = ops->confirm(pipe, buf);
  527                         if (error)
  528                                 goto out;
  529 
  530                         iov_fault_in_pages_read(iov, chars);
  531 redo1:
  532                         addr = ops->map(pipe, buf, atomic);
  533                         error = pipe_iov_copy_from_user(offset + addr, iov,
  534                                                         chars, atomic);
  535                         ops->unmap(pipe, buf, addr);
  536                         ret = error;
  537                         do_wakeup = 1;
  538                         if (error) {
  539                                 if (atomic) {
  540                                         atomic = 0;
  541                                         goto redo1;
  542                                 }
  543                                 goto out;
  544                         }
  545                         buf->len += chars;
  546                         total_len -= chars;
  547                         ret = chars;
  548                         if (!total_len)
  549                                 goto out;
  550                 }
  551         }
  552 
  553         for (;;) {
  554                 int bufs;
  555 
  556                 if (!pipe->readers) {
  557                         send_sig(SIGPIPE, current, 0);
  558                         if (!ret)
  559                                 ret = -EPIPE;
  560                         break;
  561                 }
  562                 bufs = pipe->nrbufs;
  563                 if (bufs < pipe->buffers) {
  564                         int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
  565                         struct pipe_buffer *buf = pipe->bufs + newbuf;
  566                         struct page *page = pipe->tmp_page;
  567                         char *src;
  568                         int error, atomic = 1;
  569 
  570                         if (!page) {
  571                                 page = alloc_page(GFP_HIGHUSER);
  572                                 if (unlikely(!page)) {
  573                                         ret = ret ? : -ENOMEM;
  574                                         break;
  575                                 }
  576                                 pipe->tmp_page = page;
  577                         }
  578                         /* Always wake up, even if the copy fails. Otherwise
  579                          * we lock up (O_NONBLOCK-)readers that sleep due to
  580                          * syscall merging.
  581                          * FIXME! Is this really true?
  582                          */
  583                         do_wakeup = 1;
  584                         chars = PAGE_SIZE;
  585                         if (chars > total_len)
  586                                 chars = total_len;
  587 
  588                         iov_fault_in_pages_read(iov, chars);
  589 redo2:
  590                         if (atomic)
  591                                 src = kmap_atomic(page);
  592                         else
  593                                 src = kmap(page);
  594 
  595                         error = pipe_iov_copy_from_user(src, iov, chars,
  596                                                         atomic);
  597                         if (atomic)
  598                                 kunmap_atomic(src);
  599                         else
  600                                 kunmap(page);
  601 
  602                         if (unlikely(error)) {
  603                                 if (atomic) {
  604                                         atomic = 0;
  605                                         goto redo2;
  606                                 }
  607                                 if (!ret)
  608                                         ret = error;
  609                                 break;
  610                         }
  611                         ret += chars;
  612 
  613                         /* Insert it into the buffer array */
  614                         buf->page = page;
  615                         buf->ops = &anon_pipe_buf_ops;
  616                         buf->offset = 0;
  617                         buf->len = chars;
  618                         buf->flags = 0;
  619                         if (is_packetized(filp)) {
  620                                 buf->ops = &packet_pipe_buf_ops;
  621                                 buf->flags = PIPE_BUF_FLAG_PACKET;
  622                         }
  623                         pipe->nrbufs = ++bufs;
  624                         pipe->tmp_page = NULL;
  625 
  626                         total_len -= chars;
  627                         if (!total_len)
  628                                 break;
  629                 }
  630                 if (bufs < pipe->buffers)
  631                         continue;
  632                 if (filp->f_flags & O_NONBLOCK) {
  633                         if (!ret)
  634                                 ret = -EAGAIN;
  635                         break;
  636                 }
  637                 if (signal_pending(current)) {
  638                         if (!ret)
  639                                 ret = -ERESTARTSYS;
  640                         break;
  641                 }
  642                 if (do_wakeup) {
  643                         wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
  644                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  645                         do_wakeup = 0;
  646                 }
  647                 pipe->waiting_writers++;
  648                 pipe_wait(pipe);
  649                 pipe->waiting_writers--;
  650         }
  651 out:
  652         mutex_unlock(&inode->i_mutex);
  653         if (do_wakeup) {
  654                 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
  655                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  656         }
  657         if (ret > 0) {
  658                 int err = file_update_time(filp);
  659                 if (err)
  660                         ret = err;
  661         }
  662         return ret;
  663 }
  664 
  665 static ssize_t
  666 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
  667 {
  668         return -EBADF;
  669 }
  670 
  671 static ssize_t
  672 bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
  673            loff_t *ppos)
  674 {
  675         return -EBADF;
  676 }
  677 
  678 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  679 {
  680         struct inode *inode = filp->f_path.dentry->d_inode;
  681         struct pipe_inode_info *pipe;
  682         int count, buf, nrbufs;
  683 
  684         switch (cmd) {
  685                 case FIONREAD:
  686                         mutex_lock(&inode->i_mutex);
  687                         pipe = inode->i_pipe;
  688                         count = 0;
  689                         buf = pipe->curbuf;
  690                         nrbufs = pipe->nrbufs;
  691                         while (--nrbufs >= 0) {
  692                                 count += pipe->bufs[buf].len;
  693                                 buf = (buf+1) & (pipe->buffers - 1);
  694                         }
  695                         mutex_unlock(&inode->i_mutex);
  696 
  697                         return put_user(count, (int __user *)arg);
  698                 default:
  699                         return -ENOIOCTLCMD;
  700         }
  701 }
  702 
  703 /* No kernel lock held - fine */
  704 static unsigned int
  705 pipe_poll(struct file *filp, poll_table *wait)
  706 {
  707         unsigned int mask;
  708         struct inode *inode = filp->f_path.dentry->d_inode;
  709         struct pipe_inode_info *pipe = inode->i_pipe;
  710         int nrbufs;
  711 
  712         poll_wait(filp, &pipe->wait, wait);
  713 
  714         /* Reading only -- no need for acquiring the semaphore.  */
  715         nrbufs = pipe->nrbufs;
  716         mask = 0;
  717         if (filp->f_mode & FMODE_READ) {
  718                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
  719                 if (!pipe->writers && filp->f_version != pipe->w_counter)
  720                         mask |= POLLHUP;
  721         }
  722 
  723         if (filp->f_mode & FMODE_WRITE) {
  724                 mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
  725                 /*
  726                  * Most Unices do not set POLLERR for FIFOs but on Linux they
  727                  * behave exactly like pipes for poll().
  728                  */
  729                 if (!pipe->readers)
  730                         mask |= POLLERR;
  731         }
  732 
  733         return mask;
  734 }
  735 
  736 static int
  737 pipe_release(struct inode *inode, int decr, int decw)
  738 {
  739         struct pipe_inode_info *pipe;
  740 
  741         mutex_lock(&inode->i_mutex);
  742         pipe = inode->i_pipe;
  743         pipe->readers -= decr;
  744         pipe->writers -= decw;
  745 
  746         if (!pipe->readers && !pipe->writers) {
  747                 free_pipe_info(inode);
  748         } else {
  749                 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
  750                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  751                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
  752         }
  753         mutex_unlock(&inode->i_mutex);
  754 
  755         return 0;
  756 }
  757 
  758 static int
  759 pipe_read_fasync(int fd, struct file *filp, int on)
  760 {
  761         struct inode *inode = filp->f_path.dentry->d_inode;
  762         int retval;
  763 
  764         mutex_lock(&inode->i_mutex);
  765         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
  766         mutex_unlock(&inode->i_mutex);
  767 
  768         return retval;
  769 }
  770 
  771 
  772 static int
  773 pipe_write_fasync(int fd, struct file *filp, int on)
  774 {
  775         struct inode *inode = filp->f_path.dentry->d_inode;
  776         int retval;
  777 
  778         mutex_lock(&inode->i_mutex);
  779         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
  780         mutex_unlock(&inode->i_mutex);
  781 
  782         return retval;
  783 }
  784 
  785 
  786 static int
  787 pipe_rdwr_fasync(int fd, struct file *filp, int on)
  788 {
  789         struct inode *inode = filp->f_path.dentry->d_inode;
  790         struct pipe_inode_info *pipe = inode->i_pipe;
  791         int retval;
  792 
  793         mutex_lock(&inode->i_mutex);
  794         retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
  795         if (retval >= 0) {
  796                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
  797                 if (retval < 0) /* this can happen only if on == T */
  798                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
  799         }
  800         mutex_unlock(&inode->i_mutex);
  801         return retval;
  802 }
  803 
  804 
  805 static int
  806 pipe_read_release(struct inode *inode, struct file *filp)
  807 {
  808         return pipe_release(inode, 1, 0);
  809 }
  810 
  811 static int
  812 pipe_write_release(struct inode *inode, struct file *filp)
  813 {
  814         return pipe_release(inode, 0, 1);
  815 }
  816 
  817 static int
  818 pipe_rdwr_release(struct inode *inode, struct file *filp)
  819 {
  820         int decr, decw;
  821 
  822         decr = (filp->f_mode & FMODE_READ) != 0;
  823         decw = (filp->f_mode & FMODE_WRITE) != 0;
  824         return pipe_release(inode, decr, decw);
  825 }
  826 
  827 static int
  828 pipe_read_open(struct inode *inode, struct file *filp)
  829 {
  830         int ret = -ENOENT;
  831 
  832         mutex_lock(&inode->i_mutex);
  833 
  834         if (inode->i_pipe) {
  835                 ret = 0;
  836                 inode->i_pipe->readers++;
  837         }
  838 
  839         mutex_unlock(&inode->i_mutex);
  840 
  841         return ret;
  842 }
  843 
  844 static int
  845 pipe_write_open(struct inode *inode, struct file *filp)
  846 {
  847         int ret = -ENOENT;
  848 
  849         mutex_lock(&inode->i_mutex);
  850 
  851         if (inode->i_pipe) {
  852                 ret = 0;
  853                 inode->i_pipe->writers++;
  854         }
  855 
  856         mutex_unlock(&inode->i_mutex);
  857 
  858         return ret;
  859 }
  860 
  861 static int
  862 pipe_rdwr_open(struct inode *inode, struct file *filp)
  863 {
  864         int ret = -ENOENT;
  865 
  866         mutex_lock(&inode->i_mutex);
  867 
  868         if (inode->i_pipe) {
  869                 ret = 0;
  870                 if (filp->f_mode & FMODE_READ)
  871                         inode->i_pipe->readers++;
  872                 if (filp->f_mode & FMODE_WRITE)
  873                         inode->i_pipe->writers++;
  874         }
  875 
  876         mutex_unlock(&inode->i_mutex);
  877 
  878         return ret;
  879 }
  880 
  881 /*
  882  * The file_operations structs are not static because they
  883  * are also used in linux/fs/fifo.c to do operations on FIFOs.
  884  *
  885  * Pipes reuse fifos' file_operations structs.
  886  */
  887 const struct file_operations read_pipefifo_fops = {
  888         .llseek         = no_llseek,
  889         .read           = do_sync_read,
  890         .aio_read       = pipe_read,
  891         .write          = bad_pipe_w,
  892         .poll           = pipe_poll,
  893         .unlocked_ioctl = pipe_ioctl,
  894         .open           = pipe_read_open,
  895         .release        = pipe_read_release,
  896         .fasync         = pipe_read_fasync,
  897 };
  898 
  899 const struct file_operations write_pipefifo_fops = {
  900         .llseek         = no_llseek,
  901         .read           = bad_pipe_r,
  902         .write          = do_sync_write,
  903         .aio_write      = pipe_write,
  904         .poll           = pipe_poll,
  905         .unlocked_ioctl = pipe_ioctl,
  906         .open           = pipe_write_open,
  907         .release        = pipe_write_release,
  908         .fasync         = pipe_write_fasync,
  909 };
  910 
  911 const struct file_operations rdwr_pipefifo_fops = {
  912         .llseek         = no_llseek,
  913         .read           = do_sync_read,
  914         .aio_read       = pipe_read,
  915         .write          = do_sync_write,
  916         .aio_write      = pipe_write,
  917         .poll           = pipe_poll,
  918         .unlocked_ioctl = pipe_ioctl,
  919         .open           = pipe_rdwr_open,
  920         .release        = pipe_rdwr_release,
  921         .fasync         = pipe_rdwr_fasync,
  922 };
  923 
  924 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
  925 {
  926         struct pipe_inode_info *pipe;
  927 
  928         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
  929         if (pipe) {
  930                 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
  931                 if (pipe->bufs) {
  932                         init_waitqueue_head(&pipe->wait);
  933                         pipe->r_counter = pipe->w_counter = 1;
  934                         pipe->inode = inode;
  935                         pipe->buffers = PIPE_DEF_BUFFERS;
  936                         return pipe;
  937                 }
  938                 kfree(pipe);
  939         }
  940 
  941         return NULL;
  942 }
  943 
  944 void __free_pipe_info(struct pipe_inode_info *pipe)
  945 {
  946         int i;
  947 
  948         for (i = 0; i < pipe->buffers; i++) {
  949                 struct pipe_buffer *buf = pipe->bufs + i;
  950                 if (buf->ops)
  951                         buf->ops->release(pipe, buf);
  952         }
  953         if (pipe->tmp_page)
  954                 __free_page(pipe->tmp_page);
  955         kfree(pipe->bufs);
  956         kfree(pipe);
  957 }
  958 
  959 void free_pipe_info(struct inode *inode)
  960 {
  961         __free_pipe_info(inode->i_pipe);
  962         inode->i_pipe = NULL;
  963 }
  964 
  965 static struct vfsmount *pipe_mnt __read_mostly;
  966 
  967 /*
  968  * pipefs_dname() is called from d_path().
  969  */
  970 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
  971 {
  972         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
  973                                 dentry->d_inode->i_ino);
  974 }
  975 
  976 static const struct dentry_operations pipefs_dentry_operations = {
  977         .d_dname        = pipefs_dname,
  978 };
  979 
  980 static struct inode * get_pipe_inode(void)
  981 {
  982         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
  983         struct pipe_inode_info *pipe;
  984 
  985         if (!inode)
  986                 goto fail_inode;
  987 
  988         inode->i_ino = get_next_ino();
  989 
  990         pipe = alloc_pipe_info(inode);
  991         if (!pipe)
  992                 goto fail_iput;
  993         inode->i_pipe = pipe;
  994 
  995         pipe->readers = pipe->writers = 1;
  996         inode->i_fop = &rdwr_pipefifo_fops;
  997 
  998         /*
  999          * Mark the inode dirty from the very beginning,
 1000          * that way it will never be moved to the dirty
 1001          * list because "mark_inode_dirty()" will think
 1002          * that it already _is_ on the dirty list.
 1003          */
 1004         inode->i_state = I_DIRTY;
 1005         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 1006         inode->i_uid = current_fsuid();
 1007         inode->i_gid = current_fsgid();
 1008         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 1009 
 1010         return inode;
 1011 
 1012 fail_iput:
 1013         iput(inode);
 1014 
 1015 fail_inode:
 1016         return NULL;
 1017 }
 1018 
 1019 int create_pipe_files(struct file **res, int flags)
 1020 {
 1021         int err;
 1022         struct inode *inode = get_pipe_inode();
 1023         struct file *f;
 1024         struct path path;
 1025         static struct qstr name = { .name = "" };
 1026 
 1027         if (!inode)
 1028                 return -ENFILE;
 1029 
 1030         err = -ENOMEM;
 1031         path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
 1032         if (!path.dentry)
 1033                 goto err_inode;
 1034         path.mnt = mntget(pipe_mnt);
 1035 
 1036         d_instantiate(path.dentry, inode);
 1037 
 1038         err = -ENFILE;
 1039         f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
 1040         if (!f)
 1041                 goto err_dentry;
 1042 
 1043         f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
 1044 
 1045         res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
 1046         if (!res[0])
 1047                 goto err_file;
 1048 
 1049         path_get(&path);
 1050         res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 1051         res[1] = f;
 1052         return 0;
 1053 
 1054 err_file:
 1055         put_filp(f);
 1056 err_dentry:
 1057         free_pipe_info(inode);
 1058         path_put(&path);
 1059         return err;
 1060 
 1061 err_inode:
 1062         free_pipe_info(inode);
 1063         iput(inode);
 1064         return err;
 1065 }
 1066 
 1067 static int __do_pipe_flags(int *fd, struct file **files, int flags)
 1068 {
 1069         int error;
 1070         int fdw, fdr;
 1071 
 1072         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
 1073                 return -EINVAL;
 1074 
 1075         error = create_pipe_files(files, flags);
 1076         if (error)
 1077                 return error;
 1078 
 1079         error = get_unused_fd_flags(flags);
 1080         if (error < 0)
 1081                 goto err_read_pipe;
 1082         fdr = error;
 1083 
 1084         error = get_unused_fd_flags(flags);
 1085         if (error < 0)
 1086                 goto err_fdr;
 1087         fdw = error;
 1088 
 1089         audit_fd_pair(fdr, fdw);
 1090         fd[0] = fdr;
 1091         fd[1] = fdw;
 1092         return 0;
 1093 
 1094  err_fdr:
 1095         put_unused_fd(fdr);
 1096  err_read_pipe:
 1097         fput(files[0]);
 1098         fput(files[1]);
 1099         return error;
 1100 }
 1101 
 1102 int do_pipe_flags(int *fd, int flags)
 1103 {
 1104         struct file *files[2];
 1105         int error = __do_pipe_flags(fd, files, flags);
 1106         if (!error) {
 1107                 fd_install(fd[0], files[0]);
 1108                 fd_install(fd[1], files[1]);
 1109         }
 1110         return error;
 1111 }
 1112 
 1113 /*
 1114  * sys_pipe() is the normal C calling standard for creating
 1115  * a pipe. It's not the way Unix traditionally does this, though.
 1116  */
 1117 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
 1118 {
 1119         struct file *files[2];
 1120         int fd[2];
 1121         int error;
 1122 
 1123         error = __do_pipe_flags(fd, files, flags);
 1124         if (!error) {
 1125                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
 1126                         fput(files[0]);
 1127                         fput(files[1]);
 1128                         put_unused_fd(fd[0]);
 1129                         put_unused_fd(fd[1]);
 1130                         error = -EFAULT;
 1131                 } else {
 1132                         fd_install(fd[0], files[0]);
 1133                         fd_install(fd[1], files[1]);
 1134                 }
 1135         }
 1136         return error;
 1137 }
 1138 
 1139 SYSCALL_DEFINE1(pipe, int __user *, fildes)
 1140 {
 1141         return sys_pipe2(fildes, 0);
 1142 }
 1143 
 1144 /*
 1145  * Allocate a new array of pipe buffers and copy the info over. Returns the
 1146  * pipe size if successful, or return -ERROR on error.
 1147  */
 1148 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
 1149 {
 1150         struct pipe_buffer *bufs;
 1151 
 1152         /*
 1153          * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
 1154          * expect a lot of shrink+grow operations, just free and allocate
 1155          * again like we would do for growing. If the pipe currently
 1156          * contains more buffers than arg, then return busy.
 1157          */
 1158         if (nr_pages < pipe->nrbufs)
 1159                 return -EBUSY;
 1160 
 1161         bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
 1162         if (unlikely(!bufs))
 1163                 return -ENOMEM;
 1164 
 1165         /*
 1166          * The pipe array wraps around, so just start the new one at zero
 1167          * and adjust the indexes.
 1168          */
 1169         if (pipe->nrbufs) {
 1170                 unsigned int tail;
 1171                 unsigned int head;
 1172 
 1173                 tail = pipe->curbuf + pipe->nrbufs;
 1174                 if (tail < pipe->buffers)
 1175                         tail = 0;
 1176                 else
 1177                         tail &= (pipe->buffers - 1);
 1178 
 1179                 head = pipe->nrbufs - tail;
 1180                 if (head)
 1181                         memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
 1182                 if (tail)
 1183                         memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
 1184         }
 1185 
 1186         pipe->curbuf = 0;
 1187         kfree(pipe->bufs);
 1188         pipe->bufs = bufs;
 1189         pipe->buffers = nr_pages;
 1190         return nr_pages * PAGE_SIZE;
 1191 }
 1192 
 1193 /*
 1194  * Currently we rely on the pipe array holding a power-of-2 number
 1195  * of pages.
 1196  */
 1197 static inline unsigned int round_pipe_size(unsigned int size)
 1198 {
 1199         unsigned long nr_pages;
 1200 
 1201         nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 1202         return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
 1203 }
 1204 
 1205 /*
 1206  * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
 1207  * will return an error.
 1208  */
 1209 int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
 1210                  size_t *lenp, loff_t *ppos)
 1211 {
 1212         int ret;
 1213 
 1214         ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
 1215         if (ret < 0 || !write)
 1216                 return ret;
 1217 
 1218         pipe_max_size = round_pipe_size(pipe_max_size);
 1219         return ret;
 1220 }
 1221 
 1222 /*
 1223  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
 1224  * location, so checking ->i_pipe is not enough to verify that this is a
 1225  * pipe.
 1226  */
 1227 struct pipe_inode_info *get_pipe_info(struct file *file)
 1228 {
 1229         struct inode *i = file->f_path.dentry->d_inode;
 1230 
 1231         return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
 1232 }
 1233 
 1234 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 1235 {
 1236         struct pipe_inode_info *pipe;
 1237         long ret;
 1238 
 1239         pipe = get_pipe_info(file);
 1240         if (!pipe)
 1241                 return -EBADF;
 1242 
 1243         mutex_lock(&pipe->inode->i_mutex);
 1244 
 1245         switch (cmd) {
 1246         case F_SETPIPE_SZ: {
 1247                 unsigned int size, nr_pages;
 1248 
 1249                 size = round_pipe_size(arg);
 1250                 nr_pages = size >> PAGE_SHIFT;
 1251 
 1252                 ret = -EINVAL;
 1253                 if (!nr_pages)
 1254                         goto out;
 1255 
 1256                 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
 1257                         ret = -EPERM;
 1258                         goto out;
 1259                 }
 1260                 ret = pipe_set_size(pipe, nr_pages);
 1261                 break;
 1262                 }
 1263         case F_GETPIPE_SZ:
 1264                 ret = pipe->buffers * PAGE_SIZE;
 1265                 break;
 1266         default:
 1267                 ret = -EINVAL;
 1268                 break;
 1269         }
 1270 
 1271 out:
 1272         mutex_unlock(&pipe->inode->i_mutex);
 1273         return ret;
 1274 }
 1275 
 1276 static const struct super_operations pipefs_ops = {
 1277         .destroy_inode = free_inode_nonrcu,
 1278         .statfs = simple_statfs,
 1279 };
 1280 
 1281 /*
 1282  * pipefs should _never_ be mounted by userland - too much of security hassle,
 1283  * no real gain from having the whole whorehouse mounted. So we don't need
 1284  * any operations on the root directory. However, we need a non-trivial
 1285  * d_name - pipe: will go nicely and kill the special-casing in procfs.
 1286  */
 1287 static struct dentry *pipefs_mount(struct file_system_type *fs_type,
 1288                          int flags, const char *dev_name, void *data)
 1289 {
 1290         return mount_pseudo(fs_type, "pipe:", &pipefs_ops,
 1291                         &pipefs_dentry_operations, PIPEFS_MAGIC);
 1292 }
 1293 
 1294 static struct file_system_type pipe_fs_type = {
 1295         .name           = "pipefs",
 1296         .mount          = pipefs_mount,
 1297         .kill_sb        = kill_anon_super,
 1298 };
 1299 
 1300 static int __init init_pipe_fs(void)
 1301 {
 1302         int err = register_filesystem(&pipe_fs_type);
 1303 
 1304         if (!err) {
 1305                 pipe_mnt = kern_mount(&pipe_fs_type);
 1306                 if (IS_ERR(pipe_mnt)) {
 1307                         err = PTR_ERR(pipe_mnt);
 1308                         unregister_filesystem(&pipe_fs_type);
 1309                 }
 1310         }
 1311         return err;
 1312 }
 1313 
 1314 fs_initcall(init_pipe_fs);

Cache object: 6d1e1e91c572e2a950db2c5fcb21ee54


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.