The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_pipe.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: sys_pipe.c,v 1.143 2022/12/05 23:18:37 deraadt Exp $  */
    2 
    3 /*
    4  * Copyright (c) 1996 John S. Dyson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice immediately at the beginning of the file, without modification,
   12  *    this list of conditions, and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Absolutely no warranty of function or purpose is made by the author
   17  *    John S. Dyson.
   18  * 4. Modifications may be freely made to this file if the above conditions
   19  *    are met.
   20  */
   21 
   22 /*
   23  * This file contains a high-performance replacement for the socket-based
   24  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
   25  * all features of sockets, but does do everything that pipes normally
   26  * do.
   27  */
   28 
   29 #include <sys/param.h>
   30 #include <sys/systm.h>
   31 #include <sys/proc.h>
   32 #include <sys/fcntl.h>
   33 #include <sys/file.h>
   34 #include <sys/filedesc.h>
   35 #include <sys/pool.h>
   36 #include <sys/ioctl.h>
   37 #include <sys/stat.h>
   38 #include <sys/signalvar.h>
   39 #include <sys/mount.h>
   40 #include <sys/syscallargs.h>
   41 #include <sys/event.h>
   42 #ifdef KTRACE
   43 #include <sys/ktrace.h>
   44 #endif
   45 
   46 #include <uvm/uvm_extern.h>
   47 
   48 #include <sys/pipe.h>
   49 
   50 struct pipe_pair {
   51         struct pipe pp_wpipe;
   52         struct pipe pp_rpipe;
   53         struct rwlock pp_lock;
   54 };
   55 
   56 /*
   57  * interfaces to the outside world
   58  */
   59 int     pipe_read(struct file *, struct uio *, int);
   60 int     pipe_write(struct file *, struct uio *, int);
   61 int     pipe_close(struct file *, struct proc *);
   62 int     pipe_kqfilter(struct file *fp, struct knote *kn);
   63 int     pipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
   64 int     pipe_stat(struct file *fp, struct stat *ub, struct proc *p);
   65 
   66 static const struct fileops pipeops = {
   67         .fo_read        = pipe_read,
   68         .fo_write       = pipe_write,
   69         .fo_ioctl       = pipe_ioctl,
   70         .fo_kqfilter    = pipe_kqfilter,
   71         .fo_stat        = pipe_stat,
   72         .fo_close       = pipe_close
   73 };
   74 
   75 void    filt_pipedetach(struct knote *kn);
   76 int     filt_piperead(struct knote *kn, long hint);
   77 int     filt_pipewrite(struct knote *kn, long hint);
   78 int     filt_pipeexcept(struct knote *kn, long hint);
   79 int     filt_pipemodify(struct kevent *kev, struct knote *kn);
   80 int     filt_pipeprocess(struct knote *kn, struct kevent *kev);
   81 
   82 const struct filterops pipe_rfiltops = {
   83         .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
   84         .f_attach       = NULL,
   85         .f_detach       = filt_pipedetach,
   86         .f_event        = filt_piperead,
   87         .f_modify       = filt_pipemodify,
   88         .f_process      = filt_pipeprocess,
   89 };
   90 
   91 const struct filterops pipe_wfiltops = {
   92         .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
   93         .f_attach       = NULL,
   94         .f_detach       = filt_pipedetach,
   95         .f_event        = filt_pipewrite,
   96         .f_modify       = filt_pipemodify,
   97         .f_process      = filt_pipeprocess,
   98 };
   99 
  100 const struct filterops pipe_efiltops = {
  101         .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
  102         .f_attach       = NULL,
  103         .f_detach       = filt_pipedetach,
  104         .f_event        = filt_pipeexcept,
  105         .f_modify       = filt_pipemodify,
  106         .f_process      = filt_pipeprocess,
  107 };
  108 
  109 /*
  110  * Default pipe buffer size(s), this can be kind-of large now because pipe
  111  * space is pageable.  The pipe code will try to maintain locality of
  112  * reference for performance reasons, so small amounts of outstanding I/O
  113  * will not wipe the cache.
  114  */
  115 #define MINPIPESIZE (PIPE_SIZE/3)
  116 
  117 /*
  118  * Limit the number of "big" pipes
  119  */
  120 #define LIMITBIGPIPES   32
  121 unsigned int nbigpipe;
  122 static unsigned int amountpipekva;
  123 
  124 struct pool pipe_pair_pool;
  125 
  126 int     dopipe(struct proc *, int *, int);
  127 void    pipeselwakeup(struct pipe *);
  128 
  129 int     pipe_create(struct pipe *);
  130 void    pipe_destroy(struct pipe *);
  131 int     pipe_rundown(struct pipe *);
  132 struct pipe *pipe_peer(struct pipe *);
  133 int     pipe_buffer_realloc(struct pipe *, u_int);
  134 void    pipe_buffer_free(struct pipe *);
  135 
  136 int     pipe_iolock(struct pipe *);
  137 void    pipe_iounlock(struct pipe *);
  138 int     pipe_iosleep(struct pipe *, const char *);
  139 
  140 struct pipe_pair *pipe_pair_create(void);
  141 void    pipe_pair_destroy(struct pipe_pair *);
  142 
  143 /*
  144  * The pipe system call for the DTYPE_PIPE type of pipes
  145  */
  146 
  147 int
  148 sys_pipe(struct proc *p, void *v, register_t *retval)
  149 {
  150         struct sys_pipe_args /* {
  151                 syscallarg(int *) fdp;
  152         } */ *uap = v;
  153 
  154         return (dopipe(p, SCARG(uap, fdp), 0));
  155 }
  156 
  157 int
  158 sys_pipe2(struct proc *p, void *v, register_t *retval)
  159 {
  160         struct sys_pipe2_args /* {
  161                 syscallarg(int *) fdp;
  162                 syscallarg(int) flags;
  163         } */ *uap = v;
  164 
  165         if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
  166                 return (EINVAL);
  167 
  168         return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
  169 }
  170 
  171 int
  172 dopipe(struct proc *p, int *ufds, int flags)
  173 {
  174         struct filedesc *fdp = p->p_fd;
  175         struct file *rf, *wf;
  176         struct pipe_pair *pp;
  177         struct pipe *rpipe, *wpipe = NULL;
  178         int fds[2], cloexec, error;
  179 
  180         cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
  181 
  182         pp = pipe_pair_create();
  183         if (pp == NULL)
  184                 return (ENOMEM);
  185         wpipe = &pp->pp_wpipe;
  186         rpipe = &pp->pp_rpipe;
  187 
  188         fdplock(fdp);
  189 
  190         error = falloc(p, &rf, &fds[0]);
  191         if (error != 0)
  192                 goto free2;
  193         rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
  194         rf->f_type = DTYPE_PIPE;
  195         rf->f_data = rpipe;
  196         rf->f_ops = &pipeops;
  197 
  198         error = falloc(p, &wf, &fds[1]);
  199         if (error != 0)
  200                 goto free3;
  201         wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
  202         wf->f_type = DTYPE_PIPE;
  203         wf->f_data = wpipe;
  204         wf->f_ops = &pipeops;
  205 
  206         fdinsert(fdp, fds[0], cloexec, rf);
  207         fdinsert(fdp, fds[1], cloexec, wf);
  208 
  209         error = copyout(fds, ufds, sizeof(fds));
  210         if (error == 0) {
  211                 fdpunlock(fdp);
  212 #ifdef KTRACE
  213                 if (KTRPOINT(p, KTR_STRUCT))
  214                         ktrfds(p, fds, 2);
  215 #endif
  216         } else {
  217                 /* fdrelease() unlocks fdp. */
  218                 fdrelease(p, fds[0]);
  219                 fdplock(fdp);
  220                 fdrelease(p, fds[1]);
  221         }
  222 
  223         FRELE(rf, p);
  224         FRELE(wf, p);
  225         return (error);
  226 
  227 free3:
  228         fdremove(fdp, fds[0]);
  229         closef(rf, p);
  230         rpipe = NULL;
  231 free2:
  232         fdpunlock(fdp);
  233         pipe_destroy(wpipe);
  234         pipe_destroy(rpipe);
  235         return (error);
  236 }
  237 
  238 /*
  239  * Allocate kva for pipe circular buffer, the space is pageable.
  240  * This routine will 'realloc' the size of a pipe safely, if it fails
  241  * it will retain the old buffer.
  242  * If it fails it will return ENOMEM.
  243  */
  244 int
  245 pipe_buffer_realloc(struct pipe *cpipe, u_int size)
  246 {
  247         caddr_t buffer;
  248 
  249         /* buffer uninitialized or pipe locked */
  250         KASSERT((cpipe->pipe_buffer.buffer == NULL) ||
  251             (cpipe->pipe_state & PIPE_LOCK));
  252 
  253         /* buffer should be empty */
  254         KASSERT(cpipe->pipe_buffer.cnt == 0);
  255 
  256         KERNEL_LOCK();
  257         buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
  258         KERNEL_UNLOCK();
  259         if (buffer == NULL)
  260                 return (ENOMEM);
  261 
  262         /* free old resources if we are resizing */
  263         pipe_buffer_free(cpipe);
  264 
  265         cpipe->pipe_buffer.buffer = buffer;
  266         cpipe->pipe_buffer.size = size;
  267         cpipe->pipe_buffer.in = 0;
  268         cpipe->pipe_buffer.out = 0;
  269 
  270         atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
  271 
  272         return (0);
  273 }
  274 
  275 /*
  276  * initialize and allocate VM and memory for pipe
  277  */
  278 int
  279 pipe_create(struct pipe *cpipe)
  280 {
  281         int error;
  282 
  283         error = pipe_buffer_realloc(cpipe, PIPE_SIZE);
  284         if (error != 0)
  285                 return (error);
  286 
  287         sigio_init(&cpipe->pipe_sigio);
  288 
  289         getnanotime(&cpipe->pipe_ctime);
  290         cpipe->pipe_atime = cpipe->pipe_ctime;
  291         cpipe->pipe_mtime = cpipe->pipe_ctime;
  292 
  293         return (0);
  294 }
  295 
  296 struct pipe *
  297 pipe_peer(struct pipe *cpipe)
  298 {
  299         struct pipe *peer;
  300 
  301         rw_assert_anylock(cpipe->pipe_lock);
  302 
  303         peer = cpipe->pipe_peer;
  304         if (peer == NULL || (peer->pipe_state & PIPE_EOF))
  305                 return (NULL);
  306         return (peer);
  307 }
  308 
  309 /*
  310  * Lock a pipe for exclusive I/O access.
  311  */
  312 int
  313 pipe_iolock(struct pipe *cpipe)
  314 {
  315         int error;
  316 
  317         rw_assert_wrlock(cpipe->pipe_lock);
  318 
  319         while (cpipe->pipe_state & PIPE_LOCK) {
  320                 cpipe->pipe_state |= PIPE_LWANT;
  321                 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH,
  322                     "pipeiolk", INFSLP);
  323                 if (error)
  324                         return (error);
  325         }
  326         cpipe->pipe_state |= PIPE_LOCK;
  327         return (0);
  328 }
  329 
  330 /*
  331  * Unlock a pipe I/O lock.
  332  */
  333 void
  334 pipe_iounlock(struct pipe *cpipe)
  335 {
  336         rw_assert_wrlock(cpipe->pipe_lock);
  337         KASSERT(cpipe->pipe_state & PIPE_LOCK);
  338 
  339         cpipe->pipe_state &= ~PIPE_LOCK;
  340         if (cpipe->pipe_state & PIPE_LWANT) {
  341                 cpipe->pipe_state &= ~PIPE_LWANT;
  342                 wakeup(cpipe);
  343         }
  344 }
  345 
  346 /*
  347  * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O
  348  * lock is relocked. Otherwise if a signal was caught, non-zero is returned and
  349  * the I/O lock is not locked.
  350  *
  351  * Any caller must obtain a reference to the pipe by incrementing `pipe_busy'
  352  * before calling this function in order ensure that the same pipe is not
  353  * destroyed while sleeping.
  354  */
  355 int
  356 pipe_iosleep(struct pipe *cpipe, const char *wmesg)
  357 {
  358         int error;
  359 
  360         pipe_iounlock(cpipe);
  361         error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg,
  362             INFSLP);
  363         if (error)
  364                 return (error);
  365         return (pipe_iolock(cpipe));
  366 }
  367 
  368 void
  369 pipeselwakeup(struct pipe *cpipe)
  370 {
  371         rw_assert_wrlock(cpipe->pipe_lock);
  372 
  373         KNOTE(&cpipe->pipe_klist, 0);
  374 
  375         if (cpipe->pipe_state & PIPE_ASYNC)
  376                 pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
  377 }
  378 
  379 int
  380 pipe_read(struct file *fp, struct uio *uio, int fflags)
  381 {
  382         struct pipe *rpipe = fp->f_data;
  383         size_t nread = 0, size;
  384         int error;
  385 
  386         rw_enter_write(rpipe->pipe_lock);
  387         ++rpipe->pipe_busy;
  388         error = pipe_iolock(rpipe);
  389         if (error) {
  390                 --rpipe->pipe_busy;
  391                 pipe_rundown(rpipe);
  392                 rw_exit_write(rpipe->pipe_lock);
  393                 return (error);
  394         }
  395 
  396         while (uio->uio_resid) {
  397                 /* Normal pipe buffer receive. */
  398                 if (rpipe->pipe_buffer.cnt > 0) {
  399                         size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
  400                         if (size > rpipe->pipe_buffer.cnt)
  401                                 size = rpipe->pipe_buffer.cnt;
  402                         if (size > uio->uio_resid)
  403                                 size = uio->uio_resid;
  404                         rw_exit_write(rpipe->pipe_lock);
  405                         error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
  406                                         size, uio);
  407                         rw_enter_write(rpipe->pipe_lock);
  408                         if (error) {
  409                                 break;
  410                         }
  411                         rpipe->pipe_buffer.out += size;
  412                         if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
  413                                 rpipe->pipe_buffer.out = 0;
  414 
  415                         rpipe->pipe_buffer.cnt -= size;
  416                         /*
  417                          * If there is no more to read in the pipe, reset
  418                          * its pointers to the beginning.  This improves
  419                          * cache hit stats.
  420                          */
  421                         if (rpipe->pipe_buffer.cnt == 0) {
  422                                 rpipe->pipe_buffer.in = 0;
  423                                 rpipe->pipe_buffer.out = 0;
  424                         }
  425                         nread += size;
  426                 } else {
  427                         /*
  428                          * detect EOF condition
  429                          * read returns 0 on EOF, no need to set error
  430                          */
  431                         if (rpipe->pipe_state & PIPE_EOF)
  432                                 break;
  433 
  434                         /* If the "write-side" has been blocked, wake it up. */
  435                         if (rpipe->pipe_state & PIPE_WANTW) {
  436                                 rpipe->pipe_state &= ~PIPE_WANTW;
  437                                 wakeup(rpipe);
  438                         }
  439 
  440                         /* Break if some data was read. */
  441                         if (nread > 0)
  442                                 break;
  443 
  444                         /* Handle non-blocking mode operation. */
  445                         if (fp->f_flag & FNONBLOCK) {
  446                                 error = EAGAIN;
  447                                 break;
  448                         }
  449 
  450                         /* Wait for more data. */
  451                         rpipe->pipe_state |= PIPE_WANTR;
  452                         error = pipe_iosleep(rpipe, "piperd");
  453                         if (error)
  454                                 goto unlocked_error;
  455                 }
  456         }
  457         pipe_iounlock(rpipe);
  458 
  459         if (error == 0)
  460                 getnanotime(&rpipe->pipe_atime);
  461 unlocked_error:
  462         --rpipe->pipe_busy;
  463 
  464         if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) {
  465                 /* Handle write blocking hysteresis. */
  466                 if (rpipe->pipe_state & PIPE_WANTW) {
  467                         rpipe->pipe_state &= ~PIPE_WANTW;
  468                         wakeup(rpipe);
  469                 }
  470         }
  471 
  472         if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF)
  473                 pipeselwakeup(rpipe);
  474 
  475         rw_exit_write(rpipe->pipe_lock);
  476         return (error);
  477 }
  478 
  479 int
  480 pipe_write(struct file *fp, struct uio *uio, int fflags)
  481 {
  482         struct pipe *rpipe = fp->f_data, *wpipe;
  483         struct rwlock *lock = rpipe->pipe_lock;
  484         size_t orig_resid;
  485         int error;
  486 
  487         rw_enter_write(lock);
  488         wpipe = pipe_peer(rpipe);
  489 
  490         /* Detect loss of pipe read side, issue SIGPIPE if lost. */
  491         if (wpipe == NULL) {
  492                 rw_exit_write(lock);
  493                 return (EPIPE);
  494         }
  495 
  496         ++wpipe->pipe_busy;
  497         error = pipe_iolock(wpipe);
  498         if (error) {
  499                 --wpipe->pipe_busy;
  500                 pipe_rundown(wpipe);
  501                 rw_exit_write(lock);
  502                 return (error);
  503         }
  504 
  505 
  506         /* If it is advantageous to resize the pipe buffer, do so. */
  507         if (uio->uio_resid > PIPE_SIZE &&
  508             wpipe->pipe_buffer.size <= PIPE_SIZE &&
  509             wpipe->pipe_buffer.cnt == 0) {
  510                 unsigned int npipe;
  511 
  512                 npipe = atomic_inc_int_nv(&nbigpipe);
  513                 if (npipe > LIMITBIGPIPES ||
  514                     pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0)
  515                         atomic_dec_int(&nbigpipe);
  516         }
  517 
  518         orig_resid = uio->uio_resid;
  519 
  520         while (uio->uio_resid) {
  521                 size_t space;
  522 
  523                 if (wpipe->pipe_state & PIPE_EOF) {
  524                         error = EPIPE;
  525                         break;
  526                 }
  527 
  528                 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
  529 
  530                 /* Writes of size <= PIPE_BUF must be atomic. */
  531                 if (space < uio->uio_resid && orig_resid <= PIPE_BUF)
  532                         space = 0;
  533 
  534                 if (space > 0) {
  535                         size_t size;    /* Transfer size */
  536                         size_t segsize; /* first segment to transfer */
  537 
  538                         /*
  539                          * Transfer size is minimum of uio transfer
  540                          * and free space in pipe buffer.
  541                          */
  542                         if (space > uio->uio_resid)
  543                                 size = uio->uio_resid;
  544                         else
  545                                 size = space;
  546                         /*
  547                          * First segment to transfer is minimum of
  548                          * transfer size and contiguous space in
  549                          * pipe buffer.  If first segment to transfer
  550                          * is less than the transfer size, we've got
  551                          * a wraparound in the buffer.
  552                          */
  553                         segsize = wpipe->pipe_buffer.size -
  554                                 wpipe->pipe_buffer.in;
  555                         if (segsize > size)
  556                                 segsize = size;
  557 
  558                         /* Transfer first segment */
  559 
  560                         rw_exit_write(lock);
  561                         error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
  562                                         segsize, uio);
  563                         rw_enter_write(lock);
  564 
  565                         if (error == 0 && segsize < size) {
  566                                 /*
  567                                  * Transfer remaining part now, to
  568                                  * support atomic writes.  Wraparound
  569                                  * happened.
  570                                  */
  571 #ifdef DIAGNOSTIC
  572                                 if (wpipe->pipe_buffer.in + segsize !=
  573                                     wpipe->pipe_buffer.size)
  574                                         panic("Expected pipe buffer wraparound disappeared");
  575 #endif
  576 
  577                                 rw_exit_write(lock);
  578                                 error = uiomove(&wpipe->pipe_buffer.buffer[0],
  579                                                 size - segsize, uio);
  580                                 rw_enter_write(lock);
  581                         }
  582                         if (error == 0) {
  583                                 wpipe->pipe_buffer.in += size;
  584                                 if (wpipe->pipe_buffer.in >=
  585                                     wpipe->pipe_buffer.size) {
  586 #ifdef DIAGNOSTIC
  587                                         if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
  588                                                 panic("Expected wraparound bad");
  589 #endif
  590                                         wpipe->pipe_buffer.in = size - segsize;
  591                                 }
  592 
  593                                 wpipe->pipe_buffer.cnt += size;
  594 #ifdef DIAGNOSTIC
  595                                 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
  596                                         panic("Pipe buffer overflow");
  597 #endif
  598                         }
  599                         if (error)
  600                                 break;
  601                 } else {
  602                         /* If the "read-side" has been blocked, wake it up. */
  603                         if (wpipe->pipe_state & PIPE_WANTR) {
  604                                 wpipe->pipe_state &= ~PIPE_WANTR;
  605                                 wakeup(wpipe);
  606                         }
  607 
  608                         /* Don't block on non-blocking I/O. */
  609                         if (fp->f_flag & FNONBLOCK) {
  610                                 error = EAGAIN;
  611                                 break;
  612                         }
  613 
  614                         /*
  615                          * We have no more space and have something to offer,
  616                          * wake up select/poll.
  617                          */
  618                         pipeselwakeup(wpipe);
  619 
  620                         wpipe->pipe_state |= PIPE_WANTW;
  621                         error = pipe_iosleep(wpipe, "pipewr");
  622                         if (error)
  623                                 goto unlocked_error;
  624 
  625                         /*
  626                          * If read side wants to go away, we just issue a
  627                          * signal to ourselves.
  628                          */
  629                         if (wpipe->pipe_state & PIPE_EOF) {
  630                                 error = EPIPE;
  631                                 break;
  632                         }
  633                 }
  634         }
  635         pipe_iounlock(wpipe);
  636 
  637 unlocked_error:
  638         --wpipe->pipe_busy;
  639 
  640         if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) {
  641                 /*
  642                  * If we have put any characters in the buffer, we wake up
  643                  * the reader.
  644                  */
  645                 if (wpipe->pipe_state & PIPE_WANTR) {
  646                         wpipe->pipe_state &= ~PIPE_WANTR;
  647                         wakeup(wpipe);
  648                 }
  649         }
  650 
  651         /* Don't return EPIPE if I/O was successful. */
  652         if (wpipe->pipe_buffer.cnt == 0 &&
  653             uio->uio_resid == 0 &&
  654             error == EPIPE) {
  655                 error = 0;
  656         }
  657 
  658         if (error == 0)
  659                 getnanotime(&wpipe->pipe_mtime);
  660         /* We have something to offer, wake up select/poll. */
  661         if (wpipe->pipe_buffer.cnt)
  662                 pipeselwakeup(wpipe);
  663 
  664         rw_exit_write(lock);
  665         return (error);
  666 }
  667 
  668 /*
  669  * we implement a very minimal set of ioctls for compatibility with sockets.
  670  */
  671 int
  672 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
  673 {
  674         struct pipe *mpipe = fp->f_data;
  675         int error = 0;
  676 
  677         switch (cmd) {
  678 
  679         case FIONBIO:
  680                 break;
  681 
  682         case FIOASYNC:
  683                 rw_enter_write(mpipe->pipe_lock);
  684                 if (*(int *)data) {
  685                         mpipe->pipe_state |= PIPE_ASYNC;
  686                 } else {
  687                         mpipe->pipe_state &= ~PIPE_ASYNC;
  688                 }
  689                 rw_exit_write(mpipe->pipe_lock);
  690                 break;
  691 
  692         case FIONREAD:
  693                 rw_enter_read(mpipe->pipe_lock);
  694                 *(int *)data = mpipe->pipe_buffer.cnt;
  695                 rw_exit_read(mpipe->pipe_lock);
  696                 break;
  697 
  698         case FIOSETOWN:
  699         case SIOCSPGRP:
  700         case TIOCSPGRP:
  701                 error = sigio_setown(&mpipe->pipe_sigio, cmd, data);
  702                 break;
  703 
  704         case FIOGETOWN:
  705         case SIOCGPGRP:
  706         case TIOCGPGRP:
  707                 sigio_getown(&mpipe->pipe_sigio, cmd, data);
  708                 break;
  709 
  710         default:
  711                 error = ENOTTY;
  712         }
  713 
  714         return (error);
  715 }
  716 
  717 int
  718 pipe_stat(struct file *fp, struct stat *ub, struct proc *p)
  719 {
  720         struct pipe *pipe = fp->f_data;
  721 
  722         memset(ub, 0, sizeof(*ub));
  723 
  724         rw_enter_read(pipe->pipe_lock);
  725         ub->st_mode = S_IFIFO;
  726         ub->st_blksize = pipe->pipe_buffer.size;
  727         ub->st_size = pipe->pipe_buffer.cnt;
  728         ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
  729         ub->st_atim.tv_sec  = pipe->pipe_atime.tv_sec;
  730         ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec;
  731         ub->st_mtim.tv_sec  = pipe->pipe_mtime.tv_sec;
  732         ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec;
  733         ub->st_ctim.tv_sec  = pipe->pipe_ctime.tv_sec;
  734         ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec;
  735         ub->st_uid = fp->f_cred->cr_uid;
  736         ub->st_gid = fp->f_cred->cr_gid;
  737         rw_exit_read(pipe->pipe_lock);
  738         /*
  739          * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
  740          * XXX (st_dev, st_ino) should be unique.
  741          */
  742         return (0);
  743 }
  744 
  745 int
  746 pipe_close(struct file *fp, struct proc *p)
  747 {
  748         struct pipe *cpipe = fp->f_data;
  749 
  750         fp->f_ops = NULL;
  751         fp->f_data = NULL;
  752         pipe_destroy(cpipe);
  753         return (0);
  754 }
  755 
  756 /*
  757  * Free kva for pipe circular buffer.
  758  * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose()
  759  */
  760 void
  761 pipe_buffer_free(struct pipe *cpipe)
  762 {
  763         u_int size;
  764 
  765         if (cpipe->pipe_buffer.buffer == NULL)
  766                 return;
  767 
  768         size = cpipe->pipe_buffer.size;
  769 
  770         KERNEL_LOCK();
  771         km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable);
  772         KERNEL_UNLOCK();
  773 
  774         cpipe->pipe_buffer.buffer = NULL;
  775 
  776         atomic_sub_int(&amountpipekva, size);
  777         if (size > PIPE_SIZE)
  778                 atomic_dec_int(&nbigpipe);
  779 }
  780 
  781 /*
  782  * shutdown the pipe, and free resources.
  783  */
  784 void
  785 pipe_destroy(struct pipe *cpipe)
  786 {
  787         struct pipe *ppipe;
  788 
  789         if (cpipe == NULL)
  790                 return;
  791 
  792         rw_enter_write(cpipe->pipe_lock);
  793 
  794         pipeselwakeup(cpipe);
  795         sigio_free(&cpipe->pipe_sigio);
  796 
  797         /*
  798          * If the other side is blocked, wake it up saying that
  799          * we want to close it down.
  800          */
  801         cpipe->pipe_state |= PIPE_EOF;
  802         while (cpipe->pipe_busy) {
  803                 wakeup(cpipe);
  804                 cpipe->pipe_state |= PIPE_WANTD;
  805                 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP);
  806         }
  807 
  808         /* Disconnect from peer. */
  809         if ((ppipe = cpipe->pipe_peer) != NULL) {
  810                 pipeselwakeup(ppipe);
  811 
  812                 ppipe->pipe_state |= PIPE_EOF;
  813                 wakeup(ppipe);
  814                 ppipe->pipe_peer = NULL;
  815         }
  816 
  817         pipe_buffer_free(cpipe);
  818 
  819         rw_exit_write(cpipe->pipe_lock);
  820 
  821         if (ppipe == NULL)
  822                 pipe_pair_destroy(cpipe->pipe_pair);
  823 }
  824 
  825 /*
  826  * Returns non-zero if a rundown is currently ongoing.
  827  */
  828 int
  829 pipe_rundown(struct pipe *cpipe)
  830 {
  831         rw_assert_wrlock(cpipe->pipe_lock);
  832 
  833         if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0)
  834                 return (0);
  835 
  836         /* Only wakeup pipe_destroy() once the pipe is no longer busy. */
  837         cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW);
  838         wakeup(cpipe);
  839         return (1);
  840 }
  841 
  842 int
  843 pipe_kqfilter(struct file *fp, struct knote *kn)
  844 {
  845         struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
  846         struct rwlock *lock = rpipe->pipe_lock;
  847         int error = 0;
  848 
  849         rw_enter_write(lock);
  850         wpipe = pipe_peer(rpipe);
  851 
  852         switch (kn->kn_filter) {
  853         case EVFILT_READ:
  854                 kn->kn_fop = &pipe_rfiltops;
  855                 kn->kn_hook = rpipe;
  856                 klist_insert_locked(&rpipe->pipe_klist, kn);
  857                 break;
  858         case EVFILT_WRITE:
  859                 if (wpipe == NULL) {
  860                         /* other end of pipe has been closed */
  861                         error = EPIPE;
  862                         break;
  863                 }
  864                 kn->kn_fop = &pipe_wfiltops;
  865                 kn->kn_hook = wpipe;
  866                 klist_insert_locked(&wpipe->pipe_klist, kn);
  867                 break;
  868         case EVFILT_EXCEPT:
  869                 if (kn->kn_flags & __EV_SELECT) {
  870                         /* Prevent triggering exceptfds. */
  871                         error = EPERM;
  872                         break;
  873                 }
  874                 if ((kn->kn_flags & __EV_POLL) == 0) {
  875                         /* Disallow usage through kevent(2). */
  876                         error = EINVAL;
  877                         break;
  878                 }
  879                 kn->kn_fop = &pipe_efiltops;
  880                 kn->kn_hook = rpipe;
  881                 klist_insert_locked(&rpipe->pipe_klist, kn);
  882                 break;
  883         default:
  884                 error = EINVAL;
  885         }
  886 
  887         rw_exit_write(lock);
  888 
  889         return (error);
  890 }
  891 
  892 void
  893 filt_pipedetach(struct knote *kn)
  894 {
  895         struct pipe *cpipe = kn->kn_hook;
  896 
  897         klist_remove(&cpipe->pipe_klist, kn);
  898 }
  899 
  900 int
  901 filt_piperead(struct knote *kn, long hint)
  902 {
  903         struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
  904 
  905         rw_assert_wrlock(rpipe->pipe_lock);
  906 
  907         wpipe = pipe_peer(rpipe);
  908 
  909         kn->kn_data = rpipe->pipe_buffer.cnt;
  910 
  911         if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
  912                 kn->kn_flags |= EV_EOF; 
  913                 if (kn->kn_flags & __EV_POLL)
  914                         kn->kn_flags |= __EV_HUP;
  915                 return (1);
  916         }
  917 
  918         return (kn->kn_data > 0);
  919 }
  920 
  921 int
  922 filt_pipewrite(struct knote *kn, long hint)
  923 {
  924         struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
  925 
  926         rw_assert_wrlock(rpipe->pipe_lock);
  927 
  928         wpipe = pipe_peer(rpipe);
  929 
  930         if (wpipe == NULL) {
  931                 kn->kn_data = 0;
  932                 kn->kn_flags |= EV_EOF; 
  933                 if (kn->kn_flags & __EV_POLL)
  934                         kn->kn_flags |= __EV_HUP;
  935                 return (1);
  936         }
  937         kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
  938 
  939         return (kn->kn_data >= PIPE_BUF);
  940 }
  941 
  942 int
  943 filt_pipeexcept(struct knote *kn, long hint)
  944 {
  945         struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
  946         int active = 0;
  947 
  948         rw_assert_wrlock(rpipe->pipe_lock);
  949 
  950         wpipe = pipe_peer(rpipe);
  951 
  952         if (kn->kn_flags & __EV_POLL) {
  953                 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
  954                         kn->kn_flags |= __EV_HUP;
  955                         active = 1;
  956                 }
  957         }
  958 
  959         return (active);
  960 }
  961 
  962 int
  963 filt_pipemodify(struct kevent *kev, struct knote *kn)
  964 {
  965         struct pipe *rpipe = kn->kn_fp->f_data;
  966         int active;
  967 
  968         rw_enter_write(rpipe->pipe_lock);
  969         active = knote_modify(kev, kn);
  970         rw_exit_write(rpipe->pipe_lock);
  971 
  972         return (active);
  973 }
  974 
  975 int
  976 filt_pipeprocess(struct knote *kn, struct kevent *kev)
  977 {
  978         struct pipe *rpipe = kn->kn_fp->f_data;
  979         int active;
  980 
  981         rw_enter_write(rpipe->pipe_lock);
  982         active = knote_process(kn, kev);
  983         rw_exit_write(rpipe->pipe_lock);
  984 
  985         return (active);
  986 }
  987 
  988 void
  989 pipe_init(void)
  990 {
  991         pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR,
  992             PR_WAITOK, "pipepl", NULL);
  993 }
  994 
  995 struct pipe_pair *
  996 pipe_pair_create(void)
  997 {
  998         struct pipe_pair *pp;
  999 
 1000         pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO);
 1001         pp->pp_wpipe.pipe_pair = pp;
 1002         pp->pp_rpipe.pipe_pair = pp;
 1003         pp->pp_wpipe.pipe_peer = &pp->pp_rpipe;
 1004         pp->pp_rpipe.pipe_peer = &pp->pp_wpipe;
 1005         /*
 1006          * One lock is used per pipe pair in order to obtain exclusive access to
 1007          * the pipe pair.
 1008          */
 1009         rw_init(&pp->pp_lock, "pipelk");
 1010         pp->pp_wpipe.pipe_lock = &pp->pp_lock;
 1011         pp->pp_rpipe.pipe_lock = &pp->pp_lock;
 1012 
 1013         klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock);
 1014         klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock);
 1015 
 1016         if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe))
 1017                 goto err;
 1018         return (pp);
 1019 err:
 1020         pipe_destroy(&pp->pp_wpipe);
 1021         pipe_destroy(&pp->pp_rpipe);
 1022         return (NULL);
 1023 }
 1024 
 1025 void
 1026 pipe_pair_destroy(struct pipe_pair *pp)
 1027 {
 1028         klist_free(&pp->pp_wpipe.pipe_klist);
 1029         klist_free(&pp->pp_rpipe.pipe_klist);
 1030         pool_put(&pipe_pair_pool, pp);
 1031 }

Cache object: f148c71d6d4b367822f7b30f1bde7fd2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.