FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_pipe.c
1 /* $OpenBSD: sys_pipe.c,v 1.143 2022/12/05 23:18:37 deraadt Exp $ */
2
3 /*
4 * Copyright (c) 1996 John S. Dyson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice immediately at the beginning of the file, without modification,
12 * this list of conditions, and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Absolutely no warranty of function or purpose is made by the author
17 * John S. Dyson.
18 * 4. Modifications may be freely made to this file if the above conditions
19 * are met.
20 */
21
22 /*
23 * This file contains a high-performance replacement for the socket-based
24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
25 * all features of sockets, but does do everything that pipes normally
26 * do.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/proc.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/pool.h>
36 #include <sys/ioctl.h>
37 #include <sys/stat.h>
38 #include <sys/signalvar.h>
39 #include <sys/mount.h>
40 #include <sys/syscallargs.h>
41 #include <sys/event.h>
42 #ifdef KTRACE
43 #include <sys/ktrace.h>
44 #endif
45
46 #include <uvm/uvm_extern.h>
47
48 #include <sys/pipe.h>
49
50 struct pipe_pair {
51 struct pipe pp_wpipe;
52 struct pipe pp_rpipe;
53 struct rwlock pp_lock;
54 };
55
56 /*
57 * interfaces to the outside world
58 */
59 int pipe_read(struct file *, struct uio *, int);
60 int pipe_write(struct file *, struct uio *, int);
61 int pipe_close(struct file *, struct proc *);
62 int pipe_kqfilter(struct file *fp, struct knote *kn);
63 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
64 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p);
65
66 static const struct fileops pipeops = {
67 .fo_read = pipe_read,
68 .fo_write = pipe_write,
69 .fo_ioctl = pipe_ioctl,
70 .fo_kqfilter = pipe_kqfilter,
71 .fo_stat = pipe_stat,
72 .fo_close = pipe_close
73 };
74
75 void filt_pipedetach(struct knote *kn);
76 int filt_piperead(struct knote *kn, long hint);
77 int filt_pipewrite(struct knote *kn, long hint);
78 int filt_pipeexcept(struct knote *kn, long hint);
79 int filt_pipemodify(struct kevent *kev, struct knote *kn);
80 int filt_pipeprocess(struct knote *kn, struct kevent *kev);
81
82 const struct filterops pipe_rfiltops = {
83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
84 .f_attach = NULL,
85 .f_detach = filt_pipedetach,
86 .f_event = filt_piperead,
87 .f_modify = filt_pipemodify,
88 .f_process = filt_pipeprocess,
89 };
90
91 const struct filterops pipe_wfiltops = {
92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
93 .f_attach = NULL,
94 .f_detach = filt_pipedetach,
95 .f_event = filt_pipewrite,
96 .f_modify = filt_pipemodify,
97 .f_process = filt_pipeprocess,
98 };
99
100 const struct filterops pipe_efiltops = {
101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
102 .f_attach = NULL,
103 .f_detach = filt_pipedetach,
104 .f_event = filt_pipeexcept,
105 .f_modify = filt_pipemodify,
106 .f_process = filt_pipeprocess,
107 };
108
109 /*
110 * Default pipe buffer size(s), this can be kind-of large now because pipe
111 * space is pageable. The pipe code will try to maintain locality of
112 * reference for performance reasons, so small amounts of outstanding I/O
113 * will not wipe the cache.
114 */
115 #define MINPIPESIZE (PIPE_SIZE/3)
116
117 /*
118 * Limit the number of "big" pipes
119 */
120 #define LIMITBIGPIPES 32
121 unsigned int nbigpipe;
122 static unsigned int amountpipekva;
123
124 struct pool pipe_pair_pool;
125
126 int dopipe(struct proc *, int *, int);
127 void pipeselwakeup(struct pipe *);
128
129 int pipe_create(struct pipe *);
130 void pipe_destroy(struct pipe *);
131 int pipe_rundown(struct pipe *);
132 struct pipe *pipe_peer(struct pipe *);
133 int pipe_buffer_realloc(struct pipe *, u_int);
134 void pipe_buffer_free(struct pipe *);
135
136 int pipe_iolock(struct pipe *);
137 void pipe_iounlock(struct pipe *);
138 int pipe_iosleep(struct pipe *, const char *);
139
140 struct pipe_pair *pipe_pair_create(void);
141 void pipe_pair_destroy(struct pipe_pair *);
142
143 /*
144 * The pipe system call for the DTYPE_PIPE type of pipes
145 */
146
147 int
148 sys_pipe(struct proc *p, void *v, register_t *retval)
149 {
150 struct sys_pipe_args /* {
151 syscallarg(int *) fdp;
152 } */ *uap = v;
153
154 return (dopipe(p, SCARG(uap, fdp), 0));
155 }
156
157 int
158 sys_pipe2(struct proc *p, void *v, register_t *retval)
159 {
160 struct sys_pipe2_args /* {
161 syscallarg(int *) fdp;
162 syscallarg(int) flags;
163 } */ *uap = v;
164
165 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
166 return (EINVAL);
167
168 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
169 }
170
171 int
172 dopipe(struct proc *p, int *ufds, int flags)
173 {
174 struct filedesc *fdp = p->p_fd;
175 struct file *rf, *wf;
176 struct pipe_pair *pp;
177 struct pipe *rpipe, *wpipe = NULL;
178 int fds[2], cloexec, error;
179
180 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
181
182 pp = pipe_pair_create();
183 if (pp == NULL)
184 return (ENOMEM);
185 wpipe = &pp->pp_wpipe;
186 rpipe = &pp->pp_rpipe;
187
188 fdplock(fdp);
189
190 error = falloc(p, &rf, &fds[0]);
191 if (error != 0)
192 goto free2;
193 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
194 rf->f_type = DTYPE_PIPE;
195 rf->f_data = rpipe;
196 rf->f_ops = &pipeops;
197
198 error = falloc(p, &wf, &fds[1]);
199 if (error != 0)
200 goto free3;
201 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
202 wf->f_type = DTYPE_PIPE;
203 wf->f_data = wpipe;
204 wf->f_ops = &pipeops;
205
206 fdinsert(fdp, fds[0], cloexec, rf);
207 fdinsert(fdp, fds[1], cloexec, wf);
208
209 error = copyout(fds, ufds, sizeof(fds));
210 if (error == 0) {
211 fdpunlock(fdp);
212 #ifdef KTRACE
213 if (KTRPOINT(p, KTR_STRUCT))
214 ktrfds(p, fds, 2);
215 #endif
216 } else {
217 /* fdrelease() unlocks fdp. */
218 fdrelease(p, fds[0]);
219 fdplock(fdp);
220 fdrelease(p, fds[1]);
221 }
222
223 FRELE(rf, p);
224 FRELE(wf, p);
225 return (error);
226
227 free3:
228 fdremove(fdp, fds[0]);
229 closef(rf, p);
230 rpipe = NULL;
231 free2:
232 fdpunlock(fdp);
233 pipe_destroy(wpipe);
234 pipe_destroy(rpipe);
235 return (error);
236 }
237
238 /*
239 * Allocate kva for pipe circular buffer, the space is pageable.
240 * This routine will 'realloc' the size of a pipe safely, if it fails
241 * it will retain the old buffer.
242 * If it fails it will return ENOMEM.
243 */
244 int
245 pipe_buffer_realloc(struct pipe *cpipe, u_int size)
246 {
247 caddr_t buffer;
248
249 /* buffer uninitialized or pipe locked */
250 KASSERT((cpipe->pipe_buffer.buffer == NULL) ||
251 (cpipe->pipe_state & PIPE_LOCK));
252
253 /* buffer should be empty */
254 KASSERT(cpipe->pipe_buffer.cnt == 0);
255
256 KERNEL_LOCK();
257 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
258 KERNEL_UNLOCK();
259 if (buffer == NULL)
260 return (ENOMEM);
261
262 /* free old resources if we are resizing */
263 pipe_buffer_free(cpipe);
264
265 cpipe->pipe_buffer.buffer = buffer;
266 cpipe->pipe_buffer.size = size;
267 cpipe->pipe_buffer.in = 0;
268 cpipe->pipe_buffer.out = 0;
269
270 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
271
272 return (0);
273 }
274
275 /*
276 * initialize and allocate VM and memory for pipe
277 */
278 int
279 pipe_create(struct pipe *cpipe)
280 {
281 int error;
282
283 error = pipe_buffer_realloc(cpipe, PIPE_SIZE);
284 if (error != 0)
285 return (error);
286
287 sigio_init(&cpipe->pipe_sigio);
288
289 getnanotime(&cpipe->pipe_ctime);
290 cpipe->pipe_atime = cpipe->pipe_ctime;
291 cpipe->pipe_mtime = cpipe->pipe_ctime;
292
293 return (0);
294 }
295
296 struct pipe *
297 pipe_peer(struct pipe *cpipe)
298 {
299 struct pipe *peer;
300
301 rw_assert_anylock(cpipe->pipe_lock);
302
303 peer = cpipe->pipe_peer;
304 if (peer == NULL || (peer->pipe_state & PIPE_EOF))
305 return (NULL);
306 return (peer);
307 }
308
309 /*
310 * Lock a pipe for exclusive I/O access.
311 */
312 int
313 pipe_iolock(struct pipe *cpipe)
314 {
315 int error;
316
317 rw_assert_wrlock(cpipe->pipe_lock);
318
319 while (cpipe->pipe_state & PIPE_LOCK) {
320 cpipe->pipe_state |= PIPE_LWANT;
321 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH,
322 "pipeiolk", INFSLP);
323 if (error)
324 return (error);
325 }
326 cpipe->pipe_state |= PIPE_LOCK;
327 return (0);
328 }
329
330 /*
331 * Unlock a pipe I/O lock.
332 */
333 void
334 pipe_iounlock(struct pipe *cpipe)
335 {
336 rw_assert_wrlock(cpipe->pipe_lock);
337 KASSERT(cpipe->pipe_state & PIPE_LOCK);
338
339 cpipe->pipe_state &= ~PIPE_LOCK;
340 if (cpipe->pipe_state & PIPE_LWANT) {
341 cpipe->pipe_state &= ~PIPE_LWANT;
342 wakeup(cpipe);
343 }
344 }
345
346 /*
347 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O
348 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and
349 * the I/O lock is not locked.
350 *
351 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy'
352 * before calling this function in order ensure that the same pipe is not
353 * destroyed while sleeping.
354 */
355 int
356 pipe_iosleep(struct pipe *cpipe, const char *wmesg)
357 {
358 int error;
359
360 pipe_iounlock(cpipe);
361 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg,
362 INFSLP);
363 if (error)
364 return (error);
365 return (pipe_iolock(cpipe));
366 }
367
368 void
369 pipeselwakeup(struct pipe *cpipe)
370 {
371 rw_assert_wrlock(cpipe->pipe_lock);
372
373 KNOTE(&cpipe->pipe_klist, 0);
374
375 if (cpipe->pipe_state & PIPE_ASYNC)
376 pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
377 }
378
379 int
380 pipe_read(struct file *fp, struct uio *uio, int fflags)
381 {
382 struct pipe *rpipe = fp->f_data;
383 size_t nread = 0, size;
384 int error;
385
386 rw_enter_write(rpipe->pipe_lock);
387 ++rpipe->pipe_busy;
388 error = pipe_iolock(rpipe);
389 if (error) {
390 --rpipe->pipe_busy;
391 pipe_rundown(rpipe);
392 rw_exit_write(rpipe->pipe_lock);
393 return (error);
394 }
395
396 while (uio->uio_resid) {
397 /* Normal pipe buffer receive. */
398 if (rpipe->pipe_buffer.cnt > 0) {
399 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
400 if (size > rpipe->pipe_buffer.cnt)
401 size = rpipe->pipe_buffer.cnt;
402 if (size > uio->uio_resid)
403 size = uio->uio_resid;
404 rw_exit_write(rpipe->pipe_lock);
405 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
406 size, uio);
407 rw_enter_write(rpipe->pipe_lock);
408 if (error) {
409 break;
410 }
411 rpipe->pipe_buffer.out += size;
412 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
413 rpipe->pipe_buffer.out = 0;
414
415 rpipe->pipe_buffer.cnt -= size;
416 /*
417 * If there is no more to read in the pipe, reset
418 * its pointers to the beginning. This improves
419 * cache hit stats.
420 */
421 if (rpipe->pipe_buffer.cnt == 0) {
422 rpipe->pipe_buffer.in = 0;
423 rpipe->pipe_buffer.out = 0;
424 }
425 nread += size;
426 } else {
427 /*
428 * detect EOF condition
429 * read returns 0 on EOF, no need to set error
430 */
431 if (rpipe->pipe_state & PIPE_EOF)
432 break;
433
434 /* If the "write-side" has been blocked, wake it up. */
435 if (rpipe->pipe_state & PIPE_WANTW) {
436 rpipe->pipe_state &= ~PIPE_WANTW;
437 wakeup(rpipe);
438 }
439
440 /* Break if some data was read. */
441 if (nread > 0)
442 break;
443
444 /* Handle non-blocking mode operation. */
445 if (fp->f_flag & FNONBLOCK) {
446 error = EAGAIN;
447 break;
448 }
449
450 /* Wait for more data. */
451 rpipe->pipe_state |= PIPE_WANTR;
452 error = pipe_iosleep(rpipe, "piperd");
453 if (error)
454 goto unlocked_error;
455 }
456 }
457 pipe_iounlock(rpipe);
458
459 if (error == 0)
460 getnanotime(&rpipe->pipe_atime);
461 unlocked_error:
462 --rpipe->pipe_busy;
463
464 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) {
465 /* Handle write blocking hysteresis. */
466 if (rpipe->pipe_state & PIPE_WANTW) {
467 rpipe->pipe_state &= ~PIPE_WANTW;
468 wakeup(rpipe);
469 }
470 }
471
472 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF)
473 pipeselwakeup(rpipe);
474
475 rw_exit_write(rpipe->pipe_lock);
476 return (error);
477 }
478
479 int
480 pipe_write(struct file *fp, struct uio *uio, int fflags)
481 {
482 struct pipe *rpipe = fp->f_data, *wpipe;
483 struct rwlock *lock = rpipe->pipe_lock;
484 size_t orig_resid;
485 int error;
486
487 rw_enter_write(lock);
488 wpipe = pipe_peer(rpipe);
489
490 /* Detect loss of pipe read side, issue SIGPIPE if lost. */
491 if (wpipe == NULL) {
492 rw_exit_write(lock);
493 return (EPIPE);
494 }
495
496 ++wpipe->pipe_busy;
497 error = pipe_iolock(wpipe);
498 if (error) {
499 --wpipe->pipe_busy;
500 pipe_rundown(wpipe);
501 rw_exit_write(lock);
502 return (error);
503 }
504
505
506 /* If it is advantageous to resize the pipe buffer, do so. */
507 if (uio->uio_resid > PIPE_SIZE &&
508 wpipe->pipe_buffer.size <= PIPE_SIZE &&
509 wpipe->pipe_buffer.cnt == 0) {
510 unsigned int npipe;
511
512 npipe = atomic_inc_int_nv(&nbigpipe);
513 if (npipe > LIMITBIGPIPES ||
514 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0)
515 atomic_dec_int(&nbigpipe);
516 }
517
518 orig_resid = uio->uio_resid;
519
520 while (uio->uio_resid) {
521 size_t space;
522
523 if (wpipe->pipe_state & PIPE_EOF) {
524 error = EPIPE;
525 break;
526 }
527
528 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
529
530 /* Writes of size <= PIPE_BUF must be atomic. */
531 if (space < uio->uio_resid && orig_resid <= PIPE_BUF)
532 space = 0;
533
534 if (space > 0) {
535 size_t size; /* Transfer size */
536 size_t segsize; /* first segment to transfer */
537
538 /*
539 * Transfer size is minimum of uio transfer
540 * and free space in pipe buffer.
541 */
542 if (space > uio->uio_resid)
543 size = uio->uio_resid;
544 else
545 size = space;
546 /*
547 * First segment to transfer is minimum of
548 * transfer size and contiguous space in
549 * pipe buffer. If first segment to transfer
550 * is less than the transfer size, we've got
551 * a wraparound in the buffer.
552 */
553 segsize = wpipe->pipe_buffer.size -
554 wpipe->pipe_buffer.in;
555 if (segsize > size)
556 segsize = size;
557
558 /* Transfer first segment */
559
560 rw_exit_write(lock);
561 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
562 segsize, uio);
563 rw_enter_write(lock);
564
565 if (error == 0 && segsize < size) {
566 /*
567 * Transfer remaining part now, to
568 * support atomic writes. Wraparound
569 * happened.
570 */
571 #ifdef DIAGNOSTIC
572 if (wpipe->pipe_buffer.in + segsize !=
573 wpipe->pipe_buffer.size)
574 panic("Expected pipe buffer wraparound disappeared");
575 #endif
576
577 rw_exit_write(lock);
578 error = uiomove(&wpipe->pipe_buffer.buffer[0],
579 size - segsize, uio);
580 rw_enter_write(lock);
581 }
582 if (error == 0) {
583 wpipe->pipe_buffer.in += size;
584 if (wpipe->pipe_buffer.in >=
585 wpipe->pipe_buffer.size) {
586 #ifdef DIAGNOSTIC
587 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
588 panic("Expected wraparound bad");
589 #endif
590 wpipe->pipe_buffer.in = size - segsize;
591 }
592
593 wpipe->pipe_buffer.cnt += size;
594 #ifdef DIAGNOSTIC
595 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
596 panic("Pipe buffer overflow");
597 #endif
598 }
599 if (error)
600 break;
601 } else {
602 /* If the "read-side" has been blocked, wake it up. */
603 if (wpipe->pipe_state & PIPE_WANTR) {
604 wpipe->pipe_state &= ~PIPE_WANTR;
605 wakeup(wpipe);
606 }
607
608 /* Don't block on non-blocking I/O. */
609 if (fp->f_flag & FNONBLOCK) {
610 error = EAGAIN;
611 break;
612 }
613
614 /*
615 * We have no more space and have something to offer,
616 * wake up select/poll.
617 */
618 pipeselwakeup(wpipe);
619
620 wpipe->pipe_state |= PIPE_WANTW;
621 error = pipe_iosleep(wpipe, "pipewr");
622 if (error)
623 goto unlocked_error;
624
625 /*
626 * If read side wants to go away, we just issue a
627 * signal to ourselves.
628 */
629 if (wpipe->pipe_state & PIPE_EOF) {
630 error = EPIPE;
631 break;
632 }
633 }
634 }
635 pipe_iounlock(wpipe);
636
637 unlocked_error:
638 --wpipe->pipe_busy;
639
640 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) {
641 /*
642 * If we have put any characters in the buffer, we wake up
643 * the reader.
644 */
645 if (wpipe->pipe_state & PIPE_WANTR) {
646 wpipe->pipe_state &= ~PIPE_WANTR;
647 wakeup(wpipe);
648 }
649 }
650
651 /* Don't return EPIPE if I/O was successful. */
652 if (wpipe->pipe_buffer.cnt == 0 &&
653 uio->uio_resid == 0 &&
654 error == EPIPE) {
655 error = 0;
656 }
657
658 if (error == 0)
659 getnanotime(&wpipe->pipe_mtime);
660 /* We have something to offer, wake up select/poll. */
661 if (wpipe->pipe_buffer.cnt)
662 pipeselwakeup(wpipe);
663
664 rw_exit_write(lock);
665 return (error);
666 }
667
668 /*
669 * we implement a very minimal set of ioctls for compatibility with sockets.
670 */
671 int
672 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
673 {
674 struct pipe *mpipe = fp->f_data;
675 int error = 0;
676
677 switch (cmd) {
678
679 case FIONBIO:
680 break;
681
682 case FIOASYNC:
683 rw_enter_write(mpipe->pipe_lock);
684 if (*(int *)data) {
685 mpipe->pipe_state |= PIPE_ASYNC;
686 } else {
687 mpipe->pipe_state &= ~PIPE_ASYNC;
688 }
689 rw_exit_write(mpipe->pipe_lock);
690 break;
691
692 case FIONREAD:
693 rw_enter_read(mpipe->pipe_lock);
694 *(int *)data = mpipe->pipe_buffer.cnt;
695 rw_exit_read(mpipe->pipe_lock);
696 break;
697
698 case FIOSETOWN:
699 case SIOCSPGRP:
700 case TIOCSPGRP:
701 error = sigio_setown(&mpipe->pipe_sigio, cmd, data);
702 break;
703
704 case FIOGETOWN:
705 case SIOCGPGRP:
706 case TIOCGPGRP:
707 sigio_getown(&mpipe->pipe_sigio, cmd, data);
708 break;
709
710 default:
711 error = ENOTTY;
712 }
713
714 return (error);
715 }
716
717 int
718 pipe_stat(struct file *fp, struct stat *ub, struct proc *p)
719 {
720 struct pipe *pipe = fp->f_data;
721
722 memset(ub, 0, sizeof(*ub));
723
724 rw_enter_read(pipe->pipe_lock);
725 ub->st_mode = S_IFIFO;
726 ub->st_blksize = pipe->pipe_buffer.size;
727 ub->st_size = pipe->pipe_buffer.cnt;
728 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
729 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec;
730 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec;
731 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec;
732 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec;
733 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec;
734 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec;
735 ub->st_uid = fp->f_cred->cr_uid;
736 ub->st_gid = fp->f_cred->cr_gid;
737 rw_exit_read(pipe->pipe_lock);
738 /*
739 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
740 * XXX (st_dev, st_ino) should be unique.
741 */
742 return (0);
743 }
744
745 int
746 pipe_close(struct file *fp, struct proc *p)
747 {
748 struct pipe *cpipe = fp->f_data;
749
750 fp->f_ops = NULL;
751 fp->f_data = NULL;
752 pipe_destroy(cpipe);
753 return (0);
754 }
755
756 /*
757 * Free kva for pipe circular buffer.
758 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose()
759 */
760 void
761 pipe_buffer_free(struct pipe *cpipe)
762 {
763 u_int size;
764
765 if (cpipe->pipe_buffer.buffer == NULL)
766 return;
767
768 size = cpipe->pipe_buffer.size;
769
770 KERNEL_LOCK();
771 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable);
772 KERNEL_UNLOCK();
773
774 cpipe->pipe_buffer.buffer = NULL;
775
776 atomic_sub_int(&amountpipekva, size);
777 if (size > PIPE_SIZE)
778 atomic_dec_int(&nbigpipe);
779 }
780
781 /*
782 * shutdown the pipe, and free resources.
783 */
784 void
785 pipe_destroy(struct pipe *cpipe)
786 {
787 struct pipe *ppipe;
788
789 if (cpipe == NULL)
790 return;
791
792 rw_enter_write(cpipe->pipe_lock);
793
794 pipeselwakeup(cpipe);
795 sigio_free(&cpipe->pipe_sigio);
796
797 /*
798 * If the other side is blocked, wake it up saying that
799 * we want to close it down.
800 */
801 cpipe->pipe_state |= PIPE_EOF;
802 while (cpipe->pipe_busy) {
803 wakeup(cpipe);
804 cpipe->pipe_state |= PIPE_WANTD;
805 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP);
806 }
807
808 /* Disconnect from peer. */
809 if ((ppipe = cpipe->pipe_peer) != NULL) {
810 pipeselwakeup(ppipe);
811
812 ppipe->pipe_state |= PIPE_EOF;
813 wakeup(ppipe);
814 ppipe->pipe_peer = NULL;
815 }
816
817 pipe_buffer_free(cpipe);
818
819 rw_exit_write(cpipe->pipe_lock);
820
821 if (ppipe == NULL)
822 pipe_pair_destroy(cpipe->pipe_pair);
823 }
824
825 /*
826 * Returns non-zero if a rundown is currently ongoing.
827 */
828 int
829 pipe_rundown(struct pipe *cpipe)
830 {
831 rw_assert_wrlock(cpipe->pipe_lock);
832
833 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0)
834 return (0);
835
836 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */
837 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW);
838 wakeup(cpipe);
839 return (1);
840 }
841
842 int
843 pipe_kqfilter(struct file *fp, struct knote *kn)
844 {
845 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
846 struct rwlock *lock = rpipe->pipe_lock;
847 int error = 0;
848
849 rw_enter_write(lock);
850 wpipe = pipe_peer(rpipe);
851
852 switch (kn->kn_filter) {
853 case EVFILT_READ:
854 kn->kn_fop = &pipe_rfiltops;
855 kn->kn_hook = rpipe;
856 klist_insert_locked(&rpipe->pipe_klist, kn);
857 break;
858 case EVFILT_WRITE:
859 if (wpipe == NULL) {
860 /* other end of pipe has been closed */
861 error = EPIPE;
862 break;
863 }
864 kn->kn_fop = &pipe_wfiltops;
865 kn->kn_hook = wpipe;
866 klist_insert_locked(&wpipe->pipe_klist, kn);
867 break;
868 case EVFILT_EXCEPT:
869 if (kn->kn_flags & __EV_SELECT) {
870 /* Prevent triggering exceptfds. */
871 error = EPERM;
872 break;
873 }
874 if ((kn->kn_flags & __EV_POLL) == 0) {
875 /* Disallow usage through kevent(2). */
876 error = EINVAL;
877 break;
878 }
879 kn->kn_fop = &pipe_efiltops;
880 kn->kn_hook = rpipe;
881 klist_insert_locked(&rpipe->pipe_klist, kn);
882 break;
883 default:
884 error = EINVAL;
885 }
886
887 rw_exit_write(lock);
888
889 return (error);
890 }
891
892 void
893 filt_pipedetach(struct knote *kn)
894 {
895 struct pipe *cpipe = kn->kn_hook;
896
897 klist_remove(&cpipe->pipe_klist, kn);
898 }
899
900 int
901 filt_piperead(struct knote *kn, long hint)
902 {
903 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
904
905 rw_assert_wrlock(rpipe->pipe_lock);
906
907 wpipe = pipe_peer(rpipe);
908
909 kn->kn_data = rpipe->pipe_buffer.cnt;
910
911 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
912 kn->kn_flags |= EV_EOF;
913 if (kn->kn_flags & __EV_POLL)
914 kn->kn_flags |= __EV_HUP;
915 return (1);
916 }
917
918 return (kn->kn_data > 0);
919 }
920
921 int
922 filt_pipewrite(struct knote *kn, long hint)
923 {
924 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
925
926 rw_assert_wrlock(rpipe->pipe_lock);
927
928 wpipe = pipe_peer(rpipe);
929
930 if (wpipe == NULL) {
931 kn->kn_data = 0;
932 kn->kn_flags |= EV_EOF;
933 if (kn->kn_flags & __EV_POLL)
934 kn->kn_flags |= __EV_HUP;
935 return (1);
936 }
937 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
938
939 return (kn->kn_data >= PIPE_BUF);
940 }
941
942 int
943 filt_pipeexcept(struct knote *kn, long hint)
944 {
945 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
946 int active = 0;
947
948 rw_assert_wrlock(rpipe->pipe_lock);
949
950 wpipe = pipe_peer(rpipe);
951
952 if (kn->kn_flags & __EV_POLL) {
953 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
954 kn->kn_flags |= __EV_HUP;
955 active = 1;
956 }
957 }
958
959 return (active);
960 }
961
962 int
963 filt_pipemodify(struct kevent *kev, struct knote *kn)
964 {
965 struct pipe *rpipe = kn->kn_fp->f_data;
966 int active;
967
968 rw_enter_write(rpipe->pipe_lock);
969 active = knote_modify(kev, kn);
970 rw_exit_write(rpipe->pipe_lock);
971
972 return (active);
973 }
974
975 int
976 filt_pipeprocess(struct knote *kn, struct kevent *kev)
977 {
978 struct pipe *rpipe = kn->kn_fp->f_data;
979 int active;
980
981 rw_enter_write(rpipe->pipe_lock);
982 active = knote_process(kn, kev);
983 rw_exit_write(rpipe->pipe_lock);
984
985 return (active);
986 }
987
988 void
989 pipe_init(void)
990 {
991 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR,
992 PR_WAITOK, "pipepl", NULL);
993 }
994
995 struct pipe_pair *
996 pipe_pair_create(void)
997 {
998 struct pipe_pair *pp;
999
1000 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO);
1001 pp->pp_wpipe.pipe_pair = pp;
1002 pp->pp_rpipe.pipe_pair = pp;
1003 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe;
1004 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe;
1005 /*
1006 * One lock is used per pipe pair in order to obtain exclusive access to
1007 * the pipe pair.
1008 */
1009 rw_init(&pp->pp_lock, "pipelk");
1010 pp->pp_wpipe.pipe_lock = &pp->pp_lock;
1011 pp->pp_rpipe.pipe_lock = &pp->pp_lock;
1012
1013 klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock);
1014 klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock);
1015
1016 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe))
1017 goto err;
1018 return (pp);
1019 err:
1020 pipe_destroy(&pp->pp_wpipe);
1021 pipe_destroy(&pp->pp_rpipe);
1022 return (NULL);
1023 }
1024
1025 void
1026 pipe_pair_destroy(struct pipe_pair *pp)
1027 {
1028 klist_free(&pp->pp_wpipe.pipe_klist);
1029 klist_free(&pp->pp_rpipe.pipe_klist);
1030 pool_put(&pipe_pair_pool, pp);
1031 }
Cache object: f148c71d6d4b367822f7b30f1bde7fd2
|