FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_pipe.c
1 /*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19
20 /*
21 * This file contains a high-performance replacement for the socket-based
22 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
23 * all features of sockets, but does do everything that pipes normally
24 * do.
25 */
26
27 /*
28 * This code has two modes of operation, a small write mode and a large
29 * write mode. The small write mode acts like conventional pipes with
30 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the
31 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT
32 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
33 * the receiving process can copy it directly from the pages in the sending
34 * process.
35 *
36 * If the sending process receives a signal, it is possible that it will
37 * go away, and certainly its address space can change, because control
38 * is returned back to the user-mode side. In that case, the pipe code
39 * arranges to copy the buffer supplied by the user process, to a pageable
40 * kernel buffer, and the receiving process will grab the data from the
41 * pageable kernel buffer. Since signals don't happen all that often,
42 * the copy operation is normally eliminated.
43 *
44 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
45 * happen for small transfers so that the system will not spend all of
46 * its time context switching.
47 *
48 * In order to limit the resource use of pipes, two sysctls exist:
49 *
50 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
51 * address space available to us in pipe_map. Whenever the amount in use
52 * exceeds half of this value, all new pipes will be created with size
53 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited
54 * as well. This value is loader tunable only.
55 *
56 * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
57 * be wired in order to facilitate direct copies using page flipping.
58 * Whenever this value is exceeded, pipes will fall back to using regular
59 * copies. This value is sysctl controllable at all times.
60 *
61 * These values are autotuned in subr_param.c.
62 *
63 * Memory usage may be monitored through the sysctls
64 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
65 *
66 */
67
68 #include <sys/cdefs.h>
69 __FBSDID("$FreeBSD: releng/5.2/sys/kern/sys_pipe.c 122352 2003-11-09 09:17:26Z tanimura $");
70
71 #include "opt_mac.h"
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/fcntl.h>
76 #include <sys/file.h>
77 #include <sys/filedesc.h>
78 #include <sys/filio.h>
79 #include <sys/kernel.h>
80 #include <sys/lock.h>
81 #include <sys/mac.h>
82 #include <sys/mutex.h>
83 #include <sys/ttycom.h>
84 #include <sys/stat.h>
85 #include <sys/malloc.h>
86 #include <sys/poll.h>
87 #include <sys/selinfo.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/sysproto.h>
91 #include <sys/pipe.h>
92 #include <sys/proc.h>
93 #include <sys/vnode.h>
94 #include <sys/uio.h>
95 #include <sys/event.h>
96
97 #include <vm/vm.h>
98 #include <vm/vm_param.h>
99 #include <vm/vm_object.h>
100 #include <vm/vm_kern.h>
101 #include <vm/vm_extern.h>
102 #include <vm/pmap.h>
103 #include <vm/vm_map.h>
104 #include <vm/vm_page.h>
105 #include <vm/uma.h>
106
107 /*
108 * Use this define if you want to disable *fancy* VM things. Expect an
109 * approx 30% decrease in transfer rate. This could be useful for
110 * NetBSD or OpenBSD.
111 */
112 /* #define PIPE_NODIRECT */
113
114 /*
115 * interfaces to the outside world
116 */
117 static fo_rdwr_t pipe_read;
118 static fo_rdwr_t pipe_write;
119 static fo_ioctl_t pipe_ioctl;
120 static fo_poll_t pipe_poll;
121 static fo_kqfilter_t pipe_kqfilter;
122 static fo_stat_t pipe_stat;
123 static fo_close_t pipe_close;
124
125 static struct fileops pipeops = {
126 .fo_read = pipe_read,
127 .fo_write = pipe_write,
128 .fo_ioctl = pipe_ioctl,
129 .fo_poll = pipe_poll,
130 .fo_kqfilter = pipe_kqfilter,
131 .fo_stat = pipe_stat,
132 .fo_close = pipe_close,
133 .fo_flags = DFLAG_PASSABLE
134 };
135
136 static void filt_pipedetach(struct knote *kn);
137 static int filt_piperead(struct knote *kn, long hint);
138 static int filt_pipewrite(struct knote *kn, long hint);
139
140 static struct filterops pipe_rfiltops =
141 { 1, NULL, filt_pipedetach, filt_piperead };
142 static struct filterops pipe_wfiltops =
143 { 1, NULL, filt_pipedetach, filt_pipewrite };
144
145 /*
146 * Default pipe buffer size(s), this can be kind-of large now because pipe
147 * space is pageable. The pipe code will try to maintain locality of
148 * reference for performance reasons, so small amounts of outstanding I/O
149 * will not wipe the cache.
150 */
151 #define MINPIPESIZE (PIPE_SIZE/3)
152 #define MAXPIPESIZE (2*PIPE_SIZE/3)
153
154 /*
155 * Limit the number of "big" pipes
156 */
157 #define LIMITBIGPIPES 32
158 static int nbigpipe;
159
160 static int amountpipes;
161 static int amountpipekva;
162 static int amountpipekvawired;
163
164 SYSCTL_DECL(_kern_ipc);
165
166 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN,
167 &maxpipekva, 0, "Pipe KVA limit");
168 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
169 &maxpipekvawired, 0, "Pipe KVA wired limit");
170 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
171 &amountpipes, 0, "Current # of pipes");
172 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
173 &nbigpipe, 0, "Current # of big pipes");
174 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
175 &amountpipekva, 0, "Pipe KVA usage");
176 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
177 &amountpipekvawired, 0, "Pipe wired KVA usage");
178
179 static void pipeinit(void *dummy __unused);
180 static void pipeclose(struct pipe *cpipe);
181 static void pipe_free_kmem(struct pipe *cpipe);
182 static int pipe_create(struct pipe **cpipep);
183 static __inline int pipelock(struct pipe *cpipe, int catch);
184 static __inline void pipeunlock(struct pipe *cpipe);
185 static __inline void pipeselwakeup(struct pipe *cpipe);
186 #ifndef PIPE_NODIRECT
187 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
188 static void pipe_destroy_write_buffer(struct pipe *wpipe);
189 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
190 static void pipe_clone_write_buffer(struct pipe *wpipe);
191 #endif
192 static int pipespace(struct pipe *cpipe, int size);
193
194 static uma_zone_t pipe_zone;
195
196 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
197
198 static void
199 pipeinit(void *dummy __unused)
200 {
201
202 pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
203 NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
204 KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
205 }
206
207 /*
208 * The pipe system call for the DTYPE_PIPE type of pipes
209 */
210
211 /* ARGSUSED */
212 int
213 pipe(td, uap)
214 struct thread *td;
215 struct pipe_args /* {
216 int dummy;
217 } */ *uap;
218 {
219 struct filedesc *fdp = td->td_proc->p_fd;
220 struct file *rf, *wf;
221 struct pipe *rpipe, *wpipe;
222 struct mtx *pmtx;
223 int fd, error;
224
225 rpipe = wpipe = NULL;
226 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
227 pipeclose(rpipe);
228 pipeclose(wpipe);
229 return (ENFILE);
230 }
231
232 rpipe->pipe_state |= PIPE_DIRECTOK;
233 wpipe->pipe_state |= PIPE_DIRECTOK;
234
235 error = falloc(td, &rf, &fd);
236 if (error) {
237 pipeclose(rpipe);
238 pipeclose(wpipe);
239 return (error);
240 }
241 /* An extra reference on `rf' has been held for us by falloc(). */
242 td->td_retval[0] = fd;
243
244 /*
245 * Warning: once we've gotten past allocation of the fd for the
246 * read-side, we can only drop the read side via fdrop() in order
247 * to avoid races against processes which manage to dup() the read
248 * side while we are blocked trying to allocate the write side.
249 */
250 FILE_LOCK(rf);
251 rf->f_flag = FREAD | FWRITE;
252 rf->f_type = DTYPE_PIPE;
253 rf->f_data = rpipe;
254 rf->f_ops = &pipeops;
255 FILE_UNLOCK(rf);
256 error = falloc(td, &wf, &fd);
257 if (error) {
258 FILEDESC_LOCK(fdp);
259 if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
260 fdp->fd_ofiles[td->td_retval[0]] = NULL;
261 FILEDESC_UNLOCK(fdp);
262 fdrop(rf, td);
263 } else
264 FILEDESC_UNLOCK(fdp);
265 fdrop(rf, td);
266 /* rpipe has been closed by fdrop(). */
267 pipeclose(wpipe);
268 return (error);
269 }
270 /* An extra reference on `wf' has been held for us by falloc(). */
271 FILE_LOCK(wf);
272 wf->f_flag = FREAD | FWRITE;
273 wf->f_type = DTYPE_PIPE;
274 wf->f_data = wpipe;
275 wf->f_ops = &pipeops;
276 FILE_UNLOCK(wf);
277 fdrop(wf, td);
278 td->td_retval[1] = fd;
279 rpipe->pipe_peer = wpipe;
280 wpipe->pipe_peer = rpipe;
281 #ifdef MAC
282 /*
283 * struct pipe represents a pipe endpoint. The MAC label is shared
284 * between the connected endpoints. As a result mac_init_pipe() and
285 * mac_create_pipe() should only be called on one of the endpoints
286 * after they have been connected.
287 */
288 mac_init_pipe(rpipe);
289 mac_create_pipe(td->td_ucred, rpipe);
290 #endif
291 pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
292 mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
293 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
294 fdrop(rf, td);
295
296 return (0);
297 }
298
299 /*
300 * Allocate kva for pipe circular buffer, the space is pageable
301 * This routine will 'realloc' the size of a pipe safely, if it fails
302 * it will retain the old buffer.
303 * If it fails it will return ENOMEM.
304 */
305 static int
306 pipespace(cpipe, size)
307 struct pipe *cpipe;
308 int size;
309 {
310 caddr_t buffer;
311 int error;
312 static int curfail = 0;
313 static struct timeval lastfail;
314
315 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
316 ("pipespace: pipe mutex locked"));
317
318 size = round_page(size);
319 /*
320 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
321 */
322 buffer = (caddr_t) vm_map_min(pipe_map);
323
324 /*
325 * The map entry is, by default, pageable.
326 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
327 */
328 error = vm_map_find(pipe_map, NULL, 0,
329 (vm_offset_t *) &buffer, size, 1,
330 VM_PROT_ALL, VM_PROT_ALL, 0);
331 if (error != KERN_SUCCESS) {
332 if (ppsratecheck(&lastfail, &curfail, 1))
333 printf("kern.maxpipekva exceeded, please see tuning(7).\n");
334 return (ENOMEM);
335 }
336
337 /* free old resources if we're resizing */
338 pipe_free_kmem(cpipe);
339 cpipe->pipe_buffer.buffer = buffer;
340 cpipe->pipe_buffer.size = size;
341 cpipe->pipe_buffer.in = 0;
342 cpipe->pipe_buffer.out = 0;
343 cpipe->pipe_buffer.cnt = 0;
344 atomic_add_int(&amountpipes, 1);
345 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
346 return (0);
347 }
348
349 /*
350 * initialize and allocate VM and memory for pipe
351 */
352 static int
353 pipe_create(cpipep)
354 struct pipe **cpipep;
355 {
356 struct pipe *cpipe;
357 int error;
358
359 *cpipep = uma_zalloc(pipe_zone, M_WAITOK);
360 if (*cpipep == NULL)
361 return (ENOMEM);
362
363 cpipe = *cpipep;
364
365 /*
366 * protect so pipeclose() doesn't follow a junk pointer
367 * if pipespace() fails.
368 */
369 bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
370 cpipe->pipe_state = 0;
371 cpipe->pipe_peer = NULL;
372 cpipe->pipe_busy = 0;
373
374 #ifndef PIPE_NODIRECT
375 /*
376 * pipe data structure initializations to support direct pipe I/O
377 */
378 cpipe->pipe_map.cnt = 0;
379 cpipe->pipe_map.kva = 0;
380 cpipe->pipe_map.pos = 0;
381 cpipe->pipe_map.npages = 0;
382 /* cpipe->pipe_map.ms[] = invalid */
383 #endif
384
385 cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */
386 /*
387 * Reduce to 1/4th pipe size if we're over our global max.
388 */
389 if (amountpipekva > maxpipekva / 2)
390 error = pipespace(cpipe, SMALL_PIPE_SIZE);
391 else
392 error = pipespace(cpipe, PIPE_SIZE);
393 if (error)
394 return (error);
395
396 vfs_timestamp(&cpipe->pipe_ctime);
397 cpipe->pipe_atime = cpipe->pipe_ctime;
398 cpipe->pipe_mtime = cpipe->pipe_ctime;
399
400 return (0);
401 }
402
403
404 /*
405 * lock a pipe for I/O, blocking other access
406 */
407 static __inline int
408 pipelock(cpipe, catch)
409 struct pipe *cpipe;
410 int catch;
411 {
412 int error;
413
414 PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
415 while (cpipe->pipe_state & PIPE_LOCKFL) {
416 cpipe->pipe_state |= PIPE_LWANT;
417 error = msleep(cpipe, PIPE_MTX(cpipe),
418 catch ? (PRIBIO | PCATCH) : PRIBIO,
419 "pipelk", 0);
420 if (error != 0)
421 return (error);
422 }
423 cpipe->pipe_state |= PIPE_LOCKFL;
424 return (0);
425 }
426
427 /*
428 * unlock a pipe I/O lock
429 */
430 static __inline void
431 pipeunlock(cpipe)
432 struct pipe *cpipe;
433 {
434
435 PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
436 cpipe->pipe_state &= ~PIPE_LOCKFL;
437 if (cpipe->pipe_state & PIPE_LWANT) {
438 cpipe->pipe_state &= ~PIPE_LWANT;
439 wakeup(cpipe);
440 }
441 }
442
443 static __inline void
444 pipeselwakeup(cpipe)
445 struct pipe *cpipe;
446 {
447
448 if (cpipe->pipe_state & PIPE_SEL) {
449 cpipe->pipe_state &= ~PIPE_SEL;
450 selwakeuppri(&cpipe->pipe_sel, PSOCK);
451 }
452 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
453 pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
454 KNOTE(&cpipe->pipe_sel.si_note, 0);
455 }
456
457 /* ARGSUSED */
458 static int
459 pipe_read(fp, uio, active_cred, flags, td)
460 struct file *fp;
461 struct uio *uio;
462 struct ucred *active_cred;
463 struct thread *td;
464 int flags;
465 {
466 struct pipe *rpipe = fp->f_data;
467 int error;
468 int nread = 0;
469 u_int size;
470
471 PIPE_LOCK(rpipe);
472 ++rpipe->pipe_busy;
473 error = pipelock(rpipe, 1);
474 if (error)
475 goto unlocked_error;
476
477 #ifdef MAC
478 error = mac_check_pipe_read(active_cred, rpipe);
479 if (error)
480 goto locked_error;
481 #endif
482
483 while (uio->uio_resid) {
484 /*
485 * normal pipe buffer receive
486 */
487 if (rpipe->pipe_buffer.cnt > 0) {
488 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
489 if (size > rpipe->pipe_buffer.cnt)
490 size = rpipe->pipe_buffer.cnt;
491 if (size > (u_int) uio->uio_resid)
492 size = (u_int) uio->uio_resid;
493
494 PIPE_UNLOCK(rpipe);
495 error = uiomove(
496 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
497 size, uio);
498 PIPE_LOCK(rpipe);
499 if (error)
500 break;
501
502 rpipe->pipe_buffer.out += size;
503 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
504 rpipe->pipe_buffer.out = 0;
505
506 rpipe->pipe_buffer.cnt -= size;
507
508 /*
509 * If there is no more to read in the pipe, reset
510 * its pointers to the beginning. This improves
511 * cache hit stats.
512 */
513 if (rpipe->pipe_buffer.cnt == 0) {
514 rpipe->pipe_buffer.in = 0;
515 rpipe->pipe_buffer.out = 0;
516 }
517 nread += size;
518 #ifndef PIPE_NODIRECT
519 /*
520 * Direct copy, bypassing a kernel buffer.
521 */
522 } else if ((size = rpipe->pipe_map.cnt) &&
523 (rpipe->pipe_state & PIPE_DIRECTW)) {
524 caddr_t va;
525 if (size > (u_int) uio->uio_resid)
526 size = (u_int) uio->uio_resid;
527
528 va = (caddr_t) rpipe->pipe_map.kva +
529 rpipe->pipe_map.pos;
530 PIPE_UNLOCK(rpipe);
531 error = uiomove(va, size, uio);
532 PIPE_LOCK(rpipe);
533 if (error)
534 break;
535 nread += size;
536 rpipe->pipe_map.pos += size;
537 rpipe->pipe_map.cnt -= size;
538 if (rpipe->pipe_map.cnt == 0) {
539 rpipe->pipe_state &= ~PIPE_DIRECTW;
540 wakeup(rpipe);
541 }
542 #endif
543 } else {
544 /*
545 * detect EOF condition
546 * read returns 0 on EOF, no need to set error
547 */
548 if (rpipe->pipe_state & PIPE_EOF)
549 break;
550
551 /*
552 * If the "write-side" has been blocked, wake it up now.
553 */
554 if (rpipe->pipe_state & PIPE_WANTW) {
555 rpipe->pipe_state &= ~PIPE_WANTW;
556 wakeup(rpipe);
557 }
558
559 /*
560 * Break if some data was read.
561 */
562 if (nread > 0)
563 break;
564
565 /*
566 * Unlock the pipe buffer for our remaining processing.
567 * We will either break out with an error or we will
568 * sleep and relock to loop.
569 */
570 pipeunlock(rpipe);
571
572 /*
573 * Handle non-blocking mode operation or
574 * wait for more data.
575 */
576 if (fp->f_flag & FNONBLOCK) {
577 error = EAGAIN;
578 } else {
579 rpipe->pipe_state |= PIPE_WANTR;
580 if ((error = msleep(rpipe, PIPE_MTX(rpipe),
581 PRIBIO | PCATCH,
582 "piperd", 0)) == 0)
583 error = pipelock(rpipe, 1);
584 }
585 if (error)
586 goto unlocked_error;
587 }
588 }
589 #ifdef MAC
590 locked_error:
591 #endif
592 pipeunlock(rpipe);
593
594 /* XXX: should probably do this before getting any locks. */
595 if (error == 0)
596 vfs_timestamp(&rpipe->pipe_atime);
597 unlocked_error:
598 --rpipe->pipe_busy;
599
600 /*
601 * PIPE_WANT processing only makes sense if pipe_busy is 0.
602 */
603 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
604 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
605 wakeup(rpipe);
606 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
607 /*
608 * Handle write blocking hysteresis.
609 */
610 if (rpipe->pipe_state & PIPE_WANTW) {
611 rpipe->pipe_state &= ~PIPE_WANTW;
612 wakeup(rpipe);
613 }
614 }
615
616 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
617 pipeselwakeup(rpipe);
618
619 PIPE_UNLOCK(rpipe);
620 return (error);
621 }
622
623 #ifndef PIPE_NODIRECT
624 /*
625 * Map the sending processes' buffer into kernel space and wire it.
626 * This is similar to a physical write operation.
627 */
628 static int
629 pipe_build_write_buffer(wpipe, uio)
630 struct pipe *wpipe;
631 struct uio *uio;
632 {
633 pmap_t pmap;
634 u_int size;
635 int i, j;
636 vm_offset_t addr, endaddr;
637
638 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
639
640 size = (u_int) uio->uio_iov->iov_len;
641 if (size > wpipe->pipe_buffer.size)
642 size = wpipe->pipe_buffer.size;
643
644 pmap = vmspace_pmap(curproc->p_vmspace);
645 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
646 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
647 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
648 /*
649 * vm_fault_quick() can sleep. Consequently,
650 * vm_page_lock_queue() and vm_page_unlock_queue()
651 * should not be performed outside of this loop.
652 */
653 race:
654 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
655 vm_page_lock_queues();
656 for (j = 0; j < i; j++)
657 vm_page_unhold(wpipe->pipe_map.ms[j]);
658 vm_page_unlock_queues();
659 return (EFAULT);
660 }
661 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
662 VM_PROT_READ);
663 if (wpipe->pipe_map.ms[i] == NULL)
664 goto race;
665 }
666
667 /*
668 * set up the control block
669 */
670 wpipe->pipe_map.npages = i;
671 wpipe->pipe_map.pos =
672 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
673 wpipe->pipe_map.cnt = size;
674
675 /*
676 * and map the buffer
677 */
678 if (wpipe->pipe_map.kva == 0) {
679 /*
680 * We need to allocate space for an extra page because the
681 * address range might (will) span pages at times.
682 */
683 wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
684 wpipe->pipe_buffer.size + PAGE_SIZE);
685 atomic_add_int(&amountpipekvawired,
686 wpipe->pipe_buffer.size + PAGE_SIZE);
687 }
688 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
689 wpipe->pipe_map.npages);
690
691 /*
692 * and update the uio data
693 */
694
695 uio->uio_iov->iov_len -= size;
696 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
697 if (uio->uio_iov->iov_len == 0)
698 uio->uio_iov++;
699 uio->uio_resid -= size;
700 uio->uio_offset += size;
701 return (0);
702 }
703
704 /*
705 * unmap and unwire the process buffer
706 */
707 static void
708 pipe_destroy_write_buffer(wpipe)
709 struct pipe *wpipe;
710 {
711 int i;
712
713 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
714 if (wpipe->pipe_map.kva) {
715 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
716
717 if (amountpipekvawired > maxpipekvawired / 2) {
718 /* Conserve address space */
719 vm_offset_t kva = wpipe->pipe_map.kva;
720 wpipe->pipe_map.kva = 0;
721 kmem_free(kernel_map, kva,
722 wpipe->pipe_buffer.size + PAGE_SIZE);
723 atomic_subtract_int(&amountpipekvawired,
724 wpipe->pipe_buffer.size + PAGE_SIZE);
725 }
726 }
727 vm_page_lock_queues();
728 for (i = 0; i < wpipe->pipe_map.npages; i++) {
729 vm_page_unhold(wpipe->pipe_map.ms[i]);
730 }
731 vm_page_unlock_queues();
732 wpipe->pipe_map.npages = 0;
733 }
734
735 /*
736 * In the case of a signal, the writing process might go away. This
737 * code copies the data into the circular buffer so that the source
738 * pages can be freed without loss of data.
739 */
740 static void
741 pipe_clone_write_buffer(wpipe)
742 struct pipe *wpipe;
743 {
744 int size;
745 int pos;
746
747 PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
748 size = wpipe->pipe_map.cnt;
749 pos = wpipe->pipe_map.pos;
750
751 wpipe->pipe_buffer.in = size;
752 wpipe->pipe_buffer.out = 0;
753 wpipe->pipe_buffer.cnt = size;
754 wpipe->pipe_state &= ~PIPE_DIRECTW;
755
756 PIPE_UNLOCK(wpipe);
757 bcopy((caddr_t) wpipe->pipe_map.kva + pos,
758 wpipe->pipe_buffer.buffer, size);
759 pipe_destroy_write_buffer(wpipe);
760 PIPE_LOCK(wpipe);
761 }
762
763 /*
764 * This implements the pipe buffer write mechanism. Note that only
765 * a direct write OR a normal pipe write can be pending at any given time.
766 * If there are any characters in the pipe buffer, the direct write will
767 * be deferred until the receiving process grabs all of the bytes from
768 * the pipe buffer. Then the direct mapping write is set-up.
769 */
770 static int
771 pipe_direct_write(wpipe, uio)
772 struct pipe *wpipe;
773 struct uio *uio;
774 {
775 int error;
776
777 retry:
778 PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
779 while (wpipe->pipe_state & PIPE_DIRECTW) {
780 if (wpipe->pipe_state & PIPE_WANTR) {
781 wpipe->pipe_state &= ~PIPE_WANTR;
782 wakeup(wpipe);
783 }
784 wpipe->pipe_state |= PIPE_WANTW;
785 error = msleep(wpipe, PIPE_MTX(wpipe),
786 PRIBIO | PCATCH, "pipdww", 0);
787 if (error)
788 goto error1;
789 if (wpipe->pipe_state & PIPE_EOF) {
790 error = EPIPE;
791 goto error1;
792 }
793 }
794 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */
795 if (wpipe->pipe_buffer.cnt > 0) {
796 if (wpipe->pipe_state & PIPE_WANTR) {
797 wpipe->pipe_state &= ~PIPE_WANTR;
798 wakeup(wpipe);
799 }
800
801 wpipe->pipe_state |= PIPE_WANTW;
802 error = msleep(wpipe, PIPE_MTX(wpipe),
803 PRIBIO | PCATCH, "pipdwc", 0);
804 if (error)
805 goto error1;
806 if (wpipe->pipe_state & PIPE_EOF) {
807 error = EPIPE;
808 goto error1;
809 }
810 goto retry;
811 }
812
813 wpipe->pipe_state |= PIPE_DIRECTW;
814
815 pipelock(wpipe, 0);
816 PIPE_UNLOCK(wpipe);
817 error = pipe_build_write_buffer(wpipe, uio);
818 PIPE_LOCK(wpipe);
819 pipeunlock(wpipe);
820 if (error) {
821 wpipe->pipe_state &= ~PIPE_DIRECTW;
822 goto error1;
823 }
824
825 error = 0;
826 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
827 if (wpipe->pipe_state & PIPE_EOF) {
828 pipelock(wpipe, 0);
829 PIPE_UNLOCK(wpipe);
830 pipe_destroy_write_buffer(wpipe);
831 PIPE_LOCK(wpipe);
832 pipeselwakeup(wpipe);
833 pipeunlock(wpipe);
834 error = EPIPE;
835 goto error1;
836 }
837 if (wpipe->pipe_state & PIPE_WANTR) {
838 wpipe->pipe_state &= ~PIPE_WANTR;
839 wakeup(wpipe);
840 }
841 pipeselwakeup(wpipe);
842 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
843 "pipdwt", 0);
844 }
845
846 pipelock(wpipe,0);
847 if (wpipe->pipe_state & PIPE_DIRECTW) {
848 /*
849 * this bit of trickery substitutes a kernel buffer for
850 * the process that might be going away.
851 */
852 pipe_clone_write_buffer(wpipe);
853 } else {
854 PIPE_UNLOCK(wpipe);
855 pipe_destroy_write_buffer(wpipe);
856 PIPE_LOCK(wpipe);
857 }
858 pipeunlock(wpipe);
859 return (error);
860
861 error1:
862 wakeup(wpipe);
863 return (error);
864 }
865 #endif
866
867 static int
868 pipe_write(fp, uio, active_cred, flags, td)
869 struct file *fp;
870 struct uio *uio;
871 struct ucred *active_cred;
872 struct thread *td;
873 int flags;
874 {
875 int error = 0;
876 int orig_resid;
877 struct pipe *wpipe, *rpipe;
878
879 rpipe = fp->f_data;
880 wpipe = rpipe->pipe_peer;
881
882 PIPE_LOCK(rpipe);
883 /*
884 * detect loss of pipe read side, issue SIGPIPE if lost.
885 */
886 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
887 PIPE_UNLOCK(rpipe);
888 return (EPIPE);
889 }
890 #ifdef MAC
891 error = mac_check_pipe_write(active_cred, wpipe);
892 if (error) {
893 PIPE_UNLOCK(rpipe);
894 return (error);
895 }
896 #endif
897 ++wpipe->pipe_busy;
898
899 /*
900 * If it is advantageous to resize the pipe buffer, do
901 * so.
902 */
903 if ((uio->uio_resid > PIPE_SIZE) &&
904 (amountpipekva < maxpipekva / 2) &&
905 (nbigpipe < LIMITBIGPIPES) &&
906 (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
907 (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
908 (wpipe->pipe_buffer.cnt == 0)) {
909
910 if ((error = pipelock(wpipe, 1)) == 0) {
911 PIPE_UNLOCK(wpipe);
912 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
913 atomic_add_int(&nbigpipe, 1);
914 PIPE_LOCK(wpipe);
915 pipeunlock(wpipe);
916 }
917 }
918
919 /*
920 * If an early error occured unbusy and return, waking up any pending
921 * readers.
922 */
923 if (error) {
924 --wpipe->pipe_busy;
925 if ((wpipe->pipe_busy == 0) &&
926 (wpipe->pipe_state & PIPE_WANT)) {
927 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
928 wakeup(wpipe);
929 }
930 PIPE_UNLOCK(rpipe);
931 return(error);
932 }
933
934 orig_resid = uio->uio_resid;
935
936 while (uio->uio_resid) {
937 int space;
938
939 #ifndef PIPE_NODIRECT
940 /*
941 * If the transfer is large, we can gain performance if
942 * we do process-to-process copies directly.
943 * If the write is non-blocking, we don't use the
944 * direct write mechanism.
945 *
946 * The direct write mechanism will detect the reader going
947 * away on us.
948 */
949 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
950 (fp->f_flag & FNONBLOCK) == 0 &&
951 amountpipekvawired + uio->uio_resid < maxpipekvawired) {
952 error = pipe_direct_write(wpipe, uio);
953 if (error)
954 break;
955 continue;
956 }
957 #endif
958
959 /*
960 * Pipe buffered writes cannot be coincidental with
961 * direct writes. We wait until the currently executing
962 * direct write is completed before we start filling the
963 * pipe buffer. We break out if a signal occurs or the
964 * reader goes away.
965 */
966 retrywrite:
967 while (wpipe->pipe_state & PIPE_DIRECTW) {
968 if (wpipe->pipe_state & PIPE_WANTR) {
969 wpipe->pipe_state &= ~PIPE_WANTR;
970 wakeup(wpipe);
971 }
972 error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
973 "pipbww", 0);
974 if (wpipe->pipe_state & PIPE_EOF)
975 break;
976 if (error)
977 break;
978 }
979 if (wpipe->pipe_state & PIPE_EOF) {
980 error = EPIPE;
981 break;
982 }
983
984 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
985
986 /* Writes of size <= PIPE_BUF must be atomic. */
987 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
988 space = 0;
989
990 if (space > 0) {
991 if ((error = pipelock(wpipe,1)) == 0) {
992 int size; /* Transfer size */
993 int segsize; /* first segment to transfer */
994
995 /*
996 * It is possible for a direct write to
997 * slip in on us... handle it here...
998 */
999 if (wpipe->pipe_state & PIPE_DIRECTW) {
1000 pipeunlock(wpipe);
1001 goto retrywrite;
1002 }
1003 /*
1004 * If a process blocked in uiomove, our
1005 * value for space might be bad.
1006 *
1007 * XXX will we be ok if the reader has gone
1008 * away here?
1009 */
1010 if (space > wpipe->pipe_buffer.size -
1011 wpipe->pipe_buffer.cnt) {
1012 pipeunlock(wpipe);
1013 goto retrywrite;
1014 }
1015
1016 /*
1017 * Transfer size is minimum of uio transfer
1018 * and free space in pipe buffer.
1019 */
1020 if (space > uio->uio_resid)
1021 size = uio->uio_resid;
1022 else
1023 size = space;
1024 /*
1025 * First segment to transfer is minimum of
1026 * transfer size and contiguous space in
1027 * pipe buffer. If first segment to transfer
1028 * is less than the transfer size, we've got
1029 * a wraparound in the buffer.
1030 */
1031 segsize = wpipe->pipe_buffer.size -
1032 wpipe->pipe_buffer.in;
1033 if (segsize > size)
1034 segsize = size;
1035
1036 /* Transfer first segment */
1037
1038 PIPE_UNLOCK(rpipe);
1039 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1040 segsize, uio);
1041 PIPE_LOCK(rpipe);
1042
1043 if (error == 0 && segsize < size) {
1044 /*
1045 * Transfer remaining part now, to
1046 * support atomic writes. Wraparound
1047 * happened.
1048 */
1049 if (wpipe->pipe_buffer.in + segsize !=
1050 wpipe->pipe_buffer.size)
1051 panic("Expected pipe buffer "
1052 "wraparound disappeared");
1053
1054 PIPE_UNLOCK(rpipe);
1055 error = uiomove(
1056 &wpipe->pipe_buffer.buffer[0],
1057 size - segsize, uio);
1058 PIPE_LOCK(rpipe);
1059 }
1060 if (error == 0) {
1061 wpipe->pipe_buffer.in += size;
1062 if (wpipe->pipe_buffer.in >=
1063 wpipe->pipe_buffer.size) {
1064 if (wpipe->pipe_buffer.in !=
1065 size - segsize +
1066 wpipe->pipe_buffer.size)
1067 panic("Expected "
1068 "wraparound bad");
1069 wpipe->pipe_buffer.in = size -
1070 segsize;
1071 }
1072
1073 wpipe->pipe_buffer.cnt += size;
1074 if (wpipe->pipe_buffer.cnt >
1075 wpipe->pipe_buffer.size)
1076 panic("Pipe buffer overflow");
1077
1078 }
1079 pipeunlock(wpipe);
1080 }
1081 if (error)
1082 break;
1083
1084 } else {
1085 /*
1086 * If the "read-side" has been blocked, wake it up now.
1087 */
1088 if (wpipe->pipe_state & PIPE_WANTR) {
1089 wpipe->pipe_state &= ~PIPE_WANTR;
1090 wakeup(wpipe);
1091 }
1092
1093 /*
1094 * don't block on non-blocking I/O
1095 */
1096 if (fp->f_flag & FNONBLOCK) {
1097 error = EAGAIN;
1098 break;
1099 }
1100
1101 /*
1102 * We have no more space and have something to offer,
1103 * wake up select/poll.
1104 */
1105 pipeselwakeup(wpipe);
1106
1107 wpipe->pipe_state |= PIPE_WANTW;
1108 error = msleep(wpipe, PIPE_MTX(rpipe),
1109 PRIBIO | PCATCH, "pipewr", 0);
1110 if (error != 0)
1111 break;
1112 /*
1113 * If read side wants to go away, we just issue a signal
1114 * to ourselves.
1115 */
1116 if (wpipe->pipe_state & PIPE_EOF) {
1117 error = EPIPE;
1118 break;
1119 }
1120 }
1121 }
1122
1123 --wpipe->pipe_busy;
1124
1125 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1126 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1127 wakeup(wpipe);
1128 } else if (wpipe->pipe_buffer.cnt > 0) {
1129 /*
1130 * If we have put any characters in the buffer, we wake up
1131 * the reader.
1132 */
1133 if (wpipe->pipe_state & PIPE_WANTR) {
1134 wpipe->pipe_state &= ~PIPE_WANTR;
1135 wakeup(wpipe);
1136 }
1137 }
1138
1139 /*
1140 * Don't return EPIPE if I/O was successful
1141 */
1142 if ((wpipe->pipe_buffer.cnt == 0) &&
1143 (uio->uio_resid == 0) &&
1144 (error == EPIPE)) {
1145 error = 0;
1146 }
1147
1148 if (error == 0)
1149 vfs_timestamp(&wpipe->pipe_mtime);
1150
1151 /*
1152 * We have something to offer,
1153 * wake up select/poll.
1154 */
1155 if (wpipe->pipe_buffer.cnt)
1156 pipeselwakeup(wpipe);
1157
1158 PIPE_UNLOCK(rpipe);
1159 return (error);
1160 }
1161
1162 /*
1163 * we implement a very minimal set of ioctls for compatibility with sockets.
1164 */
1165 static int
1166 pipe_ioctl(fp, cmd, data, active_cred, td)
1167 struct file *fp;
1168 u_long cmd;
1169 void *data;
1170 struct ucred *active_cred;
1171 struct thread *td;
1172 {
1173 struct pipe *mpipe = fp->f_data;
1174 #ifdef MAC
1175 int error;
1176 #endif
1177
1178 PIPE_LOCK(mpipe);
1179
1180 #ifdef MAC
1181 error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1182 if (error) {
1183 PIPE_UNLOCK(mpipe);
1184 return (error);
1185 }
1186 #endif
1187
1188 switch (cmd) {
1189
1190 case FIONBIO:
1191 PIPE_UNLOCK(mpipe);
1192 return (0);
1193
1194 case FIOASYNC:
1195 if (*(int *)data) {
1196 mpipe->pipe_state |= PIPE_ASYNC;
1197 } else {
1198 mpipe->pipe_state &= ~PIPE_ASYNC;
1199 }
1200 PIPE_UNLOCK(mpipe);
1201 return (0);
1202
1203 case FIONREAD:
1204 if (mpipe->pipe_state & PIPE_DIRECTW)
1205 *(int *)data = mpipe->pipe_map.cnt;
1206 else
1207 *(int *)data = mpipe->pipe_buffer.cnt;
1208 PIPE_UNLOCK(mpipe);
1209 return (0);
1210
1211 case FIOSETOWN:
1212 PIPE_UNLOCK(mpipe);
1213 return (fsetown(*(int *)data, &mpipe->pipe_sigio));
1214
1215 case FIOGETOWN:
1216 PIPE_UNLOCK(mpipe);
1217 *(int *)data = fgetown(&mpipe->pipe_sigio);
1218 return (0);
1219
1220 /* This is deprecated, FIOSETOWN should be used instead. */
1221 case TIOCSPGRP:
1222 PIPE_UNLOCK(mpipe);
1223 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
1224
1225 /* This is deprecated, FIOGETOWN should be used instead. */
1226 case TIOCGPGRP:
1227 PIPE_UNLOCK(mpipe);
1228 *(int *)data = -fgetown(&mpipe->pipe_sigio);
1229 return (0);
1230
1231 }
1232 PIPE_UNLOCK(mpipe);
1233 return (ENOTTY);
1234 }
1235
1236 static int
1237 pipe_poll(fp, events, active_cred, td)
1238 struct file *fp;
1239 int events;
1240 struct ucred *active_cred;
1241 struct thread *td;
1242 {
1243 struct pipe *rpipe = fp->f_data;
1244 struct pipe *wpipe;
1245 int revents = 0;
1246 #ifdef MAC
1247 int error;
1248 #endif
1249
1250 wpipe = rpipe->pipe_peer;
1251 PIPE_LOCK(rpipe);
1252 #ifdef MAC
1253 error = mac_check_pipe_poll(active_cred, rpipe);
1254 if (error)
1255 goto locked_error;
1256 #endif
1257 if (events & (POLLIN | POLLRDNORM))
1258 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1259 (rpipe->pipe_buffer.cnt > 0) ||
1260 (rpipe->pipe_state & PIPE_EOF))
1261 revents |= events & (POLLIN | POLLRDNORM);
1262
1263 if (events & (POLLOUT | POLLWRNORM))
1264 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
1265 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1266 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
1267 revents |= events & (POLLOUT | POLLWRNORM);
1268
1269 if ((rpipe->pipe_state & PIPE_EOF) ||
1270 (wpipe == NULL) ||
1271 (wpipe->pipe_state & PIPE_EOF))
1272 revents |= POLLHUP;
1273
1274 if (revents == 0) {
1275 if (events & (POLLIN | POLLRDNORM)) {
1276 selrecord(td, &rpipe->pipe_sel);
1277 rpipe->pipe_state |= PIPE_SEL;
1278 }
1279
1280 if (events & (POLLOUT | POLLWRNORM)) {
1281 selrecord(td, &wpipe->pipe_sel);
1282 wpipe->pipe_state |= PIPE_SEL;
1283 }
1284 }
1285 #ifdef MAC
1286 locked_error:
1287 #endif
1288 PIPE_UNLOCK(rpipe);
1289
1290 return (revents);
1291 }
1292
1293 /*
1294 * We shouldn't need locks here as we're doing a read and this should
1295 * be a natural race.
1296 */
1297 static int
1298 pipe_stat(fp, ub, active_cred, td)
1299 struct file *fp;
1300 struct stat *ub;
1301 struct ucred *active_cred;
1302 struct thread *td;
1303 {
1304 struct pipe *pipe = fp->f_data;
1305 #ifdef MAC
1306 int error;
1307
1308 PIPE_LOCK(pipe);
1309 error = mac_check_pipe_stat(active_cred, pipe);
1310 PIPE_UNLOCK(pipe);
1311 if (error)
1312 return (error);
1313 #endif
1314 bzero(ub, sizeof(*ub));
1315 ub->st_mode = S_IFIFO;
1316 ub->st_blksize = pipe->pipe_buffer.size;
1317 ub->st_size = pipe->pipe_buffer.cnt;
1318 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
1319 ub->st_atimespec = pipe->pipe_atime;
1320 ub->st_mtimespec = pipe->pipe_mtime;
1321 ub->st_ctimespec = pipe->pipe_ctime;
1322 ub->st_uid = fp->f_cred->cr_uid;
1323 ub->st_gid = fp->f_cred->cr_gid;
1324 /*
1325 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
1326 * XXX (st_dev, st_ino) should be unique.
1327 */
1328 return (0);
1329 }
1330
1331 /* ARGSUSED */
1332 static int
1333 pipe_close(fp, td)
1334 struct file *fp;
1335 struct thread *td;
1336 {
1337 struct pipe *cpipe = fp->f_data;
1338
1339 fp->f_ops = &badfileops;
1340 fp->f_data = NULL;
1341 funsetown(&cpipe->pipe_sigio);
1342 pipeclose(cpipe);
1343 return (0);
1344 }
1345
1346 static void
1347 pipe_free_kmem(cpipe)
1348 struct pipe *cpipe;
1349 {
1350
1351 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
1352 ("pipespace: pipe mutex locked"));
1353
1354 if (cpipe->pipe_buffer.buffer != NULL) {
1355 if (cpipe->pipe_buffer.size > PIPE_SIZE)
1356 atomic_subtract_int(&nbigpipe, 1);
1357 atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
1358 atomic_subtract_int(&amountpipes, 1);
1359 vm_map_remove(pipe_map,
1360 (vm_offset_t)cpipe->pipe_buffer.buffer,
1361 (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
1362 cpipe->pipe_buffer.buffer = NULL;
1363 }
1364 #ifndef PIPE_NODIRECT
1365 if (cpipe->pipe_map.kva != 0) {
1366 atomic_subtract_int(&amountpipekvawired,
1367 cpipe->pipe_buffer.size + PAGE_SIZE);
1368 kmem_free(kernel_map,
1369 cpipe->pipe_map.kva,
1370 cpipe->pipe_buffer.size + PAGE_SIZE);
1371 cpipe->pipe_map.cnt = 0;
1372 cpipe->pipe_map.kva = 0;
1373 cpipe->pipe_map.pos = 0;
1374 cpipe->pipe_map.npages = 0;
1375 }
1376 #endif
1377 }
1378
1379 /*
1380 * shutdown the pipe
1381 */
1382 static void
1383 pipeclose(cpipe)
1384 struct pipe *cpipe;
1385 {
1386 struct pipe *ppipe;
1387 int hadpeer;
1388
1389 if (cpipe == NULL)
1390 return;
1391
1392 hadpeer = 0;
1393
1394 /* partially created pipes won't have a valid mutex. */
1395 if (PIPE_MTX(cpipe) != NULL)
1396 PIPE_LOCK(cpipe);
1397
1398 pipeselwakeup(cpipe);
1399
1400 /*
1401 * If the other side is blocked, wake it up saying that
1402 * we want to close it down.
1403 */
1404 while (cpipe->pipe_busy) {
1405 wakeup(cpipe);
1406 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
1407 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1408 }
1409
1410 #ifdef MAC
1411 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1412 mac_destroy_pipe(cpipe);
1413 #endif
1414
1415 /*
1416 * Disconnect from peer
1417 */
1418 if ((ppipe = cpipe->pipe_peer) != NULL) {
1419 hadpeer++;
1420 pipeselwakeup(ppipe);
1421
1422 ppipe->pipe_state |= PIPE_EOF;
1423 wakeup(ppipe);
1424 KNOTE(&ppipe->pipe_sel.si_note, 0);
1425 ppipe->pipe_peer = NULL;
1426 }
1427 /*
1428 * free resources
1429 */
1430 if (PIPE_MTX(cpipe) != NULL) {
1431 PIPE_UNLOCK(cpipe);
1432 if (!hadpeer) {
1433 mtx_destroy(PIPE_MTX(cpipe));
1434 free(PIPE_MTX(cpipe), M_TEMP);
1435 }
1436 }
1437 pipe_free_kmem(cpipe);
1438 uma_zfree(pipe_zone, cpipe);
1439 }
1440
1441 /*ARGSUSED*/
1442 static int
1443 pipe_kqfilter(struct file *fp, struct knote *kn)
1444 {
1445 struct pipe *cpipe;
1446
1447 cpipe = kn->kn_fp->f_data;
1448 switch (kn->kn_filter) {
1449 case EVFILT_READ:
1450 kn->kn_fop = &pipe_rfiltops;
1451 break;
1452 case EVFILT_WRITE:
1453 kn->kn_fop = &pipe_wfiltops;
1454 cpipe = cpipe->pipe_peer;
1455 if (cpipe == NULL)
1456 /* other end of pipe has been closed */
1457 return (EPIPE);
1458 break;
1459 default:
1460 return (1);
1461 }
1462
1463 PIPE_LOCK(cpipe);
1464 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
1465 PIPE_UNLOCK(cpipe);
1466 return (0);
1467 }
1468
1469 static void
1470 filt_pipedetach(struct knote *kn)
1471 {
1472 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1473
1474 if (kn->kn_filter == EVFILT_WRITE) {
1475 if (cpipe->pipe_peer == NULL)
1476 return;
1477 cpipe = cpipe->pipe_peer;
1478 }
1479
1480 PIPE_LOCK(cpipe);
1481 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
1482 PIPE_UNLOCK(cpipe);
1483 }
1484
1485 /*ARGSUSED*/
1486 static int
1487 filt_piperead(struct knote *kn, long hint)
1488 {
1489 struct pipe *rpipe = kn->kn_fp->f_data;
1490 struct pipe *wpipe = rpipe->pipe_peer;
1491
1492 PIPE_LOCK(rpipe);
1493 kn->kn_data = rpipe->pipe_buffer.cnt;
1494 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
1495 kn->kn_data = rpipe->pipe_map.cnt;
1496
1497 if ((rpipe->pipe_state & PIPE_EOF) ||
1498 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1499 kn->kn_flags |= EV_EOF;
1500 PIPE_UNLOCK(rpipe);
1501 return (1);
1502 }
1503 PIPE_UNLOCK(rpipe);
1504 return (kn->kn_data > 0);
1505 }
1506
1507 /*ARGSUSED*/
1508 static int
1509 filt_pipewrite(struct knote *kn, long hint)
1510 {
1511 struct pipe *rpipe = kn->kn_fp->f_data;
1512 struct pipe *wpipe = rpipe->pipe_peer;
1513
1514 PIPE_LOCK(rpipe);
1515 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1516 kn->kn_data = 0;
1517 kn->kn_flags |= EV_EOF;
1518 PIPE_UNLOCK(rpipe);
1519 return (1);
1520 }
1521 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1522 if (wpipe->pipe_state & PIPE_DIRECTW)
1523 kn->kn_data = 0;
1524
1525 PIPE_UNLOCK(rpipe);
1526 return (kn->kn_data >= PIPE_BUF);
1527 }
Cache object: e4a42aae56ab336a4ded8a014486bae9
|