FreeBSD/Linux Kernel Cross Reference
sys/fs/pipe.c
1 /*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13
14 #include <asm/uaccess.h>
15 #include <asm/ioctls.h>
16
17 /*
18 * We use a start+len construction, which provides full use of the
19 * allocated memory.
20 * -- Florian Coosmann (FGC)
21 *
22 * Reads with count = 0 should always return 0.
23 * -- Julian Bradfield 1999-06-07.
24 */
25
26 /* Drop the inode semaphore and wait for a pipe event, atomically */
27 void pipe_wait(struct inode * inode)
28 {
29 DECLARE_WAITQUEUE(wait, current);
30 current->state = TASK_INTERRUPTIBLE;
31 add_wait_queue(PIPE_WAIT(*inode), &wait);
32 up(PIPE_SEM(*inode));
33 schedule();
34 remove_wait_queue(PIPE_WAIT(*inode), &wait);
35 current->state = TASK_RUNNING;
36 down(PIPE_SEM(*inode));
37 }
38
39 static ssize_t
40 pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
41 {
42 struct inode *inode = filp->f_dentry->d_inode;
43 ssize_t size, read, ret;
44
45 /* Seeks are not allowed on pipes. */
46 ret = -ESPIPE;
47 read = 0;
48 if (ppos != &filp->f_pos)
49 goto out_nolock;
50
51 /* Always return 0 on null read. */
52 ret = 0;
53 if (count == 0)
54 goto out_nolock;
55
56 /* Get the pipe semaphore */
57 ret = -ERESTARTSYS;
58 if (down_interruptible(PIPE_SEM(*inode)))
59 goto out_nolock;
60
61 if (PIPE_EMPTY(*inode)) {
62 do_more_read:
63 ret = 0;
64 if (!PIPE_WRITERS(*inode))
65 goto out;
66
67 ret = -EAGAIN;
68 if (filp->f_flags & O_NONBLOCK)
69 goto out;
70
71 for (;;) {
72 PIPE_WAITING_READERS(*inode)++;
73 pipe_wait(inode);
74 PIPE_WAITING_READERS(*inode)--;
75 ret = -ERESTARTSYS;
76 if (signal_pending(current))
77 goto out;
78 ret = 0;
79 if (!PIPE_EMPTY(*inode))
80 break;
81 if (!PIPE_WRITERS(*inode))
82 goto out;
83 }
84 }
85
86 /* Read what data is available. */
87 ret = -EFAULT;
88 while (count > 0 && (size = PIPE_LEN(*inode))) {
89 char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
90 ssize_t chars = PIPE_MAX_RCHUNK(*inode);
91
92 if (chars > count)
93 chars = count;
94 if (chars > size)
95 chars = size;
96
97 if (copy_to_user(buf, pipebuf, chars))
98 goto out;
99
100 read += chars;
101 PIPE_START(*inode) += chars;
102 PIPE_START(*inode) &= (PIPE_SIZE - 1);
103 PIPE_LEN(*inode) -= chars;
104 count -= chars;
105 buf += chars;
106 }
107
108 /* Cache behaviour optimization */
109 if (!PIPE_LEN(*inode))
110 PIPE_START(*inode) = 0;
111
112 if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
113 /*
114 * We know that we are going to sleep: signal
115 * writers synchronously that there is more
116 * room.
117 */
118 wake_up_interruptible_sync(PIPE_WAIT(*inode));
119 if (!PIPE_EMPTY(*inode))
120 BUG();
121 goto do_more_read;
122 }
123 /* Signal writers asynchronously that there is more room. */
124 wake_up_interruptible(PIPE_WAIT(*inode));
125
126 ret = read;
127 out:
128 up(PIPE_SEM(*inode));
129 out_nolock:
130 if (read)
131 ret = read;
132
133 UPDATE_ATIME(inode);
134 return ret;
135 }
136
137 static ssize_t
138 pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
139 {
140 struct inode *inode = filp->f_dentry->d_inode;
141 ssize_t free, written, ret;
142
143 /* Seeks are not allowed on pipes. */
144 ret = -ESPIPE;
145 written = 0;
146 if (ppos != &filp->f_pos)
147 goto out_nolock;
148
149 /* Null write succeeds. */
150 ret = 0;
151 if (count == 0)
152 goto out_nolock;
153
154 ret = -ERESTARTSYS;
155 if (down_interruptible(PIPE_SEM(*inode)))
156 goto out_nolock;
157
158 /* No readers yields SIGPIPE. */
159 if (!PIPE_READERS(*inode))
160 goto sigpipe;
161
162 /* If count <= PIPE_BUF, we have to make it atomic. */
163 free = (count <= PIPE_BUF ? count : 1);
164
165 /* Wait, or check for, available space. */
166 if (filp->f_flags & O_NONBLOCK) {
167 ret = -EAGAIN;
168 if (PIPE_FREE(*inode) < free)
169 goto out;
170 } else {
171 while (PIPE_FREE(*inode) < free) {
172 PIPE_WAITING_WRITERS(*inode)++;
173 pipe_wait(inode);
174 PIPE_WAITING_WRITERS(*inode)--;
175 ret = -ERESTARTSYS;
176 if (signal_pending(current))
177 goto out;
178
179 if (!PIPE_READERS(*inode))
180 goto sigpipe;
181 }
182 }
183
184 /* Copy into available space. */
185 ret = -EFAULT;
186 while (count > 0) {
187 int space;
188 char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
189 ssize_t chars = PIPE_MAX_WCHUNK(*inode);
190
191 if ((space = PIPE_FREE(*inode)) != 0) {
192 if (chars > count)
193 chars = count;
194 if (chars > space)
195 chars = space;
196
197 if (copy_from_user(pipebuf, buf, chars))
198 goto out;
199
200 written += chars;
201 PIPE_LEN(*inode) += chars;
202 count -= chars;
203 buf += chars;
204 space = PIPE_FREE(*inode);
205 continue;
206 }
207
208 ret = written;
209 if (filp->f_flags & O_NONBLOCK)
210 break;
211
212 do {
213 /*
214 * Synchronous wake-up: it knows that this process
215 * is going to give up this CPU, so it doesn't have
216 * to do idle reschedules.
217 */
218 wake_up_interruptible_sync(PIPE_WAIT(*inode));
219 PIPE_WAITING_WRITERS(*inode)++;
220 pipe_wait(inode);
221 PIPE_WAITING_WRITERS(*inode)--;
222 if (signal_pending(current))
223 goto out;
224 if (!PIPE_READERS(*inode))
225 goto sigpipe;
226 } while (!PIPE_FREE(*inode));
227 ret = -EFAULT;
228 }
229
230 /* Signal readers asynchronously that there is more data. */
231 wake_up_interruptible(PIPE_WAIT(*inode));
232
233 update_mctime(inode);
234
235 out:
236 up(PIPE_SEM(*inode));
237 out_nolock:
238 if (written)
239 ret = written;
240 return ret;
241
242 sigpipe:
243 if (written)
244 goto out;
245 up(PIPE_SEM(*inode));
246 send_sig(SIGPIPE, current, 0);
247 return -EPIPE;
248 }
249
250 static ssize_t
251 bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
252 {
253 return -EBADF;
254 }
255
256 static ssize_t
257 bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
258 {
259 return -EBADF;
260 }
261
262 static int
263 pipe_ioctl(struct inode *pino, struct file *filp,
264 unsigned int cmd, unsigned long arg)
265 {
266 switch (cmd) {
267 case FIONREAD:
268 return put_user(PIPE_LEN(*pino), (int *)arg);
269 default:
270 return -EINVAL;
271 }
272 }
273
274 /* No kernel lock held - fine */
275 static unsigned int
276 pipe_poll(struct file *filp, poll_table *wait)
277 {
278 unsigned int mask;
279 struct inode *inode = filp->f_dentry->d_inode;
280
281 poll_wait(filp, PIPE_WAIT(*inode), wait);
282
283 /* Reading only -- no need for acquiring the semaphore. */
284 mask = POLLIN | POLLRDNORM;
285 if (PIPE_EMPTY(*inode))
286 mask = POLLOUT | POLLWRNORM;
287 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
288 mask |= POLLHUP;
289 if (!PIPE_READERS(*inode))
290 mask |= POLLERR;
291
292 return mask;
293 }
294
295 /* FIXME: most Unices do not set POLLERR for fifos */
296 #define fifo_poll pipe_poll
297
298 static int
299 pipe_release(struct inode *inode, int decr, int decw)
300 {
301 down(PIPE_SEM(*inode));
302 PIPE_READERS(*inode) -= decr;
303 PIPE_WRITERS(*inode) -= decw;
304 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
305 struct pipe_inode_info *info = inode->i_pipe;
306 inode->i_pipe = NULL;
307 free_page((unsigned long) info->base);
308 kfree(info);
309 } else {
310 wake_up_interruptible(PIPE_WAIT(*inode));
311 }
312 up(PIPE_SEM(*inode));
313
314 return 0;
315 }
316
317 static int
318 pipe_read_release(struct inode *inode, struct file *filp)
319 {
320 return pipe_release(inode, 1, 0);
321 }
322
323 static int
324 pipe_write_release(struct inode *inode, struct file *filp)
325 {
326 return pipe_release(inode, 0, 1);
327 }
328
329 static int
330 pipe_rdwr_release(struct inode *inode, struct file *filp)
331 {
332 int decr, decw;
333
334 decr = (filp->f_mode & FMODE_READ) != 0;
335 decw = (filp->f_mode & FMODE_WRITE) != 0;
336 return pipe_release(inode, decr, decw);
337 }
338
339 static int
340 pipe_read_open(struct inode *inode, struct file *filp)
341 {
342 /* We could have perhaps used atomic_t, but this and friends
343 below are the only places. So it doesn't seem worthwhile. */
344 down(PIPE_SEM(*inode));
345 PIPE_READERS(*inode)++;
346 up(PIPE_SEM(*inode));
347
348 return 0;
349 }
350
351 static int
352 pipe_write_open(struct inode *inode, struct file *filp)
353 {
354 down(PIPE_SEM(*inode));
355 PIPE_WRITERS(*inode)++;
356 up(PIPE_SEM(*inode));
357
358 return 0;
359 }
360
361 static int
362 pipe_rdwr_open(struct inode *inode, struct file *filp)
363 {
364 down(PIPE_SEM(*inode));
365 if (filp->f_mode & FMODE_READ)
366 PIPE_READERS(*inode)++;
367 if (filp->f_mode & FMODE_WRITE)
368 PIPE_WRITERS(*inode)++;
369 up(PIPE_SEM(*inode));
370
371 return 0;
372 }
373
374 /*
375 * The file_operations structs are not static because they
376 * are also used in linux/fs/fifo.c to do operations on FIFOs.
377 */
378 struct file_operations read_fifo_fops = {
379 llseek: no_llseek,
380 read: pipe_read,
381 write: bad_pipe_w,
382 poll: fifo_poll,
383 ioctl: pipe_ioctl,
384 open: pipe_read_open,
385 release: pipe_read_release,
386 };
387
388 struct file_operations write_fifo_fops = {
389 llseek: no_llseek,
390 read: bad_pipe_r,
391 write: pipe_write,
392 poll: fifo_poll,
393 ioctl: pipe_ioctl,
394 open: pipe_write_open,
395 release: pipe_write_release,
396 };
397
398 struct file_operations rdwr_fifo_fops = {
399 llseek: no_llseek,
400 read: pipe_read,
401 write: pipe_write,
402 poll: fifo_poll,
403 ioctl: pipe_ioctl,
404 open: pipe_rdwr_open,
405 release: pipe_rdwr_release,
406 };
407
408 struct file_operations read_pipe_fops = {
409 llseek: no_llseek,
410 read: pipe_read,
411 write: bad_pipe_w,
412 poll: pipe_poll,
413 ioctl: pipe_ioctl,
414 open: pipe_read_open,
415 release: pipe_read_release,
416 };
417
418 struct file_operations write_pipe_fops = {
419 llseek: no_llseek,
420 read: bad_pipe_r,
421 write: pipe_write,
422 poll: pipe_poll,
423 ioctl: pipe_ioctl,
424 open: pipe_write_open,
425 release: pipe_write_release,
426 };
427
428 struct file_operations rdwr_pipe_fops = {
429 llseek: no_llseek,
430 read: pipe_read,
431 write: pipe_write,
432 poll: pipe_poll,
433 ioctl: pipe_ioctl,
434 open: pipe_rdwr_open,
435 release: pipe_rdwr_release,
436 };
437
438 struct inode* pipe_new(struct inode* inode)
439 {
440 unsigned long page;
441
442 page = __get_free_page(GFP_USER);
443 if (!page)
444 return NULL;
445
446 inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
447 if (!inode->i_pipe)
448 goto fail_page;
449
450 init_waitqueue_head(PIPE_WAIT(*inode));
451 PIPE_BASE(*inode) = (char*) page;
452 PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
453 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
454 PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
455 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
456
457 return inode;
458 fail_page:
459 free_page(page);
460 return NULL;
461 }
462
463 static struct vfsmount *pipe_mnt;
464 static int pipefs_delete_dentry(struct dentry *dentry)
465 {
466 return 1;
467 }
468 static struct dentry_operations pipefs_dentry_operations = {
469 d_delete: pipefs_delete_dentry,
470 };
471
472 static struct inode * get_pipe_inode(void)
473 {
474 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
475
476 if (!inode)
477 goto fail_inode;
478
479 if(!pipe_new(inode))
480 goto fail_iput;
481 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
482 inode->i_fop = &rdwr_pipe_fops;
483
484 /*
485 * Mark the inode dirty from the very beginning,
486 * that way it will never be moved to the dirty
487 * list because "mark_inode_dirty()" will think
488 * that it already _is_ on the dirty list.
489 */
490 inode->i_state = I_DIRTY;
491 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
492 inode->i_uid = current->fsuid;
493 inode->i_gid = current->fsgid;
494 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
495 inode->i_blksize = PAGE_SIZE;
496 return inode;
497
498 fail_iput:
499 iput(inode);
500 fail_inode:
501 return NULL;
502 }
503
504 int do_pipe(int *fd)
505 {
506 struct qstr this;
507 char name[32];
508 struct dentry *dentry;
509 struct inode * inode;
510 struct file *f1, *f2;
511 int error;
512 int i,j;
513
514 error = -ENFILE;
515 f1 = get_empty_filp();
516 if (!f1)
517 goto no_files;
518
519 f2 = get_empty_filp();
520 if (!f2)
521 goto close_f1;
522
523 inode = get_pipe_inode();
524 if (!inode)
525 goto close_f12;
526
527 error = get_unused_fd();
528 if (error < 0)
529 goto close_f12_inode;
530 i = error;
531
532 error = get_unused_fd();
533 if (error < 0)
534 goto close_f12_inode_i;
535 j = error;
536
537 error = -ENOMEM;
538 sprintf(name, "[%lu]", inode->i_ino);
539 this.name = name;
540 this.len = strlen(name);
541 this.hash = inode->i_ino; /* will go */
542 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
543 if (!dentry)
544 goto close_f12_inode_i_j;
545 dentry->d_op = &pipefs_dentry_operations;
546 d_add(dentry, inode);
547 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
548 f1->f_dentry = f2->f_dentry = dget(dentry);
549
550 /* read file */
551 f1->f_pos = f2->f_pos = 0;
552 f1->f_flags = O_RDONLY;
553 f1->f_op = &read_pipe_fops;
554 f1->f_mode = 1;
555 f1->f_version = 0;
556
557 /* write file */
558 f2->f_flags = O_WRONLY;
559 f2->f_op = &write_pipe_fops;
560 f2->f_mode = 2;
561 f2->f_version = 0;
562
563 fd_install(i, f1);
564 fd_install(j, f2);
565 fd[0] = i;
566 fd[1] = j;
567 return 0;
568
569 close_f12_inode_i_j:
570 put_unused_fd(j);
571 close_f12_inode_i:
572 put_unused_fd(i);
573 close_f12_inode:
574 free_page((unsigned long) PIPE_BASE(*inode));
575 kfree(inode->i_pipe);
576 inode->i_pipe = NULL;
577 iput(inode);
578 close_f12:
579 put_filp(f2);
580 close_f1:
581 put_filp(f1);
582 no_files:
583 return error;
584 }
585
586 /*
587 * pipefs should _never_ be mounted by userland - too much of security hassle,
588 * no real gain from having the whole whorehouse mounted. So we don't need
589 * any operations on the root directory. However, we need a non-trivial
590 * d_name - pipe: will go nicely and kill the special-casing in procfs.
591 */
592 static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
593 {
594 buf->f_type = PIPEFS_MAGIC;
595 buf->f_bsize = 1024;
596 buf->f_namelen = 255;
597 return 0;
598 }
599
600 static struct super_operations pipefs_ops = {
601 statfs: pipefs_statfs,
602 };
603
604 static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
605 {
606 struct inode *root = new_inode(sb);
607 if (!root)
608 return NULL;
609 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
610 root->i_uid = root->i_gid = 0;
611 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
612 sb->s_blocksize = 1024;
613 sb->s_blocksize_bits = 10;
614 sb->s_magic = PIPEFS_MAGIC;
615 sb->s_op = &pipefs_ops;
616 sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
617 if (!sb->s_root) {
618 iput(root);
619 return NULL;
620 }
621 sb->s_root->d_sb = sb;
622 sb->s_root->d_parent = sb->s_root;
623 d_instantiate(sb->s_root, root);
624 return sb;
625 }
626
627 static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT);
628
629 static int __init init_pipe_fs(void)
630 {
631 int err = register_filesystem(&pipe_fs_type);
632 if (!err) {
633 pipe_mnt = kern_mount(&pipe_fs_type);
634 err = PTR_ERR(pipe_mnt);
635 if (IS_ERR(pipe_mnt))
636 unregister_filesystem(&pipe_fs_type);
637 else
638 err = 0;
639 }
640 return err;
641 }
642
643 static void __exit exit_pipe_fs(void)
644 {
645 unregister_filesystem(&pipe_fs_type);
646 mntput(pipe_mnt);
647 }
648
649 module_init(init_pipe_fs)
650 module_exit(exit_pipe_fs)
Cache object: 6493931219a42b02beaea3f736fccb30
|