1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: src/sys/kern/kern_descrip.c,v 1.337 2008/08/20 08:31:58 ed Exp $");
39
40 #include "opt_compat.h"
41 #include "opt_ddb.h"
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46
47 #include <sys/conf.h>
48 #include <sys/domain.h>
49 #include <sys/fcntl.h>
50 #include <sys/file.h>
51 #include <sys/filedesc.h>
52 #include <sys/filio.h>
53 #include <sys/jail.h>
54 #include <sys/kernel.h>
55 #include <sys/limits.h>
56 #include <sys/lock.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/mqueue.h>
60 #include <sys/mutex.h>
61 #include <sys/namei.h>
62 #include <sys/priv.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/socketvar.h>
68 #include <sys/stat.h>
69 #include <sys/sx.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 #include <sys/sysproto.h>
73 #include <sys/tty.h>
74 #include <sys/unistd.h>
75 #include <sys/user.h>
76 #include <sys/vnode.h>
77 #ifdef KTRACE
78 #include <sys/ktrace.h>
79 #endif
80
81 #include <security/audit/audit.h>
82
83 #include <vm/uma.h>
84
85 #include <ddb/ddb.h>
86
87 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
88 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
89 "file desc to leader structures");
90 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
91
92 static uma_zone_t file_zone;
93
94
95 /* Flags for do_dup() */
96 #define DUP_FIXED 0x1 /* Force fixed allocation */
97 #define DUP_FCNTL 0x2 /* fcntl()-style errors */
98
99 static int do_dup(struct thread *td, int flags, int old, int new,
100 register_t *retval);
101 static int fd_first_free(struct filedesc *, int, int);
102 static int fd_last_used(struct filedesc *, int, int);
103 static void fdgrowtable(struct filedesc *, int);
104 static void fdunused(struct filedesc *fdp, int fd);
105 static void fdused(struct filedesc *fdp, int fd);
106
107 /*
108 * A process is initially started out with NDFILE descriptors stored within
109 * this structure, selected to be enough for typical applications based on
110 * the historical limit of 20 open files (and the usage of descriptors by
111 * shells). If these descriptors are exhausted, a larger descriptor table
112 * may be allocated, up to a process' resource limit; the internal arrays
113 * are then unused.
114 */
115 #define NDFILE 20
116 #define NDSLOTSIZE sizeof(NDSLOTTYPE)
117 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT)
118 #define NDSLOT(x) ((x) / NDENTRIES)
119 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
120 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES)
121
122 /*
123 * Storage required per open file descriptor.
124 */
125 #define OFILESIZE (sizeof(struct file *) + sizeof(char))
126
127 /*
128 * Basic allocation of descriptors:
129 * one of the above, plus arrays for NDFILE descriptors.
130 */
131 struct filedesc0 {
132 struct filedesc fd_fd;
133 /*
134 * These arrays are used when the number of open files is
135 * <= NDFILE, and are then pointed to by the pointers above.
136 */
137 struct file *fd_dfiles[NDFILE];
138 char fd_dfileflags[NDFILE];
139 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
140 };
141
142 /*
143 * Descriptor management.
144 */
145 volatile int openfiles; /* actual number of open files */
146 struct mtx sigio_lock; /* mtx to protect pointers to sigio */
147 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
148
149 /* A mutex to protect the association between a proc and filedesc. */
150 static struct mtx fdesc_mtx;
151
152 /*
153 * Find the first zero bit in the given bitmap, starting at low and not
154 * exceeding size - 1.
155 */
156 static int
157 fd_first_free(struct filedesc *fdp, int low, int size)
158 {
159 NDSLOTTYPE *map = fdp->fd_map;
160 NDSLOTTYPE mask;
161 int off, maxoff;
162
163 if (low >= size)
164 return (low);
165
166 off = NDSLOT(low);
167 if (low % NDENTRIES) {
168 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
169 if ((mask &= ~map[off]) != 0UL)
170 return (off * NDENTRIES + ffsl(mask) - 1);
171 ++off;
172 }
173 for (maxoff = NDSLOTS(size); off < maxoff; ++off)
174 if (map[off] != ~0UL)
175 return (off * NDENTRIES + ffsl(~map[off]) - 1);
176 return (size);
177 }
178
179 /*
180 * Find the highest non-zero bit in the given bitmap, starting at low and
181 * not exceeding size - 1.
182 */
183 static int
184 fd_last_used(struct filedesc *fdp, int low, int size)
185 {
186 NDSLOTTYPE *map = fdp->fd_map;
187 NDSLOTTYPE mask;
188 int off, minoff;
189
190 if (low >= size)
191 return (-1);
192
193 off = NDSLOT(size);
194 if (size % NDENTRIES) {
195 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
196 if ((mask &= map[off]) != 0)
197 return (off * NDENTRIES + flsl(mask) - 1);
198 --off;
199 }
200 for (minoff = NDSLOT(low); off >= minoff; --off)
201 if (map[off] != 0)
202 return (off * NDENTRIES + flsl(map[off]) - 1);
203 return (low - 1);
204 }
205
206 static int
207 fdisused(struct filedesc *fdp, int fd)
208 {
209 KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
210 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
211 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
212 }
213
214 /*
215 * Mark a file descriptor as used.
216 */
217 static void
218 fdused(struct filedesc *fdp, int fd)
219 {
220
221 FILEDESC_XLOCK_ASSERT(fdp);
222 KASSERT(!fdisused(fdp, fd),
223 ("fd already used"));
224
225 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
226 if (fd > fdp->fd_lastfile)
227 fdp->fd_lastfile = fd;
228 if (fd == fdp->fd_freefile)
229 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
230 }
231
232 /*
233 * Mark a file descriptor as unused.
234 */
235 static void
236 fdunused(struct filedesc *fdp, int fd)
237 {
238
239 FILEDESC_XLOCK_ASSERT(fdp);
240 KASSERT(fdisused(fdp, fd),
241 ("fd is already unused"));
242 KASSERT(fdp->fd_ofiles[fd] == NULL,
243 ("fd is still in use"));
244
245 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
246 if (fd < fdp->fd_freefile)
247 fdp->fd_freefile = fd;
248 if (fd == fdp->fd_lastfile)
249 fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
250 }
251
252 /*
253 * System calls on descriptors.
254 */
255 #ifndef _SYS_SYSPROTO_H_
256 struct getdtablesize_args {
257 int dummy;
258 };
259 #endif
260 /* ARGSUSED */
261 int
262 getdtablesize(struct thread *td, struct getdtablesize_args *uap)
263 {
264 struct proc *p = td->td_proc;
265
266 PROC_LOCK(p);
267 td->td_retval[0] =
268 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
269 PROC_UNLOCK(p);
270 return (0);
271 }
272
273 /*
274 * Duplicate a file descriptor to a particular value.
275 *
276 * Note: keep in mind that a potential race condition exists when closing
277 * descriptors from a shared descriptor table (via rfork).
278 */
279 #ifndef _SYS_SYSPROTO_H_
280 struct dup2_args {
281 u_int from;
282 u_int to;
283 };
284 #endif
285 /* ARGSUSED */
286 int
287 dup2(struct thread *td, struct dup2_args *uap)
288 {
289
290 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
291 td->td_retval));
292 }
293
294 /*
295 * Duplicate a file descriptor.
296 */
297 #ifndef _SYS_SYSPROTO_H_
298 struct dup_args {
299 u_int fd;
300 };
301 #endif
302 /* ARGSUSED */
303 int
304 dup(struct thread *td, struct dup_args *uap)
305 {
306
307 return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval));
308 }
309
310 /*
311 * The file control system call.
312 */
313 #ifndef _SYS_SYSPROTO_H_
314 struct fcntl_args {
315 int fd;
316 int cmd;
317 long arg;
318 };
319 #endif
320 /* ARGSUSED */
321 int
322 fcntl(struct thread *td, struct fcntl_args *uap)
323 {
324 struct flock fl;
325 struct oflock ofl;
326 intptr_t arg;
327 int error;
328 int cmd;
329
330 error = 0;
331 cmd = uap->cmd;
332 switch (uap->cmd) {
333 case F_OGETLK:
334 case F_OSETLK:
335 case F_OSETLKW:
336 /*
337 * Convert old flock structure to new.
338 */
339 error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl));
340 fl.l_start = ofl.l_start;
341 fl.l_len = ofl.l_len;
342 fl.l_pid = ofl.l_pid;
343 fl.l_type = ofl.l_type;
344 fl.l_whence = ofl.l_whence;
345 fl.l_sysid = 0;
346
347 switch (uap->cmd) {
348 case F_OGETLK:
349 cmd = F_GETLK;
350 break;
351 case F_OSETLK:
352 cmd = F_SETLK;
353 break;
354 case F_OSETLKW:
355 cmd = F_SETLKW;
356 break;
357 }
358 arg = (intptr_t)&fl;
359 break;
360 case F_GETLK:
361 case F_SETLK:
362 case F_SETLKW:
363 case F_SETLK_REMOTE:
364 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
365 arg = (intptr_t)&fl;
366 break;
367 default:
368 arg = uap->arg;
369 break;
370 }
371 if (error)
372 return (error);
373 error = kern_fcntl(td, uap->fd, cmd, arg);
374 if (error)
375 return (error);
376 if (uap->cmd == F_OGETLK) {
377 ofl.l_start = fl.l_start;
378 ofl.l_len = fl.l_len;
379 ofl.l_pid = fl.l_pid;
380 ofl.l_type = fl.l_type;
381 ofl.l_whence = fl.l_whence;
382 error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl));
383 } else if (uap->cmd == F_GETLK) {
384 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
385 }
386 return (error);
387 }
388
389 static inline struct file *
390 fdtofp(int fd, struct filedesc *fdp)
391 {
392 struct file *fp;
393
394 FILEDESC_LOCK_ASSERT(fdp);
395 if ((unsigned)fd >= fdp->fd_nfiles ||
396 (fp = fdp->fd_ofiles[fd]) == NULL)
397 return (NULL);
398 return (fp);
399 }
400
401 int
402 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
403 {
404 struct filedesc *fdp;
405 struct flock *flp;
406 struct file *fp;
407 struct proc *p;
408 char *pop;
409 struct vnode *vp;
410 int error, flg, tmp;
411 int vfslocked;
412
413 vfslocked = 0;
414 error = 0;
415 flg = F_POSIX;
416 p = td->td_proc;
417 fdp = p->p_fd;
418
419 switch (cmd) {
420 case F_DUPFD:
421 tmp = arg;
422 error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
423 break;
424
425 case F_DUP2FD:
426 tmp = arg;
427 error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
428 break;
429
430 case F_GETFD:
431 FILEDESC_SLOCK(fdp);
432 if ((fp = fdtofp(fd, fdp)) == NULL) {
433 FILEDESC_SUNLOCK(fdp);
434 error = EBADF;
435 break;
436 }
437 pop = &fdp->fd_ofileflags[fd];
438 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
439 FILEDESC_SUNLOCK(fdp);
440 break;
441
442 case F_SETFD:
443 FILEDESC_XLOCK(fdp);
444 if ((fp = fdtofp(fd, fdp)) == NULL) {
445 FILEDESC_XUNLOCK(fdp);
446 error = EBADF;
447 break;
448 }
449 pop = &fdp->fd_ofileflags[fd];
450 *pop = (*pop &~ UF_EXCLOSE) |
451 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
452 FILEDESC_XUNLOCK(fdp);
453 break;
454
455 case F_GETFL:
456 FILEDESC_SLOCK(fdp);
457 if ((fp = fdtofp(fd, fdp)) == NULL) {
458 FILEDESC_SUNLOCK(fdp);
459 error = EBADF;
460 break;
461 }
462 td->td_retval[0] = OFLAGS(fp->f_flag);
463 FILEDESC_SUNLOCK(fdp);
464 break;
465
466 case F_SETFL:
467 FILEDESC_SLOCK(fdp);
468 if ((fp = fdtofp(fd, fdp)) == NULL) {
469 FILEDESC_SUNLOCK(fdp);
470 error = EBADF;
471 break;
472 }
473 fhold(fp);
474 FILEDESC_SUNLOCK(fdp);
475 do {
476 tmp = flg = fp->f_flag;
477 tmp &= ~FCNTLFLAGS;
478 tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
479 } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
480 tmp = fp->f_flag & FNONBLOCK;
481 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
482 if (error) {
483 fdrop(fp, td);
484 break;
485 }
486 tmp = fp->f_flag & FASYNC;
487 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
488 if (error == 0) {
489 fdrop(fp, td);
490 break;
491 }
492 atomic_clear_int(&fp->f_flag, FNONBLOCK);
493 tmp = 0;
494 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
495 fdrop(fp, td);
496 break;
497
498 case F_GETOWN:
499 FILEDESC_SLOCK(fdp);
500 if ((fp = fdtofp(fd, fdp)) == NULL) {
501 FILEDESC_SUNLOCK(fdp);
502 error = EBADF;
503 break;
504 }
505 fhold(fp);
506 FILEDESC_SUNLOCK(fdp);
507 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
508 if (error == 0)
509 td->td_retval[0] = tmp;
510 fdrop(fp, td);
511 break;
512
513 case F_SETOWN:
514 FILEDESC_SLOCK(fdp);
515 if ((fp = fdtofp(fd, fdp)) == NULL) {
516 FILEDESC_SUNLOCK(fdp);
517 error = EBADF;
518 break;
519 }
520 fhold(fp);
521 FILEDESC_SUNLOCK(fdp);
522 tmp = arg;
523 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
524 fdrop(fp, td);
525 break;
526
527 case F_SETLK_REMOTE:
528 error = priv_check(td, PRIV_NFS_LOCKD);
529 if (error)
530 return (error);
531 flg = F_REMOTE;
532 goto do_setlk;
533
534 case F_SETLKW:
535 flg |= F_WAIT;
536 /* FALLTHROUGH F_SETLK */
537
538 case F_SETLK:
539 do_setlk:
540 FILEDESC_SLOCK(fdp);
541 if ((fp = fdtofp(fd, fdp)) == NULL) {
542 FILEDESC_SUNLOCK(fdp);
543 error = EBADF;
544 break;
545 }
546 if (fp->f_type != DTYPE_VNODE) {
547 FILEDESC_SUNLOCK(fdp);
548 error = EBADF;
549 break;
550 }
551 flp = (struct flock *)arg;
552 if (flp->l_whence == SEEK_CUR) {
553 if (fp->f_offset < 0 ||
554 (flp->l_start > 0 &&
555 fp->f_offset > OFF_MAX - flp->l_start)) {
556 FILEDESC_SUNLOCK(fdp);
557 error = EOVERFLOW;
558 break;
559 }
560 flp->l_start += fp->f_offset;
561 }
562
563 /*
564 * VOP_ADVLOCK() may block.
565 */
566 fhold(fp);
567 FILEDESC_SUNLOCK(fdp);
568 vp = fp->f_vnode;
569 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
570 switch (flp->l_type) {
571 case F_RDLCK:
572 if ((fp->f_flag & FREAD) == 0) {
573 error = EBADF;
574 break;
575 }
576 PROC_LOCK(p->p_leader);
577 p->p_leader->p_flag |= P_ADVLOCK;
578 PROC_UNLOCK(p->p_leader);
579 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
580 flp, flg);
581 break;
582 case F_WRLCK:
583 if ((fp->f_flag & FWRITE) == 0) {
584 error = EBADF;
585 break;
586 }
587 PROC_LOCK(p->p_leader);
588 p->p_leader->p_flag |= P_ADVLOCK;
589 PROC_UNLOCK(p->p_leader);
590 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
591 flp, flg);
592 break;
593 case F_UNLCK:
594 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
595 flp, flg);
596 break;
597 case F_UNLCKSYS:
598 /*
599 * Temporary api for testing remote lock
600 * infrastructure.
601 */
602 if (flg != F_REMOTE) {
603 error = EINVAL;
604 break;
605 }
606 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
607 F_UNLCKSYS, flp, flg);
608 break;
609 default:
610 error = EINVAL;
611 break;
612 }
613 VFS_UNLOCK_GIANT(vfslocked);
614 vfslocked = 0;
615 /* Check for race with close */
616 FILEDESC_SLOCK(fdp);
617 if ((unsigned) fd >= fdp->fd_nfiles ||
618 fp != fdp->fd_ofiles[fd]) {
619 FILEDESC_SUNLOCK(fdp);
620 flp->l_whence = SEEK_SET;
621 flp->l_start = 0;
622 flp->l_len = 0;
623 flp->l_type = F_UNLCK;
624 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
625 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
626 F_UNLCK, flp, F_POSIX);
627 VFS_UNLOCK_GIANT(vfslocked);
628 vfslocked = 0;
629 } else
630 FILEDESC_SUNLOCK(fdp);
631 fdrop(fp, td);
632 break;
633
634 case F_GETLK:
635 FILEDESC_SLOCK(fdp);
636 if ((fp = fdtofp(fd, fdp)) == NULL) {
637 FILEDESC_SUNLOCK(fdp);
638 error = EBADF;
639 break;
640 }
641 if (fp->f_type != DTYPE_VNODE) {
642 FILEDESC_SUNLOCK(fdp);
643 error = EBADF;
644 break;
645 }
646 flp = (struct flock *)arg;
647 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
648 flp->l_type != F_UNLCK) {
649 FILEDESC_SUNLOCK(fdp);
650 error = EINVAL;
651 break;
652 }
653 if (flp->l_whence == SEEK_CUR) {
654 if ((flp->l_start > 0 &&
655 fp->f_offset > OFF_MAX - flp->l_start) ||
656 (flp->l_start < 0 &&
657 fp->f_offset < OFF_MIN - flp->l_start)) {
658 FILEDESC_SUNLOCK(fdp);
659 error = EOVERFLOW;
660 break;
661 }
662 flp->l_start += fp->f_offset;
663 }
664 /*
665 * VOP_ADVLOCK() may block.
666 */
667 fhold(fp);
668 FILEDESC_SUNLOCK(fdp);
669 vp = fp->f_vnode;
670 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
671 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
672 F_POSIX);
673 VFS_UNLOCK_GIANT(vfslocked);
674 vfslocked = 0;
675 fdrop(fp, td);
676 break;
677 default:
678 error = EINVAL;
679 break;
680 }
681 VFS_UNLOCK_GIANT(vfslocked);
682 return (error);
683 }
684
685 /*
686 * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
687 */
688 static int
689 do_dup(struct thread *td, int flags, int old, int new,
690 register_t *retval)
691 {
692 struct filedesc *fdp;
693 struct proc *p;
694 struct file *fp;
695 struct file *delfp;
696 int error, holdleaders, maxfd;
697
698 p = td->td_proc;
699 fdp = p->p_fd;
700
701 /*
702 * Verify we have a valid descriptor to dup from and possibly to
703 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
704 * return EINVAL when the new descriptor is out of bounds.
705 */
706 if (old < 0)
707 return (EBADF);
708 if (new < 0)
709 return (flags & DUP_FCNTL ? EINVAL : EBADF);
710 PROC_LOCK(p);
711 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
712 PROC_UNLOCK(p);
713 if (new >= maxfd)
714 return (flags & DUP_FCNTL ? EINVAL : EMFILE);
715
716 FILEDESC_XLOCK(fdp);
717 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
718 FILEDESC_XUNLOCK(fdp);
719 return (EBADF);
720 }
721 if (flags & DUP_FIXED && old == new) {
722 *retval = new;
723 FILEDESC_XUNLOCK(fdp);
724 return (0);
725 }
726 fp = fdp->fd_ofiles[old];
727 fhold(fp);
728
729 /*
730 * If the caller specified a file descriptor, make sure the file
731 * table is large enough to hold it, and grab it. Otherwise, just
732 * allocate a new descriptor the usual way. Since the filedesc
733 * lock may be temporarily dropped in the process, we have to look
734 * out for a race.
735 */
736 if (flags & DUP_FIXED) {
737 if (new >= fdp->fd_nfiles)
738 fdgrowtable(fdp, new + 1);
739 if (fdp->fd_ofiles[new] == NULL)
740 fdused(fdp, new);
741 } else {
742 if ((error = fdalloc(td, new, &new)) != 0) {
743 FILEDESC_XUNLOCK(fdp);
744 fdrop(fp, td);
745 return (error);
746 }
747 }
748
749 /*
750 * If the old file changed out from under us then treat it as a
751 * bad file descriptor. Userland should do its own locking to
752 * avoid this case.
753 */
754 if (fdp->fd_ofiles[old] != fp) {
755 /* we've allocated a descriptor which we won't use */
756 if (fdp->fd_ofiles[new] == NULL)
757 fdunused(fdp, new);
758 FILEDESC_XUNLOCK(fdp);
759 fdrop(fp, td);
760 return (EBADF);
761 }
762 KASSERT(old != new,
763 ("new fd is same as old"));
764
765 /*
766 * Save info on the descriptor being overwritten. We cannot close
767 * it without introducing an ownership race for the slot, since we
768 * need to drop the filedesc lock to call closef().
769 *
770 * XXX this duplicates parts of close().
771 */
772 delfp = fdp->fd_ofiles[new];
773 holdleaders = 0;
774 if (delfp != NULL) {
775 if (td->td_proc->p_fdtol != NULL) {
776 /*
777 * Ask fdfree() to sleep to ensure that all relevant
778 * process leaders can be traversed in closef().
779 */
780 fdp->fd_holdleaderscount++;
781 holdleaders = 1;
782 }
783 }
784
785 /*
786 * Duplicate the source descriptor
787 */
788 fdp->fd_ofiles[new] = fp;
789 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
790 if (new > fdp->fd_lastfile)
791 fdp->fd_lastfile = new;
792 *retval = new;
793
794 /*
795 * If we dup'd over a valid file, we now own the reference to it
796 * and must dispose of it using closef() semantics (as if a
797 * close() were performed on it).
798 *
799 * XXX this duplicates parts of close().
800 */
801 if (delfp != NULL) {
802 knote_fdclose(td, new);
803 if (delfp->f_type == DTYPE_MQUEUE)
804 mq_fdclose(td, new, delfp);
805 FILEDESC_XUNLOCK(fdp);
806 (void) closef(delfp, td);
807 if (holdleaders) {
808 FILEDESC_XLOCK(fdp);
809 fdp->fd_holdleaderscount--;
810 if (fdp->fd_holdleaderscount == 0 &&
811 fdp->fd_holdleaderswakeup != 0) {
812 fdp->fd_holdleaderswakeup = 0;
813 wakeup(&fdp->fd_holdleaderscount);
814 }
815 FILEDESC_XUNLOCK(fdp);
816 }
817 } else {
818 FILEDESC_XUNLOCK(fdp);
819 }
820 return (0);
821 }
822
823 /*
824 * If sigio is on the list associated with a process or process group,
825 * disable signalling from the device, remove sigio from the list and
826 * free sigio.
827 */
828 void
829 funsetown(struct sigio **sigiop)
830 {
831 struct sigio *sigio;
832
833 SIGIO_LOCK();
834 sigio = *sigiop;
835 if (sigio == NULL) {
836 SIGIO_UNLOCK();
837 return;
838 }
839 *(sigio->sio_myref) = NULL;
840 if ((sigio)->sio_pgid < 0) {
841 struct pgrp *pg = (sigio)->sio_pgrp;
842 PGRP_LOCK(pg);
843 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
844 sigio, sio_pgsigio);
845 PGRP_UNLOCK(pg);
846 } else {
847 struct proc *p = (sigio)->sio_proc;
848 PROC_LOCK(p);
849 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
850 sigio, sio_pgsigio);
851 PROC_UNLOCK(p);
852 }
853 SIGIO_UNLOCK();
854 crfree(sigio->sio_ucred);
855 FREE(sigio, M_SIGIO);
856 }
857
858 /*
859 * Free a list of sigio structures.
860 * We only need to lock the SIGIO_LOCK because we have made ourselves
861 * inaccessible to callers of fsetown and therefore do not need to lock
862 * the proc or pgrp struct for the list manipulation.
863 */
864 void
865 funsetownlst(struct sigiolst *sigiolst)
866 {
867 struct proc *p;
868 struct pgrp *pg;
869 struct sigio *sigio;
870
871 sigio = SLIST_FIRST(sigiolst);
872 if (sigio == NULL)
873 return;
874 p = NULL;
875 pg = NULL;
876
877 /*
878 * Every entry of the list should belong
879 * to a single proc or pgrp.
880 */
881 if (sigio->sio_pgid < 0) {
882 pg = sigio->sio_pgrp;
883 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
884 } else /* if (sigio->sio_pgid > 0) */ {
885 p = sigio->sio_proc;
886 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
887 }
888
889 SIGIO_LOCK();
890 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
891 *(sigio->sio_myref) = NULL;
892 if (pg != NULL) {
893 KASSERT(sigio->sio_pgid < 0,
894 ("Proc sigio in pgrp sigio list"));
895 KASSERT(sigio->sio_pgrp == pg,
896 ("Bogus pgrp in sigio list"));
897 PGRP_LOCK(pg);
898 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
899 sio_pgsigio);
900 PGRP_UNLOCK(pg);
901 } else /* if (p != NULL) */ {
902 KASSERT(sigio->sio_pgid > 0,
903 ("Pgrp sigio in proc sigio list"));
904 KASSERT(sigio->sio_proc == p,
905 ("Bogus proc in sigio list"));
906 PROC_LOCK(p);
907 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
908 sio_pgsigio);
909 PROC_UNLOCK(p);
910 }
911 SIGIO_UNLOCK();
912 crfree(sigio->sio_ucred);
913 FREE(sigio, M_SIGIO);
914 SIGIO_LOCK();
915 }
916 SIGIO_UNLOCK();
917 }
918
919 /*
920 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
921 *
922 * After permission checking, add a sigio structure to the sigio list for
923 * the process or process group.
924 */
925 int
926 fsetown(pid_t pgid, struct sigio **sigiop)
927 {
928 struct proc *proc;
929 struct pgrp *pgrp;
930 struct sigio *sigio;
931 int ret;
932
933 if (pgid == 0) {
934 funsetown(sigiop);
935 return (0);
936 }
937
938 ret = 0;
939
940 /* Allocate and fill in the new sigio out of locks. */
941 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
942 sigio->sio_pgid = pgid;
943 sigio->sio_ucred = crhold(curthread->td_ucred);
944 sigio->sio_myref = sigiop;
945
946 sx_slock(&proctree_lock);
947 if (pgid > 0) {
948 proc = pfind(pgid);
949 if (proc == NULL) {
950 ret = ESRCH;
951 goto fail;
952 }
953
954 /*
955 * Policy - Don't allow a process to FSETOWN a process
956 * in another session.
957 *
958 * Remove this test to allow maximum flexibility or
959 * restrict FSETOWN to the current process or process
960 * group for maximum safety.
961 */
962 PROC_UNLOCK(proc);
963 if (proc->p_session != curthread->td_proc->p_session) {
964 ret = EPERM;
965 goto fail;
966 }
967
968 pgrp = NULL;
969 } else /* if (pgid < 0) */ {
970 pgrp = pgfind(-pgid);
971 if (pgrp == |