The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_descrip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)kern_descrip.c      8.6 (Berkeley) 4/19/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_capsicum.h"
   43 #include "opt_ddb.h"
   44 #include "opt_ktrace.h"
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 
   49 #include <sys/capsicum.h>
   50 #include <sys/conf.h>
   51 #include <sys/fcntl.h>
   52 #include <sys/file.h>
   53 #include <sys/filedesc.h>
   54 #include <sys/filio.h>
   55 #include <sys/jail.h>
   56 #include <sys/kernel.h>
   57 #include <sys/limits.h>
   58 #include <sys/lock.h>
   59 #include <sys/malloc.h>
   60 #include <sys/mount.h>
   61 #include <sys/mutex.h>
   62 #include <sys/namei.h>
   63 #include <sys/selinfo.h>
   64 #include <sys/priv.h>
   65 #include <sys/proc.h>
   66 #include <sys/protosw.h>
   67 #include <sys/racct.h>
   68 #include <sys/resourcevar.h>
   69 #include <sys/sbuf.h>
   70 #include <sys/signalvar.h>
   71 #include <sys/kdb.h>
   72 #include <sys/stat.h>
   73 #include <sys/sx.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #include <sys/sysproto.h>
   77 #include <sys/unistd.h>
   78 #include <sys/user.h>
   79 #include <sys/vnode.h>
   80 #ifdef KTRACE
   81 #include <sys/ktrace.h>
   82 #endif
   83 
   84 #include <net/vnet.h>
   85 
   86 #include <security/audit/audit.h>
   87 
   88 #include <vm/uma.h>
   89 #include <vm/vm.h>
   90 
   91 #include <ddb/ddb.h>
   92 
   93 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
   94 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
   95     "file desc to leader structures");
   96 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
   97 MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities");
   98 
   99 MALLOC_DECLARE(M_FADVISE);
  100 
  101 static __read_mostly uma_zone_t file_zone;
  102 static __read_mostly uma_zone_t filedesc0_zone;
  103 
  104 static int      closefp(struct filedesc *fdp, int fd, struct file *fp,
  105                     struct thread *td, int holdleaders);
  106 static int      fd_first_free(struct filedesc *fdp, int low, int size);
  107 static int      fd_last_used(struct filedesc *fdp, int size);
  108 static void     fdgrowtable(struct filedesc *fdp, int nfd);
  109 static void     fdgrowtable_exp(struct filedesc *fdp, int nfd);
  110 static void     fdunused(struct filedesc *fdp, int fd);
  111 static void     fdused(struct filedesc *fdp, int fd);
  112 static int      getmaxfd(struct thread *td);
  113 static u_long   *filecaps_copy_prep(const struct filecaps *src);
  114 static void     filecaps_copy_finish(const struct filecaps *src,
  115                     struct filecaps *dst, u_long *ioctls);
  116 static u_long   *filecaps_free_prep(struct filecaps *fcaps);
  117 static void     filecaps_free_finish(u_long *ioctls);
  118 
  119 /*
  120  * Each process has:
  121  *
  122  * - An array of open file descriptors (fd_ofiles)
  123  * - An array of file flags (fd_ofileflags)
  124  * - A bitmap recording which descriptors are in use (fd_map)
  125  *
  126  * A process starts out with NDFILE descriptors.  The value of NDFILE has
  127  * been selected based the historical limit of 20 open files, and an
  128  * assumption that the majority of processes, especially short-lived
  129  * processes like shells, will never need more.
  130  *
  131  * If this initial allocation is exhausted, a larger descriptor table and
  132  * map are allocated dynamically, and the pointers in the process's struct
  133  * filedesc are updated to point to those.  This is repeated every time
  134  * the process runs out of file descriptors (provided it hasn't hit its
  135  * resource limit).
  136  *
  137  * Since threads may hold references to individual descriptor table
  138  * entries, the tables are never freed.  Instead, they are placed on a
  139  * linked list and freed only when the struct filedesc is released.
  140  */
  141 #define NDFILE          20
  142 #define NDSLOTSIZE      sizeof(NDSLOTTYPE)
  143 #define NDENTRIES       (NDSLOTSIZE * __CHAR_BIT)
  144 #define NDSLOT(x)       ((x) / NDENTRIES)
  145 #define NDBIT(x)        ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
  146 #define NDSLOTS(x)      (((x) + NDENTRIES - 1) / NDENTRIES)
  147 
  148 /*
  149  * SLIST entry used to keep track of ofiles which must be reclaimed when
  150  * the process exits.
  151  */
  152 struct freetable {
  153         struct fdescenttbl *ft_table;
  154         SLIST_ENTRY(freetable) ft_next;
  155 };
  156 
  157 /*
  158  * Initial allocation: a filedesc structure + the head of SLIST used to
  159  * keep track of old ofiles + enough space for NDFILE descriptors.
  160  */
  161 
  162 struct fdescenttbl0 {
  163         int     fdt_nfiles;
  164         struct  filedescent fdt_ofiles[NDFILE];
  165 };
  166 
  167 struct filedesc0 {
  168         struct filedesc fd_fd;
  169         SLIST_HEAD(, freetable) fd_free;
  170         struct  fdescenttbl0 fd_dfiles;
  171         NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
  172 };
  173 
  174 /*
  175  * Descriptor management.
  176  */
  177 volatile int __exclusive_cache_line openfiles; /* actual number of open files */
  178 struct mtx sigio_lock;          /* mtx to protect pointers to sigio */
  179 void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
  180 
  181 /*
  182  * If low >= size, just return low. Otherwise find the first zero bit in the
  183  * given bitmap, starting at low and not exceeding size - 1. Return size if
  184  * not found.
  185  */
  186 static int
  187 fd_first_free(struct filedesc *fdp, int low, int size)
  188 {
  189         NDSLOTTYPE *map = fdp->fd_map;
  190         NDSLOTTYPE mask;
  191         int off, maxoff;
  192 
  193         if (low >= size)
  194                 return (low);
  195 
  196         off = NDSLOT(low);
  197         if (low % NDENTRIES) {
  198                 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
  199                 if ((mask &= ~map[off]) != 0UL)
  200                         return (off * NDENTRIES + ffsl(mask) - 1);
  201                 ++off;
  202         }
  203         for (maxoff = NDSLOTS(size); off < maxoff; ++off)
  204                 if (map[off] != ~0UL)
  205                         return (off * NDENTRIES + ffsl(~map[off]) - 1);
  206         return (size);
  207 }
  208 
  209 /*
  210  * Find the highest non-zero bit in the given bitmap, starting at 0 and
  211  * not exceeding size - 1. Return -1 if not found.
  212  */
  213 static int
  214 fd_last_used(struct filedesc *fdp, int size)
  215 {
  216         NDSLOTTYPE *map = fdp->fd_map;
  217         NDSLOTTYPE mask;
  218         int off, minoff;
  219 
  220         off = NDSLOT(size);
  221         if (size % NDENTRIES) {
  222                 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
  223                 if ((mask &= map[off]) != 0)
  224                         return (off * NDENTRIES + flsl(mask) - 1);
  225                 --off;
  226         }
  227         for (minoff = NDSLOT(0); off >= minoff; --off)
  228                 if (map[off] != 0)
  229                         return (off * NDENTRIES + flsl(map[off]) - 1);
  230         return (-1);
  231 }
  232 
  233 static int
  234 fdisused(struct filedesc *fdp, int fd)
  235 {
  236 
  237         KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
  238             ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
  239 
  240         return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
  241 }
  242 
  243 /*
  244  * Mark a file descriptor as used.
  245  */
  246 static void
  247 fdused_init(struct filedesc *fdp, int fd)
  248 {
  249 
  250         KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd));
  251 
  252         fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
  253 }
  254 
  255 static void
  256 fdused(struct filedesc *fdp, int fd)
  257 {
  258 
  259         FILEDESC_XLOCK_ASSERT(fdp);
  260 
  261         fdused_init(fdp, fd);
  262         if (fd > fdp->fd_lastfile)
  263                 fdp->fd_lastfile = fd;
  264         if (fd == fdp->fd_freefile)
  265                 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
  266 }
  267 
  268 /*
  269  * Mark a file descriptor as unused.
  270  */
  271 static void
  272 fdunused(struct filedesc *fdp, int fd)
  273 {
  274 
  275         FILEDESC_XLOCK_ASSERT(fdp);
  276 
  277         KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd));
  278         KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
  279             ("fd=%d is still in use", fd));
  280 
  281         fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
  282         if (fd < fdp->fd_freefile)
  283                 fdp->fd_freefile = fd;
  284         if (fd == fdp->fd_lastfile)
  285                 fdp->fd_lastfile = fd_last_used(fdp, fd);
  286 }
  287 
  288 /*
  289  * Free a file descriptor.
  290  *
  291  * Avoid some work if fdp is about to be destroyed.
  292  */
  293 static inline void
  294 fdefree_last(struct filedescent *fde)
  295 {
  296 
  297         filecaps_free(&fde->fde_caps);
  298 }
  299 
  300 static inline void
  301 fdfree(struct filedesc *fdp, int fd)
  302 {
  303         struct filedescent *fde;
  304 
  305         fde = &fdp->fd_ofiles[fd];
  306 #ifdef CAPABILITIES
  307         seq_write_begin(&fde->fde_seq);
  308 #endif
  309         fde->fde_file = NULL;
  310 #ifdef CAPABILITIES
  311         seq_write_end(&fde->fde_seq);
  312 #endif
  313         fdefree_last(fde);
  314         fdunused(fdp, fd);
  315 }
  316 
  317 void
  318 pwd_ensure_dirs(void)
  319 {
  320         struct filedesc *fdp;
  321 
  322         fdp = curproc->p_fd;
  323         FILEDESC_XLOCK(fdp);
  324         if (fdp->fd_cdir == NULL) {
  325                 fdp->fd_cdir = rootvnode;
  326                 vrefact(rootvnode);
  327         }
  328         if (fdp->fd_rdir == NULL) {
  329                 fdp->fd_rdir = rootvnode;
  330                 vrefact(rootvnode);
  331         }
  332         FILEDESC_XUNLOCK(fdp);
  333 }
  334 
  335 /*
  336  * System calls on descriptors.
  337  */
  338 #ifndef _SYS_SYSPROTO_H_
  339 struct getdtablesize_args {
  340         int     dummy;
  341 };
  342 #endif
  343 /* ARGSUSED */
  344 int
  345 sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap)
  346 {
  347 #ifdef  RACCT
  348         uint64_t lim;
  349 #endif
  350 
  351         td->td_retval[0] =
  352             min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc);
  353 #ifdef  RACCT
  354         PROC_LOCK(td->td_proc);
  355         lim = racct_get_limit(td->td_proc, RACCT_NOFILE);
  356         PROC_UNLOCK(td->td_proc);
  357         if (lim < td->td_retval[0])
  358                 td->td_retval[0] = lim;
  359 #endif
  360         return (0);
  361 }
  362 
  363 /*
  364  * Duplicate a file descriptor to a particular value.
  365  *
  366  * Note: keep in mind that a potential race condition exists when closing
  367  * descriptors from a shared descriptor table (via rfork).
  368  */
  369 #ifndef _SYS_SYSPROTO_H_
  370 struct dup2_args {
  371         u_int   from;
  372         u_int   to;
  373 };
  374 #endif
  375 /* ARGSUSED */
  376 int
  377 sys_dup2(struct thread *td, struct dup2_args *uap)
  378 {
  379 
  380         return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to));
  381 }
  382 
  383 /*
  384  * Duplicate a file descriptor.
  385  */
  386 #ifndef _SYS_SYSPROTO_H_
  387 struct dup_args {
  388         u_int   fd;
  389 };
  390 #endif
  391 /* ARGSUSED */
  392 int
  393 sys_dup(struct thread *td, struct dup_args *uap)
  394 {
  395 
  396         return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0));
  397 }
  398 
  399 /*
  400  * The file control system call.
  401  */
  402 #ifndef _SYS_SYSPROTO_H_
  403 struct fcntl_args {
  404         int     fd;
  405         int     cmd;
  406         long    arg;
  407 };
  408 #endif
  409 /* ARGSUSED */
  410 int
  411 sys_fcntl(struct thread *td, struct fcntl_args *uap)
  412 {
  413 
  414         return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg));
  415 }
  416 
  417 int
  418 kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg)
  419 {
  420         struct flock fl;
  421         struct __oflock ofl;
  422         intptr_t arg1;
  423         int error, newcmd;
  424 
  425         error = 0;
  426         newcmd = cmd;
  427         switch (cmd) {
  428         case F_OGETLK:
  429         case F_OSETLK:
  430         case F_OSETLKW:
  431                 /*
  432                  * Convert old flock structure to new.
  433                  */
  434                 error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl));
  435                 fl.l_start = ofl.l_start;
  436                 fl.l_len = ofl.l_len;
  437                 fl.l_pid = ofl.l_pid;
  438                 fl.l_type = ofl.l_type;
  439                 fl.l_whence = ofl.l_whence;
  440                 fl.l_sysid = 0;
  441 
  442                 switch (cmd) {
  443                 case F_OGETLK:
  444                         newcmd = F_GETLK;
  445                         break;
  446                 case F_OSETLK:
  447                         newcmd = F_SETLK;
  448                         break;
  449                 case F_OSETLKW:
  450                         newcmd = F_SETLKW;
  451                         break;
  452                 }
  453                 arg1 = (intptr_t)&fl;
  454                 break;
  455         case F_GETLK:
  456         case F_SETLK:
  457         case F_SETLKW:
  458         case F_SETLK_REMOTE:
  459                 error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl));
  460                 arg1 = (intptr_t)&fl;
  461                 break;
  462         default:
  463                 arg1 = arg;
  464                 break;
  465         }
  466         if (error)
  467                 return (error);
  468         error = kern_fcntl(td, fd, newcmd, arg1);
  469         if (error)
  470                 return (error);
  471         if (cmd == F_OGETLK) {
  472                 ofl.l_start = fl.l_start;
  473                 ofl.l_len = fl.l_len;
  474                 ofl.l_pid = fl.l_pid;
  475                 ofl.l_type = fl.l_type;
  476                 ofl.l_whence = fl.l_whence;
  477                 error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl));
  478         } else if (cmd == F_GETLK) {
  479                 error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl));
  480         }
  481         return (error);
  482 }
  483 
  484 int
  485 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
  486 {
  487         struct filedesc *fdp;
  488         struct flock *flp;
  489         struct file *fp, *fp2;
  490         struct filedescent *fde;
  491         struct proc *p;
  492         struct vnode *vp;
  493         int error, flg, tmp;
  494         uint64_t bsize;
  495         off_t foffset;
  496 
  497         error = 0;
  498         flg = F_POSIX;
  499         p = td->td_proc;
  500         fdp = p->p_fd;
  501 
  502         AUDIT_ARG_FD(cmd);
  503         AUDIT_ARG_CMD(cmd);
  504         switch (cmd) {
  505         case F_DUPFD:
  506                 tmp = arg;
  507                 error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp);
  508                 break;
  509 
  510         case F_DUPFD_CLOEXEC:
  511                 tmp = arg;
  512                 error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp);
  513                 break;
  514 
  515         case F_DUP2FD:
  516                 tmp = arg;
  517                 error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp);
  518                 break;
  519 
  520         case F_DUP2FD_CLOEXEC:
  521                 tmp = arg;
  522                 error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp);
  523                 break;
  524 
  525         case F_GETFD:
  526                 error = EBADF;
  527                 FILEDESC_SLOCK(fdp);
  528                 fde = fdeget_locked(fdp, fd);
  529                 if (fde != NULL) {
  530                         td->td_retval[0] =
  531                             (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
  532                         error = 0;
  533                 }
  534                 FILEDESC_SUNLOCK(fdp);
  535                 break;
  536 
  537         case F_SETFD:
  538                 error = EBADF;
  539                 FILEDESC_XLOCK(fdp);
  540                 fde = fdeget_locked(fdp, fd);
  541                 if (fde != NULL) {
  542                         fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
  543                             (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
  544                         error = 0;
  545                 }
  546                 FILEDESC_XUNLOCK(fdp);
  547                 break;
  548 
  549         case F_GETFL:
  550                 error = fget_fcntl(td, fd, &cap_fcntl_rights, F_GETFL, &fp);
  551                 if (error != 0)
  552                         break;
  553                 td->td_retval[0] = OFLAGS(fp->f_flag);
  554                 fdrop(fp, td);
  555                 break;
  556 
  557         case F_SETFL:
  558                 error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp);
  559                 if (error != 0)
  560                         break;
  561                 do {
  562                         tmp = flg = fp->f_flag;
  563                         tmp &= ~FCNTLFLAGS;
  564                         tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
  565                 } while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
  566                 tmp = fp->f_flag & FNONBLOCK;
  567                 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
  568                 if (error != 0) {
  569                         fdrop(fp, td);
  570                         break;
  571                 }
  572                 tmp = fp->f_flag & FASYNC;
  573                 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
  574                 if (error == 0) {
  575                         fdrop(fp, td);
  576                         break;
  577                 }
  578                 atomic_clear_int(&fp->f_flag, FNONBLOCK);
  579                 tmp = 0;
  580                 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
  581                 fdrop(fp, td);
  582                 break;
  583 
  584         case F_GETOWN:
  585                 error = fget_fcntl(td, fd, &cap_fcntl_rights, F_GETOWN, &fp);
  586                 if (error != 0)
  587                         break;
  588                 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
  589                 if (error == 0)
  590                         td->td_retval[0] = tmp;
  591                 fdrop(fp, td);
  592                 break;
  593 
  594         case F_SETOWN:
  595                 error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETOWN, &fp);
  596                 if (error != 0)
  597                         break;
  598                 tmp = arg;
  599                 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
  600                 fdrop(fp, td);
  601                 break;
  602 
  603         case F_SETLK_REMOTE:
  604                 error = priv_check(td, PRIV_NFS_LOCKD);
  605                 if (error != 0)
  606                         return (error);
  607                 flg = F_REMOTE;
  608                 goto do_setlk;
  609 
  610         case F_SETLKW:
  611                 flg |= F_WAIT;
  612                 /* FALLTHROUGH F_SETLK */
  613 
  614         case F_SETLK:
  615         do_setlk:
  616                 flp = (struct flock *)arg;
  617                 if ((flg & F_REMOTE) != 0 && flp->l_sysid == 0) {
  618                         error = EINVAL;
  619                         break;
  620                 }
  621 
  622                 error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp, NULL);
  623                 if (error != 0)
  624                         break;
  625                 if (fp->f_type != DTYPE_VNODE) {
  626                         error = EBADF;
  627                         fdrop(fp, td);
  628                         break;
  629                 }
  630 
  631                 if (flp->l_whence == SEEK_CUR) {
  632                         foffset = foffset_get(fp);
  633                         if (foffset < 0 ||
  634                             (flp->l_start > 0 &&
  635                              foffset > OFF_MAX - flp->l_start)) {
  636                                 error = EOVERFLOW;
  637                                 fdrop(fp, td);
  638                                 break;
  639                         }
  640                         flp->l_start += foffset;
  641                 }
  642 
  643                 vp = fp->f_vnode;
  644                 switch (flp->l_type) {
  645                 case F_RDLCK:
  646                         if ((fp->f_flag & FREAD) == 0) {
  647                                 error = EBADF;
  648                                 break;
  649                         }
  650                         if ((p->p_leader->p_flag & P_ADVLOCK) == 0) {
  651                                 PROC_LOCK(p->p_leader);
  652                                 p->p_leader->p_flag |= P_ADVLOCK;
  653                                 PROC_UNLOCK(p->p_leader);
  654                         }
  655                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
  656                             flp, flg);
  657                         break;
  658                 case F_WRLCK:
  659                         if ((fp->f_flag & FWRITE) == 0) {
  660                                 error = EBADF;
  661                                 break;
  662                         }
  663                         if ((p->p_leader->p_flag & P_ADVLOCK) == 0) {
  664                                 PROC_LOCK(p->p_leader);
  665                                 p->p_leader->p_flag |= P_ADVLOCK;
  666                                 PROC_UNLOCK(p->p_leader);
  667                         }
  668                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
  669                             flp, flg);
  670                         break;
  671                 case F_UNLCK:
  672                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
  673                             flp, flg);
  674                         break;
  675                 case F_UNLCKSYS:
  676                         if (flg != F_REMOTE) {
  677                                 error = EINVAL;
  678                                 break;
  679                         }
  680                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
  681                             F_UNLCKSYS, flp, flg);
  682                         break;
  683                 default:
  684                         error = EINVAL;
  685                         break;
  686                 }
  687                 if (error != 0 || flp->l_type == F_UNLCK ||
  688                     flp->l_type == F_UNLCKSYS) {
  689                         fdrop(fp, td);
  690                         break;
  691                 }
  692 
  693                 /*
  694                  * Check for a race with close.
  695                  *
  696                  * The vnode is now advisory locked (or unlocked, but this case
  697                  * is not really important) as the caller requested.
  698                  * We had to drop the filedesc lock, so we need to recheck if
  699                  * the descriptor is still valid, because if it was closed
  700                  * in the meantime we need to remove advisory lock from the
  701                  * vnode - close on any descriptor leading to an advisory
  702                  * locked vnode, removes that lock.
  703                  * We will return 0 on purpose in that case, as the result of
  704                  * successful advisory lock might have been externally visible
  705                  * already. This is fine - effectively we pretend to the caller
  706                  * that the closing thread was a bit slower and that the
  707                  * advisory lock succeeded before the close.
  708                  */
  709                 error = fget_unlocked(fdp, fd, &cap_no_rights, &fp2, NULL);
  710                 if (error != 0) {
  711                         fdrop(fp, td);
  712                         break;
  713                 }
  714                 if (fp != fp2) {
  715                         flp->l_whence = SEEK_SET;
  716                         flp->l_start = 0;
  717                         flp->l_len = 0;
  718                         flp->l_type = F_UNLCK;
  719                         (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
  720                             F_UNLCK, flp, F_POSIX);
  721                 }
  722                 fdrop(fp, td);
  723                 fdrop(fp2, td);
  724                 break;
  725 
  726         case F_GETLK:
  727                 error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp, NULL);
  728                 if (error != 0)
  729                         break;
  730                 if (fp->f_type != DTYPE_VNODE) {
  731                         error = EBADF;
  732                         fdrop(fp, td);
  733                         break;
  734                 }
  735                 flp = (struct flock *)arg;
  736                 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
  737                     flp->l_type != F_UNLCK) {
  738                         error = EINVAL;
  739                         fdrop(fp, td);
  740                         break;
  741                 }
  742                 if (flp->l_whence == SEEK_CUR) {
  743                         foffset = foffset_get(fp);
  744                         if ((flp->l_start > 0 &&
  745                             foffset > OFF_MAX - flp->l_start) ||
  746                             (flp->l_start < 0 &&
  747                             foffset < OFF_MIN - flp->l_start)) {
  748                                 error = EOVERFLOW;
  749                                 fdrop(fp, td);
  750                                 break;
  751                         }
  752                         flp->l_start += foffset;
  753                 }
  754                 vp = fp->f_vnode;
  755                 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
  756                     F_POSIX);
  757                 fdrop(fp, td);
  758                 break;
  759 
  760         case F_RDAHEAD:
  761                 arg = arg ? 128 * 1024: 0;
  762                 /* FALLTHROUGH */
  763         case F_READAHEAD:
  764                 error = fget_unlocked(fdp, fd, &cap_no_rights, &fp, NULL);
  765                 if (error != 0)
  766                         break;
  767                 if (fp->f_type != DTYPE_VNODE) {
  768                         fdrop(fp, td);
  769                         error = EBADF;
  770                         break;
  771                 }
  772                 vp = fp->f_vnode;
  773                 if (vp->v_type != VREG) {
  774                         fdrop(fp, td);
  775                         error = ENOTTY;
  776                         break;
  777                 }
  778 
  779                 /*
  780                  * Exclusive lock synchronizes against f_seqcount reads and
  781                  * writes in sequential_heuristic().
  782                  */
  783                 error = vn_lock(vp, LK_EXCLUSIVE);
  784                 if (error != 0) {
  785                         fdrop(fp, td);
  786                         break;
  787                 }
  788                 if (arg >= 0) {
  789                         bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
  790                         arg = MIN(arg, INT_MAX - bsize + 1);
  791                         fp->f_seqcount = MIN(IO_SEQMAX,
  792                             (arg + bsize - 1) / bsize);
  793                         atomic_set_int(&fp->f_flag, FRDAHEAD);
  794                 } else {
  795                         atomic_clear_int(&fp->f_flag, FRDAHEAD);
  796                 }
  797                 VOP_UNLOCK(vp, 0);
  798                 fdrop(fp, td);
  799                 break;
  800 
  801         default:
  802                 error = EINVAL;
  803                 break;
  804         }
  805         return (error);
  806 }
  807 
  808 static int
  809 getmaxfd(struct thread *td)
  810 {
  811 
  812         return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc));
  813 }
  814 
  815 /*
  816  * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
  817  */
  818 int
  819 kern_dup(struct thread *td, u_int mode, int flags, int old, int new)
  820 {
  821         struct filedesc *fdp;
  822         struct filedescent *oldfde, *newfde;
  823         struct proc *p;
  824         struct file *delfp, *oldfp;
  825         u_long *oioctls, *nioctls;
  826         int error, maxfd;
  827 
  828         p = td->td_proc;
  829         fdp = p->p_fd;
  830 
  831         MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0);
  832         MPASS(mode < FDDUP_LASTMODE);
  833 
  834         AUDIT_ARG_FD(old);
  835         /* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */
  836 
  837         /*
  838          * Verify we have a valid descriptor to dup from and possibly to
  839          * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
  840          * return EINVAL when the new descriptor is out of bounds.
  841          */
  842         if (old < 0)
  843                 return (EBADF);
  844         if (new < 0)
  845                 return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
  846         maxfd = getmaxfd(td);
  847         if (new >= maxfd)
  848                 return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
  849 
  850         error = EBADF;
  851         FILEDESC_XLOCK(fdp);
  852         if (fget_locked(fdp, old) == NULL)
  853                 goto unlock;
  854         if ((mode == FDDUP_FIXED || mode == FDDUP_MUSTREPLACE) && old == new) {
  855                 td->td_retval[0] = new;
  856                 if (flags & FDDUP_FLAG_CLOEXEC)
  857                         fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE;
  858                 error = 0;
  859                 goto unlock;
  860         }
  861 
  862         oldfde = &fdp->fd_ofiles[old];
  863         oldfp = oldfde->fde_file;
  864         if (!fhold(oldfp))
  865                 goto unlock;
  866 
  867         /*
  868          * If the caller specified a file descriptor, make sure the file
  869          * table is large enough to hold it, and grab it.  Otherwise, just
  870          * allocate a new descriptor the usual way.
  871          */
  872         switch (mode) {
  873         case FDDUP_NORMAL:
  874         case FDDUP_FCNTL:
  875                 if ((error = fdalloc(td, new, &new)) != 0) {
  876                         fdrop(oldfp, td);
  877                         goto unlock;
  878                 }
  879                 break;
  880         case FDDUP_MUSTREPLACE:
  881                 /* Target file descriptor must exist. */
  882                 if (fget_locked(fdp, new) == NULL) {
  883                         fdrop(oldfp, td);
  884                         goto unlock;
  885                 }
  886                 break;
  887         case FDDUP_FIXED:
  888                 if (new >= fdp->fd_nfiles) {
  889                         /*
  890                          * The resource limits are here instead of e.g.
  891                          * fdalloc(), because the file descriptor table may be
  892                          * shared between processes, so we can't really use
  893                          * racct_add()/racct_sub().  Instead of counting the
  894                          * number of actually allocated descriptors, just put
  895                          * the limit on the size of the file descriptor table.
  896                          */
  897 #ifdef RACCT
  898                         if (racct_enable) {
  899                                 PROC_LOCK(p);
  900                                 error = racct_set(p, RACCT_NOFILE, new + 1);
  901                                 PROC_UNLOCK(p);
  902                                 if (error != 0) {
  903                                         error = EMFILE;
  904                                         fdrop(oldfp, td);
  905                                         goto unlock;
  906                                 }
  907                         }
  908 #endif
  909                         fdgrowtable_exp(fdp, new + 1);
  910                 }
  911                 if (!fdisused(fdp, new))
  912                         fdused(fdp, new);
  913                 break;
  914         default:
  915                 KASSERT(0, ("%s unsupported mode %d", __func__, mode));
  916         }
  917 
  918         KASSERT(old != new, ("new fd is same as old"));
  919 
  920         /* Refetch oldfde because the table may have grown and old one freed. */
  921         oldfde = &fdp->fd_ofiles[old];
  922         KASSERT(oldfp == oldfde->fde_file,
  923             ("fdt_ofiles shift from growth observed at fd %d",
  924             old));
  925 
  926         newfde = &fdp->fd_ofiles[new];
  927         delfp = newfde->fde_file;
  928 
  929         nioctls = filecaps_copy_prep(&oldfde->fde_caps);
  930 
  931         /*
  932          * Duplicate the source descriptor.
  933          */
  934 #ifdef CAPABILITIES
  935         seq_write_begin(&newfde->fde_seq);
  936 #endif
  937         oioctls = filecaps_free_prep(&newfde->fde_caps);
  938         memcpy(newfde, oldfde, fde_change_size);
  939         filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps,
  940             nioctls);
  941         if ((flags & FDDUP_FLAG_CLOEXEC) != 0)
  942                 newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE;
  943         else
  944                 newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE;
  945 #ifdef CAPABILITIES
  946         seq_write_end(&newfde->fde_seq);
  947 #endif
  948         filecaps_free_finish(oioctls);
  949         td->td_retval[0] = new;
  950 
  951         error = 0;
  952 
  953         if (delfp != NULL) {
  954                 (void) closefp(fdp, new, delfp, td, 1);
  955                 FILEDESC_UNLOCK_ASSERT(fdp);
  956         } else {
  957 unlock:
  958                 FILEDESC_XUNLOCK(fdp);
  959         }
  960 
  961         return (error);
  962 }
  963 
  964 static void
  965 sigiofree(struct sigio *sigio)
  966 {
  967         crfree(sigio->sio_ucred);
  968         free(sigio, M_SIGIO);
  969 }
  970 
  971 static struct sigio *
  972 funsetown_locked(struct sigio *sigio)
  973 {
  974         struct proc *p;
  975         struct pgrp *pg;
  976 
  977         SIGIO_ASSERT_LOCKED();
  978 
  979         if (sigio == NULL)
  980                 return (NULL);
  981         *(sigio->sio_myref) = NULL;
  982         if (sigio->sio_pgid < 0) {
  983                 pg = sigio->sio_pgrp;
  984                 PGRP_LOCK(pg);
  985                 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
  986                     sigio, sio_pgsigio);
  987                 PGRP_UNLOCK(pg);
  988         } else {
  989                 p = sigio->sio_proc;
  990                 PROC_LOCK(p);
  991                 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
  992                     sigio, sio_pgsigio);
  993                 PROC_UNLOCK(p);
  994         }
  995         return (sigio);
  996 }
  997 
  998 /*
  999  * If sigio is on the list associated with a process or process group,
 1000  * disable signalling from the device, remove sigio from the list and
 1001  * free sigio.
 1002  */
 1003 void
 1004 funsetown(struct sigio **sigiop)
 1005 {
 1006         struct sigio *sigio;
 1007 
 1008         /* Racy check, consumers must provide synchronization. */
 1009         if (*sigiop == NULL)
 1010                 return;
 1011 
 1012         SIGIO_LOCK();
 1013         sigio = funsetown_locked(*sigiop);
 1014         SIGIO_UNLOCK();
 1015         if (sigio != NULL)
 1016                 sigiofree(sigio);
 1017 }
 1018 
 1019 /*
 1020  * Free a list of sigio structures.  The caller must ensure that new sigio
 1021  * structures cannot be added after this point.  For process groups this is
 1022  * guaranteed using the proctree lock; for processes, the P_WEXIT flag serves
 1023  * as an interlock.
 1024  */
 1025 void
 1026 funsetownlst(struct sigiolst *sigiolst)
 1027 {
 1028         struct proc *p;
 1029         struct pgrp *pg;
 1030         struct sigio *sigio, *tmp;
 1031 
 1032         /* Racy check. */
 1033         sigio = SLIST_FIRST(sigiolst);
 1034         if (sigio == NULL)
 1035                 return;
 1036 
 1037         p = NULL;
 1038         pg = NULL;
 1039 
 1040         SIGIO_LOCK();
 1041         sigio = SLIST_FIRST(sigiolst);
 1042         if (sigio == NULL) {
 1043                 SIGIO_UNLOCK();
 1044                 return;
 1045         }
 1046 
 1047         /*
 1048          * Every entry of the list should belong to a single proc or pgrp.
 1049          */
 1050         if (sigio->sio_pgid < 0) {
 1051                 pg = sigio->sio_pgrp;
 1052                 sx_assert(&proctree_lock, SX_XLOCKED);
 1053                 PGRP_LOCK(pg);
 1054         } else /* if (sigio->sio_pgid > 0) */ {
 1055                 p = sigio->sio_proc;
 1056                 PROC_LOCK(p);
 1057                 KASSERT((p->p_flag & P_WEXIT) != 0,
 1058                     ("%s: process %p is not exiting", __func__, p));
 1059         }
 1060 
 1061         SLIST_FOREACH(sigio, sigiolst, sio_pgsigio) {
 1062                 *sigio->sio_myref = NULL;
 1063                 if (pg != NULL) {
 1064                         KASSERT(sigio->sio_pgid < 0,
 1065                             ("Proc sigio in pgrp sigio list"));
 1066                         KASSERT(sigio->sio_pgrp == pg,
 1067                             ("Bogus pgrp in sigio list"));
 1068                 } else /* if (p != NULL) */ {
 1069                         KASSERT(sigio->sio_pgid > 0,
 1070                             ("Pgrp sigio in proc sigio list"));
 1071                         KASSERT(sigio->sio_proc == p,
 1072                             ("Bogus proc in sigio list"));
 1073                 }
 1074         }
 1075 
 1076         if (pg != NULL)
 1077                 PGRP_UNLOCK(pg);
 1078         else
 1079                 PROC_UNLOCK(p);
 1080         SIGIO_UNLOCK();
 1081 
 1082         SLIST_FOREACH_SAFE(sigio, sigiolst, sio_pgsigio, tmp)
 1083                 sigiofree(sigio);
 1084 }
 1085 
 1086 /*
 1087  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
 1088  *
 1089  * After permission checking, add a sigio structure to the sigio list for
 1090  * the process or process group.
 1091  */
 1092 int
 1093 fsetown(pid_t pgid, struct sigio **sigiop)
 1094 {
 1095         struct proc *proc;
 1096         struct pgrp *pgrp;
 1097         struct sigio *osigio, *sigio;
 1098         int ret;
 1099 
 1100         if (pgid == 0) {
 1101                 funsetown(sigiop);
 1102                 return (0);
 1103         }
 1104 
 1105         ret = 0;
 1106 
 1107         sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
 1108         sigio->sio_pgid = pgid;
 1109         sigio->sio_ucred = crhold(curthread->td_ucred);
 1110         sigio->sio_myref = sigiop;
 1111 
 1112         sx_slock(&proctree_lock);
 1113         SIGIO_LOCK();
 1114         osigio = funsetown_locked(*sigiop);
 1115         if (pgid > 0) {
 1116                 proc = pfind(pgid);
 1117                 if (proc == NULL) {
 1118                         ret = ESRCH;
 1119                         goto fail;
 1120                 }
 1121 
 1122                 /*
 1123                  * Policy - Don't allow a process to FSETOWN a process
 1124                  * in another session.
 1125                  *
 1126                  * Remove this test to allow maximum flexibility or
 1127                  * restrict FSETOWN to the current process or process
 1128                  * group for maximum safety.
 1129                  */
 1130                 if (proc->p_session != curthread->td_proc->p_session) {
 1131                         PROC_UNLOCK(proc);
 1132                         ret = EPERM;
 1133                         goto fail;
 1134                 }
 1135 
 1136                 sigio->sio_proc = proc;
 1137                 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
 1138                 PROC_UNLOCK(proc);
 1139         } else /* if (pgid < 0) */ {
 1140                 pgrp = pgfind(-pgid);
 1141                 if (pgrp == NULL) {
 1142                         ret = ESRCH;
 1143                         goto fail;
 1144                 }
 1145 
 1146                 /*
 1147                  * Policy - Don't allow a process to FSETOWN a process
 1148                  * in another session.
 1149                  *
 1150                  * Remove this test to allow maximum flexibility or
 1151                  * restrict FSETOWN to the current process or process
 1152                  * group for maximum safety.
 1153                  */
 1154                 if (pgrp->pg_session != curthread->td_proc->p_session) {
 1155                         PGRP_UNLOCK(pgrp);
 1156                         ret = EPERM;
 1157                         goto fail;
 1158                 }
 1159 
 1160                 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
 1161                 sigio->sio_pgrp = pgrp;
 1162                 PGRP_UNLOCK(pgrp);
 1163         }
 1164         sx_sunlock(&proctree_lock);
 1165         *sigiop = sigio;
 1166         SIGIO_UNLOCK();
 1167         if (osigio != NULL)
 1168                 sigiofree(osigio);
 1169         return (0);
 1170 
 1171 fail:
 1172         SIGIO_UNLOCK();
 1173         sx_sunlock(&proctree_lock);
 1174         sigiofree(sigio);
 1175         if (osigio != NULL)
 1176                 sigiofree(osigio);
 1177         return (ret);
 1178 }
 1179 
 1180 /*
 1181  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
 1182  */
 1183 pid_t
 1184 fgetown(struct sigio **sigiop)
 1185 {
 1186         pid_t pgid;
 1187 
 1188         SIGIO_LOCK();
 1189         pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
 1190         SIGIO_UNLOCK();
 1191         return (pgid);
 1192 }
 1193 
 1194 /*
 1195  * Function drops the filedesc lock on return.
 1196  */
 1197 static int
 1198 closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
 1199     int holdleaders)
 1200 {
 1201         int error;
 1202 
 1203         FILEDESC_XLOCK_ASSERT(fdp);
 1204 
 1205         if (holdleaders) {
 1206                 if (td->td_proc->p_fdtol != NULL) {
 1207                         /*
 1208                          * Ask fdfree() to sleep to ensure that all relevant
 1209                          * process leaders can be traversed in closef().
 1210                          */
 1211                         fdp->fd_holdleaderscount++;
 1212                 } else {
 1213                         holdleaders = 0;
 1214                 }
 1215         }
 1216 
 1217         /*
 1218          * We now hold the fp reference that used to be owned by the
 1219          * descriptor array.  We have to unlock the FILEDESC *AFTER*
 1220          * knote_fdclose to prevent a race of the fd getting opened, a knote
 1221          * added, and deleteing a knote for the new fd.
 1222          */
 1223         knote_fdclose(td, fd);
 1224 
 1225         /*
 1226          * We need to notify mqueue if the object is of type mqueue.
 1227          */
 1228         if (fp->f_type == DTYPE_MQUEUE)
 1229                 mq_fdclose(td, fd, fp);
 1230         FILEDESC_XUNLOCK(fdp);
 1231 
 1232         error = closef(fp, td);
 1233 
 1234         /*
 1235          * All paths leading up to closefp() will have already removed or
 1236          * replaced the fd in the filedesc table, so a restart would not
 1237          * operate on the same file.
 1238          */
 1239         if (error == ERESTART)
 1240                 error = EINTR;
 1241 
 1242         if (holdleaders) {
 1243                 FILEDESC_XLOCK(fdp);
 1244                 fdp->fd_holdleaderscount--;
 1245                 if (fdp->fd_holdleaderscount == 0 &&
 1246                     fdp->fd_holdleaderswakeup != 0) {
 1247                         fdp->fd_holdleaderswakeup = 0;
 1248                         wakeup(&fdp->fd_holdleaderscount);
 1249                 }
 1250                 FILEDESC_XUNLOCK(fdp);
 1251         }
 1252         return (error);
 1253 }
 1254 
 1255 /*
 1256  * Close a file descriptor.
 1257  */
 1258 #ifndef _SYS_SYSPROTO_H_
 1259 struct close_args {
 1260         int     fd;
 1261 };
 1262 #endif
 1263 /* ARGSUSED */
 1264 int
 1265 sys_close(struct thread *td, struct close_args *uap)
 1266 {
 1267 
 1268         return (kern_close(td, uap->fd));
 1269 }
 1270 
 1271 int
 1272 kern_close(struct thread *td, int fd)
 1273 {
 1274         struct filedesc *fdp;
 1275         struct file *fp;
 1276 
 1277         fdp = td->td_proc->p_fd;
 1278 
 1279         AUDIT_SYSCLOSE(td, fd);
 1280 
 1281         FILEDESC_XLOCK(fdp);
 1282         if ((fp = fget_locked(fdp, fd)) == NULL) {
 1283                 FILEDESC_XUNLOCK(fdp);
 1284                 return (EBADF);
 1285         }
 1286         fdfree(fdp, fd);
 1287 
 1288         /* closefp() drops the FILEDESC lock for us. */
 1289         return (closefp(fdp, fd, fp, td, 1));
 1290 }
 1291 
 1292 int
 1293 kern_close_range(struct thread *td, u_int lowfd, u_int highfd)
 1294 {
 1295         struct filedesc *fdp;
 1296         int fd, ret;
 1297 
 1298         ret = 0;
 1299         fdp = td->td_proc->p_fd;
 1300         FILEDESC_SLOCK(fdp);
 1301 
 1302         /*
 1303          * Check this prior to clamping; closefrom(3) with only fd 0, 1, and 2
 1304          * open should not be a usage error.  From a close_range() perspective,
 1305          * close_range(3, ~0U, 0) in the same scenario should also likely not
 1306          * be a usage error as all fd above 3 are in-fact already closed.
 1307          */
 1308         if (highfd < lowfd) {
 1309                 ret = EINVAL;
 1310                 goto out;
 1311         }
 1312 
 1313         /*
 1314          * If fdp->fd_lastfile == -1, we're dealing with either a fresh file
 1315          * table or one in which every fd has been closed.  Just return
 1316          * successful; there's nothing left to do.
 1317          */
 1318         if (fdp->fd_lastfile == -1)
 1319                 goto out;
 1320         /* Clamped to [lowfd, fd_lastfile] */
 1321         highfd = MIN(highfd, fdp->fd_lastfile);
 1322         for (fd = lowfd; fd <= highfd; fd++) {
 1323                 if (fdp->fd_ofiles[fd].fde_file != NULL) {
 1324                         FILEDESC_SUNLOCK(fdp);
 1325                         (void)kern_close(td, fd);
 1326                         FILEDESC_SLOCK(fdp);
 1327                 }
 1328         }
 1329 out:
 1330         FILEDESC_SUNLOCK(fdp);
 1331         return (ret);
 1332 }
 1333 
 1334 #ifndef _SYS_SYSPROTO_H_
 1335 struct close_range_args {
 1336         u_int   lowfd;
 1337         u_int   highfd;
 1338         int     flags;
 1339 };
 1340 #endif
 1341 int
 1342 sys_close_range(struct thread *td, struct close_range_args *uap)
 1343 {
 1344 
 1345         /* No flags currently defined */
 1346         if (uap->flags != 0)
 1347                 return (EINVAL);
 1348         return (kern_close_range(td, uap->lowfd, uap->highfd));
 1349 }
 1350 
 1351 /*
 1352  * Close open file descriptors.
 1353  */
 1354 #ifndef _SYS_SYSPROTO_H_
 1355 struct closefrom_args {
 1356         int     lowfd;
 1357 };
 1358 #endif
 1359 /* ARGSUSED */
 1360 int
 1361 sys_closefrom(struct thread *td, struct closefrom_args *uap)
 1362 {
 1363         u_int lowfd;
 1364 
 1365         AUDIT_ARG_FD(uap->lowfd);
 1366 
 1367         /*
 1368          * Treat negative starting file descriptor values identical to
 1369          * closefrom(0) which closes all files.
 1370          */
 1371         lowfd = MAX(0, uap->lowfd);
 1372         return (kern_close_range(td, lowfd, ~0U));
 1373 }
 1374 
 1375 #if defined(COMPAT_43)
 1376 /*
 1377  * Return status information about a file descriptor.
 1378  */
 1379 #ifndef _SYS_SYSPROTO_H_
 1380 struct ofstat_args {
 1381         int     fd;
 1382         struct  ostat *sb;
 1383 };
 1384 #endif
 1385 /* ARGSUSED */
 1386 int
 1387 ofstat(struct thread *td, struct ofstat_args *uap)
 1388 {
 1389         struct ostat oub;
 1390         struct stat ub;
 1391         int error;
 1392 
 1393         error = kern_fstat(td, uap->fd, &ub);
 1394         if (error == 0) {
 1395                 cvtstat(&ub, &oub);
 1396                 error = copyout(&oub, uap->sb, sizeof(oub));
 1397         }
 1398         return (error);
 1399 }
 1400 #endif /* COMPAT_43 */
 1401 
 1402 #if defined(COMPAT_FREEBSD11)
 1403 int
 1404 freebsd11_fstat(struct thread *td, struct freebsd11_fstat_args *uap)
 1405 {
 1406         struct stat sb;
 1407         struct freebsd11_stat osb;
 1408         int error;
 1409 
 1410         error = kern_fstat(td, uap->fd, &sb);
 1411         if (error != 0)
 1412                 return (error);
 1413         error = freebsd11_cvtstat(&sb, &osb);
 1414         if (error == 0)
 1415                 error = copyout(&osb, uap->sb, sizeof(osb));
 1416         return (error);
 1417 }
 1418 #endif  /* COMPAT_FREEBSD11 */
 1419 
 1420 /*
 1421  * Return status information about a file descriptor.
 1422  */
 1423 #ifndef _SYS_SYSPROTO_H_
 1424 struct fstat_args {
 1425         int     fd;
 1426         struct  stat *sb;
 1427 };
 1428 #endif
 1429 /* ARGSUSED */
 1430 int
 1431 sys_fstat(struct thread *td, struct fstat_args *uap)
 1432 {
 1433         struct stat ub;
 1434         int error;
 1435 
 1436         error = kern_fstat(td, uap->fd, &ub);
 1437         if (error == 0)
 1438                 error = copyout(&ub, uap->sb, sizeof(ub));
 1439         return (error);
 1440 }
 1441 
 1442 int
 1443 kern_fstat(struct thread *td, int fd, struct stat *sbp)
 1444 {
 1445         struct file *fp;
 1446         int error;
 1447 
 1448         AUDIT_ARG_FD(fd);
 1449 
 1450         error = fget(td, fd, &cap_fstat_rights, &fp);
 1451         if (error != 0)
 1452                 return (error);
 1453 
 1454         AUDIT_ARG_FILE(td->td_proc, fp);
 1455 
 1456         error = fo_stat(fp, sbp, td->td_ucred, td);
 1457         fdrop(fp, td);
 1458 #ifdef __STAT_TIME_T_EXT
 1459         if (error == 0) {
 1460                 sbp->st_atim_ext = 0;
 1461                 sbp->st_mtim_ext = 0;
 1462                 sbp->st_ctim_ext = 0;
 1463                 sbp->st_btim_ext = 0;
 1464         }
 1465 #endif
 1466 #ifdef KTRACE
 1467         if (error == 0 && KTRPOINT(td, KTR_STRUCT))
 1468                 ktrstat(sbp);
 1469 #endif
 1470         return (error);
 1471 }
 1472 
 1473 #if defined(COMPAT_FREEBSD11)
 1474 /*
 1475  * Return status information about a file descriptor.
 1476  */
 1477 #ifndef _SYS_SYSPROTO_H_
 1478 struct freebsd11_nfstat_args {
 1479         int     fd;
 1480         struct  nstat *sb;
 1481 };
 1482 #endif
 1483 /* ARGSUSED */
 1484 int
 1485 freebsd11_nfstat(struct thread *td, struct freebsd11_nfstat_args *uap)
 1486 {
 1487         struct nstat nub;
 1488         struct stat ub;
 1489         int error;
 1490 
 1491         error = kern_fstat(td, uap->fd, &ub);
 1492         if (error == 0) {
 1493                 freebsd11_cvtnstat(&ub, &nub);
 1494                 error = copyout(&nub, uap->sb, sizeof(nub));
 1495         }
 1496         return (error);
 1497 }
 1498 #endif /* COMPAT_FREEBSD11 */
 1499 
 1500 /*
 1501  * Return pathconf information about a file descriptor.
 1502  */
 1503 #ifndef _SYS_SYSPROTO_H_
 1504 struct fpathconf_args {
 1505         int     fd;
 1506         int     name;
 1507 };
 1508 #endif
 1509 /* ARGSUSED */
 1510 int
 1511 sys_fpathconf(struct thread *td, struct fpathconf_args *uap)
 1512 {
 1513         long value;
 1514         int error;
 1515 
 1516         error = kern_fpathconf(td, uap->fd, uap->name, &value);
 1517         if (error == 0)
 1518                 td->td_retval[0] = value;
 1519         return (error);
 1520 }
 1521 
 1522 int
 1523 kern_fpathconf(struct thread *td, int fd, int name, long *valuep)
 1524 {
 1525         struct file *fp;
 1526         struct vnode *vp;
 1527         int error;
 1528 
 1529         error = fget(td, fd, &cap_fpathconf_rights, &fp);
 1530         if (error != 0)
 1531                 return (error);
 1532 
 1533         if (name == _PC_ASYNC_IO) {
 1534                 *valuep = _POSIX_ASYNCHRONOUS_IO;
 1535                 goto out;
 1536         }
 1537         vp = fp->f_vnode;
 1538         if (vp != NULL) {
 1539                 vn_lock(vp, LK_SHARED | LK_RETRY);
 1540                 error = VOP_PATHCONF(vp, name, valuep);
 1541                 VOP_UNLOCK(vp, 0);
 1542         } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
 1543                 if (name != _PC_PIPE_BUF) {
 1544                         error = EINVAL;
 1545                 } else {
 1546                         *valuep = PIPE_BUF;
 1547                         error = 0;
 1548                 }
 1549         } else {
 1550                 error = EOPNOTSUPP;
 1551         }
 1552 out:
 1553         fdrop(fp, td);
 1554         return (error);
 1555 }
 1556 
 1557 /*
 1558  * Initialize filecaps structure.
 1559  */
 1560 void
 1561 filecaps_init(struct filecaps *fcaps)
 1562 {
 1563 
 1564         bzero(fcaps, sizeof(*fcaps));
 1565         fcaps->fc_nioctls = -1;
 1566 }
 1567 
 1568 /*
 1569  * Copy filecaps structure allocating memory for ioctls array if needed.
 1570  *
 1571  * The last parameter indicates whether the fdtable is locked. If it is not and
 1572  * ioctls are encountered, copying fails and the caller must lock the table.
 1573  *
 1574  * Note that if the table was not locked, the caller has to check the relevant
 1575  * sequence counter to determine whether the operation was successful.
 1576  */
 1577 bool
 1578 filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked)
 1579 {
 1580         size_t size;
 1581 
 1582         if (src->fc_ioctls != NULL && !locked)
 1583                 return (false);
 1584         memcpy(dst, src, sizeof(*src));
 1585         if (src->fc_ioctls == NULL)
 1586                 return (true);
 1587 
 1588         KASSERT(src->fc_nioctls > 0,
 1589             ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
 1590 
 1591         size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
 1592         dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK);
 1593         memcpy(dst->fc_ioctls, src->fc_ioctls, size);
 1594         return (true);
 1595 }
 1596 
 1597 static u_long *
 1598 filecaps_copy_prep(const struct filecaps *src)
 1599 {
 1600         u_long *ioctls;
 1601         size_t size;
 1602 
 1603         if (src->fc_ioctls == NULL)
 1604                 return (NULL);
 1605 
 1606         KASSERT(src->fc_nioctls > 0,
 1607             ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
 1608 
 1609         size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
 1610         ioctls = malloc(size, M_FILECAPS, M_WAITOK);
 1611         return (ioctls);
 1612 }
 1613 
 1614 static void
 1615 filecaps_copy_finish(const struct filecaps *src, struct filecaps *dst,
 1616     u_long *ioctls)
 1617 {
 1618         size_t size;
 1619 
 1620         *dst = *src;
 1621         if (src->fc_ioctls == NULL) {
 1622                 MPASS(ioctls == NULL);
 1623                 return;
 1624         }
 1625 
 1626         size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
 1627         dst->fc_ioctls = ioctls;
 1628         bcopy(src->fc_ioctls, dst->fc_ioctls, size);
 1629 }
 1630 
 1631 /*
 1632  * Move filecaps structure to the new place and clear the old place.
 1633  */
 1634 void
 1635 filecaps_move(struct filecaps *src, struct filecaps *dst)
 1636 {
 1637 
 1638         *dst = *src;
 1639         bzero(src, sizeof(*src));
 1640 }
 1641 
 1642 /*
 1643  * Fill the given filecaps structure with full rights.
 1644  */
 1645 static void
 1646 filecaps_fill(struct filecaps *fcaps)
 1647 {
 1648 
 1649         CAP_ALL(&fcaps->fc_rights);
 1650         fcaps->fc_ioctls = NULL;
 1651         fcaps->fc_nioctls = -1;
 1652         fcaps->fc_fcntls = CAP_FCNTL_ALL;
 1653 }
 1654 
 1655 /*
 1656  * Free memory allocated within filecaps structure.
 1657  */
 1658 void
 1659 filecaps_free(struct filecaps *fcaps)
 1660 {
 1661 
 1662         free(fcaps->fc_ioctls, M_FILECAPS);
 1663         bzero(fcaps, sizeof(*fcaps));
 1664 }
 1665 
 1666 static u_long *
 1667 filecaps_free_prep(struct filecaps *fcaps)
 1668 {
 1669         u_long *ioctls;
 1670 
 1671         ioctls = fcaps->fc_ioctls;
 1672         bzero(fcaps, sizeof(*fcaps));
 1673         return (ioctls);
 1674 }
 1675 
 1676 static void
 1677 filecaps_free_finish(u_long *ioctls)
 1678 {
 1679 
 1680         free(ioctls, M_FILECAPS);
 1681 }
 1682 
 1683 /*
 1684  * Validate the given filecaps structure.
 1685  */
 1686 static void
 1687 filecaps_validate(const struct filecaps *fcaps, const char *func)
 1688 {
 1689 
 1690         KASSERT(cap_rights_is_valid(&fcaps->fc_rights),
 1691             ("%s: invalid rights", func));
 1692         KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0,
 1693             ("%s: invalid fcntls", func));
 1694         KASSERT(fcaps->fc_fcntls == 0 ||
 1695             cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL),
 1696             ("%s: fcntls without CAP_FCNTL", func));
 1697         KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 :
 1698             (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0),
 1699             ("%s: invalid ioctls", func));
 1700         KASSERT(fcaps->fc_nioctls == 0 ||
 1701             cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL),
 1702             ("%s: ioctls without CAP_IOCTL", func));
 1703 }
 1704 
 1705 static void
 1706 fdgrowtable_exp(struct filedesc *fdp, int nfd)
 1707 {
 1708         int nfd1;
 1709 
 1710         FILEDESC_XLOCK_ASSERT(fdp);
 1711 
 1712         nfd1 = fdp->fd_nfiles * 2;
 1713         if (nfd1 < nfd)
 1714                 nfd1 = nfd;
 1715         fdgrowtable(fdp, nfd1);
 1716 }
 1717 
 1718 /*
 1719  * Grow the file table to accommodate (at least) nfd descriptors.
 1720  */
 1721 static void
 1722 fdgrowtable(struct filedesc *fdp, int nfd)
 1723 {
 1724         struct filedesc0 *fdp0;
 1725         struct freetable *ft;
 1726         struct fdescenttbl *ntable;
 1727         struct fdescenttbl *otable;
 1728         int nnfiles, onfiles;
 1729         NDSLOTTYPE *nmap, *omap;
 1730 
 1731         /*
 1732          * If lastfile is -1 this struct filedesc was just allocated and we are
 1733          * growing it to accommodate for the one we are going to copy from. There
 1734          * is no need to have a lock on this one as it's not visible to anyone.
 1735          */
 1736         if (fdp->fd_lastfile != -1)
 1737                 FILEDESC_XLOCK_ASSERT(fdp);
 1738 
 1739         KASSERT(fdp->fd_nfiles > 0, ("zero-length file table"));
 1740 
 1741         /* save old values */
 1742         onfiles = fdp->fd_nfiles;
 1743         otable = fdp->fd_files;
 1744         omap = fdp->fd_map;
 1745 
 1746         /* compute the size of the new table */
 1747         nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
 1748         if (nnfiles <= onfiles)
 1749                 /* the table is already large enough */
 1750                 return;
 1751 
 1752         /*
 1753          * Allocate a new table.  We need enough space for the number of
 1754          * entries, file entries themselves and the struct freetable we will use
 1755          * when we decommission the table and place it on the freelist.
 1756          * We place the struct freetable in the middle so we don't have
 1757          * to worry about padding.
 1758          */
 1759         ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) +
 1760             nnfiles * sizeof(ntable->fdt_ofiles[0]) +
 1761             sizeof(struct freetable),
 1762             M_FILEDESC, M_ZERO | M_WAITOK);
 1763         /* copy the old data */
 1764         ntable->fdt_nfiles = nnfiles;
 1765         memcpy(ntable->fdt_ofiles, otable->fdt_ofiles,
 1766             onfiles * sizeof(ntable->fdt_ofiles[0]));
 1767 
 1768         /*
 1769          * Allocate a new map only if the old is not large enough.  It will
 1770          * grow at a slower rate than the table as it can map more
 1771          * entries than the table can hold.
 1772          */
 1773         if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
 1774                 nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC,
 1775                     M_ZERO | M_WAITOK);
 1776                 /* copy over the old data and update the pointer */
 1777                 memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
 1778                 fdp->fd_map = nmap;
 1779         }
 1780 
 1781         /*
 1782          * Make sure that ntable is correctly initialized before we replace
 1783          * fd_files poiner. Otherwise fget_unlocked() may see inconsistent
 1784          * data.
 1785          */
 1786         atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable);
 1787 
 1788         /*
 1789          * Do not free the old file table, as some threads may still
 1790          * reference entries within it.  Instead, place it on a freelist
 1791          * which will be processed when the struct filedesc is released.
 1792          *
 1793          * Note that if onfiles == NDFILE, we're dealing with the original
 1794          * static allocation contained within (struct filedesc0 *)fdp,
 1795          * which must not be freed.
 1796          */
 1797         if (onfiles > NDFILE) {
 1798                 ft = (struct freetable *)&otable->fdt_ofiles[onfiles];
 1799                 fdp0 = (struct filedesc0 *)fdp;
 1800                 ft->ft_table = otable;
 1801                 SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next);
 1802         }
 1803         /*
 1804          * The map does not have the same possibility of threads still
 1805          * holding references to it.  So always free it as long as it
 1806          * does not reference the original static allocation.
 1807          */
 1808         if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
 1809                 free(omap, M_FILEDESC);
 1810 }
 1811 
 1812 /*
 1813  * Allocate a file descriptor for the process.
 1814  */
 1815 int
 1816 fdalloc(struct thread *td, int minfd, int *result)
 1817 {
 1818         struct proc *p = td->td_proc;
 1819         struct filedesc *fdp = p->p_fd;
 1820         int fd, maxfd, allocfd;
 1821 #ifdef RACCT
 1822         int error;
 1823 #endif
 1824 
 1825         FILEDESC_XLOCK_ASSERT(fdp);
 1826 
 1827         if (fdp->fd_freefile > minfd)
 1828                 minfd = fdp->fd_freefile;
 1829 
 1830         maxfd = getmaxfd(td);
 1831 
 1832         /*
 1833          * Search the bitmap for a free descriptor starting at minfd.
 1834          * If none is found, grow the file table.
 1835          */
 1836         fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
 1837         if (fd >= maxfd)
 1838                 return (EMFILE);
 1839         if (fd >= fdp->fd_nfiles) {
 1840                 allocfd = min(fd * 2, maxfd);
 1841 #ifdef RACCT
 1842                 if (racct_enable) {
 1843                         PROC_LOCK(p);
 1844                         error = racct_set(p, RACCT_NOFILE, allocfd);
 1845                         PROC_UNLOCK(p);
 1846                         if (error != 0)
 1847                                 return (EMFILE);
 1848                 }
 1849 #endif
 1850                 /*
 1851                  * fd is already equal to first free descriptor >= minfd, so
 1852                  * we only need to grow the table and we are done.
 1853                  */
 1854                 fdgrowtable_exp(fdp, allocfd);
 1855         }
 1856 
 1857         /*
 1858          * Perform some sanity checks, then mark the file descriptor as
 1859          * used and return it to the caller.
 1860          */
 1861         KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles),
 1862             ("invalid descriptor %d", fd));
 1863         KASSERT(!fdisused(fdp, fd),
 1864             ("fd_first_free() returned non-free descriptor"));
 1865         KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
 1866             ("file descriptor isn't free"));
 1867         fdused(fdp, fd);
 1868         *result = fd;
 1869         return (0);
 1870 }
 1871 
 1872 /*
 1873  * Allocate n file descriptors for the process.
 1874  */
 1875 int
 1876 fdallocn(struct thread *td, int minfd, int *fds, int n)
 1877 {
 1878         struct proc *p = td->td_proc;
 1879         struct filedesc *fdp = p->p_fd;
 1880         int i;
 1881 
 1882         FILEDESC_XLOCK_ASSERT(fdp);
 1883 
 1884         for (i = 0; i < n; i++)
 1885                 if (fdalloc(td, 0, &fds[i]) != 0)
 1886                         break;
 1887 
 1888         if (i < n) {
 1889                 for (i--; i >= 0; i--)
 1890                         fdunused(fdp, fds[i]);
 1891                 return (EMFILE);
 1892         }
 1893 
 1894         return (0);
 1895 }
 1896 
 1897 /*
 1898  * Create a new open file structure and allocate a file descriptor for the
 1899  * process that refers to it.  We add one reference to the file for the
 1900  * descriptor table and one reference for resultfp. This is to prevent us
 1901  * being preempted and the entry in the descriptor table closed after we
 1902  * release the FILEDESC lock.
 1903  */
 1904 int
 1905 falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags,
 1906     struct filecaps *fcaps)
 1907 {
 1908         struct file *fp;
 1909         int error, fd;
 1910 
 1911         error = falloc_noinstall(td, &fp);
 1912         if (error)
 1913                 return (error);         /* no reference held on error */
 1914 
 1915         error = finstall(td, fp, &fd, flags, fcaps);
 1916         if (error) {
 1917                 fdrop(fp, td);          /* one reference (fp only) */
 1918                 return (error);
 1919         }
 1920 
 1921         if (resultfp != NULL)
 1922                 *resultfp = fp;         /* copy out result */
 1923         else
 1924                 fdrop(fp, td);          /* release local reference */
 1925 
 1926         if (resultfd != NULL)
 1927                 *resultfd = fd;
 1928 
 1929         return (0);
 1930 }
 1931 
 1932 /*
 1933  * Create a new open file structure without allocating a file descriptor.
 1934  */
 1935 int
 1936 falloc_noinstall(struct thread *td, struct file **resultfp)
 1937 {
 1938         struct file *fp;
 1939         int maxuserfiles = maxfiles - (maxfiles / 20);
 1940         int openfiles_new;
 1941         static struct timeval lastfail;
 1942         static int curfail;
 1943 
 1944         KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__));
 1945 
 1946         openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1;
 1947         if ((openfiles_new >= maxuserfiles &&
 1948             priv_check(td, PRIV_MAXFILES) != 0) ||
 1949             openfiles_new >= maxfiles) {
 1950                 atomic_subtract_int(&openfiles, 1);
 1951                 if (ppsratecheck(&lastfail, &curfail, 1)) {
 1952                         printf("kern.maxfiles limit exceeded by uid %i, (%s) "
 1953                             "please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm);
 1954                 }
 1955                 return (ENFILE);
 1956         }
 1957         fp = uma_zalloc(file_zone, M_WAITOK);
 1958         bzero(fp, sizeof(*fp));
 1959         refcount_init(&fp->f_count, 1);
 1960         fp->f_cred = crhold(td->td_ucred);
 1961         fp->f_ops = &badfileops;
 1962         *resultfp = fp;
 1963         return (0);
 1964 }
 1965 
 1966 /*
 1967  * Install a file in a file descriptor table.
 1968  */
 1969 void
 1970 _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
 1971     struct filecaps *fcaps)
 1972 {
 1973         struct filedescent *fde;
 1974 
 1975         MPASS(fp != NULL);
 1976         if (fcaps != NULL)
 1977                 filecaps_validate(fcaps, __func__);
 1978         FILEDESC_XLOCK_ASSERT(fdp);
 1979 
 1980         fde = &fdp->fd_ofiles[fd];
 1981 #ifdef CAPABILITIES
 1982         seq_write_begin(&fde->fde_seq);
 1983 #endif
 1984         fde->fde_file = fp;
 1985         fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0;
 1986         if (fcaps != NULL)
 1987                 filecaps_move(fcaps, &fde->fde_caps);
 1988         else
 1989                 filecaps_fill(&fde->fde_caps);
 1990 #ifdef CAPABILITIES
 1991         seq_write_end(&fde->fde_seq);
 1992 #endif
 1993 }
 1994 
 1995 int
 1996 finstall(struct thread *td, struct file *fp, int *fd, int flags,
 1997     struct filecaps *fcaps)
 1998 {
 1999         struct filedesc *fdp = td->td_proc->p_fd;
 2000         int error;
 2001 
 2002         MPASS(fd != NULL);
 2003 
 2004         if (!fhold(fp))
 2005                 return (EBADF);
 2006         FILEDESC_XLOCK(fdp);
 2007         if ((error = fdalloc(td, 0, fd))) {
 2008                 FILEDESC_XUNLOCK(fdp);
 2009                 fdrop(fp, td);
 2010                 return (error);
 2011         }
 2012         _finstall(fdp, fp, *fd, flags, fcaps);
 2013         FILEDESC_XUNLOCK(fdp);
 2014         return (0);
 2015 }
 2016 
 2017 /*
 2018  * Build a new filedesc structure from another.
 2019  * Copy the current, root, and jail root vnode references.
 2020  *
 2021  * If fdp is not NULL, return with it shared locked.
 2022  */
 2023 struct filedesc *
 2024 fdinit(struct filedesc *fdp, bool prepfiles)
 2025 {
 2026         struct filedesc0 *newfdp0;
 2027         struct filedesc *newfdp;
 2028 
 2029         newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO);
 2030         newfdp = &newfdp0->fd_fd;
 2031 
 2032         /* Create the file descriptor table. */
 2033         FILEDESC_LOCK_INIT(newfdp);
 2034         refcount_init(&newfdp->fd_refcnt, 1);
 2035         refcount_init(&newfdp->fd_holdcnt, 1);
 2036         newfdp->fd_cmask = CMASK;
 2037         newfdp->fd_map = newfdp0->fd_dmap;
 2038         newfdp->fd_lastfile = -1;
 2039         newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles;
 2040         newfdp->fd_files->fdt_nfiles = NDFILE;
 2041 
 2042         if (fdp == NULL)
 2043                 return (newfdp);
 2044 
 2045         if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles)
 2046                 fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 2047 
 2048         FILEDESC_SLOCK(fdp);
 2049         newfdp->fd_cdir = fdp->fd_cdir;
 2050         if (newfdp->fd_cdir)
 2051                 vrefact(newfdp->fd_cdir);
 2052         newfdp->fd_rdir = fdp->fd_rdir;
 2053         if (newfdp->fd_rdir)
 2054                 vrefact(newfdp->fd_rdir);
 2055         newfdp->fd_jdir = fdp->fd_jdir;
 2056         if (newfdp->fd_jdir)
 2057                 vrefact(newfdp->fd_jdir);
 2058 
 2059         if (!prepfiles) {
 2060                 FILEDESC_SUNLOCK(fdp);
 2061         } else {
 2062                 while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
 2063                         FILEDESC_SUNLOCK(fdp);
 2064                         fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 2065                         FILEDESC_SLOCK(fdp);
 2066                 }
 2067         }
 2068 
 2069         return (newfdp);
 2070 }
 2071 
 2072 static struct filedesc *
 2073 fdhold(struct proc *p)
 2074 {
 2075         struct filedesc *fdp;
 2076 
 2077         PROC_LOCK_ASSERT(p, MA_OWNED);
 2078         fdp = p->p_fd;
 2079         if (fdp != NULL)
 2080                 refcount_acquire(&fdp->fd_holdcnt);
 2081         return (fdp);
 2082 }
 2083 
 2084 static void
 2085 fddrop(struct filedesc *fdp)
 2086 {
 2087 
 2088         if (fdp->fd_holdcnt > 1) {
 2089                 if (refcount_release(&fdp->fd_holdcnt) == 0)
 2090                         return;
 2091         }
 2092 
 2093         FILEDESC_LOCK_DESTROY(fdp);
 2094         uma_zfree(filedesc0_zone, fdp);
 2095 }
 2096 
 2097 /*
 2098  * Share a filedesc structure.
 2099  */
 2100 struct filedesc *
 2101 fdshare(struct filedesc *fdp)
 2102 {
 2103 
 2104         refcount_acquire(&fdp->fd_refcnt);
 2105         return (fdp);
 2106 }
 2107 
 2108 /*
 2109  * Unshare a filedesc structure, if necessary by making a copy
 2110  */
 2111 void
 2112 fdunshare(struct thread *td)
 2113 {
 2114         struct filedesc *tmp;
 2115         struct proc *p = td->td_proc;
 2116 
 2117         if (p->p_fd->fd_refcnt == 1)
 2118                 return;
 2119 
 2120         tmp = fdcopy(p->p_fd);
 2121         fdescfree(td);
 2122         p->p_fd = tmp;
 2123 }
 2124 
 2125 void
 2126 fdinstall_remapped(struct thread *td, struct filedesc *fdp)
 2127 {
 2128 
 2129         fdescfree(td);
 2130         td->td_proc->p_fd = fdp;
 2131 }
 2132 
 2133 /*
 2134  * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
 2135  * this is to ease callers, not catch errors.
 2136  */
 2137 struct filedesc *
 2138 fdcopy(struct filedesc *fdp)
 2139 {
 2140         struct filedesc *newfdp;
 2141         struct filedescent *nfde, *ofde;
 2142         int i;
 2143 
 2144         MPASS(fdp != NULL);
 2145 
 2146         newfdp = fdinit(fdp, true);
 2147         /* copy all passable descriptors (i.e. not kqueue) */
 2148         newfdp->fd_freefile = -1;
 2149         for (i = 0; i <= fdp->fd_lastfile; ++i) {
 2150                 ofde = &fdp->fd_ofiles[i];
 2151                 if (ofde->fde_file == NULL ||
 2152                     (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 ||
 2153                     !fhold(ofde->fde_file)) {
 2154                         if (newfdp->fd_freefile == -1)
 2155                                 newfdp->fd_freefile = i;
 2156                         continue;
 2157                 }
 2158                 nfde = &newfdp->fd_ofiles[i];
 2159                 *nfde = *ofde;
 2160                 filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 2161                 fdused_init(newfdp, i);
 2162                 newfdp->fd_lastfile = i;
 2163         }
 2164         if (newfdp->fd_freefile == -1)
 2165                 newfdp->fd_freefile = i;
 2166         newfdp->fd_cmask = fdp->fd_cmask;
 2167         FILEDESC_SUNLOCK(fdp);
 2168         return (newfdp);
 2169 }
 2170 
 2171 /*
 2172  * Copies a filedesc structure, while remapping all file descriptors
 2173  * stored inside using a translation table.
 2174  *
 2175  * File descriptors are copied over to the new file descriptor table,
 2176  * regardless of whether the close-on-exec flag is set.
 2177  */
 2178 int
 2179 fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
 2180     struct filedesc **ret)
 2181 {
 2182         struct filedesc *newfdp;
 2183         struct filedescent *nfde, *ofde;
 2184         int error, i;
 2185 
 2186         MPASS(fdp != NULL);
 2187 
 2188         newfdp = fdinit(fdp, true);
 2189         if (nfds > fdp->fd_lastfile + 1) {
 2190                 /* New table cannot be larger than the old one. */
 2191                 error = E2BIG;
 2192                 goto bad;
 2193         }
 2194         /* Copy all passable descriptors (i.e. not kqueue). */
 2195         newfdp->fd_freefile = nfds;
 2196         for (i = 0; i < nfds; ++i) {
 2197                 if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) {
 2198                         /* File descriptor out of bounds. */
 2199                         error = EBADF;
 2200                         goto bad;
 2201                 }
 2202                 ofde = &fdp->fd_ofiles[fds[i]];
 2203                 if (ofde->fde_file == NULL) {
 2204                         /* Unused file descriptor. */
 2205                         error = EBADF;
 2206                         goto bad;
 2207                 }
 2208                 if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
 2209                         /* File descriptor cannot be passed. */
 2210                         error = EINVAL;
 2211                         goto bad;
 2212                 }
 2213                 if (!fhold(nfde->fde_file)) {
 2214                         error = EBADF;
 2215                         goto bad;
 2216                 }
 2217                 nfde = &newfdp->fd_ofiles[i];
 2218                 *nfde = *ofde;
 2219                 filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 2220                 fdused_init(newfdp, i);
 2221                 newfdp->fd_lastfile = i;
 2222         }
 2223         newfdp->fd_cmask = fdp->fd_cmask;
 2224         FILEDESC_SUNLOCK(fdp);
 2225         *ret = newfdp;
 2226         return (0);
 2227 bad:
 2228         FILEDESC_SUNLOCK(fdp);
 2229         fdescfree_remapped(newfdp);
 2230         return (error);
 2231 }
 2232 
 2233 /*
 2234  * Clear POSIX style locks. This is only used when fdp looses a reference (i.e.
 2235  * one of processes using it exits) and the table used to be shared.
 2236  */
 2237 static void
 2238 fdclearlocks(struct thread *td)
 2239 {
 2240         struct filedesc *fdp;
 2241         struct filedesc_to_leader *fdtol;
 2242         struct flock lf;
 2243         struct file *fp;
 2244         struct proc *p;
 2245         struct vnode *vp;
 2246         int i;
 2247 
 2248         p = td->td_proc;
 2249         fdp = p->p_fd;
 2250         fdtol = p->p_fdtol;
 2251         MPASS(fdtol != NULL);
 2252 
 2253         FILEDESC_XLOCK(fdp);
 2254         KASSERT(fdtol->fdl_refcount > 0,
 2255             ("filedesc_to_refcount botch: fdl_refcount=%d",
 2256             fdtol->fdl_refcount));
 2257         if (fdtol->fdl_refcount == 1 &&
 2258             (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 2259                 for (i = 0; i <= fdp->fd_lastfile; i++) {
 2260                         fp = fdp->fd_ofiles[i].fde_file;
 2261                         if (fp == NULL || fp->f_type != DTYPE_VNODE ||
 2262                             !fhold(fp))
 2263                                 continue;
 2264                         FILEDESC_XUNLOCK(fdp);
 2265                         lf.l_whence = SEEK_SET;
 2266                         lf.l_start = 0;
 2267                         lf.l_len = 0;
 2268                         lf.l_type = F_UNLCK;
 2269                         vp = fp->f_vnode;
 2270                         (void) VOP_ADVLOCK(vp,
 2271                             (caddr_t)p->p_leader, F_UNLCK,
 2272                             &lf, F_POSIX);
 2273                         FILEDESC_XLOCK(fdp);
 2274                         fdrop(fp, td);
 2275                 }
 2276         }
 2277 retry:
 2278         if (fdtol->fdl_refcount == 1) {
 2279                 if (fdp->fd_holdleaderscount > 0 &&
 2280                     (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 2281                         /*
 2282                          * close() or kern_dup() has cleared a reference
 2283                          * in a shared file descriptor table.
 2284                          */
 2285                         fdp->fd_holdleaderswakeup = 1;
 2286                         sx_sleep(&fdp->fd_holdleaderscount,
 2287                             FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
 2288                         goto retry;
 2289                 }
 2290                 if (fdtol->fdl_holdcount > 0) {
 2291                         /*
 2292                          * Ensure that fdtol->fdl_leader remains
 2293                          * valid in closef().
 2294                          */
 2295                         fdtol->fdl_wakeup = 1;
 2296                         sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
 2297                             "fdlhold", 0);
 2298                         goto retry;
 2299                 }
 2300         }
 2301         fdtol->fdl_refcount--;
 2302         if (fdtol->fdl_refcount == 0 &&
 2303             fdtol->fdl_holdcount == 0) {
 2304                 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
 2305                 fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
 2306         } else
 2307                 fdtol = NULL;
 2308         p->p_fdtol = NULL;
 2309         FILEDESC_XUNLOCK(fdp);
 2310         if (fdtol != NULL)
 2311                 free(fdtol, M_FILEDESC_TO_LEADER);
 2312 }
 2313 
 2314 /*
 2315  * Release a filedesc structure.
 2316  */
 2317 static void
 2318 fdescfree_fds(struct thread *td, struct filedesc *fdp, bool needclose)
 2319 {
 2320         struct filedesc0 *fdp0;
 2321         struct freetable *ft, *tft;
 2322         struct filedescent *fde;
 2323         struct file *fp;
 2324         int i;
 2325 
 2326         for (i = 0; i <= fdp->fd_lastfile; i++) {
 2327                 fde = &fdp->fd_ofiles[i];
 2328                 fp = fde->fde_file;
 2329                 if (fp != NULL) {
 2330                         fdefree_last(fde);
 2331                         if (needclose)
 2332                                 (void) closef(fp, td);
 2333                         else
 2334                                 fdrop(fp, td);
 2335                 }
 2336         }
 2337 
 2338         if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
 2339                 free(fdp->fd_map, M_FILEDESC);
 2340         if (fdp->fd_nfiles > NDFILE)
 2341                 free(fdp->fd_files, M_FILEDESC);
 2342 
 2343         fdp0 = (struct filedesc0 *)fdp;
 2344         SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft)
 2345                 free(ft->ft_table, M_FILEDESC);
 2346 
 2347         fddrop(fdp);
 2348 }
 2349 
 2350 void
 2351 fdescfree(struct thread *td)
 2352 {
 2353         struct proc *p;
 2354         struct filedesc *fdp;
 2355         struct vnode *cdir, *jdir, *rdir;
 2356 
 2357         p = td->td_proc;
 2358         fdp = p->p_fd;
 2359         MPASS(fdp != NULL);
 2360 
 2361 #ifdef RACCT
 2362         if (racct_enable) {
 2363                 PROC_LOCK(p);
 2364                 racct_set(p, RACCT_NOFILE, 0);
 2365                 PROC_UNLOCK(p);
 2366         }
 2367 #endif
 2368 
 2369         if (p->p_fdtol != NULL)
 2370                 fdclearlocks(td);
 2371 
 2372         PROC_LOCK(p);
 2373         p->p_fd = NULL;
 2374         PROC_UNLOCK(p);
 2375 
 2376         if (refcount_release(&fdp->fd_refcnt) == 0)
 2377                 return;
 2378 
 2379         FILEDESC_XLOCK(fdp);
 2380         cdir = fdp->fd_cdir;
 2381         fdp->fd_cdir = NULL;
 2382         rdir = fdp->fd_rdir;
 2383         fdp->fd_rdir = NULL;
 2384         jdir = fdp->fd_jdir;
 2385         fdp->fd_jdir = NULL;
 2386         FILEDESC_XUNLOCK(fdp);
 2387 
 2388         if (cdir != NULL)
 2389                 vrele(cdir);
 2390         if (rdir != NULL)
 2391                 vrele(rdir);
 2392         if (jdir != NULL)
 2393                 vrele(jdir);
 2394 
 2395         fdescfree_fds(td, fdp, 1);
 2396 }
 2397 
 2398 void
 2399 fdescfree_remapped(struct filedesc *fdp)
 2400 {
 2401 
 2402         if (fdp->fd_cdir != NULL)
 2403                 vrele(fdp->fd_cdir);
 2404         if (fdp->fd_rdir != NULL)
 2405                 vrele(fdp->fd_rdir);
 2406         if (fdp->fd_jdir != NULL)
 2407                 vrele(fdp->fd_jdir);
 2408 
 2409         fdescfree_fds(curthread, fdp, 0);
 2410 }
 2411 
 2412 /*
 2413  * For setugid programs, we don't want to people to use that setugidness
 2414  * to generate error messages which write to a file which otherwise would
 2415  * otherwise be off-limits to the process.  We check for filesystems where
 2416  * the vnode can change out from under us after execve (like [lin]procfs).
 2417  *
 2418  * Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is
 2419  * sufficient.  We also don't check for setugidness since we know we are.
 2420  */
 2421 static bool
 2422 is_unsafe(struct file *fp)
 2423 {
 2424         struct vnode *vp;
 2425 
 2426         if (fp->f_type != DTYPE_VNODE)
 2427                 return (false);
 2428 
 2429         vp = fp->f_vnode;
 2430         return ((vp->v_vflag & VV_PROCDEP) != 0);
 2431 }
 2432 
 2433 /*
 2434  * Make this setguid thing safe, if at all possible.
 2435  */
 2436 void
 2437 fdsetugidsafety(struct thread *td)
 2438 {
 2439         struct filedesc *fdp;
 2440         struct file *fp;
 2441         int i;
 2442 
 2443         fdp = td->td_proc->p_fd;
 2444         KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 2445         MPASS(fdp->fd_nfiles >= 3);
 2446         for (i = 0; i <= 2; i++) {
 2447                 fp = fdp->fd_ofiles[i].fde_file;
 2448                 if (fp != NULL && is_unsafe(fp)) {
 2449                         FILEDESC_XLOCK(fdp);
 2450                         knote_fdclose(td, i);
 2451                         /*
 2452                          * NULL-out descriptor prior to close to avoid
 2453                          * a race while close blocks.
 2454                          */
 2455                         fdfree(fdp, i);
 2456                         FILEDESC_XUNLOCK(fdp);
 2457                         (void) closef(fp, td);
 2458                 }
 2459         }
 2460 }
 2461 
 2462 /*
 2463  * If a specific file object occupies a specific file descriptor, close the
 2464  * file descriptor entry and drop a reference on the file object.  This is a
 2465  * convenience function to handle a subsequent error in a function that calls
 2466  * falloc() that handles the race that another thread might have closed the
 2467  * file descriptor out from under the thread creating the file object.
 2468  */
 2469 void
 2470 fdclose(struct thread *td, struct file *fp, int idx)
 2471 {
 2472         struct filedesc *fdp = td->td_proc->p_fd;
 2473 
 2474         FILEDESC_XLOCK(fdp);
 2475         if (fdp->fd_ofiles[idx].fde_file == fp) {
 2476                 fdfree(fdp, idx);
 2477                 FILEDESC_XUNLOCK(fdp);
 2478                 fdrop(fp, td);
 2479         } else
 2480                 FILEDESC_XUNLOCK(fdp);
 2481 }
 2482 
 2483 /*
 2484  * Close any files on exec?
 2485  */
 2486 void
 2487 fdcloseexec(struct thread *td)
 2488 {
 2489         struct filedesc *fdp;
 2490         struct filedescent *fde;
 2491         struct file *fp;
 2492         int i;
 2493 
 2494         fdp = td->td_proc->p_fd;
 2495         KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 2496         for (i = 0; i <= fdp->fd_lastfile; i++) {
 2497                 fde = &fdp->fd_ofiles[i];
 2498                 fp = fde->fde_file;
 2499                 if (fp != NULL && (fp->f_type == DTYPE_MQUEUE ||
 2500                     (fde->fde_flags & UF_EXCLOSE))) {
 2501                         FILEDESC_XLOCK(fdp);
 2502                         fdfree(fdp, i);
 2503                         (void) closefp(fdp, i, fp, td, 0);
 2504                         FILEDESC_UNLOCK_ASSERT(fdp);
 2505                 }
 2506         }
 2507 }
 2508 
 2509 /*
 2510  * It is unsafe for set[ug]id processes to be started with file
 2511  * descriptors 0..2 closed, as these descriptors are given implicit
 2512  * significance in the Standard C library.  fdcheckstd() will create a
 2513  * descriptor referencing /dev/null for each of stdin, stdout, and
 2514  * stderr that is not already open.
 2515  */
 2516 int
 2517 fdcheckstd(struct thread *td)
 2518 {
 2519         struct filedesc *fdp;
 2520         register_t save;
 2521         int i, error, devnull;
 2522 
 2523         fdp = td->td_proc->p_fd;
 2524         KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 2525         MPASS(fdp->fd_nfiles >= 3);
 2526         devnull = -1;
 2527         for (i = 0; i <= 2; i++) {
 2528                 if (fdp->fd_ofiles[i].fde_file != NULL)
 2529                         continue;
 2530 
 2531                 save = td->td_retval[0];
 2532                 if (devnull != -1) {
 2533                         error = kern_dup(td, FDDUP_FIXED, 0, devnull, i);
 2534                 } else {
 2535                         error = kern_openat(td, AT_FDCWD, "/dev/null",
 2536                             UIO_SYSSPACE, O_RDWR, 0);
 2537                         if (error == 0) {
 2538                                 devnull = td->td_retval[0];
 2539                                 KASSERT(devnull == i, ("we didn't get our fd"));
 2540                         }
 2541                 }
 2542                 td->td_retval[0] = save;
 2543                 if (error != 0)
 2544                         return (error);
 2545         }
 2546         return (0);
 2547 }
 2548 
 2549 /*
 2550  * Internal form of close.  Decrement reference count on file structure.
 2551  * Note: td may be NULL when closing a file that was being passed in a
 2552  * message.
 2553  */
 2554 int
 2555 closef(struct file *fp, struct thread *td)
 2556 {
 2557         struct vnode *vp;
 2558         struct flock lf;
 2559         struct filedesc_to_leader *fdtol;
 2560         struct filedesc *fdp;
 2561 
 2562         /*
 2563          * POSIX record locking dictates that any close releases ALL
 2564          * locks owned by this process.  This is handled by setting
 2565          * a flag in the unlock to free ONLY locks obeying POSIX
 2566          * semantics, and not to free BSD-style file locks.
 2567          * If the descriptor was in a message, POSIX-style locks
 2568          * aren't passed with the descriptor, and the thread pointer
 2569          * will be NULL.  Callers should be careful only to pass a
 2570          * NULL thread pointer when there really is no owning
 2571          * context that might have locks, or the locks will be
 2572          * leaked.
 2573          */
 2574         if (fp->f_type == DTYPE_VNODE && td != NULL) {
 2575                 vp = fp->f_vnode;
 2576                 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
 2577                         lf.l_whence = SEEK_SET;
 2578                         lf.l_start = 0;
 2579                         lf.l_len = 0;
 2580                         lf.l_type = F_UNLCK;
 2581                         (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
 2582                             F_UNLCK, &lf, F_POSIX);
 2583                 }
 2584                 fdtol = td->td_proc->p_fdtol;
 2585                 if (fdtol != NULL) {
 2586                         /*
 2587                          * Handle special case where file descriptor table is
 2588                          * shared between multiple process leaders.
 2589                          */
 2590                         fdp = td->td_proc->p_fd;
 2591                         FILEDESC_XLOCK(fdp);
 2592                         for (fdtol = fdtol->fdl_next;
 2593                             fdtol != td->td_proc->p_fdtol;
 2594                             fdtol = fdtol->fdl_next) {
 2595                                 if ((fdtol->fdl_leader->p_flag &
 2596                                     P_ADVLOCK) == 0)
 2597                                         continue;
 2598                                 fdtol->fdl_holdcount++;
 2599                                 FILEDESC_XUNLOCK(fdp);
 2600                                 lf.l_whence = SEEK_SET;
 2601                                 lf.l_start = 0;
 2602                                 lf.l_len = 0;
 2603                                 lf.l_type = F_UNLCK;
 2604                                 vp = fp->f_vnode;
 2605                                 (void) VOP_ADVLOCK(vp,
 2606                                     (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf,
 2607                                     F_POSIX);
 2608                                 FILEDESC_XLOCK(fdp);
 2609                                 fdtol->fdl_holdcount--;
 2610                                 if (fdtol->fdl_holdcount == 0 &&
 2611                                     fdtol->fdl_wakeup != 0) {
 2612                                         fdtol->fdl_wakeup = 0;
 2613                                         wakeup(fdtol);
 2614                                 }
 2615                         }
 2616                         FILEDESC_XUNLOCK(fdp);
 2617                 }
 2618         }
 2619         return (fdrop(fp, td));
 2620 }
 2621 
 2622 /*
 2623  * Initialize the file pointer with the specified properties.
 2624  *
 2625  * The ops are set with release semantics to be certain that the flags, type,
 2626  * and data are visible when ops is.  This is to prevent ops methods from being
 2627  * called with bad data.
 2628  */
 2629 void
 2630 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
 2631 {
 2632         fp->f_data = data;
 2633         fp->f_flag = flag;
 2634         fp->f_type = type;
 2635         atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
 2636 }
 2637 
 2638 int
 2639 fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
 2640     struct file **fpp, struct filecaps *havecapsp)
 2641 {
 2642         struct filedescent *fde;
 2643         int error;
 2644 
 2645         FILEDESC_LOCK_ASSERT(fdp);
 2646 
 2647         fde = fdeget_locked(fdp, fd);
 2648         if (fde == NULL) {
 2649                 error = EBADF;
 2650                 goto out;
 2651         }
 2652 
 2653 #ifdef CAPABILITIES
 2654         error = cap_check(cap_rights_fde_inline(fde), needrightsp);
 2655         if (error != 0)
 2656                 goto out;
 2657 #endif
 2658 
 2659         if (havecapsp != NULL)
 2660                 filecaps_copy(&fde->fde_caps, havecapsp, true);
 2661 
 2662         *fpp = fde->fde_file;
 2663 
 2664         error = 0;
 2665 out:
 2666         return (error);
 2667 }
 2668 
 2669 int
 2670 fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp,
 2671     struct file **fpp, struct filecaps *havecapsp)
 2672 {
 2673         struct filedesc *fdp = td->td_proc->p_fd;
 2674         int error;
 2675 #ifndef CAPABILITIES
 2676         error = fget_unlocked(fdp, fd, needrightsp, fpp, NULL);
 2677         if (error == 0 && havecapsp != NULL)
 2678                 filecaps_fill(havecapsp);
 2679 #else
 2680         struct file *fp;
 2681         seq_t seq;
 2682 
 2683         for (;;) {
 2684                 error = fget_unlocked(fdp, fd, needrightsp, &fp, &seq);
 2685                 if (error != 0)
 2686                         return (error);
 2687 
 2688                 if (havecapsp != NULL) {
 2689                         if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps,
 2690                             havecapsp, false)) {
 2691                                 fdrop(fp, td);
 2692                                 goto get_locked;
 2693                         }
 2694                 }
 2695 
 2696                 if (!fd_modified(fdp, fd, seq))
 2697                         break;
 2698                 fdrop(fp, td);
 2699         }
 2700 
 2701         *fpp = fp;
 2702         return (0);
 2703 
 2704 get_locked:
 2705         FILEDESC_SLOCK(fdp);
 2706         error = fget_cap_locked(fdp, fd, needrightsp, fpp, havecapsp);
 2707         if (error == 0 && !fhold(*fpp))
 2708                 error = EBADF;
 2709         FILEDESC_SUNLOCK(fdp);
 2710 #endif
 2711         return (error);
 2712 }
 2713 
 2714 int
 2715 fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
 2716     struct file **fpp, seq_t *seqp)
 2717 {
 2718 #ifdef CAPABILITIES
 2719         const struct filedescent *fde;
 2720 #endif
 2721         const struct fdescenttbl *fdt;
 2722         struct file *fp;
 2723         u_int count;
 2724 #ifdef CAPABILITIES
 2725         seq_t seq;
 2726         cap_rights_t haverights;
 2727         int error;
 2728 #endif
 2729 
 2730         fdt = fdp->fd_files;
 2731         if ((u_int)fd >= fdt->fdt_nfiles)
 2732                 return (EBADF);
 2733         /*
 2734          * Fetch the descriptor locklessly.  We avoid fdrop() races by
 2735          * never raising a refcount above 0.  To accomplish this we have
 2736          * to use a cmpset loop rather than an atomic_add.  The descriptor
 2737          * must be re-verified once we acquire a reference to be certain
 2738          * that the identity is still correct and we did not lose a race
 2739          * due to preemption.
 2740          */
 2741         for (;;) {
 2742 #ifdef CAPABILITIES
 2743                 seq = seq_load(fd_seq(fdt, fd));
 2744                 fde = &fdt->fdt_ofiles[fd];
 2745                 haverights = *cap_rights_fde_inline(fde);
 2746                 fp = fde->fde_file;
 2747                 if (!seq_consistent(fd_seq(fdt, fd), seq))
 2748                         continue;
 2749 #else
 2750                 fp = fdt->fdt_ofiles[fd].fde_file;
 2751 #endif
 2752                 if (fp == NULL)
 2753                         return (EBADF);
 2754 #ifdef CAPABILITIES
 2755                 error = cap_check(&haverights, needrightsp);
 2756                 if (error != 0)
 2757                         return (error);
 2758 #endif
 2759                 count = fp->f_count;
 2760         retry:
 2761                 if (count == 0) {
 2762                         /*
 2763                          * Force a reload. Other thread could reallocate the
 2764                          * table before this fd was closed, so it possible that
 2765                          * there is a stale fp pointer in cached version.
 2766                          */
 2767                         fdt = *(const struct fdescenttbl * const volatile *)
 2768                             &(fdp->fd_files);
 2769                         continue;
 2770                 }
 2771                 if (__predict_false(count + 1 < count))
 2772                         return (EBADF);
 2773 
 2774                 /*
 2775                  * Use an acquire barrier to force re-reading of fdt so it is
 2776                  * refreshed for verification.
 2777                  */
 2778                 if (__predict_false(atomic_fcmpset_acq_int(&fp->f_count,
 2779                     &count, count + 1) == 0))
 2780                         goto retry;
 2781                 fdt = fdp->fd_files;
 2782 #ifdef  CAPABILITIES
 2783                 if (seq_consistent_nomb(fd_seq(fdt, fd), seq))
 2784 #else
 2785                 if (fp == fdt->fdt_ofiles[fd].fde_file)
 2786 #endif
 2787                         break;
 2788                 fdrop(fp, curthread);
 2789         }
 2790         *fpp = fp;
 2791         if (seqp != NULL) {
 2792 #ifdef CAPABILITIES
 2793                 *seqp = seq;
 2794 #endif
 2795         }
 2796         return (0);
 2797 }
 2798 
 2799 /*
 2800  * Extract the file pointer associated with the specified descriptor for the
 2801  * current user process.
 2802  *
 2803  * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
 2804  * returned.
 2805  *
 2806  * File's rights will be checked against the capability rights mask.
 2807  *
 2808  * If an error occurred the non-zero error is returned and *fpp is set to
 2809  * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
 2810  * responsible for fdrop().
 2811  */
 2812 static __inline int
 2813 _fget(struct thread *td, int fd, struct file **fpp, int flags,
 2814     cap_rights_t *needrightsp, seq_t *seqp)
 2815 {
 2816         struct filedesc *fdp;
 2817         struct file *fp;
 2818         int error;
 2819 
 2820         *fpp = NULL;
 2821         fdp = td->td_proc->p_fd;
 2822         error = fget_unlocked(fdp, fd, needrightsp, &fp, seqp);
 2823         if (error != 0)
 2824                 return (error);
 2825         if (fp->f_ops == &badfileops) {
 2826                 fdrop(fp, td);
 2827                 return (EBADF);
 2828         }
 2829 
 2830         /*
 2831          * FREAD and FWRITE failure return EBADF as per POSIX.
 2832          */
 2833         error = 0;
 2834         switch (flags) {
 2835         case FREAD:
 2836         case FWRITE:
 2837                 if ((fp->f_flag & flags) == 0)
 2838                         error = EBADF;
 2839                 break;
 2840         case FEXEC:
 2841                 if ((fp->f_flag & (FREAD | FEXEC)) == 0 ||
 2842                     ((fp->f_flag & FWRITE) != 0))
 2843                         error = EBADF;
 2844                 break;
 2845         case 0:
 2846                 break;
 2847         default:
 2848                 KASSERT(0, ("wrong flags"));
 2849         }
 2850 
 2851         if (error != 0) {
 2852                 fdrop(fp, td);
 2853                 return (error);
 2854         }
 2855 
 2856         *fpp = fp;
 2857         return (0);
 2858 }
 2859 
 2860 int
 2861 fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 2862 {
 2863 
 2864         return (_fget(td, fd, fpp, 0, rightsp, NULL));
 2865 }
 2866 
 2867 int
 2868 fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, vm_prot_t *maxprotp,
 2869     struct file **fpp)
 2870 {
 2871         int error;
 2872 #ifndef CAPABILITIES
 2873         error = _fget(td, fd, fpp, 0, rightsp, NULL);
 2874         if (maxprotp != NULL)
 2875                 *maxprotp = VM_PROT_ALL;
 2876 #else
 2877         cap_rights_t fdrights;
 2878         struct filedesc *fdp = td->td_proc->p_fd;
 2879         seq_t seq;
 2880 
 2881         MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
 2882         for (;;) {
 2883                 error = _fget(td, fd, fpp, 0, rightsp, &seq);
 2884                 if (error != 0)
 2885                         return (error);
 2886                 if (maxprotp != NULL)
 2887                         fdrights = *cap_rights(fdp, fd);
 2888                 if (!fd_modified(fdp, fd, seq))
 2889                         break;
 2890                 fdrop(*fpp, td);
 2891         }
 2892 
 2893         /*
 2894          * If requested, convert capability rights to access flags.
 2895          */
 2896         if (maxprotp != NULL)
 2897                 *maxprotp = cap_rights_to_vmprot(&fdrights);
 2898 #endif
 2899         return (error);
 2900 }
 2901 
 2902 int
 2903 fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 2904 {
 2905 
 2906         return (_fget(td, fd, fpp, FREAD, rightsp, NULL));
 2907 }
 2908 
 2909 int
 2910 fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 2911 {
 2912 
 2913         return (_fget(td, fd, fpp, FWRITE, rightsp, NULL));
 2914 }
 2915 
 2916 int
 2917 fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp, int needfcntl,
 2918     struct file **fpp)
 2919 {
 2920         struct filedesc *fdp = td->td_proc->p_fd;
 2921 #ifndef CAPABILITIES
 2922         return (fget_unlocked(fdp, fd, rightsp, fpp, NULL));
 2923 #else
 2924         int error;
 2925         seq_t seq;
 2926 
 2927         MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
 2928         for (;;) {
 2929                 error = fget_unlocked(fdp, fd, rightsp, fpp, &seq);
 2930                 if (error != 0)
 2931                         return (error);
 2932                 error = cap_fcntl_check(fdp, fd, needfcntl);
 2933                 if (!fd_modified(fdp, fd, seq))
 2934                         break;
 2935                 fdrop(*fpp, td);
 2936         }
 2937         if (error != 0) {
 2938                 fdrop(*fpp, td);
 2939                 *fpp = NULL;
 2940         }
 2941         return (error);
 2942 #endif
 2943 }
 2944 
 2945 /*
 2946  * Like fget() but loads the underlying vnode, or returns an error if the
 2947  * descriptor does not represent a vnode.  Note that pipes use vnodes but
 2948  * never have VM objects.  The returned vnode will be vref()'d.
 2949  *
 2950  * XXX: what about the unused flags ?
 2951  */
 2952 static __inline int
 2953 _fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp,
 2954     struct vnode **vpp)
 2955 {
 2956         struct file *fp;
 2957         int error;
 2958 
 2959         *vpp = NULL;
 2960         error = _fget(td, fd, &fp, flags, needrightsp, NULL);
 2961         if (error != 0)
 2962                 return (error);
 2963         if (fp->f_vnode == NULL) {
 2964                 error = EINVAL;
 2965         } else {
 2966                 *vpp = fp->f_vnode;
 2967                 vrefact(*vpp);
 2968         }
 2969         fdrop(fp, td);
 2970 
 2971         return (error);
 2972 }
 2973 
 2974 int
 2975 fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 2976 {
 2977 
 2978         return (_fgetvp(td, fd, 0, rightsp, vpp));
 2979 }
 2980 
 2981 int
 2982 fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp,
 2983     struct filecaps *havecaps, struct vnode **vpp)
 2984 {
 2985         struct filedesc *fdp;
 2986         struct filecaps caps;
 2987         struct file *fp;
 2988         int error;
 2989 
 2990         fdp = td->td_proc->p_fd;
 2991         error = fget_cap_locked(fdp, fd, needrightsp, &fp, &caps);
 2992         if (error != 0)
 2993                 return (error);
 2994         if (fp->f_ops == &badfileops) {
 2995                 error = EBADF;
 2996                 goto out;
 2997         }
 2998         if (fp->f_vnode == NULL) {
 2999                 error = EINVAL;
 3000                 goto out;
 3001         }
 3002 
 3003         *havecaps = caps;
 3004         *vpp = fp->f_vnode;
 3005         vrefact(*vpp);
 3006 
 3007         return (0);
 3008 out:
 3009         filecaps_free(&caps);
 3010         return (error);
 3011 }
 3012 
 3013 int
 3014 fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 3015 {
 3016 
 3017         return (_fgetvp(td, fd, FREAD, rightsp, vpp));
 3018 }
 3019 
 3020 int
 3021 fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 3022 {
 3023 
 3024         return (_fgetvp(td, fd, FEXEC, rightsp, vpp));
 3025 }
 3026 
 3027 #ifdef notyet
 3028 int
 3029 fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
 3030     struct vnode **vpp)
 3031 {
 3032 
 3033         return (_fgetvp(td, fd, FWRITE, rightsp, vpp));
 3034 }
 3035 #endif
 3036 
 3037 /*
 3038  * Handle the last reference to a file being closed.
 3039  *
 3040  * Without the noinline attribute clang keeps inlining the func thorough this
 3041  * file when fdrop is used.
 3042  */
 3043 int __noinline
 3044 _fdrop(struct file *fp, struct thread *td)
 3045 {
 3046         int error;
 3047 
 3048         if (fp->f_count != 0)
 3049                 panic("fdrop: count %d", fp->f_count);
 3050         error = fo_close(fp, td);
 3051         atomic_subtract_int(&openfiles, 1);
 3052         crfree(fp->f_cred);
 3053         free(fp->f_advice, M_FADVISE);
 3054         uma_zfree(file_zone, fp);
 3055 
 3056         return (error);
 3057 }
 3058 
 3059 /*
 3060  * Apply an advisory lock on a file descriptor.
 3061  *
 3062  * Just attempt to get a record lock of the requested type on the entire file
 3063  * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 3064  */
 3065 #ifndef _SYS_SYSPROTO_H_
 3066 struct flock_args {
 3067         int     fd;
 3068         int     how;
 3069 };
 3070 #endif
 3071 /* ARGSUSED */
 3072 int
 3073 sys_flock(struct thread *td, struct flock_args *uap)
 3074 {
 3075         struct file *fp;
 3076         struct vnode *vp;
 3077         struct flock lf;
 3078         int error;
 3079 
 3080         error = fget(td, uap->fd, &cap_flock_rights, &fp);
 3081         if (error != 0)
 3082                 return (error);
 3083         if (fp->f_type != DTYPE_VNODE) {
 3084                 fdrop(fp, td);
 3085                 return (EOPNOTSUPP);
 3086         }
 3087 
 3088         vp = fp->f_vnode;
 3089         lf.l_whence = SEEK_SET;
 3090         lf.l_start = 0;
 3091         lf.l_len = 0;
 3092         if (uap->how & LOCK_UN) {
 3093                 lf.l_type = F_UNLCK;
 3094                 atomic_clear_int(&fp->f_flag, FHASLOCK);
 3095                 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 3096                 goto done2;
 3097         }
 3098         if (uap->how & LOCK_EX)
 3099                 lf.l_type = F_WRLCK;
 3100         else if (uap->how & LOCK_SH)
 3101                 lf.l_type = F_RDLCK;
 3102         else {
 3103                 error = EBADF;
 3104                 goto done2;
 3105         }
 3106         atomic_set_int(&fp->f_flag, FHASLOCK);
 3107         error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 3108             (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
 3109 done2:
 3110         fdrop(fp, td);
 3111         return (error);
 3112 }
 3113 /*
 3114  * Duplicate the specified descriptor to a free descriptor.
 3115  */
 3116 int
 3117 dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
 3118     int openerror, int *indxp)
 3119 {
 3120         struct filedescent *newfde, *oldfde;
 3121         struct file *fp;
 3122         u_long *ioctls;
 3123         int error, indx;
 3124 
 3125         KASSERT(openerror == ENODEV || openerror == ENXIO,
 3126             ("unexpected error %d in %s", openerror, __func__));
 3127 
 3128         /*
 3129          * If the to-be-dup'd fd number is greater than the allowed number
 3130          * of file descriptors, or the fd to be dup'd has already been
 3131          * closed, then reject.
 3132          */
 3133         FILEDESC_XLOCK(fdp);
 3134         if ((fp = fget_locked(fdp, dfd)) == NULL) {
 3135                 FILEDESC_XUNLOCK(fdp);
 3136                 return (EBADF);
 3137         }
 3138 
 3139         error = fdalloc(td, 0, &indx);
 3140         if (error != 0) {
 3141                 FILEDESC_XUNLOCK(fdp);
 3142                 return (error);
 3143         }
 3144 
 3145         /*
 3146          * There are two cases of interest here.
 3147          *
 3148          * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
 3149          *
 3150          * For ENXIO steal away the file structure from (dfd) and store it in
 3151          * (indx).  (dfd) is effectively closed by this operation.
 3152          */
 3153         switch (openerror) {
 3154         case ENODEV:
 3155                 /*
 3156                  * Check that the mode the file is being opened for is a
 3157                  * subset of the mode of the existing descriptor.
 3158                  */
 3159                 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
 3160                         fdunused(fdp, indx);
 3161                         FILEDESC_XUNLOCK(fdp);
 3162                         return (EACCES);
 3163                 }
 3164                 if (!fhold(fp)) {
 3165                         fdunused(fdp, indx);
 3166                         FILEDESC_XUNLOCK(fdp);
 3167                         return (EBADF);
 3168                 }
 3169                 newfde = &fdp->fd_ofiles[indx];
 3170                 oldfde = &fdp->fd_ofiles[dfd];
 3171                 ioctls = filecaps_copy_prep(&oldfde->fde_caps);
 3172 #ifdef CAPABILITIES
 3173                 seq_write_begin(&newfde->fde_seq);
 3174 #endif
 3175                 memcpy(newfde, oldfde, fde_change_size);
 3176                 filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps,
 3177                     ioctls);
 3178 #ifdef CAPABILITIES
 3179                 seq_write_end(&newfde->fde_seq);
 3180 #endif
 3181                 break;
 3182         case ENXIO:
 3183                 /*
 3184                  * Steal away the file pointer from dfd and stuff it into indx.
 3185                  */
 3186                 newfde = &fdp->fd_ofiles[indx];
 3187                 oldfde = &fdp->fd_ofiles[dfd];
 3188 #ifdef CAPABILITIES
 3189                 seq_write_begin(&newfde->fde_seq);
 3190 #endif
 3191                 memcpy(newfde, oldfde, fde_change_size);
 3192                 oldfde->fde_file = NULL;
 3193                 fdunused(fdp, dfd);
 3194 #ifdef CAPABILITIES
 3195                 seq_write_end(&newfde->fde_seq);
 3196 #endif
 3197                 break;
 3198         }
 3199         FILEDESC_XUNLOCK(fdp);
 3200         *indxp = indx;
 3201         return (0);
 3202 }
 3203 
 3204 /*
 3205  * This sysctl determines if we will allow a process to chroot(2) if it
 3206  * has a directory open:
 3207  *      0: disallowed for all processes.
 3208  *      1: allowed for processes that were not already chroot(2)'ed.
 3209  *      2: allowed for all processes.
 3210  */
 3211 
 3212 static int chroot_allow_open_directories = 1;
 3213 
 3214 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
 3215     &chroot_allow_open_directories, 0,
 3216     "Allow a process to chroot(2) if it has a directory open");
 3217 
 3218 /*
 3219  * Helper function for raised chroot(2) security function:  Refuse if
 3220  * any filedescriptors are open directories.
 3221  */
 3222 static int
 3223 chroot_refuse_vdir_fds(struct filedesc *fdp)
 3224 {
 3225         struct vnode *vp;
 3226         struct file *fp;
 3227         int fd;
 3228 
 3229         FILEDESC_LOCK_ASSERT(fdp);
 3230 
 3231         for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
 3232                 fp = fget_locked(fdp, fd);
 3233                 if (fp == NULL)
 3234                         continue;
 3235                 if (fp->f_type == DTYPE_VNODE) {
 3236                         vp = fp->f_vnode;
 3237                         if (vp->v_type == VDIR)
 3238                                 return (EPERM);
 3239                 }
 3240         }
 3241         return (0);
 3242 }
 3243 
 3244 /*
 3245 * The caller is responsible for invoking priv_check() and
 3246 * mac_vnode_check_chroot() to authorize this operation.
 3247 */
 3248 int
 3249 pwd_chroot(struct thread *td, struct vnode *vp)
 3250 {
 3251         struct filedesc *fdp;
 3252         struct vnode *oldvp;
 3253         int error;
 3254 
 3255         fdp = td->td_proc->p_fd;
 3256         FILEDESC_XLOCK(fdp);
 3257         if (chroot_allow_open_directories == 0 ||
 3258             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
 3259                 error = chroot_refuse_vdir_fds(fdp);
 3260                 if (error != 0) {
 3261                         FILEDESC_XUNLOCK(fdp);
 3262                         return (error);
 3263                 }
 3264         }
 3265         oldvp = fdp->fd_rdir;
 3266         vrefact(vp);
 3267         fdp->fd_rdir = vp;
 3268         if (fdp->fd_jdir == NULL) {
 3269                 vrefact(vp);
 3270                 fdp->fd_jdir = vp;
 3271         }
 3272         FILEDESC_XUNLOCK(fdp);
 3273         vrele(oldvp);
 3274         return (0);
 3275 }
 3276 
 3277 void
 3278 pwd_chdir(struct thread *td, struct vnode *vp)
 3279 {
 3280         struct filedesc *fdp;
 3281         struct vnode *oldvp;
 3282 
 3283         fdp = td->td_proc->p_fd;
 3284         FILEDESC_XLOCK(fdp);
 3285         VNASSERT(vp->v_usecount > 0, vp,
 3286             ("chdir to a vnode with zero usecount"));
 3287         oldvp = fdp->fd_cdir;
 3288         fdp->fd_cdir = vp;
 3289         FILEDESC_XUNLOCK(fdp);
 3290         vrele(oldvp);
 3291 }
 3292 
 3293 /*
 3294  * jail_attach(2) changes both root and working directories.
 3295  */
 3296 int
 3297 pwd_chroot_chdir(struct thread *td, struct vnode *vp)
 3298 {
 3299         struct filedesc *fdp;
 3300         struct vnode *oldvrp, *oldvcp;
 3301         int error;
 3302 
 3303         fdp = td->td_proc->p_fd;
 3304         FILEDESC_XLOCK(fdp);
 3305         error = chroot_refuse_vdir_fds(fdp);
 3306         if (error != 0) {
 3307                 FILEDESC_XUNLOCK(fdp);
 3308                 return (error);
 3309         }
 3310         oldvrp = fdp->fd_rdir;
 3311         vrefact(vp);
 3312         fdp->fd_rdir = vp;
 3313         oldvcp = fdp->fd_cdir;
 3314         vrefact(vp);
 3315         fdp->fd_cdir = vp;
 3316         if (fdp->fd_jdir == NULL) {
 3317                 vrefact(vp);
 3318                 fdp->fd_jdir = vp;
 3319         }
 3320         FILEDESC_XUNLOCK(fdp);
 3321         vrele(oldvrp);
 3322         vrele(oldvcp);
 3323         return (0);
 3324 }
 3325 
 3326 /*
 3327  * Scan all active processes and prisons to see if any of them have a current
 3328  * or root directory of `olddp'. If so, replace them with the new mount point.
 3329  */
 3330 void
 3331 mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
 3332 {
 3333         struct filedesc *fdp;
 3334         struct prison *pr;
 3335         struct proc *p;
 3336         int nrele;
 3337 
 3338         if (vrefcnt(olddp) == 1)
 3339                 return;
 3340         nrele = 0;
 3341         sx_slock(&allproc_lock);
 3342         FOREACH_PROC_IN_SYSTEM(p) {
 3343                 PROC_LOCK(p);
 3344                 fdp = fdhold(p);
 3345                 PROC_UNLOCK(p);
 3346                 if (fdp == NULL)
 3347                         continue;
 3348                 FILEDESC_XLOCK(fdp);
 3349                 if (fdp->fd_cdir == olddp) {
 3350                         vrefact(newdp);
 3351                         fdp->fd_cdir = newdp;
 3352                         nrele++;
 3353                 }
 3354                 if (fdp->fd_rdir == olddp) {
 3355                         vrefact(newdp);
 3356                         fdp->fd_rdir = newdp;
 3357                         nrele++;
 3358                 }
 3359                 if (fdp->fd_jdir == olddp) {
 3360                         vrefact(newdp);
 3361                         fdp->fd_jdir = newdp;
 3362                         nrele++;
 3363                 }
 3364                 FILEDESC_XUNLOCK(fdp);
 3365                 fddrop(fdp);
 3366         }
 3367         sx_sunlock(&allproc_lock);
 3368         if (rootvnode == olddp) {
 3369                 vrefact(newdp);
 3370                 rootvnode = newdp;
 3371                 nrele++;
 3372         }
 3373         mtx_lock(&prison0.pr_mtx);
 3374         if (prison0.pr_root == olddp) {
 3375                 vrefact(newdp);
 3376                 prison0.pr_root = newdp;
 3377                 nrele++;
 3378         }
 3379         mtx_unlock(&prison0.pr_mtx);
 3380         sx_slock(&allprison_lock);
 3381         TAILQ_FOREACH(pr, &allprison, pr_list) {
 3382                 mtx_lock(&pr->pr_mtx);
 3383                 if (pr->pr_root == olddp) {
 3384                         vrefact(newdp);
 3385                         pr->pr_root = newdp;
 3386                         nrele++;
 3387                 }
 3388                 mtx_unlock(&pr->pr_mtx);
 3389         }
 3390         sx_sunlock(&allprison_lock);
 3391         while (nrele--)
 3392                 vrele(olddp);
 3393 }
 3394 
 3395 struct filedesc_to_leader *
 3396 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
 3397 {
 3398         struct filedesc_to_leader *fdtol;
 3399 
 3400         fdtol = malloc(sizeof(struct filedesc_to_leader),
 3401             M_FILEDESC_TO_LEADER, M_WAITOK);
 3402         fdtol->fdl_refcount = 1;
 3403         fdtol->fdl_holdcount = 0;
 3404         fdtol->fdl_wakeup = 0;
 3405         fdtol->fdl_leader = leader;
 3406         if (old != NULL) {
 3407                 FILEDESC_XLOCK(fdp);
 3408                 fdtol->fdl_next = old->fdl_next;
 3409                 fdtol->fdl_prev = old;
 3410                 old->fdl_next = fdtol;
 3411                 fdtol->fdl_next->fdl_prev = fdtol;
 3412                 FILEDESC_XUNLOCK(fdp);
 3413         } else {
 3414                 fdtol->fdl_next = fdtol;
 3415                 fdtol->fdl_prev = fdtol;
 3416         }
 3417         return (fdtol);
 3418 }
 3419 
 3420 static int
 3421 sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS)
 3422 {
 3423         struct filedesc *fdp;
 3424         int i, count, slots;
 3425 
 3426         if (*(int *)arg1 != 0)
 3427                 return (EINVAL);
 3428 
 3429         fdp = curproc->p_fd;
 3430         count = 0;
 3431         FILEDESC_SLOCK(fdp);
 3432         slots = NDSLOTS(fdp->fd_lastfile + 1);
 3433         for (i = 0; i < slots; i++)
 3434                 count += bitcountl(fdp->fd_map[i]);
 3435         FILEDESC_SUNLOCK(fdp);
 3436 
 3437         return (SYSCTL_OUT(req, &count, sizeof(count)));
 3438 }
 3439 
 3440 static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds,
 3441     CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds,
 3442     "Number of open file descriptors");
 3443 
 3444 /*
 3445  * Get file structures globally.
 3446  */
 3447 static int
 3448 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 3449 {
 3450         struct xfile xf;
 3451         struct filedesc *fdp;
 3452         struct file *fp;
 3453         struct proc *p;
 3454         int error, n;
 3455 
 3456         error = sysctl_wire_old_buffer(req, 0);
 3457         if (error != 0)
 3458                 return (error);
 3459         if (req->oldptr == NULL) {
 3460                 n = 0;
 3461                 sx_slock(&allproc_lock);
 3462                 FOREACH_PROC_IN_SYSTEM(p) {
 3463                         PROC_LOCK(p);
 3464                         if (p->p_state == PRS_NEW) {
 3465                                 PROC_UNLOCK(p);
 3466                                 continue;
 3467                         }
 3468                         fdp = fdhold(p);
 3469                         PROC_UNLOCK(p);
 3470                         if (fdp == NULL)
 3471                                 continue;
 3472                         /* overestimates sparse tables. */
 3473                         if (fdp->fd_lastfile > 0)
 3474                                 n += fdp->fd_lastfile;
 3475                         fddrop(fdp);
 3476                 }
 3477                 sx_sunlock(&allproc_lock);
 3478                 return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
 3479         }
 3480         error = 0;
 3481         bzero(&xf, sizeof(xf));
 3482         xf.xf_size = sizeof(xf);
 3483         sx_slock(&allproc_lock);
 3484         FOREACH_PROC_IN_SYSTEM(p) {
 3485                 PROC_LOCK(p);
 3486                 if (p->p_state == PRS_NEW) {
 3487                         PROC_UNLOCK(p);
 3488                         continue;
 3489                 }
 3490                 if (p_cansee(req->td, p) != 0) {
 3491                         PROC_UNLOCK(p);
 3492                         continue;
 3493                 }
 3494                 xf.xf_pid = p->p_pid;
 3495                 xf.xf_uid = p->p_ucred->cr_uid;
 3496                 fdp = fdhold(p);
 3497                 PROC_UNLOCK(p);
 3498                 if (fdp == NULL)
 3499                         continue;
 3500                 FILEDESC_SLOCK(fdp);
 3501                 for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) {
 3502                         if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
 3503                                 continue;
 3504                         xf.xf_fd = n;
 3505                         xf.xf_file = (uintptr_t)fp;
 3506                         xf.xf_data = (uintptr_t)fp->f_data;
 3507                         xf.xf_vnode = (uintptr_t)fp->f_vnode;
 3508                         xf.xf_type = (uintptr_t)fp->f_type;
 3509                         xf.xf_count = fp->f_count;
 3510                         xf.xf_msgcount = 0;
 3511                         xf.xf_offset = foffset_get(fp);
 3512                         xf.xf_flag = fp->f_flag;
 3513                         error = SYSCTL_OUT(req, &xf, sizeof(xf));
 3514                         if (error)
 3515                                 break;
 3516                 }
 3517                 FILEDESC_SUNLOCK(fdp);
 3518                 fddrop(fdp);
 3519                 if (error)
 3520                         break;
 3521         }
 3522         sx_sunlock(&allproc_lock);
 3523         return (error);
 3524 }
 3525 
 3526 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
 3527     0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
 3528 
 3529 #ifdef KINFO_FILE_SIZE
 3530 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 3531 #endif
 3532 
 3533 static int
 3534 xlate_fflags(int fflags)
 3535 {
 3536         static const struct {
 3537                 int     fflag;
 3538                 int     kf_fflag;
 3539         } fflags_table[] = {
 3540                 { FAPPEND, KF_FLAG_APPEND },
 3541                 { FASYNC, KF_FLAG_ASYNC },
 3542                 { FFSYNC, KF_FLAG_FSYNC },
 3543                 { FHASLOCK, KF_FLAG_HASLOCK },
 3544                 { FNONBLOCK, KF_FLAG_NONBLOCK },
 3545                 { FREAD, KF_FLAG_READ },
 3546                 { FWRITE, KF_FLAG_WRITE },
 3547                 { O_CREAT, KF_FLAG_CREAT },
 3548                 { O_DIRECT, KF_FLAG_DIRECT },
 3549                 { O_EXCL, KF_FLAG_EXCL },
 3550                 { O_EXEC, KF_FLAG_EXEC },
 3551                 { O_EXLOCK, KF_FLAG_EXLOCK },
 3552                 { O_NOFOLLOW, KF_FLAG_NOFOLLOW },
 3553                 { O_SHLOCK, KF_FLAG_SHLOCK },
 3554                 { O_TRUNC, KF_FLAG_TRUNC }
 3555         };
 3556         unsigned int i;
 3557         int kflags;
 3558 
 3559         kflags = 0;
 3560         for (i = 0; i < nitems(fflags_table); i++)
 3561                 if (fflags & fflags_table[i].fflag)
 3562                         kflags |=  fflags_table[i].kf_fflag;
 3563         return (kflags);
 3564 }
 3565 
 3566 /* Trim unused data from kf_path by truncating the structure size. */
 3567 void
 3568 pack_kinfo(struct kinfo_file *kif)
 3569 {
 3570 
 3571         kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
 3572             strlen(kif->kf_path) + 1;
 3573         kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
 3574 }
 3575 
 3576 static void
 3577 export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp,
 3578     struct kinfo_file *kif, struct filedesc *fdp, int flags)
 3579 {
 3580         int error;
 3581 
 3582         bzero(kif, sizeof(*kif));
 3583 
 3584         /* Set a default type to allow for empty fill_kinfo() methods. */
 3585         kif->kf_type = KF_TYPE_UNKNOWN;
 3586         kif->kf_flags = xlate_fflags(fp->f_flag);
 3587         if (rightsp != NULL)
 3588                 kif->kf_cap_rights = *rightsp;
 3589         else
 3590                 cap_rights_init(&kif->kf_cap_rights);
 3591         kif->kf_fd = fd;
 3592         kif->kf_ref_count = fp->f_count;
 3593         kif->kf_offset = foffset_get(fp);
 3594 
 3595         /*
 3596          * This may drop the filedesc lock, so the 'fp' cannot be
 3597          * accessed after this call.
 3598          */
 3599         error = fo_fill_kinfo(fp, kif, fdp);
 3600         if (error == 0)
 3601                 kif->kf_status |= KF_ATTR_VALID;
 3602         if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
 3603                 pack_kinfo(kif);
 3604         else
 3605                 kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
 3606 }
 3607 
 3608 static void
 3609 export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags,
 3610     struct kinfo_file *kif, int flags)
 3611 {
 3612         int error;
 3613 
 3614         bzero(kif, sizeof(*kif));
 3615 
 3616         kif->kf_type = KF_TYPE_VNODE;
 3617         error = vn_fill_kinfo_vnode(vp, kif);
 3618         if (error == 0)
 3619                 kif->kf_status |= KF_ATTR_VALID;
 3620         kif->kf_flags = xlate_fflags(fflags);
 3621         cap_rights_init(&kif->kf_cap_rights);
 3622         kif->kf_fd = fd;
 3623         kif->kf_ref_count = -1;
 3624         kif->kf_offset = -1;
 3625         if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
 3626                 pack_kinfo(kif);
 3627         else
 3628                 kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
 3629         vrele(vp);
 3630 }
 3631 
 3632 struct export_fd_buf {
 3633         struct filedesc         *fdp;
 3634         struct sbuf             *sb;
 3635         ssize_t                 remainder;
 3636         struct kinfo_file       kif;
 3637         int                     flags;
 3638 };
 3639 
 3640 static int
 3641 export_kinfo_to_sb(struct export_fd_buf *efbuf)
 3642 {
 3643         struct kinfo_file *kif;
 3644 
 3645         kif = &efbuf->kif;
 3646         if (efbuf->remainder != -1) {
 3647                 if (efbuf->remainder < kif->kf_structsize) {
 3648                         /* Terminate export. */
 3649                         efbuf->remainder = 0;
 3650                         return (0);
 3651                 }
 3652                 efbuf->remainder -= kif->kf_structsize;
 3653         }
 3654         return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM);
 3655 }
 3656 
 3657 static int
 3658 export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp,
 3659     struct export_fd_buf *efbuf)
 3660 {
 3661         int error;
 3662 
 3663         if (efbuf->remainder == 0)
 3664                 return (0);
 3665         export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp,
 3666             efbuf->flags);
 3667         FILEDESC_SUNLOCK(efbuf->fdp);
 3668         error = export_kinfo_to_sb(efbuf);
 3669         FILEDESC_SLOCK(efbuf->fdp);
 3670         return (error);
 3671 }
 3672 
 3673 static int
 3674 export_vnode_to_sb(struct vnode *vp, int fd, int fflags,
 3675     struct export_fd_buf *efbuf)
 3676 {
 3677         int error;
 3678 
 3679         if (efbuf->remainder == 0)
 3680                 return (0);
 3681         if (efbuf->fdp != NULL)
 3682                 FILEDESC_SUNLOCK(efbuf->fdp);
 3683         export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags);
 3684         error = export_kinfo_to_sb(efbuf);
 3685         if (efbuf->fdp != NULL)
 3686                 FILEDESC_SLOCK(efbuf->fdp);
 3687         return (error);
 3688 }
 3689 
 3690 /*
 3691  * Store a process file descriptor information to sbuf.
 3692  *
 3693  * Takes a locked proc as argument, and returns with the proc unlocked.
 3694  */
 3695 int
 3696 kern_proc_filedesc_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen,
 3697     int flags)
 3698 {
 3699         struct file *fp;
 3700         struct filedesc *fdp;
 3701         struct export_fd_buf *efbuf;
 3702         struct vnode *cttyvp, *textvp, *tracevp;
 3703         int error, i;
 3704         cap_rights_t rights;
 3705 
 3706         PROC_LOCK_ASSERT(p, MA_OWNED);
 3707 
 3708         /* ktrace vnode */
 3709         tracevp = p->p_tracevp;
 3710         if (tracevp != NULL)
 3711                 vrefact(tracevp);
 3712         /* text vnode */
 3713         textvp = p->p_textvp;
 3714         if (textvp != NULL)
 3715                 vrefact(textvp);
 3716         /* Controlling tty. */
 3717         cttyvp = NULL;
 3718         if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) {
 3719                 cttyvp = p->p_pgrp->pg_session->s_ttyvp;
 3720                 if (cttyvp != NULL)
 3721                         vrefact(cttyvp);
 3722         }
 3723         fdp = fdhold(p);
 3724         PROC_UNLOCK(p);
 3725         efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
 3726         efbuf->fdp = NULL;
 3727         efbuf->sb = sb;
 3728         efbuf->remainder = maxlen;
 3729         efbuf->flags = flags;
 3730         if (tracevp != NULL)
 3731                 export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE,
 3732                     efbuf);
 3733         if (textvp != NULL)
 3734                 export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD, efbuf);
 3735         if (cttyvp != NULL)
 3736                 export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE,
 3737                     efbuf);
 3738         error = 0;
 3739         if (fdp == NULL)
 3740                 goto fail;
 3741         efbuf->fdp = fdp;
 3742         FILEDESC_SLOCK(fdp);
 3743         /* working directory */
 3744         if (fdp->fd_cdir != NULL) {
 3745                 vrefact(fdp->fd_cdir);
 3746                 export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
 3747         }
 3748         /* root directory */
 3749         if (fdp->fd_rdir != NULL) {
 3750                 vrefact(fdp->fd_rdir);
 3751                 export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf);
 3752         }
 3753         /* jail directory */
 3754         if (fdp->fd_jdir != NULL) {
 3755                 vrefact(fdp->fd_jdir);
 3756                 export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf);
 3757         }
 3758         for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 3759                 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 3760                         continue;
 3761 #ifdef CAPABILITIES
 3762                 rights = *cap_rights(fdp, i);
 3763 #else /* !CAPABILITIES */
 3764                 rights = cap_no_rights;
 3765 #endif
 3766                 /*
 3767                  * Create sysctl entry.  It is OK to drop the filedesc
 3768                  * lock inside of export_file_to_sb() as we will
 3769                  * re-validate and re-evaluate its properties when the
 3770                  * loop continues.
 3771                  */
 3772                 error = export_file_to_sb(fp, i, &rights, efbuf);
 3773                 if (error != 0 || efbuf->remainder == 0)
 3774                         break;
 3775         }
 3776         FILEDESC_SUNLOCK(fdp);
 3777         fddrop(fdp);
 3778 fail:
 3779         free(efbuf, M_TEMP);
 3780         return (error);
 3781 }
 3782 
 3783 #define FILEDESC_SBUF_SIZE      (sizeof(struct kinfo_file) * 5)
 3784 
 3785 /*
 3786  * Get per-process file descriptors for use by procstat(1), et al.
 3787  */
 3788 static int
 3789 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
 3790 {
 3791         struct sbuf sb;
 3792         struct proc *p;
 3793         ssize_t maxlen;
 3794         int error, error2, *name;
 3795 
 3796         name = (int *)arg1;
 3797 
 3798         sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req);
 3799         sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 3800         error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 3801         if (error != 0) {
 3802                 sbuf_delete(&sb);
 3803                 return (error);
 3804         }
 3805         maxlen = req->oldptr != NULL ? req->oldlen : -1;
 3806         error = kern_proc_filedesc_out(p, &sb, maxlen,
 3807             KERN_FILEDESC_PACK_KINFO);
 3808         error2 = sbuf_finish(&sb);
 3809         sbuf_delete(&sb);
 3810         return (error != 0 ? error : error2);
 3811 }
 3812 
 3813 #ifdef COMPAT_FREEBSD7
 3814 #ifdef KINFO_OFILE_SIZE
 3815 CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
 3816 #endif
 3817 
 3818 static void
 3819 kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif)
 3820 {
 3821 
 3822         okif->kf_structsize = sizeof(*okif);
 3823         okif->kf_type = kif->kf_type;
 3824         okif->kf_fd = kif->kf_fd;
 3825         okif->kf_ref_count = kif->kf_ref_count;
 3826         okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE |
 3827             KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK |
 3828             KF_FLAG_DIRECT | KF_FLAG_HASLOCK);
 3829         okif->kf_offset = kif->kf_offset;
 3830         if (kif->kf_type == KF_TYPE_VNODE)
 3831                 okif->kf_vnode_type = kif->kf_un.kf_file.kf_file_type;
 3832         else
 3833                 okif->kf_vnode_type = KF_VTYPE_VNON;
 3834         strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path));
 3835         if (kif->kf_type == KF_TYPE_SOCKET) {
 3836                 okif->kf_sock_domain = kif->kf_un.kf_sock.kf_sock_domain0;
 3837                 okif->kf_sock_type = kif->kf_un.kf_sock.kf_sock_type0;
 3838                 okif->kf_sock_protocol = kif->kf_un.kf_sock.kf_sock_protocol0;
 3839                 okif->kf_sa_local = kif->kf_un.kf_sock.kf_sa_local;
 3840                 okif->kf_sa_peer = kif->kf_un.kf_sock.kf_sa_peer;
 3841         } else {
 3842                 okif->kf_sa_local.ss_family = AF_UNSPEC;
 3843                 okif->kf_sa_peer.ss_family = AF_UNSPEC;
 3844         }
 3845 }
 3846 
 3847 static int
 3848 export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif,
 3849     struct kinfo_ofile *okif, struct filedesc *fdp, struct sysctl_req *req)
 3850 {
 3851         int error;
 3852 
 3853         vrefact(vp);
 3854         FILEDESC_SUNLOCK(fdp);
 3855         export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO);
 3856         kinfo_to_okinfo(kif, okif);
 3857         error = SYSCTL_OUT(req, okif, sizeof(*okif));
 3858         FILEDESC_SLOCK(fdp);
 3859         return (error);
 3860 }
 3861 
 3862 /*
 3863  * Get per-process file descriptors for use by procstat(1), et al.
 3864  */
 3865 static int
 3866 sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
 3867 {
 3868         struct kinfo_ofile *okif;
 3869         struct kinfo_file *kif;
 3870         struct filedesc *fdp;
 3871         int error, i, *name;
 3872         struct file *fp;
 3873         struct proc *p;
 3874 
 3875         name = (int *)arg1;
 3876         error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 3877         if (error != 0)
 3878                 return (error);
 3879         fdp = fdhold(p);
 3880         PROC_UNLOCK(p);
 3881         if (fdp == NULL)
 3882                 return (ENOENT);
 3883         kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
 3884         okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK);
 3885         FILEDESC_SLOCK(fdp);
 3886         if (fdp->fd_cdir != NULL)
 3887                 export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
 3888                     okif, fdp, req);
 3889         if (fdp->fd_rdir != NULL)
 3890                 export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
 3891                     okif, fdp, req);
 3892         if (fdp->fd_jdir != NULL)
 3893                 export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
 3894                     okif, fdp, req);
 3895         for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 3896                 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 3897                         continue;
 3898                 export_file_to_kinfo(fp, i, NULL, kif, fdp,
 3899                     KERN_FILEDESC_PACK_KINFO);
 3900                 FILEDESC_SUNLOCK(fdp);
 3901                 kinfo_to_okinfo(kif, okif);
 3902                 error = SYSCTL_OUT(req, okif, sizeof(*okif));
 3903                 FILEDESC_SLOCK(fdp);
 3904                 if (error)
 3905                         break;
 3906         }
 3907         FILEDESC_SUNLOCK(fdp);
 3908         fddrop(fdp);
 3909         free(kif, M_TEMP);
 3910         free(okif, M_TEMP);
 3911         return (0);
 3912 }
 3913 
 3914 static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc,
 3915     CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc,
 3916     "Process ofiledesc entries");
 3917 #endif  /* COMPAT_FREEBSD7 */
 3918 
 3919 int
 3920 vntype_to_kinfo(int vtype)
 3921 {
 3922         struct {
 3923                 int     vtype;
 3924                 int     kf_vtype;
 3925         } vtypes_table[] = {
 3926                 { VBAD, KF_VTYPE_VBAD },
 3927                 { VBLK, KF_VTYPE_VBLK },
 3928                 { VCHR, KF_VTYPE_VCHR },
 3929                 { VDIR, KF_VTYPE_VDIR },
 3930                 { VFIFO, KF_VTYPE_VFIFO },
 3931                 { VLNK, KF_VTYPE_VLNK },
 3932                 { VNON, KF_VTYPE_VNON },
 3933                 { VREG, KF_VTYPE_VREG },
 3934                 { VSOCK, KF_VTYPE_VSOCK }
 3935         };
 3936         unsigned int i;
 3937 
 3938         /*
 3939          * Perform vtype translation.
 3940          */
 3941         for (i = 0; i < nitems(vtypes_table); i++)
 3942                 if (vtypes_table[i].vtype == vtype)
 3943                         return (vtypes_table[i].kf_vtype);
 3944 
 3945         return (KF_VTYPE_UNKNOWN);
 3946 }
 3947 
 3948 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc,
 3949     CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc,
 3950     "Process filedesc entries");
 3951 
 3952 /*
 3953  * Store a process current working directory information to sbuf.
 3954  *
 3955  * Takes a locked proc as argument, and returns with the proc unlocked.
 3956  */
 3957 int
 3958 kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen)
 3959 {
 3960         struct filedesc *fdp;
 3961         struct export_fd_buf *efbuf;
 3962         int error;
 3963 
 3964         PROC_LOCK_ASSERT(p, MA_OWNED);
 3965 
 3966         fdp = fdhold(p);
 3967         PROC_UNLOCK(p);
 3968         if (fdp == NULL)
 3969                 return (EINVAL);
 3970 
 3971         efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
 3972         efbuf->fdp = fdp;
 3973         efbuf->sb = sb;
 3974         efbuf->remainder = maxlen;
 3975 
 3976         FILEDESC_SLOCK(fdp);
 3977         if (fdp->fd_cdir == NULL)
 3978                 error = EINVAL;
 3979         else {
 3980                 vrefact(fdp->fd_cdir);
 3981                 error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD,
 3982                     FREAD, efbuf);
 3983         }
 3984         FILEDESC_SUNLOCK(fdp);
 3985         fddrop(fdp);
 3986         free(efbuf, M_TEMP);
 3987         return (error);
 3988 }
 3989 
 3990 /*
 3991  * Get per-process current working directory.
 3992  */
 3993 static int
 3994 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS)
 3995 {
 3996         struct sbuf sb;
 3997         struct proc *p;
 3998         ssize_t maxlen;
 3999         int error, error2, *name;
 4000 
 4001         name = (int *)arg1;
 4002 
 4003         sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req);
 4004         sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 4005         error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 4006         if (error != 0) {
 4007                 sbuf_delete(&sb);
 4008                 return (error);
 4009         }
 4010         maxlen = req->oldptr != NULL ? req->oldlen : -1;
 4011         error = kern_proc_cwd_out(p, &sb, maxlen);
 4012         error2 = sbuf_finish(&sb);
 4013         sbuf_delete(&sb);
 4014         return (error != 0 ? error : error2);
 4015 }
 4016 
 4017 static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE,
 4018     sysctl_kern_proc_cwd, "Process current working directory");
 4019 
 4020 #ifdef DDB
 4021 /*
 4022  * For the purposes of debugging, generate a human-readable string for the
 4023  * file type.
 4024  */
 4025 static const char *
 4026 file_type_to_name(short type)
 4027 {
 4028 
 4029         switch (type) {
 4030         case 0:
 4031                 return ("zero");
 4032         case DTYPE_VNODE:
 4033                 return ("vnode");
 4034         case DTYPE_SOCKET:
 4035                 return ("socket");
 4036         case DTYPE_PIPE:
 4037                 return ("pipe");
 4038         case DTYPE_FIFO:
 4039                 return ("fifo");
 4040         case DTYPE_KQUEUE:
 4041                 return ("kqueue");
 4042         case DTYPE_CRYPTO:
 4043                 return ("crypto");
 4044         case DTYPE_MQUEUE:
 4045                 return ("mqueue");
 4046         case DTYPE_SHM:
 4047                 return ("shm");
 4048         case DTYPE_SEM:
 4049                 return ("ksem");
 4050         case DTYPE_PTS:
 4051                 return ("pts");
 4052         case DTYPE_DEV:
 4053                 return ("dev");
 4054         case DTYPE_PROCDESC:
 4055                 return ("proc");
 4056         case DTYPE_LINUXEFD:
 4057                 return ("levent");
 4058         case DTYPE_LINUXTFD:
 4059                 return ("ltimer");
 4060         default:
 4061                 return ("unkn");
 4062         }
 4063 }
 4064 
 4065 /*
 4066  * For the purposes of debugging, identify a process (if any, perhaps one of
 4067  * many) that references the passed file in its file descriptor array. Return
 4068  * NULL if none.
 4069  */
 4070 static struct proc *
 4071 file_to_first_proc(struct file *fp)
 4072 {
 4073         struct filedesc *fdp;
 4074         struct proc *p;
 4075         int n;
 4076 
 4077         FOREACH_PROC_IN_SYSTEM(p) {
 4078                 if (p->p_state == PRS_NEW)
 4079                         continue;
 4080                 fdp = p->p_fd;
 4081                 if (fdp == NULL)
 4082                         continue;
 4083                 for (n = 0; n <= fdp->fd_lastfile; n++) {
 4084                         if (fp == fdp->fd_ofiles[n].fde_file)
 4085                                 return (p);
 4086                 }
 4087         }
 4088         return (NULL);
 4089 }
 4090 
 4091 static void
 4092 db_print_file(struct file *fp, int header)
 4093 {
 4094 #define XPTRWIDTH ((int)howmany(sizeof(void *) * NBBY, 4))
 4095         struct proc *p;
 4096 
 4097         if (header)
 4098                 db_printf("%*s %6s %*s %8s %4s %5s %6s %*s %5s %s\n",
 4099                     XPTRWIDTH, "File", "Type", XPTRWIDTH, "Data", "Flag",
 4100                     "GCFl", "Count", "MCount", XPTRWIDTH, "Vnode", "FPID",
 4101                     "FCmd");
 4102         p = file_to_first_proc(fp);
 4103         db_printf("%*p %6s %*p %08x %04x %5d %6d %*p %5d %s\n", XPTRWIDTH,
 4104             fp, file_type_to_name(fp->f_type), XPTRWIDTH, fp->f_data,
 4105             fp->f_flag, 0, fp->f_count, 0, XPTRWIDTH, fp->f_vnode,
 4106             p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
 4107 
 4108 #undef XPTRWIDTH
 4109 }
 4110 
 4111 DB_SHOW_COMMAND(file, db_show_file)
 4112 {
 4113         struct file *fp;
 4114 
 4115         if (!have_addr) {
 4116                 db_printf("usage: show file <addr>\n");
 4117                 return;
 4118         }
 4119         fp = (struct file *)addr;
 4120         db_print_file(fp, 1);
 4121 }
 4122 
 4123 DB_SHOW_COMMAND(files, db_show_files)
 4124 {
 4125         struct filedesc *fdp;
 4126         struct file *fp;
 4127         struct proc *p;
 4128         int header;
 4129         int n;
 4130 
 4131         header = 1;
 4132         FOREACH_PROC_IN_SYSTEM(p) {
 4133                 if (p->p_state == PRS_NEW)
 4134                         continue;
 4135                 if ((fdp = p->p_fd) == NULL)
 4136                         continue;
 4137                 for (n = 0; n <= fdp->fd_lastfile; ++n) {
 4138                         if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
 4139                                 continue;
 4140                         db_print_file(fp, header);
 4141                         header = 0;
 4142                 }
 4143         }
 4144 }
 4145 #endif
 4146 
 4147 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
 4148     &maxfilesperproc, 0, "Maximum files allowed open per process");
 4149 
 4150 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
 4151     &maxfiles, 0, "Maximum number of files");
 4152 
 4153 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
 4154     __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
 4155 
 4156 /* ARGSUSED*/
 4157 static void
 4158 filelistinit(void *dummy)
 4159 {
 4160 
 4161         file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
 4162             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 4163         filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0),
 4164             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 4165         mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 4166 }
 4167 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
 4168 
 4169 /*-------------------------------------------------------------------*/
 4170 
 4171 static int
 4172 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred,
 4173     int flags, struct thread *td)
 4174 {
 4175 
 4176         return (EBADF);
 4177 }
 4178 
 4179 static int
 4180 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
 4181     struct thread *td)
 4182 {
 4183 
 4184         return (EINVAL);
 4185 }
 4186 
 4187 static int
 4188 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
 4189     struct thread *td)
 4190 {
 4191 
 4192         return (EBADF);
 4193 }
 4194 
 4195 static int
 4196 badfo_poll(struct file *fp, int events, struct ucred *active_cred,
 4197     struct thread *td)
 4198 {
 4199 
 4200         return (0);
 4201 }
 4202 
 4203 static int
 4204 badfo_kqfilter(struct file *fp, struct knote *kn)
 4205 {
 4206 
 4207         return (EBADF);
 4208 }
 4209 
 4210 static int
 4211 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
 4212     struct thread *td)
 4213 {
 4214 
 4215         return (EBADF);
 4216 }
 4217 
 4218 static int
 4219 badfo_close(struct file *fp, struct thread *td)
 4220 {
 4221 
 4222         return (0);
 4223 }
 4224 
 4225 static int
 4226 badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
 4227     struct thread *td)
 4228 {
 4229 
 4230         return (EBADF);
 4231 }
 4232 
 4233 static int
 4234 badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
 4235     struct thread *td)
 4236 {
 4237 
 4238         return (EBADF);
 4239 }
 4240 
 4241 static int
 4242 badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
 4243     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
 4244     struct thread *td)
 4245 {
 4246 
 4247         return (EBADF);
 4248 }
 4249 
 4250 static int
 4251 badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 4252 {
 4253 
 4254         return (0);
 4255 }
 4256 
 4257 struct fileops badfileops = {
 4258         .fo_read = badfo_readwrite,
 4259         .fo_write = badfo_readwrite,
 4260         .fo_truncate = badfo_truncate,
 4261         .fo_ioctl = badfo_ioctl,
 4262         .fo_poll = badfo_poll,
 4263         .fo_kqfilter = badfo_kqfilter,
 4264         .fo_stat = badfo_stat,
 4265         .fo_close = badfo_close,
 4266         .fo_chmod = badfo_chmod,
 4267         .fo_chown = badfo_chown,
 4268         .fo_sendfile = badfo_sendfile,
 4269         .fo_fill_kinfo = badfo_fill_kinfo,
 4270 };
 4271 
 4272 int
 4273 invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
 4274     int flags, struct thread *td)
 4275 {
 4276 
 4277         return (EOPNOTSUPP);
 4278 }
 4279 
 4280 int
 4281 invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
 4282     struct thread *td)
 4283 {
 4284 
 4285         return (EINVAL);
 4286 }
 4287 
 4288 int
 4289 invfo_ioctl(struct file *fp, u_long com, void *data,
 4290     struct ucred *active_cred, struct thread *td)
 4291 {
 4292 
 4293         return (ENOTTY);
 4294 }
 4295 
 4296 int
 4297 invfo_poll(struct file *fp, int events, struct ucred *active_cred,
 4298     struct thread *td)
 4299 {
 4300 
 4301         return (poll_no_poll(events));
 4302 }
 4303 
 4304 int
 4305 invfo_kqfilter(struct file *fp, struct knote *kn)
 4306 {
 4307 
 4308         return (EINVAL);
 4309 }
 4310 
 4311 int
 4312 invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
 4313     struct thread *td)
 4314 {
 4315 
 4316         return (EINVAL);
 4317 }
 4318 
 4319 int
 4320 invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
 4321     struct thread *td)
 4322 {
 4323 
 4324         return (EINVAL);
 4325 }
 4326 
 4327 int
 4328 invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
 4329     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
 4330     struct thread *td)
 4331 {
 4332 
 4333         return (EINVAL);
 4334 }
 4335 
 4336 /*-------------------------------------------------------------------*/
 4337 
 4338 /*
 4339  * File Descriptor pseudo-device driver (/dev/fd/).
 4340  *
 4341  * Opening minor device N dup()s the file (if any) connected to file
 4342  * descriptor N belonging to the calling process.  Note that this driver
 4343  * consists of only the ``open()'' routine, because all subsequent
 4344  * references to this file will be direct to the other driver.
 4345  *
 4346  * XXX: we could give this one a cloning event handler if necessary.
 4347  */
 4348 
 4349 /* ARGSUSED */
 4350 static int
 4351 fdopen(struct cdev *dev, int mode, int type, struct thread *td)
 4352 {
 4353 
 4354         /*
 4355          * XXX Kludge: set curthread->td_dupfd to contain the value of the
 4356          * the file descriptor being sought for duplication. The error
 4357          * return ensures that the vnode for this device will be released
 4358          * by vn_open. Open will detect this special error and take the
 4359          * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 4360          * will simply report the error.
 4361          */
 4362         td->td_dupfd = dev2unit(dev);
 4363         return (ENODEV);
 4364 }
 4365 
 4366 static struct cdevsw fildesc_cdevsw = {
 4367         .d_version =    D_VERSION,
 4368         .d_open =       fdopen,
 4369         .d_name =       "FD",
 4370 };
 4371 
 4372 static void
 4373 fildesc_drvinit(void *unused)
 4374 {
 4375         struct cdev *dev;
 4376 
 4377         dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL,
 4378             UID_ROOT, GID_WHEEL, 0666, "fd/0");
 4379         make_dev_alias(dev, "stdin");
 4380         dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL,
 4381             UID_ROOT, GID_WHEEL, 0666, "fd/1");
 4382         make_dev_alias(dev, "stdout");
 4383         dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL,
 4384             UID_ROOT, GID_WHEEL, 0666, "fd/2");
 4385         make_dev_alias(dev, "stderr");
 4386 }
 4387 
 4388 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);

Cache object: 4406a6fd158c4cafa496270120b1c7ac


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.