The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_descrip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_descrip.c      8.6 (Berkeley) 4/19/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include "opt_compat.h"
   41 #include "opt_ddb.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 
   46 #include <sys/conf.h>
   47 #include <sys/fcntl.h>
   48 #include <sys/file.h>
   49 #include <sys/filedesc.h>
   50 #include <sys/filio.h>
   51 #include <sys/jail.h>
   52 #include <sys/kernel.h>
   53 #include <sys/limits.h>
   54 #include <sys/lock.h>
   55 #include <sys/malloc.h>
   56 #include <sys/mount.h>
   57 #include <sys/mqueue.h>
   58 #include <sys/mutex.h>
   59 #include <sys/namei.h>
   60 #include <sys/priv.h>
   61 #include <sys/proc.h>
   62 #include <sys/resourcevar.h>
   63 #include <sys/signalvar.h>
   64 #include <sys/socketvar.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/syscallsubr.h>
   68 #include <sys/sysctl.h>
   69 #include <sys/sysproto.h>
   70 #include <sys/unistd.h>
   71 #include <sys/vnode.h>
   72 
   73 #include <security/audit/audit.h>
   74 
   75 #include <vm/uma.h>
   76 
   77 #include <ddb/ddb.h>
   78 
   79 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
   80 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
   81                      "file desc to leader structures");
   82 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
   83 
   84 static uma_zone_t file_zone;
   85 
   86 
   87 /* How to treat 'new' parameter when allocating a fd for do_dup(). */
   88 enum dup_type { DUP_VARIABLE, DUP_FIXED };
   89 
   90 static int do_dup(struct thread *td, enum dup_type type, int old, int new,
   91     register_t *retval);
   92 static int      fd_first_free(struct filedesc *, int, int);
   93 static int      fd_last_used(struct filedesc *, int, int);
   94 static void     fdgrowtable(struct filedesc *, int);
   95 static int      fdrop_locked(struct file *fp, struct thread *td);
   96 static void     fdunused(struct filedesc *fdp, int fd);
   97 static void     fdused(struct filedesc *fdp, int fd);
   98 
   99 /*
  100  * A process is initially started out with NDFILE descriptors stored within
  101  * this structure, selected to be enough for typical applications based on
  102  * the historical limit of 20 open files (and the usage of descriptors by
  103  * shells).  If these descriptors are exhausted, a larger descriptor table
  104  * may be allocated, up to a process' resource limit; the internal arrays
  105  * are then unused.
  106  */
  107 #define NDFILE          20
  108 #define NDSLOTSIZE      sizeof(NDSLOTTYPE)
  109 #define NDENTRIES       (NDSLOTSIZE * __CHAR_BIT)
  110 #define NDSLOT(x)       ((x) / NDENTRIES)
  111 #define NDBIT(x)        ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
  112 #define NDSLOTS(x)      (((x) + NDENTRIES - 1) / NDENTRIES)
  113 
  114 /*
  115  * Storage required per open file descriptor.
  116  */
  117 #define OFILESIZE (sizeof(struct file *) + sizeof(char))
  118 
  119 /*
  120  * Basic allocation of descriptors:
  121  * one of the above, plus arrays for NDFILE descriptors.
  122  */
  123 struct filedesc0 {
  124         struct  filedesc fd_fd;
  125         /*
  126          * These arrays are used when the number of open files is
  127          * <= NDFILE, and are then pointed to by the pointers above.
  128          */
  129         struct  file *fd_dfiles[NDFILE];
  130         char    fd_dfileflags[NDFILE];
  131         NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
  132 };
  133 
  134 /*
  135  * Descriptor management.
  136  */
  137 struct filelist filehead;       /* head of list of open files */
  138 int openfiles;                  /* actual number of open files */
  139 struct sx filelist_lock;        /* sx to protect filelist */
  140 struct mtx sigio_lock;          /* mtx to protect pointers to sigio */
  141 void    (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
  142 
  143 /* A mutex to protect the association between a proc and filedesc. */
  144 static struct mtx       fdesc_mtx;
  145 
  146 /*
  147  * Find the first zero bit in the given bitmap, starting at low and not
  148  * exceeding size - 1.
  149  */
  150 static int
  151 fd_first_free(struct filedesc *fdp, int low, int size)
  152 {
  153         NDSLOTTYPE *map = fdp->fd_map;
  154         NDSLOTTYPE mask;
  155         int off, maxoff;
  156 
  157         if (low >= size)
  158                 return (low);
  159 
  160         off = NDSLOT(low);
  161         if (low % NDENTRIES) {
  162                 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
  163                 if ((mask &= ~map[off]) != 0UL)
  164                         return (off * NDENTRIES + ffsl(mask) - 1);
  165                 ++off;
  166         }
  167         for (maxoff = NDSLOTS(size); off < maxoff; ++off)
  168                 if (map[off] != ~0UL)
  169                         return (off * NDENTRIES + ffsl(~map[off]) - 1);
  170         return (size);
  171 }
  172 
  173 /*
  174  * Find the highest non-zero bit in the given bitmap, starting at low and
  175  * not exceeding size - 1.
  176  */
  177 static int
  178 fd_last_used(struct filedesc *fdp, int low, int size)
  179 {
  180         NDSLOTTYPE *map = fdp->fd_map;
  181         NDSLOTTYPE mask;
  182         int off, minoff;
  183 
  184         if (low >= size)
  185                 return (-1);
  186 
  187         off = NDSLOT(size);
  188         if (size % NDENTRIES) {
  189                 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
  190                 if ((mask &= map[off]) != 0)
  191                         return (off * NDENTRIES + flsl(mask) - 1);
  192                 --off;
  193         }
  194         for (minoff = NDSLOT(low); off >= minoff; --off)
  195                 if (map[off] != 0)
  196                         return (off * NDENTRIES + flsl(map[off]) - 1);
  197         return (low - 1);
  198 }
  199 
  200 static int
  201 fdisused(struct filedesc *fdp, int fd)
  202 {
  203         KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
  204             ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
  205         return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
  206 }
  207 
  208 /*
  209  * Mark a file descriptor as used.
  210  */
  211 static void
  212 fdused(struct filedesc *fdp, int fd)
  213 {
  214 
  215         FILEDESC_XLOCK_ASSERT(fdp);
  216         KASSERT(!fdisused(fdp, fd),
  217             ("fd already used"));
  218 
  219         fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
  220         if (fd > fdp->fd_lastfile)
  221                 fdp->fd_lastfile = fd;
  222         if (fd == fdp->fd_freefile)
  223                 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
  224 }
  225 
  226 /*
  227  * Mark a file descriptor as unused.
  228  */
  229 static void
  230 fdunused(struct filedesc *fdp, int fd)
  231 {
  232 
  233         FILEDESC_XLOCK_ASSERT(fdp);
  234         KASSERT(fdisused(fdp, fd),
  235             ("fd is already unused"));
  236         KASSERT(fdp->fd_ofiles[fd] == NULL,
  237             ("fd is still in use"));
  238 
  239         fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
  240         if (fd < fdp->fd_freefile)
  241                 fdp->fd_freefile = fd;
  242         if (fd == fdp->fd_lastfile)
  243                 fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
  244 }
  245 
  246 /*
  247  * System calls on descriptors.
  248  */
  249 #ifndef _SYS_SYSPROTO_H_
  250 struct getdtablesize_args {
  251         int     dummy;
  252 };
  253 #endif
  254 /* ARGSUSED */
  255 int
  256 getdtablesize(struct thread *td, struct getdtablesize_args *uap)
  257 {
  258         struct proc *p = td->td_proc;
  259 
  260         PROC_LOCK(p);
  261         td->td_retval[0] =
  262             min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
  263         PROC_UNLOCK(p);
  264         return (0);
  265 }
  266 
  267 /*
  268  * Duplicate a file descriptor to a particular value.
  269  *
  270  * Note: keep in mind that a potential race condition exists when closing
  271  * descriptors from a shared descriptor table (via rfork).
  272  */
  273 #ifndef _SYS_SYSPROTO_H_
  274 struct dup2_args {
  275         u_int   from;
  276         u_int   to;
  277 };
  278 #endif
  279 /* ARGSUSED */
  280 int
  281 dup2(struct thread *td, struct dup2_args *uap)
  282 {
  283 
  284         return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
  285                     td->td_retval));
  286 }
  287 
  288 /*
  289  * Duplicate a file descriptor.
  290  */
  291 #ifndef _SYS_SYSPROTO_H_
  292 struct dup_args {
  293         u_int   fd;
  294 };
  295 #endif
  296 /* ARGSUSED */
  297 int
  298 dup(struct thread *td, struct dup_args *uap)
  299 {
  300 
  301         return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
  302 }
  303 
  304 /*
  305  * The file control system call.
  306  */
  307 #ifndef _SYS_SYSPROTO_H_
  308 struct fcntl_args {
  309         int     fd;
  310         int     cmd;
  311         long    arg;
  312 };
  313 #endif
  314 /* ARGSUSED */
  315 int
  316 fcntl(struct thread *td, struct fcntl_args *uap)
  317 {
  318         struct flock fl;
  319         intptr_t arg;
  320         int error;
  321 
  322         error = 0;
  323         switch (uap->cmd) {
  324         case F_GETLK:
  325         case F_SETLK:
  326         case F_SETLKW:
  327                 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
  328                 arg = (intptr_t)&fl;
  329                 break;
  330         default:
  331                 arg = uap->arg;
  332                 break;
  333         }
  334         if (error)
  335                 return (error);
  336         error = kern_fcntl(td, uap->fd, uap->cmd, arg);
  337         if (error)
  338                 return (error);
  339         if (uap->cmd == F_GETLK)
  340                 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
  341         return (error);
  342 }
  343 
  344 static inline struct file *
  345 fdtofp(int fd, struct filedesc *fdp)
  346 {
  347         struct file *fp;
  348 
  349         FILEDESC_LOCK_ASSERT(fdp);
  350         if ((unsigned)fd >= fdp->fd_nfiles ||
  351             (fp = fdp->fd_ofiles[fd]) == NULL)
  352                 return (NULL);
  353         return (fp);
  354 }
  355 
  356 int
  357 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
  358 {
  359         struct filedesc *fdp;
  360         struct flock *flp;
  361         struct file *fp;
  362         struct proc *p;
  363         char *pop;
  364         struct vnode *vp;
  365         u_int newmin;
  366         int error, flg, tmp;
  367         int vfslocked;
  368 
  369         vfslocked = 0;
  370         error = 0;
  371         flg = F_POSIX;
  372         p = td->td_proc;
  373         fdp = p->p_fd;
  374 
  375         switch (cmd) {
  376         case F_DUPFD:
  377                 FILEDESC_SLOCK(fdp);
  378                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  379                         FILEDESC_SUNLOCK(fdp);
  380                         error = EBADF;
  381                         break;
  382                 }
  383                 FILEDESC_SUNLOCK(fdp);
  384                 newmin = arg;
  385                 PROC_LOCK(p);
  386                 if (newmin >= lim_cur(p, RLIMIT_NOFILE) ||
  387                     newmin >= maxfilesperproc) {
  388                         PROC_UNLOCK(p);
  389                         error = EINVAL;
  390                         break;
  391                 }
  392                 PROC_UNLOCK(p);
  393                 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
  394                 break;
  395 
  396         case F_GETFD:
  397                 FILEDESC_SLOCK(fdp);
  398                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  399                         FILEDESC_SUNLOCK(fdp);
  400                         error = EBADF;
  401                         break;
  402                 }
  403                 pop = &fdp->fd_ofileflags[fd];
  404                 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
  405                 FILEDESC_SUNLOCK(fdp);
  406                 break;
  407 
  408         case F_SETFD:
  409                 FILEDESC_XLOCK(fdp);
  410                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  411                         FILEDESC_XUNLOCK(fdp);
  412                         error = EBADF;
  413                         break;
  414                 }
  415                 pop = &fdp->fd_ofileflags[fd];
  416                 *pop = (*pop &~ UF_EXCLOSE) |
  417                     (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
  418                 FILEDESC_XUNLOCK(fdp);
  419                 break;
  420 
  421         case F_GETFL:
  422                 FILEDESC_SLOCK(fdp);
  423                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  424                         FILEDESC_SUNLOCK(fdp);
  425                         error = EBADF;
  426                         break;
  427                 }
  428                 FILE_LOCK(fp);
  429                 td->td_retval[0] = OFLAGS(fp->f_flag);
  430                 FILE_UNLOCK(fp);
  431                 FILEDESC_SUNLOCK(fdp);
  432                 break;
  433 
  434         case F_SETFL:
  435                 FILEDESC_SLOCK(fdp);
  436                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  437                         FILEDESC_SUNLOCK(fdp);
  438                         error = EBADF;
  439                         break;
  440                 }
  441                 FILE_LOCK(fp);
  442                 fhold_locked(fp);
  443                 fp->f_flag &= ~FCNTLFLAGS;
  444                 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
  445                 FILE_UNLOCK(fp);
  446                 FILEDESC_SUNLOCK(fdp);
  447                 tmp = fp->f_flag & FNONBLOCK;
  448                 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
  449                 if (error) {
  450                         fdrop(fp, td);
  451                         break;
  452                 }
  453                 tmp = fp->f_flag & FASYNC;
  454                 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
  455                 if (error == 0) {
  456                         fdrop(fp, td);
  457                         break;
  458                 }
  459                 FILE_LOCK(fp);
  460                 fp->f_flag &= ~FNONBLOCK;
  461                 FILE_UNLOCK(fp);
  462                 tmp = 0;
  463                 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
  464                 fdrop(fp, td);
  465                 break;
  466 
  467         case F_GETOWN:
  468                 FILEDESC_SLOCK(fdp);
  469                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  470                         FILEDESC_SUNLOCK(fdp);
  471                         error = EBADF;
  472                         break;
  473                 }
  474                 fhold(fp);
  475                 FILEDESC_SUNLOCK(fdp);
  476                 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
  477                 if (error == 0)
  478                         td->td_retval[0] = tmp;
  479                 fdrop(fp, td);
  480                 break;
  481 
  482         case F_SETOWN:
  483                 FILEDESC_SLOCK(fdp);
  484                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  485                         FILEDESC_SUNLOCK(fdp);
  486                         error = EBADF;
  487                         break;
  488                 }
  489                 fhold(fp);
  490                 FILEDESC_SUNLOCK(fdp);
  491                 tmp = arg;
  492                 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
  493                 fdrop(fp, td);
  494                 break;
  495 
  496         case F_SETLKW:
  497                 flg |= F_WAIT;
  498                 /* FALLTHROUGH F_SETLK */
  499 
  500         case F_SETLK:
  501                 FILEDESC_SLOCK(fdp);
  502                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  503                         FILEDESC_SUNLOCK(fdp);
  504                         error = EBADF;
  505                         break;
  506                 }
  507                 if (fp->f_type != DTYPE_VNODE) {
  508                         FILEDESC_SUNLOCK(fdp);
  509                         error = EBADF;
  510                         break;
  511                 }
  512                 flp = (struct flock *)arg;
  513                 if (flp->l_whence == SEEK_CUR) {
  514                         if (fp->f_offset < 0 ||
  515                             (flp->l_start > 0 &&
  516                              fp->f_offset > OFF_MAX - flp->l_start)) {
  517                                 FILEDESC_SUNLOCK(fdp);
  518                                 error = EOVERFLOW;
  519                                 break;
  520                         }
  521                         flp->l_start += fp->f_offset;
  522                 }
  523 
  524                 /*
  525                  * VOP_ADVLOCK() may block.
  526                  */
  527                 fhold(fp);
  528                 FILEDESC_SUNLOCK(fdp);
  529                 vp = fp->f_vnode;
  530                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  531                 switch (flp->l_type) {
  532                 case F_RDLCK:
  533                         if ((fp->f_flag & FREAD) == 0) {
  534                                 error = EBADF;
  535                                 break;
  536                         }
  537                         PROC_LOCK(p->p_leader);
  538                         p->p_leader->p_flag |= P_ADVLOCK;
  539                         PROC_UNLOCK(p->p_leader);
  540                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
  541                             flp, flg);
  542                         break;
  543                 case F_WRLCK:
  544                         if ((fp->f_flag & FWRITE) == 0) {
  545                                 error = EBADF;
  546                                 break;
  547                         }
  548                         PROC_LOCK(p->p_leader);
  549                         p->p_leader->p_flag |= P_ADVLOCK;
  550                         PROC_UNLOCK(p->p_leader);
  551                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
  552                             flp, flg);
  553                         break;
  554                 case F_UNLCK:
  555                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
  556                             flp, F_POSIX);
  557                         break;
  558                 default:
  559                         error = EINVAL;
  560                         break;
  561                 }
  562                 VFS_UNLOCK_GIANT(vfslocked);
  563                 vfslocked = 0;
  564                 /* Check for race with close */
  565                 FILEDESC_SLOCK(fdp);
  566                 if ((unsigned) fd >= fdp->fd_nfiles ||
  567                     fp != fdp->fd_ofiles[fd]) {
  568                         FILEDESC_SUNLOCK(fdp);
  569                         flp->l_whence = SEEK_SET;
  570                         flp->l_start = 0;
  571                         flp->l_len = 0;
  572                         flp->l_type = F_UNLCK;
  573                         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  574                         (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
  575                                            F_UNLCK, flp, F_POSIX);
  576                         VFS_UNLOCK_GIANT(vfslocked);
  577                         vfslocked = 0;
  578                 } else
  579                         FILEDESC_SUNLOCK(fdp);
  580                 fdrop(fp, td);
  581                 break;
  582 
  583         case F_GETLK:
  584                 FILEDESC_SLOCK(fdp);
  585                 if ((fp = fdtofp(fd, fdp)) == NULL) {
  586                         FILEDESC_SUNLOCK(fdp);
  587                         error = EBADF;
  588                         break;
  589                 }
  590                 if (fp->f_type != DTYPE_VNODE) {
  591                         FILEDESC_SUNLOCK(fdp);
  592                         error = EBADF;
  593                         break;
  594                 }
  595                 flp = (struct flock *)arg;
  596                 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
  597                     flp->l_type != F_UNLCK) {
  598                         FILEDESC_SUNLOCK(fdp);
  599                         error = EINVAL;
  600                         break;
  601                 }
  602                 if (flp->l_whence == SEEK_CUR) {
  603                         if ((flp->l_start > 0 &&
  604                             fp->f_offset > OFF_MAX - flp->l_start) ||
  605                             (flp->l_start < 0 &&
  606                              fp->f_offset < OFF_MIN - flp->l_start)) {
  607                                 FILEDESC_SUNLOCK(fdp);
  608                                 error = EOVERFLOW;
  609                                 break;
  610                         }
  611                         flp->l_start += fp->f_offset;
  612                 }
  613                 /*
  614                  * VOP_ADVLOCK() may block.
  615                  */
  616                 fhold(fp);
  617                 FILEDESC_SUNLOCK(fdp);
  618                 vp = fp->f_vnode;
  619                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  620                 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
  621                     F_POSIX);
  622                 VFS_UNLOCK_GIANT(vfslocked);
  623                 vfslocked = 0;
  624                 fdrop(fp, td);
  625                 break;
  626         default:
  627                 error = EINVAL;
  628                 break;
  629         }
  630         VFS_UNLOCK_GIANT(vfslocked);
  631         return (error);
  632 }
  633 
  634 /*
  635  * Common code for dup, dup2, and fcntl(F_DUPFD).
  636  */
  637 static int
  638 do_dup(struct thread *td, enum dup_type type, int old, int new,
  639     register_t *retval)
  640 {
  641         struct filedesc *fdp;
  642         struct proc *p;
  643         struct file *fp;
  644         struct file *delfp;
  645         int error, holdleaders, maxfd;
  646 
  647         KASSERT((type == DUP_VARIABLE || type == DUP_FIXED),
  648             ("invalid dup type %d", type));
  649 
  650         p = td->td_proc;
  651         fdp = p->p_fd;
  652 
  653         /*
  654          * Verify we have a valid descriptor to dup from and possibly to
  655          * dup to.
  656          */
  657         if (old < 0 || new < 0)
  658                 return (EBADF);
  659         PROC_LOCK(p);
  660         maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
  661         PROC_UNLOCK(p);
  662         if (new >= maxfd)
  663                 return (EMFILE);
  664 
  665         FILEDESC_XLOCK(fdp);
  666         if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
  667                 FILEDESC_XUNLOCK(fdp);
  668                 return (EBADF);
  669         }
  670         if (type == DUP_FIXED && old == new) {
  671                 *retval = new;
  672                 FILEDESC_XUNLOCK(fdp);
  673                 return (0);
  674         }
  675         fp = fdp->fd_ofiles[old];
  676         fhold(fp);
  677 
  678         /*
  679          * If the caller specified a file descriptor, make sure the file
  680          * table is large enough to hold it, and grab it.  Otherwise, just
  681          * allocate a new descriptor the usual way.  Since the filedesc
  682          * lock may be temporarily dropped in the process, we have to look
  683          * out for a race.
  684          */
  685         if (type == DUP_FIXED) {
  686                 if (new >= fdp->fd_nfiles)
  687                         fdgrowtable(fdp, new + 1);
  688                 if (fdp->fd_ofiles[new] == NULL)
  689                         fdused(fdp, new);
  690         } else {
  691                 if ((error = fdalloc(td, new, &new)) != 0) {
  692                         FILEDESC_XUNLOCK(fdp);
  693                         fdrop(fp, td);
  694                         return (error);
  695                 }
  696         }
  697 
  698         /*
  699          * If the old file changed out from under us then treat it as a
  700          * bad file descriptor.  Userland should do its own locking to
  701          * avoid this case.
  702          */
  703         if (fdp->fd_ofiles[old] != fp) {
  704                 /* we've allocated a descriptor which we won't use */
  705                 if (fdp->fd_ofiles[new] == NULL)
  706                         fdunused(fdp, new);
  707                 FILEDESC_XUNLOCK(fdp);
  708                 fdrop(fp, td);
  709                 return (EBADF);
  710         }
  711         KASSERT(old != new,
  712             ("new fd is same as old"));
  713 
  714         /*
  715          * Save info on the descriptor being overwritten.  We cannot close
  716          * it without introducing an ownership race for the slot, since we
  717          * need to drop the filedesc lock to call closef().
  718          *
  719          * XXX this duplicates parts of close().
  720          */
  721         delfp = fdp->fd_ofiles[new];
  722         holdleaders = 0;
  723         if (delfp != NULL) {
  724                 if (td->td_proc->p_fdtol != NULL) {
  725                         /*
  726                          * Ask fdfree() to sleep to ensure that all relevant
  727                          * process leaders can be traversed in closef().
  728                          */
  729                         fdp->fd_holdleaderscount++;
  730                         holdleaders = 1;
  731                 }
  732         }
  733 
  734         /*
  735          * Duplicate the source descriptor
  736          */
  737         fdp->fd_ofiles[new] = fp;
  738         fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
  739         if (new > fdp->fd_lastfile)
  740                 fdp->fd_lastfile = new;
  741         *retval = new;
  742 
  743         /*
  744          * If we dup'd over a valid file, we now own the reference to it
  745          * and must dispose of it using closef() semantics (as if a
  746          * close() were performed on it).
  747          *
  748          * XXX this duplicates parts of close().
  749          */
  750         if (delfp != NULL) {
  751                 knote_fdclose(td, new);
  752                 if (delfp->f_type == DTYPE_MQUEUE)
  753                         mq_fdclose(td, new, delfp);
  754                 FILEDESC_XUNLOCK(fdp);
  755                 (void) closef(delfp, td);
  756                 if (holdleaders) {
  757                         FILEDESC_XLOCK(fdp);
  758                         fdp->fd_holdleaderscount--;
  759                         if (fdp->fd_holdleaderscount == 0 &&
  760                             fdp->fd_holdleaderswakeup != 0) {
  761                                 fdp->fd_holdleaderswakeup = 0;
  762                                 wakeup(&fdp->fd_holdleaderscount);
  763                         }
  764                         FILEDESC_XUNLOCK(fdp);
  765                 }
  766         } else {
  767                 FILEDESC_XUNLOCK(fdp);
  768         }
  769         return (0);
  770 }
  771 
  772 /*
  773  * If sigio is on the list associated with a process or process group,
  774  * disable signalling from the device, remove sigio from the list and
  775  * free sigio.
  776  */
  777 void
  778 funsetown(struct sigio **sigiop)
  779 {
  780         struct sigio *sigio;
  781 
  782         SIGIO_LOCK();
  783         sigio = *sigiop;
  784         if (sigio == NULL) {
  785                 SIGIO_UNLOCK();
  786                 return;
  787         }
  788         *(sigio->sio_myref) = NULL;
  789         if ((sigio)->sio_pgid < 0) {
  790                 struct pgrp *pg = (sigio)->sio_pgrp;
  791                 PGRP_LOCK(pg);
  792                 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
  793                              sigio, sio_pgsigio);
  794                 PGRP_UNLOCK(pg);
  795         } else {
  796                 struct proc *p = (sigio)->sio_proc;
  797                 PROC_LOCK(p);
  798                 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
  799                              sigio, sio_pgsigio);
  800                 PROC_UNLOCK(p);
  801         }
  802         SIGIO_UNLOCK();
  803         crfree(sigio->sio_ucred);
  804         FREE(sigio, M_SIGIO);
  805 }
  806 
  807 /*
  808  * Free a list of sigio structures.
  809  * We only need to lock the SIGIO_LOCK because we have made ourselves
  810  * inaccessible to callers of fsetown and therefore do not need to lock
  811  * the proc or pgrp struct for the list manipulation.
  812  */
  813 void
  814 funsetownlst(struct sigiolst *sigiolst)
  815 {
  816         struct proc *p;
  817         struct pgrp *pg;
  818         struct sigio *sigio;
  819 
  820         sigio = SLIST_FIRST(sigiolst);
  821         if (sigio == NULL)
  822                 return;
  823         p = NULL;
  824         pg = NULL;
  825 
  826         /*
  827          * Every entry of the list should belong
  828          * to a single proc or pgrp.
  829          */
  830         if (sigio->sio_pgid < 0) {
  831                 pg = sigio->sio_pgrp;
  832                 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
  833         } else /* if (sigio->sio_pgid > 0) */ {
  834                 p = sigio->sio_proc;
  835                 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
  836         }
  837 
  838         SIGIO_LOCK();
  839         while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
  840                 *(sigio->sio_myref) = NULL;
  841                 if (pg != NULL) {
  842                         KASSERT(sigio->sio_pgid < 0,
  843                             ("Proc sigio in pgrp sigio list"));
  844                         KASSERT(sigio->sio_pgrp == pg,
  845                             ("Bogus pgrp in sigio list"));
  846                         PGRP_LOCK(pg);
  847                         SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
  848                             sio_pgsigio);
  849                         PGRP_UNLOCK(pg);
  850                 } else /* if (p != NULL) */ {
  851                         KASSERT(sigio->sio_pgid > 0,
  852                             ("Pgrp sigio in proc sigio list"));
  853                         KASSERT(sigio->sio_proc == p,
  854                             ("Bogus proc in sigio list"));
  855                         PROC_LOCK(p);
  856                         SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
  857                             sio_pgsigio);
  858                         PROC_UNLOCK(p);
  859                 }
  860                 SIGIO_UNLOCK();
  861                 crfree(sigio->sio_ucred);
  862                 FREE(sigio, M_SIGIO);
  863                 SIGIO_LOCK();
  864         }
  865         SIGIO_UNLOCK();
  866 }
  867 
  868 /*
  869  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
  870  *
  871  * After permission checking, add a sigio structure to the sigio list for
  872  * the process or process group.
  873  */
  874 int
  875 fsetown(pid_t pgid, struct sigio **sigiop)
  876 {
  877         struct proc *proc;
  878         struct pgrp *pgrp;
  879         struct sigio *sigio;
  880         int ret;
  881 
  882         if (pgid == 0) {
  883                 funsetown(sigiop);
  884                 return (0);
  885         }
  886 
  887         ret = 0;
  888 
  889         /* Allocate and fill in the new sigio out of locks. */
  890         MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
  891         sigio->sio_pgid = pgid;
  892         sigio->sio_ucred = crhold(curthread->td_ucred);
  893         sigio->sio_myref = sigiop;
  894 
  895         sx_slock(&proctree_lock);
  896         if (pgid > 0) {
  897                 proc = pfind(pgid);
  898                 if (proc == NULL) {
  899                         ret = ESRCH;
  900                         goto fail;
  901                 }
  902 
  903                 /*
  904                  * Policy - Don't allow a process to FSETOWN a process
  905                  * in another session.
  906                  *
  907                  * Remove this test to allow maximum flexibility or
  908                  * restrict FSETOWN to the current process or process
  909                  * group for maximum safety.
  910                  */
  911                 PROC_UNLOCK(proc);
  912                 if (proc->p_session != curthread->td_proc->p_session) {
  913                         ret = EPERM;
  914                         goto fail;
  915                 }
  916 
  917                 pgrp = NULL;
  918         } else /* if (pgid < 0) */ {
  919                 pgrp = pgfind(-pgid);
  920                 if (pgrp == NULL) {
  921                         ret = ESRCH;
  922                         goto fail;
  923                 }
  924                 PGRP_UNLOCK(pgrp);
  925 
  926                 /*
  927                  * Policy - Don't allow a process to FSETOWN a process
  928                  * in another session.
  929                  *
  930                  * Remove this test to allow maximum flexibility or
  931                  * restrict FSETOWN to the current process or process
  932                  * group for maximum safety.
  933                  */
  934                 if (pgrp->pg_session != curthread->td_proc->p_session) {
  935                         ret = EPERM;
  936                         goto fail;
  937                 }
  938 
  939                 proc = NULL;
  940         }
  941         funsetown(sigiop);
  942         if (pgid > 0) {
  943                 PROC_LOCK(proc);
  944                 /*
  945                  * Since funsetownlst() is called without the proctree
  946                  * locked, we need to check for P_WEXIT.
  947                  * XXX: is ESRCH correct?
  948                  */
  949                 if ((proc->p_flag & P_WEXIT) != 0) {
  950                         PROC_UNLOCK(proc);
  951                         ret = ESRCH;
  952                         goto fail;
  953                 }
  954                 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
  955                 sigio->sio_proc = proc;
  956                 PROC_UNLOCK(proc);
  957         } else {
  958                 PGRP_LOCK(pgrp);
  959                 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
  960                 sigio->sio_pgrp = pgrp;
  961                 PGRP_UNLOCK(pgrp);
  962         }
  963         sx_sunlock(&proctree_lock);
  964         SIGIO_LOCK();
  965         *sigiop = sigio;
  966         SIGIO_UNLOCK();
  967         return (0);
  968 
  969 fail:
  970         sx_sunlock(&proctree_lock);
  971         crfree(sigio->sio_ucred);
  972         FREE(sigio, M_SIGIO);
  973         return (ret);
  974 }
  975 
  976 /*
  977  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
  978  */
  979 pid_t
  980 fgetown(sigiop)
  981         struct sigio **sigiop;
  982 {
  983         pid_t pgid;
  984 
  985         SIGIO_LOCK();
  986         pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
  987         SIGIO_UNLOCK();
  988         return (pgid);
  989 }
  990 
  991 /*
  992  * Close a file descriptor.
  993  */
  994 #ifndef _SYS_SYSPROTO_H_
  995 struct close_args {
  996         int     fd;
  997 };
  998 #endif
  999 /* ARGSUSED */
 1000 int
 1001 close(td, uap)
 1002         struct thread *td;
 1003         struct close_args *uap;
 1004 {
 1005 
 1006         return (kern_close(td, uap->fd));
 1007 }
 1008 
 1009 int
 1010 kern_close(td, fd)
 1011         struct thread *td;
 1012         int fd;
 1013 {
 1014         struct filedesc *fdp;
 1015         struct file *fp;
 1016         int error;
 1017         int holdleaders;
 1018 
 1019         error = 0;
 1020         holdleaders = 0;
 1021         fdp = td->td_proc->p_fd;
 1022 
 1023         AUDIT_SYSCLOSE(td, fd);
 1024 
 1025         FILEDESC_XLOCK(fdp);
 1026         if ((unsigned)fd >= fdp->fd_nfiles ||
 1027             (fp = fdp->fd_ofiles[fd]) == NULL) {
 1028                 FILEDESC_XUNLOCK(fdp);
 1029                 return (EBADF);
 1030         }
 1031         fdp->fd_ofiles[fd] = NULL;
 1032         fdp->fd_ofileflags[fd] = 0;
 1033         fdunused(fdp, fd);
 1034         if (td->td_proc->p_fdtol != NULL) {
 1035                 /*
 1036                  * Ask fdfree() to sleep to ensure that all relevant
 1037                  * process leaders can be traversed in closef().
 1038                  */
 1039                 fdp->fd_holdleaderscount++;
 1040                 holdleaders = 1;
 1041         }
 1042 
 1043         /*
 1044          * We now hold the fp reference that used to be owned by the
 1045          * descriptor array.  We have to unlock the FILEDESC *AFTER*
 1046          * knote_fdclose to prevent a race of the fd getting opened, a knote
 1047          * added, and deleteing a knote for the new fd.
 1048          */
 1049         knote_fdclose(td, fd);
 1050         if (fp->f_type == DTYPE_MQUEUE)
 1051                 mq_fdclose(td, fd, fp);
 1052         FILEDESC_XUNLOCK(fdp);
 1053 
 1054         error = closef(fp, td);
 1055         if (holdleaders) {
 1056                 FILEDESC_XLOCK(fdp);
 1057                 fdp->fd_holdleaderscount--;
 1058                 if (fdp->fd_holdleaderscount == 0 &&
 1059                     fdp->fd_holdleaderswakeup != 0) {
 1060                         fdp->fd_holdleaderswakeup = 0;
 1061                         wakeup(&fdp->fd_holdleaderscount);
 1062                 }
 1063                 FILEDESC_XUNLOCK(fdp);
 1064         }
 1065         return (error);
 1066 }
 1067 
 1068 #if defined(COMPAT_43)
 1069 /*
 1070  * Return status information about a file descriptor.
 1071  */
 1072 #ifndef _SYS_SYSPROTO_H_
 1073 struct ofstat_args {
 1074         int     fd;
 1075         struct  ostat *sb;
 1076 };
 1077 #endif
 1078 /* ARGSUSED */
 1079 int
 1080 ofstat(struct thread *td, struct ofstat_args *uap)
 1081 {
 1082         struct ostat oub;
 1083         struct stat ub;
 1084         int error;
 1085 
 1086         error = kern_fstat(td, uap->fd, &ub);
 1087         if (error == 0) {
 1088                 cvtstat(&ub, &oub);
 1089                 error = copyout(&oub, uap->sb, sizeof(oub));
 1090         }
 1091         return (error);
 1092 }
 1093 #endif /* COMPAT_43 */
 1094 
 1095 /*
 1096  * Return status information about a file descriptor.
 1097  */
 1098 #ifndef _SYS_SYSPROTO_H_
 1099 struct fstat_args {
 1100         int     fd;
 1101         struct  stat *sb;
 1102 };
 1103 #endif
 1104 /* ARGSUSED */
 1105 int
 1106 fstat(struct thread *td, struct fstat_args *uap)
 1107 {
 1108         struct stat ub;
 1109         int error;
 1110 
 1111         error = kern_fstat(td, uap->fd, &ub);
 1112         if (error == 0)
 1113                 error = copyout(&ub, uap->sb, sizeof(ub));
 1114         return (error);
 1115 }
 1116 
 1117 int
 1118 kern_fstat(struct thread *td, int fd, struct stat *sbp)
 1119 {
 1120         struct file *fp;
 1121         int error;
 1122 
 1123         AUDIT_ARG(fd, fd);
 1124 
 1125         if ((error = fget(td, fd, &fp)) != 0)
 1126                 return (error);
 1127 
 1128         AUDIT_ARG(file, td->td_proc, fp);
 1129 
 1130         error = fo_stat(fp, sbp, td->td_ucred, td);
 1131         fdrop(fp, td);
 1132         return (error);
 1133 }
 1134 
 1135 /*
 1136  * Return status information about a file descriptor.
 1137  */
 1138 #ifndef _SYS_SYSPROTO_H_
 1139 struct nfstat_args {
 1140         int     fd;
 1141         struct  nstat *sb;
 1142 };
 1143 #endif
 1144 /* ARGSUSED */
 1145 int
 1146 nfstat(struct thread *td, struct nfstat_args *uap)
 1147 {
 1148         struct nstat nub;
 1149         struct stat ub;
 1150         int error;
 1151 
 1152         error = kern_fstat(td, uap->fd, &ub);
 1153         if (error == 0) {
 1154                 cvtnstat(&ub, &nub);
 1155                 error = copyout(&nub, uap->sb, sizeof(nub));
 1156         }
 1157         return (error);
 1158 }
 1159 
 1160 /*
 1161  * Return pathconf information about a file descriptor.
 1162  */
 1163 #ifndef _SYS_SYSPROTO_H_
 1164 struct fpathconf_args {
 1165         int     fd;
 1166         int     name;
 1167 };
 1168 #endif
 1169 /* ARGSUSED */
 1170 int
 1171 fpathconf(struct thread *td, struct fpathconf_args *uap)
 1172 {
 1173         struct file *fp;
 1174         struct vnode *vp;
 1175         int error;
 1176 
 1177         if ((error = fget(td, uap->fd, &fp)) != 0)
 1178                 return (error);
 1179 
 1180         /* If asynchronous I/O is available, it works for all descriptors. */
 1181         if (uap->name == _PC_ASYNC_IO) {
 1182                 td->td_retval[0] = async_io_version;
 1183                 goto out;
 1184         }
 1185         vp = fp->f_vnode;
 1186         if (vp != NULL) {
 1187                 int vfslocked;
 1188                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1189                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1190                 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
 1191                 VOP_UNLOCK(vp, 0, td);
 1192                 VFS_UNLOCK_GIANT(vfslocked);
 1193         } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
 1194                 if (uap->name != _PC_PIPE_BUF) {
 1195                         error = EINVAL;
 1196                 } else {
 1197                         td->td_retval[0] = PIPE_BUF;
 1198                 error = 0;
 1199                 }
 1200         } else {
 1201                 error = EOPNOTSUPP;
 1202         }
 1203 out:
 1204         fdrop(fp, td);
 1205         return (error);
 1206 }
 1207 
 1208 /*
 1209  * Grow the file table to accomodate (at least) nfd descriptors.  This may
 1210  * block and drop the filedesc lock, but it will reacquire it before
 1211  * returning.
 1212  */
 1213 static void
 1214 fdgrowtable(struct filedesc *fdp, int nfd)
 1215 {
 1216         struct file **ntable;
 1217         char *nfileflags;
 1218         int nnfiles, onfiles;
 1219         NDSLOTTYPE *nmap;
 1220 
 1221         FILEDESC_XLOCK_ASSERT(fdp);
 1222 
 1223         KASSERT(fdp->fd_nfiles > 0,
 1224             ("zero-length file table"));
 1225 
 1226         /* compute the size of the new table */
 1227         onfiles = fdp->fd_nfiles;
 1228         nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
 1229         if (nnfiles <= onfiles)
 1230                 /* the table is already large enough */
 1231                 return;
 1232 
 1233         /* allocate a new table and (if required) new bitmaps */
 1234         FILEDESC_XUNLOCK(fdp);
 1235         MALLOC(ntable, struct file **, nnfiles * OFILESIZE,
 1236             M_FILEDESC, M_ZERO | M_WAITOK);
 1237         nfileflags = (char *)&ntable[nnfiles];
 1238         if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
 1239                 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE,
 1240                     M_FILEDESC, M_ZERO | M_WAITOK);
 1241         else
 1242                 nmap = NULL;
 1243         FILEDESC_XLOCK(fdp);
 1244 
 1245         /*
 1246          * We now have new tables ready to go.  Since we dropped the
 1247          * filedesc lock to call malloc(), watch out for a race.
 1248          */
 1249         onfiles = fdp->fd_nfiles;
 1250         if (onfiles >= nnfiles) {
 1251                 /* we lost the race, but that's OK */
 1252                 free(ntable, M_FILEDESC);
 1253                 if (nmap != NULL)
 1254                         free(nmap, M_FILEDESC);
 1255                 return;
 1256         }
 1257         bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
 1258         bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
 1259         if (onfiles > NDFILE)
 1260                 free(fdp->fd_ofiles, M_FILEDESC);
 1261         fdp->fd_ofiles = ntable;
 1262         fdp->fd_ofileflags = nfileflags;
 1263         if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
 1264                 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
 1265                 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
 1266                         free(fdp->fd_map, M_FILEDESC);
 1267                 fdp->fd_map = nmap;
 1268         }
 1269         fdp->fd_nfiles = nnfiles;
 1270 }
 1271 
 1272 /*
 1273  * Allocate a file descriptor for the process.
 1274  */
 1275 int
 1276 fdalloc(struct thread *td, int minfd, int *result)
 1277 {
 1278         struct proc *p = td->td_proc;
 1279         struct filedesc *fdp = p->p_fd;
 1280         int fd = -1, maxfd;
 1281 
 1282         FILEDESC_XLOCK_ASSERT(fdp);
 1283 
 1284         if (fdp->fd_freefile > minfd)
 1285                 minfd = fdp->fd_freefile;          
 1286 
 1287         PROC_LOCK(p);
 1288         maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
 1289         PROC_UNLOCK(p);
 1290 
 1291         /*
 1292          * Search the bitmap for a free descriptor.  If none is found, try
 1293          * to grow the file table.  Keep at it until we either get a file
 1294          * descriptor or run into process or system limits; fdgrowtable()
 1295          * may drop the filedesc lock, so we're in a race.
 1296          */
 1297         for (;;) {
 1298                 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
 1299                 if (fd >= maxfd)
 1300                         return (EMFILE);
 1301                 if (fd < fdp->fd_nfiles)
 1302                         break;
 1303                 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
 1304         }
 1305 
 1306         /*
 1307          * Perform some sanity checks, then mark the file descriptor as
 1308          * used and return it to the caller.
 1309          */
 1310         KASSERT(!fdisused(fdp, fd),
 1311             ("fd_first_free() returned non-free descriptor"));
 1312         KASSERT(fdp->fd_ofiles[fd] == NULL,
 1313             ("free descriptor isn't"));
 1314         fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
 1315         fdused(fdp, fd);
 1316         *result = fd;
 1317         return (0);
 1318 }
 1319 
 1320 /*
 1321  * Check to see whether n user file descriptors are available to the process
 1322  * p.
 1323  */
 1324 int
 1325 fdavail(struct thread *td, int n)
 1326 {
 1327         struct proc *p = td->td_proc;
 1328         struct filedesc *fdp = td->td_proc->p_fd;
 1329         struct file **fpp;
 1330         int i, lim, last;
 1331 
 1332         FILEDESC_LOCK_ASSERT(fdp);
 1333 
 1334         PROC_LOCK(p);
 1335         lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
 1336         PROC_UNLOCK(p);
 1337         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
 1338                 return (1);
 1339         last = min(fdp->fd_nfiles, lim);
 1340         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
 1341         for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
 1342                 if (*fpp == NULL && --n <= 0)
 1343                         return (1);
 1344         }
 1345         return (0);
 1346 }
 1347 
 1348 /*
 1349  * Create a new open file structure and allocate a file decriptor for the
 1350  * process that refers to it.  We add one reference to the file for the
 1351  * descriptor table and one reference for resultfp. This is to prevent us
 1352  * being preempted and the entry in the descriptor table closed after we
 1353  * release the FILEDESC lock.
 1354  */
 1355 int
 1356 falloc(struct thread *td, struct file **resultfp, int *resultfd)
 1357 {
 1358         struct proc *p = td->td_proc;
 1359         struct file *fp, *fq;
 1360         int error, i;
 1361         int maxuserfiles = maxfiles - (maxfiles / 20);
 1362         static struct timeval lastfail;
 1363         static int curfail;
 1364 
 1365         fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
 1366         sx_xlock(&filelist_lock);
 1367 
 1368         if ((openfiles >= maxuserfiles &&
 1369             priv_check(td, PRIV_MAXFILES) != 0) ||
 1370             openfiles >= maxfiles) {
 1371                 if (ppsratecheck(&lastfail, &curfail, 1)) {
 1372                         printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
 1373                                 td->td_ucred->cr_ruid);
 1374                 }
 1375                 sx_xunlock(&filelist_lock);
 1376                 uma_zfree(file_zone, fp);
 1377                 return (ENFILE);
 1378         }
 1379         openfiles++;
 1380 
 1381         /*
 1382          * If the process has file descriptor zero open, add the new file
 1383          * descriptor to the list of open files at that point, otherwise
 1384          * put it at the front of the list of open files.
 1385          */
 1386         fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep);
 1387         fp->f_count = 1;
 1388         if (resultfp)
 1389                 fp->f_count++;
 1390         fp->f_cred = crhold(td->td_ucred);
 1391         fp->f_ops = &badfileops;
 1392         fp->f_data = NULL;
 1393         fp->f_vnode = NULL;
 1394         FILEDESC_XLOCK(p->p_fd);
 1395         if ((fq = p->p_fd->fd_ofiles[0])) {
 1396                 LIST_INSERT_AFTER(fq, fp, f_list);
 1397         } else {
 1398                 LIST_INSERT_HEAD(&filehead, fp, f_list);
 1399         }
 1400         sx_xunlock(&filelist_lock);
 1401         if ((error = fdalloc(td, 0, &i))) {
 1402                 FILEDESC_XUNLOCK(p->p_fd);
 1403                 fdrop(fp, td);
 1404                 if (resultfp)
 1405                         fdrop(fp, td);
 1406                 return (error);
 1407         }
 1408         p->p_fd->fd_ofiles[i] = fp;
 1409         FILEDESC_XUNLOCK(p->p_fd);
 1410         if (resultfp)
 1411                 *resultfp = fp;
 1412         if (resultfd)
 1413                 *resultfd = i;
 1414         return (0);
 1415 }
 1416 
 1417 /*
 1418  * Build a new filedesc structure from another.
 1419  * Copy the current, root, and jail root vnode references.
 1420  */
 1421 struct filedesc *
 1422 fdinit(struct filedesc *fdp)
 1423 {
 1424         struct filedesc0 *newfdp;
 1425 
 1426         newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
 1427         FILEDESC_LOCK_INIT(&newfdp->fd_fd);
 1428         if (fdp != NULL) {
 1429                 FILEDESC_XLOCK(fdp);
 1430                 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
 1431                 if (newfdp->fd_fd.fd_cdir)
 1432                         VREF(newfdp->fd_fd.fd_cdir);
 1433                 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
 1434                 if (newfdp->fd_fd.fd_rdir)
 1435                         VREF(newfdp->fd_fd.fd_rdir);
 1436                 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
 1437                 if (newfdp->fd_fd.fd_jdir)
 1438                         VREF(newfdp->fd_fd.fd_jdir);
 1439                 FILEDESC_XUNLOCK(fdp);
 1440         }
 1441 
 1442         /* Create the file descriptor table. */
 1443         newfdp->fd_fd.fd_refcnt = 1;
 1444         newfdp->fd_fd.fd_holdcnt = 1;
 1445         newfdp->fd_fd.fd_cmask = CMASK;
 1446         newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
 1447         newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
 1448         newfdp->fd_fd.fd_nfiles = NDFILE;
 1449         newfdp->fd_fd.fd_map = newfdp->fd_dmap;
 1450         newfdp->fd_fd.fd_lastfile = -1;
 1451         return (&newfdp->fd_fd);
 1452 }
 1453 
 1454 static struct filedesc *
 1455 fdhold(struct proc *p)
 1456 {
 1457         struct filedesc *fdp;
 1458 
 1459         mtx_lock(&fdesc_mtx);
 1460         fdp = p->p_fd;
 1461         if (fdp != NULL)
 1462                 fdp->fd_holdcnt++;
 1463         mtx_unlock(&fdesc_mtx);
 1464         return (fdp);
 1465 }
 1466 
 1467 static void
 1468 fddrop(struct filedesc *fdp)
 1469 {
 1470         int i;
 1471 
 1472         mtx_lock(&fdesc_mtx);
 1473         i = --fdp->fd_holdcnt;
 1474         mtx_unlock(&fdesc_mtx);
 1475         if (i > 0)
 1476                 return;
 1477 
 1478         FILEDESC_LOCK_DESTROY(fdp);
 1479         FREE(fdp, M_FILEDESC);
 1480 }
 1481 
 1482 /*
 1483  * Share a filedesc structure.
 1484  */
 1485 struct filedesc *
 1486 fdshare(struct filedesc *fdp)
 1487 {
 1488 
 1489         FILEDESC_XLOCK(fdp);
 1490         fdp->fd_refcnt++;
 1491         FILEDESC_XUNLOCK(fdp);
 1492         return (fdp);
 1493 }
 1494 
 1495 /*
 1496  * Unshare a filedesc structure, if necessary by making a copy
 1497  */
 1498 void
 1499 fdunshare(struct proc *p, struct thread *td)
 1500 {
 1501 
 1502         FILEDESC_XLOCK(p->p_fd);
 1503         if (p->p_fd->fd_refcnt > 1) {
 1504                 struct filedesc *tmp;
 1505 
 1506                 FILEDESC_XUNLOCK(p->p_fd);
 1507                 tmp = fdcopy(p->p_fd);
 1508                 fdfree(td);
 1509                 p->p_fd = tmp;
 1510         } else
 1511                 FILEDESC_XUNLOCK(p->p_fd);
 1512 }
 1513 
 1514 /*
 1515  * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
 1516  * this is to ease callers, not catch errors.
 1517  */
 1518 struct filedesc *
 1519 fdcopy(struct filedesc *fdp)
 1520 {
 1521         struct filedesc *newfdp;
 1522         int i;
 1523 
 1524         /* Certain daemons might not have file descriptors. */
 1525         if (fdp == NULL)
 1526                 return (NULL);
 1527 
 1528         newfdp = fdinit(fdp);
 1529         FILEDESC_SLOCK(fdp);
 1530         while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
 1531                 FILEDESC_SUNLOCK(fdp);
 1532                 FILEDESC_XLOCK(newfdp);
 1533                 fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 1534                 FILEDESC_XUNLOCK(newfdp);
 1535                 FILEDESC_SLOCK(fdp);
 1536         }
 1537         /* copy everything except kqueue descriptors */
 1538         newfdp->fd_freefile = -1;
 1539         for (i = 0; i <= fdp->fd_lastfile; ++i) {
 1540                 if (fdisused(fdp, i) &&
 1541                     fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) {
 1542                         newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
 1543                         newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
 1544                         fhold(newfdp->fd_ofiles[i]);
 1545                         newfdp->fd_lastfile = i;
 1546                 } else {
 1547                         if (newfdp->fd_freefile == -1)
 1548                                 newfdp->fd_freefile = i;
 1549                 }
 1550         }
 1551         FILEDESC_SUNLOCK(fdp);
 1552         FILEDESC_XLOCK(newfdp);
 1553         for (i = 0; i <= newfdp->fd_lastfile; ++i)
 1554                 if (newfdp->fd_ofiles[i] != NULL)
 1555                         fdused(newfdp, i);
 1556         FILEDESC_XUNLOCK(newfdp);
 1557         FILEDESC_SLOCK(fdp);
 1558         if (newfdp->fd_freefile == -1)
 1559                 newfdp->fd_freefile = i;
 1560         newfdp->fd_cmask = fdp->fd_cmask;
 1561         FILEDESC_SUNLOCK(fdp);
 1562         return (newfdp);
 1563 }
 1564 
 1565 /*
 1566  * Release a filedesc structure.
 1567  */
 1568 void
 1569 fdfree(struct thread *td)
 1570 {
 1571         struct filedesc *fdp;
 1572         struct file **fpp;
 1573         int i, locked;
 1574         struct filedesc_to_leader *fdtol;
 1575         struct file *fp;
 1576         struct vnode *cdir, *jdir, *rdir, *vp;
 1577         struct flock lf;
 1578 
 1579         /* Certain daemons might not have file descriptors. */
 1580         fdp = td->td_proc->p_fd;
 1581         if (fdp == NULL)
 1582                 return;
 1583 
 1584         /* Check for special need to clear POSIX style locks */
 1585         fdtol = td->td_proc->p_fdtol;
 1586         if (fdtol != NULL) {
 1587                 FILEDESC_XLOCK(fdp);
 1588                 KASSERT(fdtol->fdl_refcount > 0,
 1589                         ("filedesc_to_refcount botch: fdl_refcount=%d",
 1590                          fdtol->fdl_refcount));
 1591                 if (fdtol->fdl_refcount == 1 &&
 1592                     (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
 1593                         for (i = 0, fpp = fdp->fd_ofiles;
 1594                              i <= fdp->fd_lastfile;
 1595                              i++, fpp++) {
 1596                                 if (*fpp == NULL ||
 1597                                     (*fpp)->f_type != DTYPE_VNODE)
 1598                                         continue;
 1599                                 fp = *fpp;
 1600                                 fhold(fp);
 1601                                 FILEDESC_XUNLOCK(fdp);
 1602                                 lf.l_whence = SEEK_SET;
 1603                                 lf.l_start = 0;
 1604                                 lf.l_len = 0;
 1605                                 lf.l_type = F_UNLCK;
 1606                                 vp = fp->f_vnode;
 1607                                 locked = VFS_LOCK_GIANT(vp->v_mount);
 1608                                 (void) VOP_ADVLOCK(vp,
 1609                                                    (caddr_t)td->td_proc->
 1610                                                    p_leader,
 1611                                                    F_UNLCK,
 1612                                                    &lf,
 1613                                                    F_POSIX);
 1614                                 VFS_UNLOCK_GIANT(locked);
 1615                                 FILEDESC_XLOCK(fdp);
 1616                                 fdrop(fp, td);
 1617                                 fpp = fdp->fd_ofiles + i;
 1618                         }
 1619                 }
 1620         retry:
 1621                 if (fdtol->fdl_refcount == 1) {
 1622                         if (fdp->fd_holdleaderscount > 0 &&
 1623                             (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
 1624                                 /*
 1625                                  * close() or do_dup() has cleared a reference
 1626                                  * in a shared file descriptor table.
 1627                                  */
 1628                                 fdp->fd_holdleaderswakeup = 1;
 1629                                 sx_sleep(&fdp->fd_holdleaderscount,
 1630                                     FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
 1631                                 goto retry;
 1632                         }
 1633                         if (fdtol->fdl_holdcount > 0) {
 1634                                 /*
 1635                                  * Ensure that fdtol->fdl_leader remains
 1636                                  * valid in closef().
 1637                                  */
 1638                                 fdtol->fdl_wakeup = 1;
 1639                                 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
 1640                                     "fdlhold", 0);
 1641                                 goto retry;
 1642                         }
 1643                 }
 1644                 fdtol->fdl_refcount--;
 1645                 if (fdtol->fdl_refcount == 0 &&
 1646                     fdtol->fdl_holdcount == 0) {
 1647                         fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
 1648                         fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
 1649                 } else
 1650                         fdtol = NULL;
 1651                 td->td_proc->p_fdtol = NULL;
 1652                 FILEDESC_XUNLOCK(fdp);
 1653                 if (fdtol != NULL)
 1654                         FREE(fdtol, M_FILEDESC_TO_LEADER);
 1655         }
 1656         FILEDESC_XLOCK(fdp);
 1657         i = --fdp->fd_refcnt;
 1658         FILEDESC_XUNLOCK(fdp);
 1659         if (i > 0)
 1660                 return;
 1661         /*
 1662          * We are the last reference to the structure, so we can
 1663          * safely assume it will not change out from under us.
 1664          */
 1665         fpp = fdp->fd_ofiles;
 1666         for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
 1667                 if (*fpp)
 1668                         (void) closef(*fpp, td);
 1669         }
 1670         FILEDESC_XLOCK(fdp);
 1671 
 1672         /* XXX This should happen earlier. */
 1673         mtx_lock(&fdesc_mtx);
 1674         td->td_proc->p_fd = NULL;
 1675         mtx_unlock(&fdesc_mtx);
 1676 
 1677         if (fdp->fd_nfiles > NDFILE)
 1678                 FREE(fdp->fd_ofiles, M_FILEDESC);
 1679         if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
 1680                 FREE(fdp->fd_map, M_FILEDESC);
 1681 
 1682         fdp->fd_nfiles = 0;
 1683 
 1684         cdir = fdp->fd_cdir;
 1685         fdp->fd_cdir = NULL;
 1686         rdir = fdp->fd_rdir;
 1687         fdp->fd_rdir = NULL;
 1688         jdir = fdp->fd_jdir;
 1689         fdp->fd_jdir = NULL;
 1690         FILEDESC_XUNLOCK(fdp);
 1691 
 1692         if (cdir) {
 1693                 locked = VFS_LOCK_GIANT(cdir->v_mount);
 1694                 vrele(cdir);
 1695                 VFS_UNLOCK_GIANT(locked);
 1696         }
 1697         if (rdir) {
 1698                 locked = VFS_LOCK_GIANT(rdir->v_mount);
 1699                 vrele(rdir);
 1700                 VFS_UNLOCK_GIANT(locked);
 1701         }
 1702         if (jdir) {
 1703                 locked = VFS_LOCK_GIANT(jdir->v_mount);
 1704                 vrele(jdir);
 1705                 VFS_UNLOCK_GIANT(locked);
 1706         }
 1707 
 1708         fddrop(fdp);
 1709 }
 1710 
 1711 /*
 1712  * For setugid programs, we don't want to people to use that setugidness
 1713  * to generate error messages which write to a file which otherwise would
 1714  * otherwise be off-limits to the process.  We check for filesystems where
 1715  * the vnode can change out from under us after execve (like [lin]procfs).
 1716  *
 1717  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
 1718  * sufficient.  We also don't check for setugidness since we know we are.
 1719  */
 1720 static int
 1721 is_unsafe(struct file *fp)
 1722 {
 1723         if (fp->f_type == DTYPE_VNODE) {
 1724                 struct vnode *vp = fp->f_vnode;
 1725 
 1726                 if ((vp->v_vflag & VV_PROCDEP) != 0)
 1727                         return (1);
 1728         }
 1729         return (0);
 1730 }
 1731 
 1732 /*
 1733  * Make this setguid thing safe, if at all possible.
 1734  */
 1735 void
 1736 setugidsafety(struct thread *td)
 1737 {
 1738         struct filedesc *fdp;
 1739         int i;
 1740 
 1741         /* Certain daemons might not have file descriptors. */
 1742         fdp = td->td_proc->p_fd;
 1743         if (fdp == NULL)
 1744                 return;
 1745 
 1746         /*
 1747          * Note: fdp->fd_ofiles may be reallocated out from under us while
 1748          * we are blocked in a close.  Be careful!
 1749          */
 1750         FILEDESC_XLOCK(fdp);
 1751         for (i = 0; i <= fdp->fd_lastfile; i++) {
 1752                 if (i > 2)
 1753                         break;
 1754                 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
 1755                         struct file *fp;
 1756 
 1757                         knote_fdclose(td, i);
 1758                         /*
 1759                          * NULL-out descriptor prior to close to avoid
 1760                          * a race while close blocks.
 1761                          */
 1762                         fp = fdp->fd_ofiles[i];
 1763                         fdp->fd_ofiles[i] = NULL;
 1764                         fdp->fd_ofileflags[i] = 0;
 1765                         fdunused(fdp, i);
 1766                         FILEDESC_XUNLOCK(fdp);
 1767                         (void) closef(fp, td);
 1768                         FILEDESC_XLOCK(fdp);
 1769                 }
 1770         }
 1771         FILEDESC_XUNLOCK(fdp);
 1772 }
 1773 
 1774 /*
 1775  * If a specific file object occupies a specific file descriptor, close the
 1776  * file descriptor entry and drop a reference on the file object.  This is a
 1777  * convenience function to handle a subsequent error in a function that calls
 1778  * falloc() that handles the race that another thread might have closed the
 1779  * file descriptor out from under the thread creating the file object.
 1780  */
 1781 void
 1782 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
 1783 {
 1784 
 1785         FILEDESC_XLOCK(fdp);
 1786         if (fdp->fd_ofiles[idx] == fp) {
 1787                 fdp->fd_ofiles[idx] = NULL;
 1788                 fdunused(fdp, idx);
 1789                 FILEDESC_XUNLOCK(fdp);
 1790                 fdrop(fp, td);
 1791         } else
 1792                 FILEDESC_XUNLOCK(fdp);
 1793 }
 1794 
 1795 /*
 1796  * Close any files on exec?
 1797  */
 1798 void
 1799 fdcloseexec(struct thread *td)
 1800 {
 1801         struct filedesc *fdp;
 1802         int i;
 1803 
 1804         /* Certain daemons might not have file descriptors. */
 1805         fdp = td->td_proc->p_fd;
 1806         if (fdp == NULL)
 1807                 return;
 1808 
 1809         FILEDESC_XLOCK(fdp);
 1810 
 1811         /*
 1812          * We cannot cache fd_ofiles or fd_ofileflags since operations
 1813          * may block and rip them out from under us.
 1814          */
 1815         for (i = 0; i <= fdp->fd_lastfile; i++) {
 1816                 if (fdp->fd_ofiles[i] != NULL &&
 1817                     (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
 1818                     (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
 1819                         struct file *fp;
 1820 
 1821                         knote_fdclose(td, i);
 1822                         /*
 1823                          * NULL-out descriptor prior to close to avoid
 1824                          * a race while close blocks.
 1825                          */
 1826                         fp = fdp->fd_ofiles[i];
 1827                         fdp->fd_ofiles[i] = NULL;
 1828                         fdp->fd_ofileflags[i] = 0;
 1829                         fdunused(fdp, i);
 1830                         if (fp->f_type == DTYPE_MQUEUE)
 1831                                 mq_fdclose(td, i, fp);
 1832                         FILEDESC_XUNLOCK(fdp);
 1833                         (void) closef(fp, td);
 1834                         FILEDESC_XLOCK(fdp);
 1835                 }
 1836         }
 1837         FILEDESC_XUNLOCK(fdp);
 1838 }
 1839 
 1840 /*
 1841  * It is unsafe for set[ug]id processes to be started with file
 1842  * descriptors 0..2 closed, as these descriptors are given implicit
 1843  * significance in the Standard C library.  fdcheckstd() will create a
 1844  * descriptor referencing /dev/null for each of stdin, stdout, and
 1845  * stderr that is not already open.
 1846  */
 1847 int
 1848 fdcheckstd(struct thread *td)
 1849 {
 1850         struct filedesc *fdp;
 1851         register_t retval, save;
 1852         int i, error, devnull;
 1853 
 1854         fdp = td->td_proc->p_fd;
 1855         if (fdp == NULL)
 1856                 return (0);
 1857         KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 1858         devnull = -1;
 1859         error = 0;
 1860         for (i = 0; i < 3; i++) {
 1861                 if (fdp->fd_ofiles[i] != NULL)
 1862                         continue;
 1863                 if (devnull < 0) {
 1864                         save = td->td_retval[0];
 1865                         error = kern_open(td, "/dev/null", UIO_SYSSPACE,
 1866                             O_RDWR, 0);
 1867                         devnull = td->td_retval[0];
 1868                         KASSERT(devnull == i, ("oof, we didn't get our fd"));
 1869                         td->td_retval[0] = save;
 1870                         if (error)
 1871                                 break;
 1872                 } else {
 1873                         error = do_dup(td, DUP_FIXED, devnull, i, &retval);
 1874                         if (error != 0)
 1875                                 break;
 1876                 }
 1877         }
 1878         return (error);
 1879 }
 1880 
 1881 /*
 1882  * Internal form of close.  Decrement reference count on file structure.
 1883  * Note: td may be NULL when closing a file that was being passed in a
 1884  * message.
 1885  *
 1886  * XXXRW: Giant is not required for the caller, but often will be held; this
 1887  * makes it moderately likely the Giant will be recursed in the VFS case.
 1888  */
 1889 int
 1890 closef(struct file *fp, struct thread *td)
 1891 {
 1892         struct vnode *vp;
 1893         struct flock lf;
 1894         struct filedesc_to_leader *fdtol;
 1895         struct filedesc *fdp;
 1896 
 1897         /*
 1898          * POSIX record locking dictates that any close releases ALL
 1899          * locks owned by this process.  This is handled by setting
 1900          * a flag in the unlock to free ONLY locks obeying POSIX
 1901          * semantics, and not to free BSD-style file locks.
 1902          * If the descriptor was in a message, POSIX-style locks
 1903          * aren't passed with the descriptor, and the thread pointer
 1904          * will be NULL.  Callers should be careful only to pass a
 1905          * NULL thread pointer when there really is no owning
 1906          * context that might have locks, or the locks will be
 1907          * leaked.
 1908          */
 1909         if (fp->f_type == DTYPE_VNODE && td != NULL) {
 1910                 int vfslocked;
 1911 
 1912                 vp = fp->f_vnode;
 1913                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1914                 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
 1915                         lf.l_whence = SEEK_SET;
 1916                         lf.l_start = 0;
 1917                         lf.l_len = 0;
 1918                         lf.l_type = F_UNLCK;
 1919                         (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
 1920                                            F_UNLCK, &lf, F_POSIX);
 1921                 }
 1922                 fdtol = td->td_proc->p_fdtol;
 1923                 if (fdtol != NULL) {
 1924                         /*
 1925                          * Handle special case where file descriptor table is
 1926                          * shared between multiple process leaders.
 1927                          */
 1928                         fdp = td->td_proc->p_fd;
 1929                         FILEDESC_XLOCK(fdp);
 1930                         for (fdtol = fdtol->fdl_next;
 1931                              fdtol != td->td_proc->p_fdtol;
 1932                              fdtol = fdtol->fdl_next) {
 1933                                 if ((fdtol->fdl_leader->p_flag &
 1934                                      P_ADVLOCK) == 0)
 1935                                         continue;
 1936                                 fdtol->fdl_holdcount++;
 1937                                 FILEDESC_XUNLOCK(fdp);
 1938                                 lf.l_whence = SEEK_SET;
 1939                                 lf.l_start = 0;
 1940                                 lf.l_len = 0;
 1941                                 lf.l_type = F_UNLCK;
 1942                                 vp = fp->f_vnode;
 1943                                 (void) VOP_ADVLOCK(vp,
 1944                                                    (caddr_t)fdtol->fdl_leader,
 1945                                                    F_UNLCK, &lf, F_POSIX);
 1946                                 FILEDESC_XLOCK(fdp);
 1947                                 fdtol->fdl_holdcount--;
 1948                                 if (fdtol->fdl_holdcount == 0 &&
 1949                                     fdtol->fdl_wakeup != 0) {
 1950                                         fdtol->fdl_wakeup = 0;
 1951                                         wakeup(fdtol);
 1952                                 }
 1953                         }
 1954                         FILEDESC_XUNLOCK(fdp);
 1955                 }
 1956                 VFS_UNLOCK_GIANT(vfslocked);
 1957         }
 1958         return (fdrop(fp, td));
 1959 }
 1960 
 1961 /*
 1962  * Extract the file pointer associated with the specified descriptor for the
 1963  * current user process.
 1964  *
 1965  * If the descriptor doesn't exist, EBADF is returned.
 1966  *
 1967  * If the descriptor exists but doesn't match 'flags' then return EBADF for
 1968  * read attempts and EINVAL for write attempts.
 1969  *
 1970  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
 1971  * It should be dropped with fdrop().  If it is not set, then the refcount
 1972  * will not be bumped however the thread's filedesc struct will be returned
 1973  * locked (for fgetsock).
 1974  *
 1975  * If an error occured the non-zero error is returned and *fpp is set to
 1976  * NULL.  Otherwise *fpp is set and zero is returned.
 1977  */
 1978 static __inline int
 1979 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
 1980 {
 1981         struct filedesc *fdp;
 1982         struct file *fp;
 1983 
 1984         *fpp = NULL;
 1985         if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
 1986                 return (EBADF);
 1987         FILEDESC_SLOCK(fdp);
 1988         if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
 1989                 FILEDESC_SUNLOCK(fdp);
 1990                 return (EBADF);
 1991         }
 1992 
 1993         /*
 1994          * FREAD and FWRITE failure return EBADF as per POSIX.
 1995          *
 1996          * Only one flag, or 0, may be specified.
 1997          */
 1998         if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
 1999                 FILEDESC_SUNLOCK(fdp);
 2000                 return (EBADF);
 2001         }
 2002         if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
 2003                 FILEDESC_SUNLOCK(fdp);
 2004                 return (EBADF);
 2005         }
 2006         if (hold) {
 2007                 fhold(fp);
 2008                 FILEDESC_SUNLOCK(fdp);
 2009         }
 2010         *fpp = fp;
 2011         return (0);
 2012 }
 2013 
 2014 int
 2015 fget(struct thread *td, int fd, struct file **fpp)
 2016 {
 2017 
 2018         return(_fget(td, fd, fpp, 0, 1));
 2019 }
 2020 
 2021 int
 2022 fget_read(struct thread *td, int fd, struct file **fpp)
 2023 {
 2024 
 2025         return(_fget(td, fd, fpp, FREAD, 1));
 2026 }
 2027 
 2028 int
 2029 fget_write(struct thread *td, int fd, struct file **fpp)
 2030 {
 2031 
 2032         return(_fget(td, fd, fpp, FWRITE, 1));
 2033 }
 2034 
 2035 /*
 2036  * Like fget() but loads the underlying vnode, or returns an error if the
 2037  * descriptor does not represent a vnode.  Note that pipes use vnodes but
 2038  * never have VM objects.  The returned vnode will be vref()'d.
 2039  *
 2040  * XXX: what about the unused flags ?
 2041  */
 2042 static __inline int
 2043 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
 2044 {
 2045         struct file *fp;
 2046         int error;
 2047 
 2048         *vpp = NULL;
 2049         if ((error = _fget(td, fd, &fp, flags, 0)) != 0)
 2050                 return (error);
 2051         if (fp->f_vnode == NULL) {
 2052                 error = EINVAL;
 2053         } else {
 2054                 *vpp = fp->f_vnode;
 2055                 vref(*vpp);
 2056         }
 2057         FILEDESC_SUNLOCK(td->td_proc->p_fd);
 2058         return (error);
 2059 }
 2060 
 2061 int
 2062 fgetvp(struct thread *td, int fd, struct vnode **vpp)
 2063 {
 2064 
 2065         return (_fgetvp(td, fd, vpp, 0));
 2066 }
 2067 
 2068 int
 2069 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
 2070 {
 2071 
 2072         return (_fgetvp(td, fd, vpp, FREAD));
 2073 }
 2074 
 2075 #ifdef notyet
 2076 int
 2077 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
 2078 {
 2079 
 2080         return (_fgetvp(td, fd, vpp, FWRITE));
 2081 }
 2082 #endif
 2083 
 2084 /*
 2085  * Like fget() but loads the underlying socket, or returns an error if the
 2086  * descriptor does not represent a socket.
 2087  *
 2088  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock
 2089  * in the future.
 2090  *
 2091  * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely
 2092  * on their file descriptor reference to prevent the socket from being free'd
 2093  * during use.
 2094  */
 2095 int
 2096 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
 2097 {
 2098         struct file *fp;
 2099         int error;
 2100 
 2101         *spp = NULL;
 2102         if (fflagp != NULL)
 2103                 *fflagp = 0;
 2104         if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
 2105                 return (error);
 2106         if (fp->f_type != DTYPE_SOCKET) {
 2107                 error = ENOTSOCK;
 2108         } else {
 2109                 *spp = fp->f_data;
 2110                 if (fflagp)
 2111                         *fflagp = fp->f_flag;
 2112                 SOCK_LOCK(*spp);
 2113                 soref(*spp);
 2114                 SOCK_UNLOCK(*spp);
 2115         }
 2116         FILEDESC_SUNLOCK(td->td_proc->p_fd);
 2117         return (error);
 2118 }
 2119 
 2120 /*
 2121  * Drop the reference count on the socket and XXX release the SX lock in the
 2122  * future.  The last reference closes the socket.
 2123  *
 2124  * XXXRW: fputsock() is deprecated, see comment for fgetsock().
 2125  */
 2126 void
 2127 fputsock(struct socket *so)
 2128 {
 2129 
 2130         ACCEPT_LOCK();
 2131         SOCK_LOCK(so);
 2132         sorele(so);
 2133 }
 2134 
 2135 int
 2136 fdrop(struct file *fp, struct thread *td)
 2137 {
 2138 
 2139         FILE_LOCK(fp);
 2140         return (fdrop_locked(fp, td));
 2141 }
 2142 
 2143 /*
 2144  * Drop reference on struct file passed in, may call closef if the
 2145  * reference hits zero.
 2146  * Expects struct file locked, and will unlock it.
 2147  */
 2148 static int
 2149 fdrop_locked(struct file *fp, struct thread *td)
 2150 {
 2151         int error;
 2152 
 2153         FILE_LOCK_ASSERT(fp, MA_OWNED);
 2154 
 2155         if (--fp->f_count > 0) {
 2156                 FILE_UNLOCK(fp);
 2157                 return (0);
 2158         }
 2159 
 2160         /*
 2161          * We might have just dropped the last reference to a file
 2162          * object that is for a UNIX domain socket whose message
 2163          * buffers are being examined in unp_gc().  If that is the
 2164          * case, FWAIT will be set in f_gcflag and we need to wait for
 2165          * unp_gc() to finish its scan.
 2166          */
 2167         while (fp->f_gcflag & FWAIT)
 2168                 msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0);
 2169 
 2170         /* We have the last ref so we can proceed without the file lock. */
 2171         FILE_UNLOCK(fp);
 2172         if (fp->f_count < 0)
 2173                 panic("fdrop: count < 0");
 2174         if (fp->f_ops != &badfileops)
 2175                 error = fo_close(fp, td);
 2176         else
 2177                 error = 0;
 2178 
 2179         sx_xlock(&filelist_lock);
 2180         LIST_REMOVE(fp, f_list);
 2181         openfiles--;
 2182         sx_xunlock(&filelist_lock);
 2183         crfree(fp->f_cred);
 2184         uma_zfree(file_zone, fp);
 2185 
 2186         return (error);
 2187 }
 2188 
 2189 /*
 2190  * Apply an advisory lock on a file descriptor.
 2191  *
 2192  * Just attempt to get a record lock of the requested type on the entire file
 2193  * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 2194  */
 2195 #ifndef _SYS_SYSPROTO_H_
 2196 struct flock_args {
 2197         int     fd;
 2198         int     how;
 2199 };
 2200 #endif
 2201 /* ARGSUSED */
 2202 int
 2203 flock(struct thread *td, struct flock_args *uap)
 2204 {
 2205         struct file *fp;
 2206         struct vnode *vp;
 2207         struct flock lf;
 2208         int vfslocked;
 2209         int error;
 2210 
 2211         if ((error = fget(td, uap->fd, &fp)) != 0)
 2212                 return (error);
 2213         if (fp->f_type != DTYPE_VNODE) {
 2214                 fdrop(fp, td);
 2215                 return (EOPNOTSUPP);
 2216         }
 2217 
 2218         vp = fp->f_vnode;
 2219         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2220         lf.l_whence = SEEK_SET;
 2221         lf.l_start = 0;
 2222         lf.l_len = 0;
 2223         if (uap->how & LOCK_UN) {
 2224                 lf.l_type = F_UNLCK;
 2225                 FILE_LOCK(fp);
 2226                 fp->f_flag &= ~FHASLOCK;
 2227                 FILE_UNLOCK(fp);
 2228                 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 2229                 goto done2;
 2230         }
 2231         if (uap->how & LOCK_EX)
 2232                 lf.l_type = F_WRLCK;
 2233         else if (uap->how & LOCK_SH)
 2234                 lf.l_type = F_RDLCK;
 2235         else {
 2236                 error = EBADF;
 2237                 goto done2;
 2238         }
 2239         FILE_LOCK(fp);
 2240         fp->f_flag |= FHASLOCK;
 2241         FILE_UNLOCK(fp);
 2242         error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 2243             (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
 2244 done2:
 2245         fdrop(fp, td);
 2246         VFS_UNLOCK_GIANT(vfslocked);
 2247         return (error);
 2248 }
 2249 /*
 2250  * Duplicate the specified descriptor to a free descriptor.
 2251  */
 2252 int
 2253 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
 2254 {
 2255         struct file *wfp;
 2256         struct file *fp;
 2257 
 2258         /*
 2259          * If the to-be-dup'd fd number is greater than the allowed number
 2260          * of file descriptors, or the fd to be dup'd has already been
 2261          * closed, then reject.
 2262          */
 2263         FILEDESC_XLOCK(fdp);
 2264         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
 2265             (wfp = fdp->fd_ofiles[dfd]) == NULL) {
 2266                 FILEDESC_XUNLOCK(fdp);
 2267                 return (EBADF);
 2268         }
 2269 
 2270         /*
 2271          * There are two cases of interest here.
 2272          *
 2273          * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
 2274          *
 2275          * For ENXIO steal away the file structure from (dfd) and store it in
 2276          * (indx).  (dfd) is effectively closed by this operation.
 2277          *
 2278          * Any other error code is just returned.
 2279          */
 2280         switch (error) {
 2281         case ENODEV:
 2282                 /*
 2283                  * Check that the mode the file is being opened for is a
 2284                  * subset of the mode of the existing descriptor.
 2285                  */
 2286                 FILE_LOCK(wfp);
 2287                 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
 2288                         FILE_UNLOCK(wfp);
 2289                         FILEDESC_XUNLOCK(fdp);
 2290                         return (EACCES);
 2291                 }
 2292                 fp = fdp->fd_ofiles[indx];
 2293                 fdp->fd_ofiles[indx] = wfp;
 2294                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 2295                 if (fp == NULL)
 2296                         fdused(fdp, indx);
 2297                 fhold_locked(wfp);
 2298                 FILE_UNLOCK(wfp);
 2299                 FILEDESC_XUNLOCK(fdp);
 2300                 if (fp != NULL)
 2301                         /*
 2302                          * We now own the reference to fp that the ofiles[]
 2303                          * array used to own.  Release it.
 2304                          */
 2305                         fdrop(fp, td);
 2306                 return (0);
 2307 
 2308         case ENXIO:
 2309                 /*
 2310                  * Steal away the file pointer from dfd and stuff it into indx.
 2311                  */
 2312                 fp = fdp->fd_ofiles[indx];
 2313                 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
 2314                 fdp->fd_ofiles[dfd] = NULL;
 2315                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 2316                 fdp->fd_ofileflags[dfd] = 0;
 2317                 fdunused(fdp, dfd);
 2318                 if (fp == NULL)
 2319                         fdused(fdp, indx);
 2320                 FILEDESC_XUNLOCK(fdp);
 2321 
 2322                 /*
 2323                  * We now own the reference to fp that the ofiles[] array
 2324                  * used to own.  Release it.
 2325                  */
 2326                 if (fp != NULL)
 2327                         fdrop(fp, td);
 2328                 return (0);
 2329 
 2330         default:
 2331                 FILEDESC_XUNLOCK(fdp);
 2332                 return (error);
 2333         }
 2334         /* NOTREACHED */
 2335 }
 2336 
 2337 /*
 2338  * Scan all active processes to see if any of them have a current or root
 2339  * directory of `olddp'. If so, replace them with the new mount point.
 2340  */
 2341 void
 2342 mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
 2343 {
 2344         struct filedesc *fdp;
 2345         struct proc *p;
 2346         int nrele;
 2347 
 2348         if (vrefcnt(olddp) == 1)
 2349                 return;
 2350         sx_slock(&allproc_lock);
 2351         FOREACH_PROC_IN_SYSTEM(p) {
 2352                 fdp = fdhold(p);
 2353                 if (fdp == NULL)
 2354                         continue;
 2355                 nrele = 0;
 2356                 FILEDESC_XLOCK(fdp);
 2357                 if (fdp->fd_cdir == olddp) {
 2358                         vref(newdp);
 2359                         fdp->fd_cdir = newdp;
 2360                         nrele++;
 2361                 }
 2362                 if (fdp->fd_rdir == olddp) {
 2363                         vref(newdp);
 2364                         fdp->fd_rdir = newdp;
 2365                         nrele++;
 2366                 }
 2367                 FILEDESC_XUNLOCK(fdp);
 2368                 fddrop(fdp);
 2369                 while (nrele--)
 2370                         vrele(olddp);
 2371         }
 2372         sx_sunlock(&allproc_lock);
 2373         if (rootvnode == olddp) {
 2374                 vrele(rootvnode);
 2375                 vref(newdp);
 2376                 rootvnode = newdp;
 2377         }
 2378 }
 2379 
 2380 struct filedesc_to_leader *
 2381 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
 2382 {
 2383         struct filedesc_to_leader *fdtol;
 2384 
 2385         MALLOC(fdtol, struct filedesc_to_leader *,
 2386                sizeof(struct filedesc_to_leader),
 2387                M_FILEDESC_TO_LEADER,
 2388                M_WAITOK);
 2389         fdtol->fdl_refcount = 1;
 2390         fdtol->fdl_holdcount = 0;
 2391         fdtol->fdl_wakeup = 0;
 2392         fdtol->fdl_leader = leader;
 2393         if (old != NULL) {
 2394                 FILEDESC_XLOCK(fdp);
 2395                 fdtol->fdl_next = old->fdl_next;
 2396                 fdtol->fdl_prev = old;
 2397                 old->fdl_next = fdtol;
 2398                 fdtol->fdl_next->fdl_prev = fdtol;
 2399                 FILEDESC_XUNLOCK(fdp);
 2400         } else {
 2401                 fdtol->fdl_next = fdtol;
 2402                 fdtol->fdl_prev = fdtol;
 2403         }
 2404         return (fdtol);
 2405 }
 2406 
 2407 /*
 2408  * Get file structures.
 2409  */
 2410 static int
 2411 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 2412 {
 2413         struct xfile xf;
 2414         struct filedesc *fdp;
 2415         struct file *fp;
 2416         struct proc *p;
 2417         int error, n;
 2418 
 2419         /*
 2420          * Note: because the number of file descriptors is calculated
 2421          * in different ways for sizing vs returning the data,
 2422          * there is information leakage from the first loop.  However,
 2423          * it is of a similar order of magnitude to the leakage from
 2424          * global system statistics such as kern.openfiles.
 2425          */
 2426         error = sysctl_wire_old_buffer(req, 0);
 2427         if (error != 0)
 2428                 return (error);
 2429         if (req->oldptr == NULL) {
 2430                 n = 16;         /* A slight overestimate. */
 2431                 sx_slock(&filelist_lock);
 2432                 LIST_FOREACH(fp, &filehead, f_list) {
 2433                         /*
 2434                          * We should grab the lock, but this is an
 2435                          * estimate, so does it really matter?
 2436                          */
 2437                         /* mtx_lock(fp->f_mtxp); */
 2438                         n += fp->f_count;
 2439                         /* mtx_unlock(f->f_mtxp); */
 2440                 }
 2441                 sx_sunlock(&filelist_lock);
 2442                 return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
 2443         }
 2444         error = 0;
 2445         bzero(&xf, sizeof(xf));
 2446         xf.xf_size = sizeof(xf);
 2447         sx_slock(&allproc_lock);
 2448         FOREACH_PROC_IN_SYSTEM(p) {
 2449                 if (p->p_state == PRS_NEW)
 2450                         continue;
 2451                 PROC_LOCK(p);
 2452                 if (p_cansee(req->td, p) != 0) {
 2453                         PROC_UNLOCK(p);
 2454                         continue;
 2455                 }
 2456                 xf.xf_pid = p->p_pid;
 2457                 xf.xf_uid = p->p_ucred->cr_uid;
 2458                 PROC_UNLOCK(p);
 2459                 fdp = fdhold(p);
 2460                 if (fdp == NULL)
 2461                         continue;
 2462                 FILEDESC_SLOCK(fdp);
 2463                 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
 2464                         if ((fp = fdp->fd_ofiles[n]) == NULL)
 2465                                 continue;
 2466                         xf.xf_fd = n;
 2467                         xf.xf_file = fp;
 2468                         xf.xf_data = fp->f_data;
 2469                         xf.xf_vnode = fp->f_vnode;
 2470                         xf.xf_type = fp->f_type;
 2471                         xf.xf_count = fp->f_count;
 2472                         xf.xf_msgcount = fp->f_msgcount;
 2473                         xf.xf_offset = fp->f_offset;
 2474                         xf.xf_flag = fp->f_flag;
 2475                         error = SYSCTL_OUT(req, &xf, sizeof(xf));
 2476                         if (error)
 2477                                 break;
 2478                 }
 2479                 FILEDESC_SUNLOCK(fdp);
 2480                 fddrop(fdp);
 2481                 if (error)
 2482                         break;
 2483         }
 2484         sx_sunlock(&allproc_lock);
 2485         return (error);
 2486 }
 2487 
 2488 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
 2489     0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
 2490 
 2491 #ifdef DDB
 2492 /*
 2493  * For the purposes of debugging, generate a human-readable string for the
 2494  * file type.
 2495  */
 2496 static const char *
 2497 file_type_to_name(short type)
 2498 {
 2499 
 2500         switch (type) {
 2501         case 0:
 2502                 return ("zero");
 2503         case DTYPE_VNODE:
 2504                 return ("vnod");
 2505         case DTYPE_SOCKET:
 2506                 return ("sock");
 2507         case DTYPE_PIPE:
 2508                 return ("pipe");
 2509         case DTYPE_FIFO:
 2510                 return ("fifo");
 2511         case DTYPE_KQUEUE:
 2512                 return ("kque");
 2513         case DTYPE_CRYPTO:
 2514                 return ("crpt");
 2515         case DTYPE_MQUEUE:
 2516                 return ("mque");
 2517         default:
 2518                 return ("unkn");
 2519         }
 2520 }
 2521 
 2522 /*
 2523  * For the purposes of debugging, identify a process (if any, perhaps one of
 2524  * many) that references the passed file in its file descriptor array. Return
 2525  * NULL if none.
 2526  */
 2527 static struct proc *
 2528 file_to_first_proc(struct file *fp)
 2529 {
 2530         struct filedesc *fdp;
 2531         struct proc *p;
 2532         int n;
 2533 
 2534         FOREACH_PROC_IN_SYSTEM(p) {
 2535                 if (p->p_state == PRS_NEW)
 2536                         continue;
 2537                 fdp = p->p_fd;
 2538                 if (fdp == NULL)
 2539                         continue;
 2540                 for (n = 0; n < fdp->fd_nfiles; n++) {
 2541                         if (fp == fdp->fd_ofiles[n])
 2542                                 return (p);
 2543                 }
 2544         }
 2545         return (NULL);
 2546 }
 2547 
 2548 static void
 2549 db_print_file(struct file *fp, int header)
 2550 {
 2551         struct proc *p;
 2552 
 2553         if (header)
 2554                 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
 2555                     "File", "Type", "Data", "Flag", "GCFl", "Count",
 2556                     "MCount", "Vnode", "FPID", "FCmd");
 2557         p = file_to_first_proc(fp);
 2558         db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
 2559             file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
 2560             fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode,
 2561             p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
 2562 }
 2563 
 2564 DB_SHOW_COMMAND(file, db_show_file)
 2565 {
 2566         struct file *fp;
 2567 
 2568         if (!have_addr) {
 2569                 db_printf("usage: show file <addr>\n");
 2570                 return;
 2571         }
 2572         fp = (struct file *)addr;
 2573         db_print_file(fp, 1);
 2574 }
 2575 
 2576 DB_SHOW_COMMAND(files, db_show_files)
 2577 {
 2578         struct file *fp;
 2579         int header;
 2580 
 2581         header = 1;
 2582         LIST_FOREACH(fp, &filehead, f_list) {
 2583                 db_print_file(fp, header);
 2584                 header = 0;
 2585         }
 2586 }
 2587 #endif
 2588 
 2589 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
 2590     &maxfilesperproc, 0, "Maximum files allowed open per process");
 2591 
 2592 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
 2593     &maxfiles, 0, "Maximum number of files");
 2594 
 2595 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
 2596     &openfiles, 0, "System-wide number of open files");
 2597 
 2598 /* ARGSUSED*/
 2599 static void
 2600 filelistinit(void *dummy)
 2601 {
 2602 
 2603         file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
 2604             NULL, NULL, UMA_ALIGN_PTR, 0);
 2605         sx_init(&filelist_lock, "filelist lock");
 2606         mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 2607         mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
 2608 }
 2609 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
 2610 
 2611 /*-------------------------------------------------------------------*/
 2612 
 2613 static int
 2614 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td)
 2615 {
 2616 
 2617         return (EBADF);
 2618 }
 2619 
 2620 static int
 2621 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td)
 2622 {
 2623 
 2624         return (EBADF);
 2625 }
 2626 
 2627 static int
 2628 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td)
 2629 {
 2630 
 2631         return (0);
 2632 }
 2633 
 2634 static int
 2635 badfo_kqfilter(struct file *fp, struct knote *kn)
 2636 {
 2637 
 2638         return (EBADF);
 2639 }
 2640 
 2641 static int
 2642 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td)
 2643 {
 2644 
 2645         return (EBADF);
 2646 }
 2647 
 2648 static int
 2649 badfo_close(struct file *fp, struct thread *td)
 2650 {
 2651 
 2652         return (EBADF);
 2653 }
 2654 
 2655 struct fileops badfileops = {
 2656         .fo_read = badfo_readwrite,
 2657         .fo_write = badfo_readwrite,
 2658         .fo_ioctl = badfo_ioctl,
 2659         .fo_poll = badfo_poll,
 2660         .fo_kqfilter = badfo_kqfilter,
 2661         .fo_stat = badfo_stat,
 2662         .fo_close = badfo_close,
 2663 };
 2664 
 2665 
 2666 /*-------------------------------------------------------------------*/
 2667 
 2668 /*
 2669  * File Descriptor pseudo-device driver (/dev/fd/).
 2670  *
 2671  * Opening minor device N dup()s the file (if any) connected to file
 2672  * descriptor N belonging to the calling process.  Note that this driver
 2673  * consists of only the ``open()'' routine, because all subsequent
 2674  * references to this file will be direct to the other driver.
 2675  *
 2676  * XXX: we could give this one a cloning event handler if necessary.
 2677  */
 2678 
 2679 /* ARGSUSED */
 2680 static int
 2681 fdopen(struct cdev *dev, int mode, int type, struct thread *td)
 2682 {
 2683 
 2684         /*
 2685          * XXX Kludge: set curthread->td_dupfd to contain the value of the
 2686          * the file descriptor being sought for duplication. The error
 2687          * return ensures that the vnode for this device will be released
 2688          * by vn_open. Open will detect this special error and take the
 2689          * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 2690          * will simply report the error.
 2691          */
 2692         td->td_dupfd = dev2unit(dev);
 2693         return (ENODEV);
 2694 }
 2695 
 2696 static struct cdevsw fildesc_cdevsw = {
 2697         .d_version =    D_VERSION,
 2698         .d_flags =      D_NEEDGIANT,
 2699         .d_open =       fdopen,
 2700         .d_name =       "FD",
 2701 };
 2702 
 2703 static void
 2704 fildesc_drvinit(void *unused)
 2705 {
 2706         struct cdev *dev;
 2707 
 2708         dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
 2709         make_dev_alias(dev, "stdin");
 2710         dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
 2711         make_dev_alias(dev, "stdout");
 2712         dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
 2713         make_dev_alias(dev, "stderr");
 2714 }
 2715 
 2716 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL)

Cache object: a50c4cad2bb51980f8c2cb788aca4f3d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.