The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_descrip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)kern_descrip.c      8.6 (Berkeley) 4/19/94
   39  * $FreeBSD$
   40  */
   41 
   42 #include "opt_compat.h"
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/malloc.h>
   46 #include <sys/sysproto.h>
   47 #include <sys/conf.h>
   48 #include <sys/filedesc.h>
   49 #include <sys/kernel.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/vnode.h>
   52 #include <sys/proc.h>
   53 #include <sys/namei.h>
   54 #include <sys/file.h>
   55 #include <sys/stat.h>
   56 #include <sys/filio.h>
   57 #include <sys/fcntl.h>
   58 #include <sys/unistd.h>
   59 #include <sys/resourcevar.h>
   60 #include <sys/event.h>
   61 
   62 #include <vm/vm.h>
   63 #include <vm/vm_extern.h>
   64 
   65 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
   66 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
   67                      "file desc to leader structures");
   68 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
   69 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
   70 
   71 static   d_open_t  fdopen;
   72 #define NUMFDESC 64
   73 
   74 #define CDEV_MAJOR 22
   75 static struct cdevsw fildesc_cdevsw = {
   76         /* open */      fdopen,
   77         /* close */     noclose,
   78         /* read */      noread,
   79         /* write */     nowrite,
   80         /* ioctl */     noioctl,
   81         /* poll */      nopoll,
   82         /* mmap */      nommap,
   83         /* strategy */  nostrategy,
   84         /* name */      "FD",
   85         /* maj */       CDEV_MAJOR,
   86         /* dump */      nodump,
   87         /* psize */     nopsize,
   88         /* flags */     0,
   89         /* bmaj */      -1
   90 };
   91 
   92 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p));
   93 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
   94     struct ucred *cred, int flags, struct proc *p));
   95 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
   96     struct proc *p));
   97 static int badfo_poll __P((struct file *fp, int events,
   98     struct ucred *cred, struct proc *p));
   99 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
  100 static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
  101 static int badfo_close __P((struct file *fp, struct proc *p));
  102 
  103 /*
  104  * Descriptor management.
  105  */
  106 struct filelist filehead;       /* head of list of open files */
  107 int nfiles;                     /* actual number of open files */
  108 extern int cmask;       
  109 
  110 /*
  111  * System calls on descriptors.
  112  */
  113 #ifndef _SYS_SYSPROTO_H_
  114 struct getdtablesize_args {
  115         int     dummy;
  116 };
  117 #endif
  118 /* ARGSUSED */
  119 int
  120 getdtablesize(p, uap)
  121         struct proc *p;
  122         struct getdtablesize_args *uap;
  123 {
  124 
  125         p->p_retval[0] = 
  126             min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
  127         return (0);
  128 }
  129 
  130 /*
  131  * Duplicate a file descriptor to a particular value.
  132  *
  133  * note: keep in mind that a potential race condition exists when closing
  134  * descriptors from a shared descriptor table (via rfork).
  135  */
  136 #ifndef _SYS_SYSPROTO_H_
  137 struct dup2_args {
  138         u_int   from;
  139         u_int   to;
  140 };
  141 #endif
  142 /* ARGSUSED */
  143 int
  144 dup2(p, uap)
  145         struct proc *p;
  146         struct dup2_args *uap;
  147 {
  148         register struct filedesc *fdp = p->p_fd;
  149         register u_int old = uap->from, new = uap->to;
  150         int i, error;
  151 
  152 retry:
  153         if (old >= fdp->fd_nfiles ||
  154             fdp->fd_ofiles[old] == NULL ||
  155             new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
  156             new >= maxfilesperproc) {
  157                 return (EBADF);
  158         }
  159         if (old == new) {
  160                 p->p_retval[0] = new;
  161                 return (0);
  162         }
  163         if (new >= fdp->fd_nfiles) {
  164                 if ((error = fdalloc(p, new, &i)))
  165                         return (error);
  166                 /*
  167                  * fdalloc() may block, retest everything.
  168                  */
  169                 goto retry;
  170         }
  171         return (do_dup(fdp, (int)old, (int)new, p->p_retval, p));
  172 }
  173 
  174 /*
  175  * Duplicate a file descriptor.
  176  */
  177 #ifndef _SYS_SYSPROTO_H_
  178 struct dup_args {
  179         u_int   fd;
  180 };
  181 #endif
  182 /* ARGSUSED */
  183 int
  184 dup(p, uap)
  185         struct proc *p;
  186         struct dup_args *uap;
  187 {
  188         register struct filedesc *fdp;
  189         u_int old;
  190         int new, error;
  191 
  192         old = uap->fd;
  193         fdp = p->p_fd;
  194         if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
  195                 return (EBADF);
  196         if ((error = fdalloc(p, 0, &new)))
  197                 return (error);
  198         return (do_dup(fdp, (int)old, new, p->p_retval, p));
  199 }
  200 
  201 /*
  202  * The file control system call.
  203  */
  204 #ifndef _SYS_SYSPROTO_H_
  205 struct fcntl_args {
  206         int     fd;
  207         int     cmd;
  208         long    arg;
  209 };
  210 #endif
  211 /* ARGSUSED */
  212 int
  213 fcntl(p, uap)
  214         struct proc *p;
  215         register struct fcntl_args *uap;
  216 {
  217         register struct filedesc *fdp = p->p_fd;
  218         register struct file *fp;
  219         register char *pop;
  220         struct vnode *vp;
  221         int i, tmp, error, flg = F_POSIX;
  222         struct flock fl;
  223         u_int newmin;
  224 
  225         if ((unsigned)uap->fd >= fdp->fd_nfiles ||
  226             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  227                 return (EBADF);
  228         pop = &fdp->fd_ofileflags[uap->fd];
  229 
  230         switch (uap->cmd) {
  231         case F_DUPFD:
  232                 newmin = uap->arg;
  233                 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
  234                     newmin >= maxfilesperproc)
  235                         return (EINVAL);
  236                 if ((error = fdalloc(p, newmin, &i)))
  237                         return (error);
  238                 return (do_dup(fdp, uap->fd, i, p->p_retval, p));
  239 
  240         case F_GETFD:
  241                 p->p_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
  242                 return (0);
  243 
  244         case F_SETFD:
  245                 *pop = (*pop &~ UF_EXCLOSE) |
  246                     (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
  247                 return (0);
  248 
  249         case F_GETFL:
  250                 p->p_retval[0] = OFLAGS(fp->f_flag);
  251                 return (0);
  252 
  253         case F_SETFL:
  254                 fhold(fp);
  255                 fp->f_flag &= ~FCNTLFLAGS;
  256                 fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
  257                 tmp = fp->f_flag & FNONBLOCK;
  258                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
  259                 if (error) {
  260                         fdrop(fp, p);
  261                         return (error);
  262                 }
  263                 tmp = fp->f_flag & FASYNC;
  264                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
  265                 if (!error) {
  266                         fdrop(fp, p);
  267                         return (0);
  268                 }
  269                 fp->f_flag &= ~FNONBLOCK;
  270                 tmp = 0;
  271                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
  272                 fdrop(fp, p);
  273                 return (error);
  274 
  275         case F_GETOWN:
  276                 fhold(fp);
  277                 error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
  278                 fdrop(fp, p);
  279                 return(error);
  280 
  281         case F_SETOWN:
  282                 fhold(fp);
  283                 error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p);
  284                 fdrop(fp, p);
  285                 return(error);
  286 
  287         case F_SETLKW:
  288                 flg |= F_WAIT;
  289                 /* Fall into F_SETLK */
  290 
  291         case F_SETLK:
  292                 if (fp->f_type != DTYPE_VNODE)
  293                         return (EBADF);
  294                 vp = (struct vnode *)fp->f_data;
  295 
  296                 /*
  297                  * copyin/lockop may block
  298                  */
  299                 fhold(fp);
  300                 /* Copy in the lock structure */
  301                 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
  302                     sizeof(fl));
  303                 if (error) {
  304                         fdrop(fp, p);
  305                         return (error);
  306                 }
  307                 if (fl.l_whence == SEEK_CUR)
  308                         fl.l_start += fp->f_offset;
  309 
  310                 switch (fl.l_type) {
  311                 case F_RDLCK:
  312                         if ((fp->f_flag & FREAD) == 0) {
  313                                 error = EBADF;
  314                                 break;
  315                         }
  316                         p->p_leader->p_flag |= P_ADVLOCK;
  317                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
  318                             &fl, flg);
  319                         break;
  320                 case F_WRLCK:
  321                         if ((fp->f_flag & FWRITE) == 0) {
  322                                 error = EBADF;
  323                                 break;
  324                         }
  325                         p->p_leader->p_flag |= P_ADVLOCK;
  326                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
  327                             &fl, flg);
  328                         break;
  329                 case F_UNLCK:
  330                         error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
  331                                 &fl, F_POSIX);
  332                         break;
  333                 default:
  334                         error = EINVAL;
  335                         break;
  336                 }
  337                 /* Check for race with close */
  338                 if ((unsigned) uap->fd >= fdp->fd_nfiles ||
  339                     fp != fdp->fd_ofiles[uap->fd]) {
  340                         fl.l_whence = SEEK_SET;
  341                         fl.l_start = 0;
  342                         fl.l_len = 0;
  343                         fl.l_type = F_UNLCK;
  344                         (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
  345                                            F_UNLCK, &fl, F_POSIX);
  346                 }
  347                 fdrop(fp, p);
  348                 return(error);
  349 
  350         case F_GETLK:
  351                 if (fp->f_type != DTYPE_VNODE)
  352                         return (EBADF);
  353                 vp = (struct vnode *)fp->f_data;
  354                 /*
  355                  * copyin/lockop may block
  356                  */
  357                 fhold(fp);
  358                 /* Copy in the lock structure */
  359                 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
  360                     sizeof(fl));
  361                 if (error) {
  362                         fdrop(fp, p);
  363                         return (error);
  364                 }
  365                 if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
  366                     fl.l_type != F_UNLCK) {
  367                         fdrop(fp, p);
  368                         return (EINVAL);
  369                 }
  370                 if (fl.l_whence == SEEK_CUR)
  371                         fl.l_start += fp->f_offset;
  372                 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
  373                             &fl, F_POSIX);
  374                 fdrop(fp, p);
  375                 if (error == 0) {
  376                         error = copyout((caddr_t)&fl,
  377                                     (caddr_t)(intptr_t)uap->arg, sizeof(fl));
  378                 }
  379                 return(error);
  380         default:
  381                 return (EINVAL);
  382         }
  383         /* NOTREACHED */
  384 }
  385 
  386 /*
  387  * Common code for dup, dup2, and fcntl(F_DUPFD).
  388  */
  389 static int
  390 do_dup(fdp, old, new, retval, p)
  391         register struct filedesc *fdp;
  392         register int old, new;
  393         register_t *retval;
  394         struct proc *p;
  395 {
  396         struct file *fp;
  397         struct file *delfp;
  398         int holdleaders;
  399 
  400         /*
  401          * Save info on the descriptor being overwritten.  We have
  402          * to do the unmap now, but we cannot close it without
  403          * introducing an ownership race for the slot.
  404          */
  405         delfp = fdp->fd_ofiles[new];
  406         if (delfp != NULL && p->p_fdtol != NULL) {
  407                 /*
  408                  * Ask fdfree() to sleep to ensure that all relevant
  409                  * process leaders can be traversed in closef().
  410                  */
  411                 fdp->fd_holdleaderscount++;
  412                 holdleaders = 1;
  413         } else
  414                 holdleaders = 0;
  415 #if 0
  416         if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
  417                 (void) munmapfd(p, new);
  418 #endif
  419 
  420         /*
  421          * Duplicate the source descriptor, update lastfile
  422          */
  423         fp = fdp->fd_ofiles[old];
  424         fdp->fd_ofiles[new] = fp;
  425         fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
  426         fhold(fp);
  427         if (new > fdp->fd_lastfile)
  428                 fdp->fd_lastfile = new;
  429         *retval = new;
  430 
  431         /*
  432          * If we dup'd over a valid file, we now own the reference to it
  433          * and must dispose of it using closef() semantics (as if a
  434          * close() were performed on it).
  435          */
  436         if (delfp) {
  437                 (void) closef(delfp, p);
  438                 if (holdleaders) {
  439                         fdp->fd_holdleaderscount--;
  440                         if (fdp->fd_holdleaderscount == 0 &&
  441                             fdp->fd_holdleaderswakeup != 0) {
  442                                 fdp->fd_holdleaderswakeup = 0;
  443                                 wakeup(&fdp->fd_holdleaderscount);
  444                         }
  445                 }
  446         }
  447         return (0);
  448 }
  449 
  450 /*
  451  * If sigio is on the list associated with a process or process group,
  452  * disable signalling from the device, remove sigio from the list and
  453  * free sigio.
  454  */
  455 void
  456 funsetown(sigio)
  457         struct sigio *sigio;
  458 {
  459         int s;
  460 
  461         if (sigio == NULL)
  462                 return;
  463         s = splhigh();
  464         *(sigio->sio_myref) = NULL;
  465         splx(s);
  466         if (sigio->sio_pgid < 0) {
  467                 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
  468                              sigio, sio_pgsigio);
  469         } else /* if ((*sigiop)->sio_pgid > 0) */ {
  470                 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
  471                              sigio, sio_pgsigio);
  472         }
  473         crfree(sigio->sio_ucred);
  474         FREE(sigio, M_SIGIO);
  475 }
  476 
  477 /* Free a list of sigio structures. */
  478 void
  479 funsetownlst(sigiolst)
  480         struct sigiolst *sigiolst;
  481 {
  482         struct sigio *sigio;
  483 
  484         while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
  485                 funsetown(sigio);
  486 }
  487 
  488 /*
  489  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
  490  *
  491  * After permission checking, add a sigio structure to the sigio list for
  492  * the process or process group.
  493  */
  494 int
  495 fsetown(pgid, sigiop)
  496         pid_t pgid;
  497         struct sigio **sigiop;
  498 {
  499         struct proc *proc;
  500         struct pgrp *pgrp;
  501         struct sigio *sigio;
  502         int s;
  503 
  504         if (pgid == 0) {
  505                 funsetown(*sigiop);
  506                 return (0);
  507         }
  508         if (pgid > 0) {
  509                 proc = pfind(pgid);
  510                 if (proc == NULL)
  511                         return (ESRCH);
  512 
  513                 /*
  514                  * Policy - Don't allow a process to FSETOWN a process
  515                  * in another session.
  516                  *
  517                  * Remove this test to allow maximum flexibility or
  518                  * restrict FSETOWN to the current process or process
  519                  * group for maximum safety.
  520                  */
  521                 if (proc->p_session != curproc->p_session)
  522                         return (EPERM);
  523 
  524                 pgrp = NULL;
  525         } else /* if (pgid < 0) */ {
  526                 pgrp = pgfind(-pgid);
  527                 if (pgrp == NULL)
  528                         return (ESRCH);
  529 
  530                 /*
  531                  * Policy - Don't allow a process to FSETOWN a process
  532                  * in another session.
  533                  *
  534                  * Remove this test to allow maximum flexibility or
  535                  * restrict FSETOWN to the current process or process
  536                  * group for maximum safety.
  537                  */
  538                 if (pgrp->pg_session != curproc->p_session)
  539                         return (EPERM);
  540 
  541                 proc = NULL;
  542         }
  543         funsetown(*sigiop);
  544         MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
  545         if (pgid > 0) {
  546                 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
  547                 sigio->sio_proc = proc;
  548         } else {
  549                 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
  550                 sigio->sio_pgrp = pgrp;
  551         }
  552         sigio->sio_pgid = pgid;
  553         crhold(curproc->p_ucred);
  554         sigio->sio_ucred = curproc->p_ucred;
  555         /* It would be convenient if p_ruid was in ucred. */
  556         sigio->sio_ruid = curproc->p_cred->p_ruid;
  557         sigio->sio_myref = sigiop;
  558         s = splhigh();
  559         *sigiop = sigio;
  560         splx(s);
  561         return (0);
  562 }
  563 
  564 /*
  565  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
  566  */
  567 pid_t
  568 fgetown(sigio)
  569         struct sigio *sigio;
  570 {
  571         return (sigio != NULL ? sigio->sio_pgid : 0);
  572 }
  573 
  574 /*
  575  * Close a file descriptor.
  576  */
  577 #ifndef _SYS_SYSPROTO_H_
  578 struct close_args {
  579         int     fd;
  580 };
  581 #endif
  582 /* ARGSUSED */
  583 int
  584 close(p, uap)
  585         struct proc *p;
  586         struct close_args *uap;
  587 {
  588         register struct filedesc *fdp = p->p_fd;
  589         register struct file *fp;
  590         register int fd = uap->fd;
  591         int error;
  592         int holdleaders;
  593 
  594         if ((unsigned)fd >= fdp->fd_nfiles ||
  595             (fp = fdp->fd_ofiles[fd]) == NULL)
  596                 return (EBADF);
  597 #if 0
  598         if (fdp->fd_ofileflags[fd] & UF_MAPPED)
  599                 (void) munmapfd(p, fd);
  600 #endif
  601         fdp->fd_ofiles[fd] = NULL;
  602         fdp->fd_ofileflags[fd] = 0;
  603         holdleaders = 0;
  604         if (p->p_fdtol != NULL) {
  605                 /*
  606                  * Ask fdfree() to sleep to ensure that all relevant
  607                  * process leaders can be traversed in closef().
  608                  */
  609                 fdp->fd_holdleaderscount++;
  610                 holdleaders = 1;
  611         }
  612 
  613         /*
  614          * we now hold the fp reference that used to be owned by the descriptor
  615          * array.
  616          */
  617         while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
  618                 fdp->fd_lastfile--;
  619         if (fd < fdp->fd_freefile)
  620                 fdp->fd_freefile = fd;
  621         if (fd < fdp->fd_knlistsize)
  622                 knote_fdclose(p, fd);
  623         error = closef(fp, p);
  624         if (holdleaders) {
  625                 fdp->fd_holdleaderscount--;
  626                 if (fdp->fd_holdleaderscount == 0 &&
  627                     fdp->fd_holdleaderswakeup != 0) {
  628                         fdp->fd_holdleaderswakeup = 0;
  629                         wakeup(&fdp->fd_holdleaderscount);
  630                 }
  631         }
  632         return (error);
  633 }
  634 
  635 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
  636 /*
  637  * Return status information about a file descriptor.
  638  */
  639 #ifndef _SYS_SYSPROTO_H_
  640 struct ofstat_args {
  641         int     fd;
  642         struct  ostat *sb;
  643 };
  644 #endif
  645 /* ARGSUSED */
  646 int
  647 ofstat(p, uap)
  648         struct proc *p;
  649         register struct ofstat_args *uap;
  650 {
  651         register struct filedesc *fdp = p->p_fd;
  652         register struct file *fp;
  653         struct stat ub;
  654         struct ostat oub;
  655         int error;
  656 
  657         if ((unsigned)uap->fd >= fdp->fd_nfiles ||
  658             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  659                 return (EBADF);
  660         fhold(fp);
  661         error = fo_stat(fp, &ub, p);
  662         if (error == 0) {
  663                 cvtstat(&ub, &oub);
  664                 error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
  665         }
  666         fdrop(fp, p);
  667         return (error);
  668 }
  669 #endif /* COMPAT_43 || COMPAT_SUNOS */
  670 
  671 /*
  672  * Return status information about a file descriptor.
  673  */
  674 #ifndef _SYS_SYSPROTO_H_
  675 struct fstat_args {
  676         int     fd;
  677         struct  stat *sb;
  678 };
  679 #endif
  680 /* ARGSUSED */
  681 int
  682 fstat(p, uap)
  683         struct proc *p;
  684         register struct fstat_args *uap;
  685 {
  686         register struct filedesc *fdp = p->p_fd;
  687         register struct file *fp;
  688         struct stat ub;
  689         int error;
  690 
  691         if ((unsigned)uap->fd >= fdp->fd_nfiles ||
  692             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  693                 return (EBADF);
  694         fhold(fp);
  695         error = fo_stat(fp, &ub, p);
  696         if (error == 0)
  697                 error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
  698         fdrop(fp, p);
  699         return (error);
  700 }
  701 
  702 /*
  703  * Return status information about a file descriptor.
  704  */
  705 #ifndef _SYS_SYSPROTO_H_
  706 struct nfstat_args {
  707         int     fd;
  708         struct  nstat *sb;
  709 };
  710 #endif
  711 /* ARGSUSED */
  712 int
  713 nfstat(p, uap)
  714         struct proc *p;
  715         register struct nfstat_args *uap;
  716 {
  717         register struct filedesc *fdp = p->p_fd;
  718         register struct file *fp;
  719         struct stat ub;
  720         struct nstat nub;
  721         int error;
  722 
  723         if ((unsigned)uap->fd >= fdp->fd_nfiles ||
  724             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  725                 return (EBADF);
  726         fhold(fp);
  727         error = fo_stat(fp, &ub, p);
  728         if (error == 0) {
  729                 cvtnstat(&ub, &nub);
  730                 error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
  731         }
  732         fdrop(fp, p);
  733         return (error);
  734 }
  735 
  736 /*
  737  * Return pathconf information about a file descriptor.
  738  */
  739 #ifndef _SYS_SYSPROTO_H_
  740 struct fpathconf_args {
  741         int     fd;
  742         int     name;
  743 };
  744 #endif
  745 /* ARGSUSED */
  746 int
  747 fpathconf(p, uap)
  748         struct proc *p;
  749         register struct fpathconf_args *uap;
  750 {
  751         struct filedesc *fdp = p->p_fd;
  752         struct file *fp;
  753         struct vnode *vp;
  754         int error = 0;
  755 
  756         if ((unsigned)uap->fd >= fdp->fd_nfiles ||
  757             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  758                 return (EBADF);
  759 
  760         fhold(fp);
  761 
  762         switch (fp->f_type) {
  763         case DTYPE_PIPE:
  764         case DTYPE_SOCKET:
  765                 if (uap->name != _PC_PIPE_BUF) {
  766                         error = EINVAL;
  767                 } else {
  768                         p->p_retval[0] = PIPE_BUF;
  769                         error = 0;
  770                 }
  771                 break;
  772         case DTYPE_FIFO:
  773         case DTYPE_VNODE:
  774                 vp = (struct vnode *)fp->f_data;
  775                 error = VOP_PATHCONF(vp, uap->name, p->p_retval);
  776                 break;
  777         default:
  778                 error = EOPNOTSUPP;
  779                 break;
  780         }
  781         fdrop(fp, p);
  782         return(error);
  783 }
  784 
  785 /*
  786  * Allocate a file descriptor for the process.
  787  */
  788 static int fdexpand;
  789 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
  790 
  791 int
  792 fdalloc(p, want, result)
  793         struct proc *p;
  794         int want;
  795         int *result;
  796 {
  797         register struct filedesc *fdp = p->p_fd;
  798         register int i;
  799         int lim, last, nfiles;
  800         struct file **newofile;
  801         char *newofileflags;
  802 
  803         /*
  804          * Search for a free descriptor starting at the higher
  805          * of want or fd_freefile.  If that fails, consider
  806          * expanding the ofile array.
  807          */
  808         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
  809         for (;;) {
  810                 last = min(fdp->fd_nfiles, lim);
  811                 if ((i = want) < fdp->fd_freefile)
  812                         i = fdp->fd_freefile;
  813                 for (; i < last; i++) {
  814                         if (fdp->fd_ofiles[i] == NULL) {
  815                                 fdp->fd_ofileflags[i] = 0;
  816                                 if (i > fdp->fd_lastfile)
  817                                         fdp->fd_lastfile = i;
  818                                 if (want <= fdp->fd_freefile)
  819                                         fdp->fd_freefile = i;
  820                                 *result = i;
  821                                 return (0);
  822                         }
  823                 }
  824 
  825                 /*
  826                  * No space in current array.  Expand?
  827                  */
  828                 if (fdp->fd_nfiles >= lim)
  829                         return (EMFILE);
  830                 if (fdp->fd_nfiles < NDEXTENT)
  831                         nfiles = NDEXTENT;
  832                 else
  833                         nfiles = 2 * fdp->fd_nfiles;
  834                 MALLOC(newofile, struct file **, nfiles * OFILESIZE,
  835                     M_FILEDESC, M_WAITOK);
  836 
  837                 /*
  838                  * deal with file-table extend race that might have occured
  839                  * when malloc was blocked.
  840                  */
  841                 if (fdp->fd_nfiles >= nfiles) {
  842                         FREE(newofile, M_FILEDESC);
  843                         continue;
  844                 }
  845                 newofileflags = (char *) &newofile[nfiles];
  846                 /*
  847                  * Copy the existing ofile and ofileflags arrays
  848                  * and zero the new portion of each array.
  849                  */
  850                 bcopy(fdp->fd_ofiles, newofile,
  851                         (i = sizeof(struct file *) * fdp->fd_nfiles));
  852                 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
  853                 bcopy(fdp->fd_ofileflags, newofileflags,
  854                         (i = sizeof(char) * fdp->fd_nfiles));
  855                 bzero(newofileflags + i, nfiles * sizeof(char) - i);
  856                 if (fdp->fd_nfiles > NDFILE)
  857                         FREE(fdp->fd_ofiles, M_FILEDESC);
  858                 fdp->fd_ofiles = newofile;
  859                 fdp->fd_ofileflags = newofileflags;
  860                 fdp->fd_nfiles = nfiles;
  861                 fdexpand++;
  862         }
  863         return (0);
  864 }
  865 
  866 /*
  867  * Check to see whether n user file descriptors
  868  * are available to the process p.
  869  */
  870 int
  871 fdavail(p, n)
  872         struct proc *p;
  873         register int n;
  874 {
  875         register struct filedesc *fdp = p->p_fd;
  876         register struct file **fpp;
  877         register int i, lim, last;
  878 
  879         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
  880         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
  881                 return (1);
  882 
  883         last = min(fdp->fd_nfiles, lim);
  884         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
  885         for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
  886                 if (*fpp == NULL && --n <= 0)
  887                         return (1);
  888         }
  889         return (0);
  890 }
  891 
  892 /*
  893  * Create a new open file structure and allocate
  894  * a file decriptor for the process that refers to it.
  895  */
  896 int
  897 falloc(p, resultfp, resultfd)
  898         register struct proc *p;
  899         struct file **resultfp;
  900         int *resultfd;
  901 {
  902         register struct file *fp, *fq;
  903         int error, i;
  904         int maxuserfiles = maxfiles - (maxfiles / 20);
  905         static struct timeval lastfail;
  906         static int curfail;
  907 
  908         if ((nfiles >= maxuserfiles && p->p_cred->p_ruid != 0)
  909            || nfiles >= maxfiles) {
  910                 if (ppsratecheck(&lastfail, &curfail, 1)) {
  911                         printf("kern.maxfiles limit exceeded by uid %d, please see tuning(7).\n",
  912                                 p->p_cred->p_ruid);
  913                 }
  914                 return (ENFILE);
  915         }
  916         /*
  917          * Allocate a new file descriptor.
  918          * If the process has file descriptor zero open, add to the list
  919          * of open files at that point, otherwise put it at the front of
  920          * the list of open files.
  921          */
  922         nfiles++;
  923         MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
  924         bzero(fp, sizeof(struct file));
  925 
  926         /*
  927          * wait until after malloc (which may have blocked) returns before
  928          * allocating the slot, else a race might have shrunk it if we had
  929          * allocated it before the malloc.
  930          */
  931         if ((error = fdalloc(p, 0, &i))) {
  932                 nfiles--;
  933                 FREE(fp, M_FILE);
  934                 return (error);
  935         }
  936         fp->f_count = 1;
  937         fp->f_cred = p->p_ucred;
  938         fp->f_ops = &badfileops;
  939         fp->f_seqcount = 1;
  940         crhold(fp->f_cred);
  941         if ((fq = p->p_fd->fd_ofiles[0])) {
  942                 LIST_INSERT_AFTER(fq, fp, f_list);
  943         } else {
  944                 LIST_INSERT_HEAD(&filehead, fp, f_list);
  945         }
  946         p->p_fd->fd_ofiles[i] = fp;
  947         if (resultfp)
  948                 *resultfp = fp;
  949         if (resultfd)
  950                 *resultfd = i;
  951         return (0);
  952 }
  953 
  954 /*
  955  * Free a file descriptor.
  956  */
  957 void
  958 ffree(fp)
  959         register struct file *fp;
  960 {
  961         KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
  962         LIST_REMOVE(fp, f_list);
  963         crfree(fp->f_cred);
  964         nfiles--;
  965         FREE(fp, M_FILE);
  966 }
  967 
  968 /*
  969  * Build a new filedesc structure.
  970  */
  971 struct filedesc *
  972 fdinit(p)
  973         struct proc *p;
  974 {
  975         register struct filedesc0 *newfdp;
  976         register struct filedesc *fdp = p->p_fd;
  977 
  978         MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
  979             M_FILEDESC, M_WAITOK);
  980         bzero(newfdp, sizeof(struct filedesc0));
  981         newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
  982         if (newfdp->fd_fd.fd_cdir)
  983                 VREF(newfdp->fd_fd.fd_cdir);
  984         newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
  985         if (newfdp->fd_fd.fd_rdir)
  986                 VREF(newfdp->fd_fd.fd_rdir);
  987         newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
  988         if (newfdp->fd_fd.fd_jdir)
  989                 VREF(newfdp->fd_fd.fd_jdir);
  990 
  991         /* Create the file descriptor table. */
  992         newfdp->fd_fd.fd_refcnt = 1;
  993         newfdp->fd_fd.fd_cmask = cmask;
  994         newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
  995         newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
  996         newfdp->fd_fd.fd_nfiles = NDFILE;
  997         newfdp->fd_fd.fd_knlistsize = -1;
  998 
  999         return (&newfdp->fd_fd);
 1000 }
 1001 
 1002 /*
 1003  * Share a filedesc structure.
 1004  */
 1005 struct filedesc *
 1006 fdshare(p)
 1007         struct proc *p;
 1008 {
 1009         p->p_fd->fd_refcnt++;
 1010         return (p->p_fd);
 1011 }
 1012 
 1013 /*
 1014  * Copy a filedesc structure.
 1015  */
 1016 struct filedesc *
 1017 fdcopy(p)
 1018         struct proc *p;
 1019 {
 1020         register struct filedesc *newfdp, *fdp = p->p_fd;
 1021         register struct file **fpp;
 1022         register int i;
 1023 
 1024         /* Certain daemons might not have file descriptors. */
 1025         if (fdp == NULL)
 1026                 return (NULL);
 1027 
 1028         MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
 1029             M_FILEDESC, M_WAITOK);
 1030         bcopy(fdp, newfdp, sizeof(struct filedesc));
 1031         if (newfdp->fd_cdir)
 1032                 VREF(newfdp->fd_cdir);
 1033         if (newfdp->fd_rdir)
 1034                 VREF(newfdp->fd_rdir);
 1035         if (newfdp->fd_jdir)
 1036                 VREF(newfdp->fd_jdir);
 1037         newfdp->fd_refcnt = 1;
 1038 
 1039         /*
 1040          * If the number of open files fits in the internal arrays
 1041          * of the open file structure, use them, otherwise allocate
 1042          * additional memory for the number of descriptors currently
 1043          * in use.
 1044          */
 1045         if (newfdp->fd_lastfile < NDFILE) {
 1046                 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
 1047                 newfdp->fd_ofileflags =
 1048                     ((struct filedesc0 *) newfdp)->fd_dfileflags;
 1049                 i = NDFILE;
 1050         } else {
 1051                 /*
 1052                  * Compute the smallest multiple of NDEXTENT needed
 1053                  * for the file descriptors currently in use,
 1054                  * allowing the table to shrink.
 1055                  */
 1056                 i = newfdp->fd_nfiles;
 1057                 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
 1058                         i /= 2;
 1059                 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
 1060                     M_FILEDESC, M_WAITOK);
 1061                 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
 1062         }
 1063         newfdp->fd_nfiles = i;
 1064         bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
 1065         bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
 1066 
 1067         /*
 1068          * kq descriptors cannot be copied.
 1069          */
 1070         if (newfdp->fd_knlistsize != -1) {
 1071                 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
 1072                 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
 1073                         if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
 1074                                 *fpp = NULL;
 1075                                 if (i < newfdp->fd_freefile)
 1076                                         newfdp->fd_freefile = i;
 1077                         }
 1078                         if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
 1079                                 newfdp->fd_lastfile--;
 1080                 }
 1081                 newfdp->fd_knlist = NULL;
 1082                 newfdp->fd_knlistsize = -1;
 1083                 newfdp->fd_knhash = NULL;
 1084                 newfdp->fd_knhashmask = 0;
 1085         }
 1086 
 1087         fpp = newfdp->fd_ofiles;
 1088         for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
 1089                 if (*fpp != NULL)
 1090                         fhold(*fpp);
 1091         }
 1092         return (newfdp);
 1093 }
 1094 
 1095 /*
 1096  * Release a filedesc structure.
 1097  */
 1098 void
 1099 fdfree(p)
 1100         struct proc *p;
 1101 {
 1102         register struct filedesc *fdp = p->p_fd;
 1103         struct file **fpp;
 1104         register int i;
 1105         struct filedesc_to_leader *fdtol;
 1106         struct file *fp;
 1107         struct vnode *vp;
 1108         struct flock lf;
 1109 
 1110         /* Certain daemons might not have file descriptors. */
 1111         if (fdp == NULL)
 1112                 return;
 1113 
 1114         /* Check for special need to clear POSIX style locks */
 1115         fdtol = p->p_fdtol;
 1116         if (fdtol != NULL) {
 1117                 KASSERT(fdtol->fdl_refcount > 0,
 1118                         ("filedesc_to_refcount botch: fdl_refcount=%d",
 1119                          fdtol->fdl_refcount));
 1120                 if (fdtol->fdl_refcount == 1 &&
 1121                     (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 1122                         i = 0;
 1123                         fpp = fdp->fd_ofiles;
 1124                         for (i = 0, fpp = fdp->fd_ofiles;
 1125                              i <= fdp->fd_lastfile;
 1126                              i++, fpp++) {
 1127                                 if (*fpp == NULL ||
 1128                                     (*fpp)->f_type != DTYPE_VNODE)
 1129                                         continue;
 1130                                 fp = *fpp;
 1131                                 fhold(fp);
 1132                                 lf.l_whence = SEEK_SET;
 1133                                 lf.l_start = 0;
 1134                                 lf.l_len = 0;
 1135                                 lf.l_type = F_UNLCK;
 1136                                 vp = (struct vnode *)fp->f_data;
 1137                                 (void) VOP_ADVLOCK(vp,
 1138                                                    (caddr_t)p->p_leader,
 1139                                                    F_UNLCK,
 1140                                                    &lf,
 1141                                                    F_POSIX);
 1142                                 fdrop(fp, p);
 1143                                 fpp = fdp->fd_ofiles + i;
 1144                         }
 1145                 }
 1146         retry:
 1147                 if (fdtol->fdl_refcount == 1) {
 1148                         if (fdp->fd_holdleaderscount > 0 &&
 1149                             (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 1150                                 /*
 1151                                  * close() or do_dup() has cleared a reference
 1152                                  * in a shared file descriptor table.
 1153                                  */
 1154                                 fdp->fd_holdleaderswakeup = 1;
 1155                                 tsleep(&fdp->fd_holdleaderscount,
 1156                                        PLOCK, "fdlhold", 0);
 1157                                 goto retry;
 1158                         }
 1159                         if (fdtol->fdl_holdcount > 0) {
 1160                                 /* 
 1161                                  * Ensure that fdtol->fdl_leader
 1162                                  * remains valid in closef().
 1163                                  */
 1164                                 fdtol->fdl_wakeup = 1;
 1165                                 tsleep(fdtol, PLOCK, "fdlhold", 0);
 1166                                 goto retry;
 1167                         }
 1168                 }
 1169                 fdtol->fdl_refcount--;
 1170                 if (fdtol->fdl_refcount == 0 &&
 1171                     fdtol->fdl_holdcount == 0) {
 1172                         fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
 1173                         fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
 1174                 } else
 1175                         fdtol = NULL;
 1176                 p->p_fdtol = NULL;
 1177                 if (fdtol != NULL)
 1178                         FREE(fdtol, M_FILEDESC_TO_LEADER);
 1179         }
 1180         if (--fdp->fd_refcnt > 0)
 1181                 return;
 1182         /*
 1183          * we are the last reference to the structure, we can
 1184          * safely assume it will not change out from under us.
 1185          */
 1186         fpp = fdp->fd_ofiles;
 1187         for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
 1188                 if (*fpp)
 1189                         (void) closef(*fpp, p);
 1190         }
 1191         if (fdp->fd_nfiles > NDFILE)
 1192                 FREE(fdp->fd_ofiles, M_FILEDESC);
 1193         if (fdp->fd_cdir)
 1194                 vrele(fdp->fd_cdir);
 1195         if (fdp->fd_rdir)
 1196                 vrele(fdp->fd_rdir);
 1197         if (fdp->fd_jdir)
 1198                 vrele(fdp->fd_jdir);
 1199         if (fdp->fd_knlist)
 1200                 FREE(fdp->fd_knlist, M_KQUEUE);
 1201         if (fdp->fd_knhash)
 1202                 FREE(fdp->fd_knhash, M_KQUEUE);
 1203         FREE(fdp, M_FILEDESC);
 1204 }
 1205 
 1206 /*
 1207  * For setugid programs, we don't want to people to use that setugidness
 1208  * to generate error messages which write to a file which otherwise would
 1209  * otherwise be off-limits to the process.
 1210  *
 1211  * This is a gross hack to plug the hole.  A better solution would involve
 1212  * a special vop or other form of generalized access control mechanism.  We
 1213  * go ahead and just reject all procfs file systems accesses as dangerous.
 1214  *
 1215  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
 1216  * sufficient.  We also don't for check setugidness since we know we are.
 1217  */
 1218 static int
 1219 is_unsafe(struct file *fp)
 1220 {
 1221         if (fp->f_type == DTYPE_VNODE && 
 1222             ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
 1223                 return (1);
 1224         return (0);
 1225 }
 1226 
 1227 /*
 1228  * Make this setguid thing safe, if at all possible.
 1229  */
 1230 void
 1231 setugidsafety(p)
 1232         struct proc *p;
 1233 {
 1234         struct filedesc *fdp = p->p_fd;
 1235         register int i;
 1236 
 1237         /* Certain daemons might not have file descriptors. */
 1238         if (fdp == NULL)
 1239                 return;
 1240 
 1241         /*
 1242          * note: fdp->fd_ofiles may be reallocated out from under us while
 1243          * we are blocked in a close.  Be careful!
 1244          */
 1245         for (i = 0; i <= fdp->fd_lastfile; i++) {
 1246                 if (i > 2)
 1247                         break;
 1248                 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
 1249                         struct file *fp;
 1250 
 1251 #if 0
 1252                         if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
 1253                                 (void) munmapfd(p, i);
 1254 #endif
 1255                         if (i < fdp->fd_knlistsize)
 1256                                 knote_fdclose(p, i);
 1257                         /*
 1258                          * NULL-out descriptor prior to close to avoid
 1259                          * a race while close blocks.
 1260                          */
 1261                         fp = fdp->fd_ofiles[i];
 1262                         fdp->fd_ofiles[i] = NULL;
 1263                         fdp->fd_ofileflags[i] = 0;
 1264                         if (i < fdp->fd_freefile)
 1265                                 fdp->fd_freefile = i;
 1266                         (void) closef(fp, p);
 1267                 }
 1268         }
 1269         while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
 1270                 fdp->fd_lastfile--;
 1271 }
 1272 
 1273 /*
 1274  * Close any files on exec?
 1275  */
 1276 void
 1277 fdcloseexec(p)
 1278         struct proc *p;
 1279 {
 1280         struct filedesc *fdp = p->p_fd;
 1281         register int i;
 1282 
 1283         /* Certain daemons might not have file descriptors. */
 1284         if (fdp == NULL)
 1285                 return;
 1286 
 1287         /*
 1288          * We cannot cache fd_ofiles or fd_ofileflags since operations
 1289          * may block and rip them out from under us.
 1290          */
 1291         for (i = 0; i <= fdp->fd_lastfile; i++) {
 1292                 if (fdp->fd_ofiles[i] != NULL &&
 1293                     (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
 1294                         struct file *fp;
 1295 
 1296 #if 0
 1297                         if (fdp->fd_ofileflags[i] & UF_MAPPED)
 1298                                 (void) munmapfd(p, i);
 1299 #endif
 1300                         if (i < fdp->fd_knlistsize)
 1301                                 knote_fdclose(p, i);
 1302                         /*
 1303                          * NULL-out descriptor prior to close to avoid
 1304                          * a race while close blocks.
 1305                          */
 1306                         fp = fdp->fd_ofiles[i];
 1307                         fdp->fd_ofiles[i] = NULL;
 1308                         fdp->fd_ofileflags[i] = 0;
 1309                         if (i < fdp->fd_freefile)
 1310                                 fdp->fd_freefile = i;
 1311                         (void) closef(fp, p);
 1312                 }
 1313         }
 1314         while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
 1315                 fdp->fd_lastfile--;
 1316 }
 1317 
 1318 /*
 1319  * It is unsafe for set[ug]id processes to be started with file
 1320  * descriptors 0..2 closed, as these descriptors are given implicit
 1321  * significance in the Standard C library.  fdcheckstd() will create a
 1322  * descriptor referencing /dev/null for each of stdin, stdout, and
 1323  * stderr that is not already open.
 1324  */
 1325 int
 1326 fdcheckstd(p)
 1327        struct proc *p;
 1328 {
 1329        struct nameidata nd;
 1330        struct filedesc *fdp;
 1331        struct file *fp;
 1332        register_t retval;
 1333        int fd, i, error, flags, devnull;
 1334 
 1335        fdp = p->p_fd;
 1336        if (fdp == NULL)
 1337                return (0);
 1338        devnull = -1;
 1339        error = 0;
 1340        for (i = 0; i < 3; i++) {
 1341                if (fdp->fd_ofiles[i] != NULL)
 1342                        continue;
 1343                if (devnull < 0) {
 1344                        error = falloc(p, &fp, &fd);
 1345                        if (error != 0)
 1346                                break;
 1347                        NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
 1348                            p);
 1349                        flags = FREAD | FWRITE;
 1350                        error = vn_open(&nd, flags, 0);
 1351                        if (error != 0) {
 1352                                fdp->fd_ofiles[i] = NULL;
 1353                                fdrop(fp, p);
 1354                                break;
 1355                        }
 1356                        NDFREE(&nd, NDF_ONLY_PNBUF);
 1357                        fp->f_data = (caddr_t)nd.ni_vp;
 1358                        fp->f_flag = flags;
 1359                        fp->f_ops = &vnops;
 1360                        fp->f_type = DTYPE_VNODE;
 1361                        VOP_UNLOCK(nd.ni_vp, 0, p);
 1362                        devnull = fd;
 1363                } else {
 1364                        error = fdalloc(p, 0, &fd);
 1365                        if (error != 0)
 1366                                break;
 1367                        error = do_dup(fdp, devnull, fd, &retval, p);
 1368                        if (error != 0)
 1369                                break;
 1370                }
 1371        }
 1372        return (error);
 1373 }
 1374 
 1375 /*
 1376  * Internal form of close.
 1377  * Decrement reference count on file structure.
 1378  * Note: p may be NULL when closing a file
 1379  * that was being passed in a message.
 1380  */
 1381 int
 1382 closef(fp, p)
 1383         register struct file *fp;
 1384         register struct proc *p;
 1385 {
 1386         struct vnode *vp;
 1387         struct flock lf;
 1388         struct filedesc_to_leader *fdtol;
 1389 
 1390         if (fp == NULL)
 1391                 return (0);
 1392         /*
 1393          * POSIX record locking dictates that any close releases ALL
 1394          * locks owned by this process.  This is handled by setting
 1395          * a flag in the unlock to free ONLY locks obeying POSIX
 1396          * semantics, and not to free BSD-style file locks.
 1397          * If the descriptor was in a message, POSIX-style locks
 1398          * aren't passed with the descriptor.
 1399          */
 1400         if (p != NULL && 
 1401             fp->f_type == DTYPE_VNODE) {
 1402                 if ((p->p_leader->p_flag & P_ADVLOCK) != 0) {
 1403                         lf.l_whence = SEEK_SET;
 1404                         lf.l_start = 0;
 1405                         lf.l_len = 0;
 1406                         lf.l_type = F_UNLCK;
 1407                         vp = (struct vnode *)fp->f_data;
 1408                         (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
 1409                                            &lf, F_POSIX);
 1410                 }
 1411                 fdtol = p->p_fdtol;
 1412                 if (fdtol != NULL) {
 1413                         /*
 1414                          * Handle special case where file descriptor table
 1415                          * is shared between multiple process leaders.
 1416                          */
 1417                         for (fdtol = fdtol->fdl_next;
 1418                              fdtol != p->p_fdtol;
 1419                              fdtol = fdtol->fdl_next) {
 1420                                 if ((fdtol->fdl_leader->p_flag &
 1421                                      P_ADVLOCK) == 0)
 1422                                         continue;
 1423                                 fdtol->fdl_holdcount++;
 1424                                 lf.l_whence = SEEK_SET;
 1425                                 lf.l_start = 0;
 1426                                 lf.l_len = 0;
 1427                                 lf.l_type = F_UNLCK;
 1428                                 vp = (struct vnode *)fp->f_data;
 1429                                 (void) VOP_ADVLOCK(vp,
 1430                                                    (caddr_t)p->p_leader,
 1431                                                    F_UNLCK, &lf, F_POSIX);
 1432                                 fdtol->fdl_holdcount--;
 1433                                 if (fdtol->fdl_holdcount == 0 &&
 1434                                     fdtol->fdl_wakeup != 0) {
 1435                                         fdtol->fdl_wakeup = 0;
 1436                                         wakeup(fdtol);
 1437                                 }
 1438                         }
 1439                 }
 1440         }
 1441         return (fdrop(fp, p));
 1442 }
 1443 
 1444 int
 1445 fdrop(fp, p)
 1446         struct file *fp;
 1447         struct proc *p;
 1448 {
 1449         struct flock lf;
 1450         struct vnode *vp;
 1451         int error;
 1452 
 1453         if (--fp->f_count > 0)
 1454                 return (0);
 1455         if (fp->f_count < 0)
 1456                 panic("fdrop: count < 0");
 1457         if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
 1458                 lf.l_whence = SEEK_SET;
 1459                 lf.l_start = 0;
 1460                 lf.l_len = 0;
 1461                 lf.l_type = F_UNLCK;
 1462                 vp = (struct vnode *)fp->f_data;
 1463                 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 1464         }
 1465         if (fp->f_ops != &badfileops)
 1466                 error = fo_close(fp, p);
 1467         else
 1468                 error = 0;
 1469         ffree(fp);
 1470         return (error);
 1471 }
 1472 
 1473 /*
 1474  * Apply an advisory lock on a file descriptor.
 1475  *
 1476  * Just attempt to get a record lock of the requested type on
 1477  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 1478  */
 1479 #ifndef _SYS_SYSPROTO_H_
 1480 struct flock_args {
 1481         int     fd;
 1482         int     how;
 1483 };
 1484 #endif
 1485 /* ARGSUSED */
 1486 int
 1487 flock(p, uap)
 1488         struct proc *p;
 1489         register struct flock_args *uap;
 1490 {
 1491         register struct filedesc *fdp = p->p_fd;
 1492         register struct file *fp;
 1493         struct vnode *vp;
 1494         struct flock lf;
 1495 
 1496         if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 1497             (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 1498                 return (EBADF);
 1499         if (fp->f_type != DTYPE_VNODE)
 1500                 return (EOPNOTSUPP);
 1501         vp = (struct vnode *)fp->f_data;
 1502         lf.l_whence = SEEK_SET;
 1503         lf.l_start = 0;
 1504         lf.l_len = 0;
 1505         if (uap->how & LOCK_UN) {
 1506                 lf.l_type = F_UNLCK;
 1507                 fp->f_flag &= ~FHASLOCK;
 1508                 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
 1509         }
 1510         if (uap->how & LOCK_EX)
 1511                 lf.l_type = F_WRLCK;
 1512         else if (uap->how & LOCK_SH)
 1513                 lf.l_type = F_RDLCK;
 1514         else
 1515                 return (EBADF);
 1516         fp->f_flag |= FHASLOCK;
 1517         if (uap->how & LOCK_NB)
 1518                 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
 1519         return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
 1520 }
 1521 
 1522 /*
 1523  * File Descriptor pseudo-device driver (/dev/fd/).
 1524  *
 1525  * Opening minor device N dup()s the file (if any) connected to file
 1526  * descriptor N belonging to the calling process.  Note that this driver
 1527  * consists of only the ``open()'' routine, because all subsequent
 1528  * references to this file will be direct to the other driver.
 1529  */
 1530 /* ARGSUSED */
 1531 static int
 1532 fdopen(dev, mode, type, p)
 1533         dev_t dev;
 1534         int mode, type;
 1535         struct proc *p;
 1536 {
 1537 
 1538         /*
 1539          * XXX Kludge: set curproc->p_dupfd to contain the value of the
 1540          * the file descriptor being sought for duplication. The error
 1541          * return ensures that the vnode for this device will be released
 1542          * by vn_open. Open will detect this special error and take the
 1543          * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 1544          * will simply report the error.
 1545          */
 1546         p->p_dupfd = minor(dev);
 1547         return (ENODEV);
 1548 }
 1549 
 1550 /*
 1551  * Duplicate the specified descriptor to a free descriptor.
 1552  */
 1553 int
 1554 dupfdopen(p, fdp, indx, dfd, mode, error)
 1555         struct proc *p;
 1556         struct filedesc *fdp;
 1557         int indx, dfd;
 1558         int mode;
 1559         int error;
 1560 {
 1561         register struct file *wfp;
 1562         struct file *fp;
 1563 
 1564         /*
 1565          * If the to-be-dup'd fd number is greater than the allowed number
 1566          * of file descriptors, or the fd to be dup'd has already been
 1567          * closed, then reject.
 1568          */
 1569         if ((u_int)dfd >= fdp->fd_nfiles ||
 1570             (wfp = fdp->fd_ofiles[dfd]) == NULL) {
 1571                 return (EBADF);
 1572         }
 1573 
 1574         /*
 1575          * There are two cases of interest here.
 1576          *
 1577          * For ENODEV simply dup (dfd) to file descriptor
 1578          * (indx) and return.
 1579          *
 1580          * For ENXIO steal away the file structure from (dfd) and
 1581          * store it in (indx).  (dfd) is effectively closed by
 1582          * this operation.
 1583          *
 1584          * Any other error code is just returned.
 1585          */
 1586         switch (error) {
 1587         case ENODEV:
 1588                 /*
 1589                  * Check that the mode the file is being opened for is a
 1590                  * subset of the mode of the existing descriptor.
 1591                  */
 1592                 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
 1593                         return (EACCES);
 1594                 fp = fdp->fd_ofiles[indx];
 1595 #if 0
 1596                 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
 1597                         (void) munmapfd(p, indx);
 1598 #endif
 1599                 fdp->fd_ofiles[indx] = wfp;
 1600                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 1601                 fhold(wfp);
 1602                 if (indx > fdp->fd_lastfile)
 1603                         fdp->fd_lastfile = indx;
 1604                 /*
 1605                  * we now own the reference to fp that the ofiles[] array
 1606                  * used to own.  Release it.
 1607                  */
 1608                 if (fp)
 1609                         fdrop(fp, p);
 1610                 return (0);
 1611 
 1612         case ENXIO:
 1613                 /*
 1614                  * Steal away the file pointer from dfd, and stuff it into indx.
 1615                  */
 1616                 fp = fdp->fd_ofiles[indx];
 1617 #if 0
 1618                 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
 1619                         (void) munmapfd(p, indx);
 1620 #endif
 1621                 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
 1622                 fdp->fd_ofiles[dfd] = NULL;
 1623                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 1624                 fdp->fd_ofileflags[dfd] = 0;
 1625 
 1626                 /*
 1627                  * we now own the reference to fp that the ofiles[] array
 1628                  * used to own.  Release it.
 1629                  */
 1630                 if (fp)
 1631                         fdrop(fp, p);
 1632                 /*
 1633                  * Complete the clean up of the filedesc structure by
 1634                  * recomputing the various hints.
 1635                  */
 1636                 if (indx > fdp->fd_lastfile) {
 1637                         fdp->fd_lastfile = indx;
 1638                 } else {
 1639                         while (fdp->fd_lastfile > 0 &&
 1640                            fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
 1641                                 fdp->fd_lastfile--;
 1642                         }
 1643                         if (dfd < fdp->fd_freefile)
 1644                                 fdp->fd_freefile = dfd;
 1645                 }
 1646                 return (0);
 1647 
 1648         default:
 1649                 return (error);
 1650         }
 1651         /* NOTREACHED */
 1652 }
 1653 
 1654 
 1655 struct filedesc_to_leader *
 1656 filedesc_to_leader_alloc(struct filedesc_to_leader *old,
 1657                          struct proc *leader)
 1658 {
 1659         struct filedesc_to_leader *fdtol;
 1660         
 1661         MALLOC(fdtol, struct filedesc_to_leader *,
 1662                sizeof(struct filedesc_to_leader),
 1663                M_FILEDESC_TO_LEADER,
 1664                M_WAITOK);
 1665         fdtol->fdl_refcount = 1;
 1666         fdtol->fdl_holdcount = 0;
 1667         fdtol->fdl_wakeup = 0;
 1668         fdtol->fdl_leader = leader;
 1669         if (old != NULL) {
 1670                 fdtol->fdl_next = old->fdl_next;
 1671                 fdtol->fdl_prev = old;
 1672                 old->fdl_next = fdtol;
 1673                 fdtol->fdl_next->fdl_prev = fdtol;
 1674         } else {
 1675                 fdtol->fdl_next = fdtol;
 1676                 fdtol->fdl_prev = fdtol;
 1677         }
 1678         return fdtol;
 1679 }
 1680 
 1681 /*
 1682  * Get file structures.
 1683  */
 1684 static int
 1685 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 1686 {
 1687         int error;
 1688         struct file *fp;
 1689 
 1690         if (!req->oldptr) {
 1691                 /*
 1692                  * overestimate by 10 files
 1693                  */
 1694                 return (SYSCTL_OUT(req, 0, sizeof(filehead) + 
 1695                                 (nfiles + 10) * sizeof(struct file)));
 1696         }
 1697 
 1698         error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
 1699         if (error)
 1700                 return (error);
 1701 
 1702         /*
 1703          * followed by an array of file structures
 1704          */
 1705         LIST_FOREACH(fp, &filehead, f_list) {
 1706                 error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
 1707                 if (error)
 1708                         return (error);
 1709         }
 1710         return (0);
 1711 }
 1712 
 1713 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
 1714     0, 0, sysctl_kern_file, "S,file", "Entire file table");
 1715 
 1716 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 
 1717     &maxfilesperproc, 0, "Maximum files allowed open per process");
 1718 
 1719 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 
 1720     &maxfiles, 0, "Maximum number of files");
 1721 
 1722 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 
 1723         &nfiles, 0, "System-wide number of open files");
 1724 
 1725 static void
 1726 fildesc_drvinit(void *unused)
 1727 {
 1728         int fd;
 1729 
 1730         for (fd = 0; fd < NUMFDESC; fd++)
 1731                 make_dev(&fildesc_cdevsw, fd,
 1732                     UID_BIN, GID_BIN, 0666, "fd/%d", fd);
 1733         make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
 1734         make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
 1735         make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
 1736 }
 1737 
 1738 struct fileops badfileops = {
 1739         badfo_readwrite,
 1740         badfo_readwrite,
 1741         badfo_ioctl,
 1742         badfo_poll,
 1743         badfo_kqfilter,
 1744         badfo_stat,
 1745         badfo_close
 1746 };
 1747 
 1748 static int
 1749 badfo_readwrite(fp, uio, cred, flags, p)
 1750         struct file *fp;
 1751         struct uio *uio;
 1752         struct ucred *cred;
 1753         struct proc *p;
 1754         int flags;
 1755 {
 1756 
 1757         return (EBADF);
 1758 }
 1759 
 1760 static int
 1761 badfo_ioctl(fp, com, data, p)
 1762         struct file *fp;
 1763         u_long com;
 1764         caddr_t data;
 1765         struct proc *p;
 1766 {
 1767 
 1768         return (EBADF);
 1769 }
 1770 
 1771 static int
 1772 badfo_poll(fp, events, cred, p)
 1773         struct file *fp;
 1774         int events;
 1775         struct ucred *cred;
 1776         struct proc *p;
 1777 {
 1778 
 1779         return (0);
 1780 }
 1781 
 1782 static int
 1783 badfo_kqfilter(fp, kn)
 1784         struct file *fp;
 1785         struct knote *kn;
 1786 {
 1787 
 1788         return (0);
 1789 }
 1790 
 1791 static int
 1792 badfo_stat(fp, sb, p)
 1793         struct file *fp;
 1794         struct stat *sb;
 1795         struct proc *p;
 1796 {
 1797 
 1798         return (EBADF);
 1799 }
 1800 
 1801 static int
 1802 badfo_close(fp, p)
 1803         struct file *fp;
 1804         struct proc *p;
 1805 {
 1806 
 1807         return (EBADF);
 1808 }
 1809 
 1810 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
 1811                                         fildesc_drvinit,NULL)

Cache object: 898ad5abdc0f4cfaae80f3d7f996868d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.