The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_descrip.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_descrip.c,v 1.123.2.3 2005/05/24 19:38:34 riz Exp $       */
    2 
    3 /*
    4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.123.2.3 2005/05/24 19:38:34 riz Exp $");
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/filedesc.h>
   45 #include <sys/kernel.h>
   46 #include <sys/vnode.h>
   47 #include <sys/proc.h>
   48 #include <sys/file.h>
   49 #include <sys/namei.h>
   50 #include <sys/socket.h>
   51 #include <sys/socketvar.h>
   52 #include <sys/stat.h>
   53 #include <sys/ioctl.h>
   54 #include <sys/fcntl.h>
   55 #include <sys/malloc.h>
   56 #include <sys/pool.h>
   57 #include <sys/syslog.h>
   58 #include <sys/unistd.h>
   59 #include <sys/resourcevar.h>
   60 #include <sys/conf.h>
   61 #include <sys/event.h>
   62 
   63 #include <sys/mount.h>
   64 #include <sys/sa.h>
   65 #include <sys/syscallargs.h>
   66 
   67 /*
   68  * Descriptor management.
   69  */
   70 struct filelist filehead;       /* head of list of open files */
   71 int             nfiles;         /* actual number of open files */
   72 struct pool     file_pool;      /* memory pool for file structures */
   73 struct pool     cwdi_pool;      /* memory pool for cwdinfo structures */
   74 struct pool     filedesc0_pool; /* memory pool for filedesc0 structures */
   75 
   76 /* Global file list lock */
   77 static struct simplelock filelist_slock = SIMPLELOCK_INITIALIZER;
   78 
   79 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
   80 MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
   81 MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
   82 
   83 static __inline void    fd_used(struct filedesc *, int);
   84 static __inline void    fd_unused(struct filedesc *, int);
   85 static __inline int     find_next_zero(uint32_t *, int, u_int);
   86 int                     finishdup(struct proc *, int, int, register_t *);
   87 int                     find_last_set(struct filedesc *, int);
   88 int                     fcntl_forfs(int, struct proc *, int, void *);
   89 
   90 dev_type_open(filedescopen);
   91 
   92 const struct cdevsw filedesc_cdevsw = {
   93         filedescopen, noclose, noread, nowrite, noioctl,
   94         nostop, notty, nopoll, nommap, nokqfilter,
   95 };
   96 
   97 static __inline int
   98 find_next_zero(uint32_t *bitmap, int want, u_int bits)
   99 {
  100         int i, off, maxoff;
  101         uint32_t sub;
  102 
  103         if (want > bits)
  104                 return -1;
  105 
  106         off = want >> NDENTRYSHIFT;
  107         i = want & NDENTRYMASK;
  108         if (i) {
  109                 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
  110                 if (sub != ~0)
  111                         goto found;
  112                 off++;
  113         }
  114 
  115         maxoff = NDLOSLOTS(bits);
  116         while (off < maxoff) {
  117                 if ((sub = bitmap[off]) != ~0)
  118                         goto found;
  119                 off++;
  120         }
  121 
  122         return (-1);
  123 
  124  found:
  125         return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
  126 }
  127 
  128 int
  129 find_last_set(struct filedesc *fd, int last)
  130 {
  131         int off, i;
  132         struct file **ofiles = fd->fd_ofiles;
  133         uint32_t *bitmap = fd->fd_lomap;
  134 
  135         off = (last - 1) >> NDENTRYSHIFT;
  136 
  137         while (off >= 0 && !bitmap[off])
  138                 off--;
  139 
  140         if (off < 0)
  141                 return (-1);
  142        
  143         i = ((off + 1) << NDENTRYSHIFT) - 1;
  144         if (i >= last)
  145                 i = last - 1;
  146 
  147         while (i > 0 && ofiles[i] == NULL)
  148                 i--;
  149 
  150         return (i);
  151 }
  152 
  153 static __inline void
  154 fd_used(struct filedesc *fdp, int fd)
  155 {
  156         u_int off = fd >> NDENTRYSHIFT;
  157 
  158         KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0);
  159 
  160         fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
  161         if (fdp->fd_lomap[off] == ~0) {
  162                 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] &
  163                     (1 << (off & NDENTRYMASK))) == 0);
  164                 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
  165         }
  166 
  167         if (fd > fdp->fd_lastfile)
  168                 fdp->fd_lastfile = fd;
  169 }
  170 
  171 static __inline void
  172 fd_unused(struct filedesc *fdp, int fd)
  173 {
  174         u_int off = fd >> NDENTRYSHIFT;
  175 
  176         if (fd < fdp->fd_freefile)
  177                 fdp->fd_freefile = fd;
  178 
  179         if (fdp->fd_lomap[off] == ~0) {
  180                 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] &
  181                     (1 << (off & NDENTRYMASK))) != 0);
  182                 fdp->fd_himap[off >> NDENTRYSHIFT] &=
  183                     ~(1 << (off & NDENTRYMASK));
  184         }
  185         KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0);
  186         fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
  187 
  188 #ifdef DIAGNOSTIC
  189         if (fd > fdp->fd_lastfile)
  190                 panic("fd_unused: fd_lastfile inconsistent");
  191 #endif
  192         if (fd == fdp->fd_lastfile)
  193                 fdp->fd_lastfile = find_last_set(fdp, fd);
  194 }
  195 
  196 /*
  197  * Lookup the file structure corresponding to a file descriptor
  198  * and return it locked.
  199  * Note: typical usage is: `fp = fd_getfile(..); FILE_USE(fp);'
  200  * The locking strategy has been optimised for this case, i.e.
  201  * fd_getfile() returns the file locked while FILE_USE() will increment
  202  * the file's use count and unlock.
  203  */
  204 struct file *
  205 fd_getfile(struct filedesc *fdp, int fd)
  206 {
  207         struct file *fp;
  208 
  209         if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
  210                 return (NULL);
  211 
  212         simple_lock(&fp->f_slock);
  213         if (FILE_IS_USABLE(fp) == 0) {
  214                 simple_unlock(&fp->f_slock);
  215                 return (NULL);
  216         }
  217 
  218         return (fp);
  219 }
  220 
  221 /*
  222  * System calls on descriptors.
  223  */
  224 
  225 /*
  226  * Duplicate a file descriptor.
  227  */
  228 /* ARGSUSED */
  229 int
  230 sys_dup(struct lwp *l, void *v, register_t *retval)
  231 {
  232         struct sys_dup_args /* {
  233                 syscallarg(int) fd;
  234         } */ *uap = v;
  235         struct file     *fp;
  236         struct filedesc *fdp;
  237         struct proc     *p;
  238         int             old, new, error;
  239 
  240         p = l->l_proc;
  241         fdp = p->p_fd;
  242         old = SCARG(uap, fd);
  243 
  244  restart:
  245         if ((fp = fd_getfile(fdp, old)) == NULL)
  246                 return (EBADF);
  247 
  248         FILE_USE(fp);
  249 
  250         if ((error = fdalloc(p, 0, &new)) != 0) {
  251                 if (error == ENOSPC) {
  252                         fdexpand(p);
  253                         FILE_UNUSE(fp, p);
  254                         goto restart;
  255                 }
  256                 FILE_UNUSE(fp, p);
  257                 return (error);
  258         }
  259 
  260         /* finishdup() will unuse the descriptors for us */
  261         return (finishdup(p, old, new, retval));
  262 }
  263 
  264 /*
  265  * Duplicate a file descriptor to a particular value.
  266  */
  267 /* ARGSUSED */
  268 int
  269 sys_dup2(struct lwp *l, void *v, register_t *retval)
  270 {
  271         struct sys_dup2_args /* {
  272                 syscallarg(int) from;
  273                 syscallarg(int) to;
  274         } */ *uap = v;
  275         struct file     *fp;
  276         struct filedesc *fdp;
  277         struct proc     *p;
  278         int             old, new, i, error;
  279 
  280         p = l->l_proc;
  281         fdp = p->p_fd;
  282         old = SCARG(uap, from);
  283         new = SCARG(uap, to);
  284 
  285  restart:
  286         if ((fp = fd_getfile(fdp, old)) == NULL)
  287                 return (EBADF);
  288 
  289         if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
  290             (u_int)new >= maxfiles) {
  291                 simple_unlock(&fp->f_slock);
  292                 return (EBADF);
  293         }
  294 
  295         if (old == new) {
  296                 simple_unlock(&fp->f_slock);
  297                 *retval = new;
  298                 return (0);
  299         }
  300 
  301         FILE_USE(fp);
  302 
  303         if (new >= fdp->fd_nfiles) {
  304                 if ((error = fdalloc(p, new, &i)) != 0) {
  305                         if (error == ENOSPC) {
  306                                 fdexpand(p);
  307                                 FILE_UNUSE(fp, p);
  308                                 goto restart;
  309                         }
  310                         FILE_UNUSE(fp, p);
  311                         return (error);
  312                 }
  313                 if (new != i)
  314                         panic("dup2: fdalloc");
  315         } else if (fdp->fd_ofiles[new] == NULL) {
  316                 /*
  317                  * Mark `new' slot "used" only if it was empty.
  318                  */
  319                 fd_used(fdp, new);
  320         }
  321 
  322         /*
  323          * finishdup() will close the file that's in the `new'
  324          * slot, if there's one there.
  325          */
  326 
  327         /* finishdup() will unuse the descriptors for us */
  328         return (finishdup(p, old, new, retval));
  329 }
  330 
  331 /*
  332  * The file control system call.
  333  */
  334 /* ARGSUSED */
  335 int
  336 sys_fcntl(struct lwp *l, void *v, register_t *retval)
  337 {
  338         struct sys_fcntl_args /* {
  339                 syscallarg(int)         fd;
  340                 syscallarg(int)         cmd;
  341                 syscallarg(void *)      arg;
  342         } */ *uap = v;
  343         struct filedesc *fdp;
  344         struct file     *fp;
  345         struct proc     *p;
  346         struct vnode    *vp;
  347         int             fd, i, tmp, error, flg, cmd, newmin;
  348         struct flock    fl;
  349 
  350         p = l->l_proc;
  351         fd = SCARG(uap, fd);
  352         cmd = SCARG(uap, cmd);
  353         fdp = p->p_fd;
  354         error = 0;
  355         flg = F_POSIX;
  356 
  357         switch (cmd) {
  358         case F_CLOSEM:
  359                 if (fd < 0)
  360                         return EBADF;
  361                 while (fdp->fd_lastfile >= fd)
  362                         fdrelease(p, fdp->fd_lastfile);
  363                 return 0;
  364 
  365         case F_MAXFD:
  366                 *retval = fdp->fd_lastfile;
  367                 return 0;
  368 
  369         default:
  370                 /* Handled below */
  371                 break;
  372         }
  373 
  374  restart:
  375         if ((fp = fd_getfile(fdp, fd)) == NULL)
  376                 return (EBADF);
  377 
  378         FILE_USE(fp);
  379 
  380         if ((cmd & F_FSCTL)) {
  381                 error = fcntl_forfs(fd, p, cmd, SCARG(uap, arg));
  382                 goto out;
  383         }
  384 
  385         switch (cmd) {
  386 
  387         case F_DUPFD:
  388                 newmin = (long)SCARG(uap, arg);
  389                 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
  390                     (u_int)newmin >= maxfiles) {
  391                         error = EINVAL;
  392                         goto out;
  393                 }
  394                 if ((error = fdalloc(p, newmin, &i)) != 0) {
  395                         if (error == ENOSPC) {
  396                                 fdexpand(p);
  397                                 FILE_UNUSE(fp, p);
  398                                 goto restart;
  399                         }
  400                         goto out;
  401                 }
  402 
  403                 /* finishdup() will unuse the descriptors for us */
  404                 return (finishdup(p, fd, i, retval));
  405 
  406         case F_GETFD:
  407                 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
  408                 break;
  409 
  410         case F_SETFD:
  411                 if ((long)SCARG(uap, arg) & 1)
  412                         fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
  413                 else
  414                         fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
  415                 break;
  416 
  417         case F_GETFL:
  418                 *retval = OFLAGS(fp->f_flag);
  419                 break;
  420 
  421         case F_SETFL:
  422                 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
  423                 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp, p);
  424                 if (error)
  425                         break;
  426                 i = tmp ^ fp->f_flag;
  427                 if (i & FNONBLOCK) {
  428                         int fl = tmp & FNONBLOCK;
  429                         error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &fl, p);
  430                         if (error)
  431                                 goto reset_fcntl;
  432                 }
  433                 if (i & FASYNC) {
  434                         int fl = tmp & FASYNC;
  435                         error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &fl, p);
  436                         if (error) {
  437                                 if (i & FNONBLOCK) {
  438                                         tmp = fp->f_flag & FNONBLOCK;
  439                                         (void)(*fp->f_ops->fo_ioctl)(fp,
  440                                                 FIONBIO, &tmp, p);
  441                                 }
  442                                 goto reset_fcntl;
  443                         }
  444                 }
  445                 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
  446                 break;
  447             reset_fcntl:
  448                 (void)(*fp->f_ops->fo_fcntl)(fp, F_SETFL, &fp->f_flag, p);
  449                 break;
  450 
  451         case F_GETOWN:
  452                 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, retval, p);
  453                 break;
  454 
  455         case F_SETOWN:
  456                 tmp = (int)(intptr_t) SCARG(uap, arg);
  457                 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp, p);
  458                 break;
  459 
  460         case F_SETLKW:
  461                 flg |= F_WAIT;
  462                 /* Fall into F_SETLK */
  463 
  464         case F_SETLK:
  465                 if (fp->f_type != DTYPE_VNODE) {
  466                         error = EINVAL;
  467                         goto out;
  468                 }
  469                 vp = (struct vnode *)fp->f_data;
  470                 /* Copy in the lock structure */
  471                 error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
  472                 if (error)
  473                         goto out;
  474                 if (fl.l_whence == SEEK_CUR)
  475                         fl.l_start += fp->f_offset;
  476                 switch (fl.l_type) {
  477                 case F_RDLCK:
  478                         if ((fp->f_flag & FREAD) == 0) {
  479                                 error = EBADF;
  480                                 goto out;
  481                         }
  482                         p->p_flag |= P_ADVLOCK;
  483                         error = VOP_ADVLOCK(vp, p, F_SETLK, &fl, flg);
  484                         goto out;
  485 
  486                 case F_WRLCK:
  487                         if ((fp->f_flag & FWRITE) == 0) {
  488                                 error = EBADF;
  489                                 goto out;
  490                         }
  491                         p->p_flag |= P_ADVLOCK;
  492                         error = VOP_ADVLOCK(vp, p, F_SETLK, &fl, flg);
  493                         goto out;
  494 
  495                 case F_UNLCK:
  496                         error = VOP_ADVLOCK(vp, p, F_UNLCK, &fl, F_POSIX);
  497                         goto out;
  498 
  499                 default:
  500                         error = EINVAL;
  501                         goto out;
  502                 }
  503 
  504         case F_GETLK:
  505                 if (fp->f_type != DTYPE_VNODE) {
  506                         error = EINVAL;
  507                         goto out;
  508                 }
  509                 vp = (struct vnode *)fp->f_data;
  510                 /* Copy in the lock structure */
  511                 error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
  512                 if (error)
  513                         goto out;
  514                 if (fl.l_whence == SEEK_CUR)
  515                         fl.l_start += fp->f_offset;
  516                 if (fl.l_type != F_RDLCK &&
  517                     fl.l_type != F_WRLCK &&
  518                     fl.l_type != F_UNLCK) {
  519                         error = EINVAL;
  520                         goto out;
  521                 }
  522                 error = VOP_ADVLOCK(vp, p, F_GETLK, &fl, F_POSIX);
  523                 if (error)
  524                         goto out;
  525                 error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
  526                 break;
  527 
  528         default:
  529                 error = EINVAL;
  530         }
  531 
  532  out:
  533         FILE_UNUSE(fp, p);
  534         return (error);
  535 }
  536 
  537 /*
  538  * Common code for dup, dup2, and fcntl(F_DUPFD).
  539  */
  540 int
  541 finishdup(struct proc *p, int old, int new, register_t *retval)
  542 {
  543         struct filedesc *fdp;
  544         struct file     *fp, *delfp;
  545 
  546         fdp = p->p_fd;
  547 
  548         /*
  549          * If there is a file in the new slot, remember it so we
  550          * can close it after we've finished the dup.  We need
  551          * to do it after the dup is finished, since closing
  552          * the file may block.
  553          *
  554          * Note: `old' is already used for us.
  555          * Note: Caller already marked `new' slot "used".
  556          */
  557         delfp = fdp->fd_ofiles[new];
  558 
  559         fp = fdp->fd_ofiles[old];
  560         KDASSERT(fp != NULL);
  561         fdp->fd_ofiles[new] = fp;
  562         fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
  563         fp->f_count++;
  564         *retval = new;
  565         FILE_UNUSE(fp, p);
  566 
  567         if (delfp != NULL) {
  568                 simple_lock(&delfp->f_slock);
  569                 FILE_USE(delfp);
  570                 if (new < fdp->fd_knlistsize)
  571                         knote_fdclose(p, new);
  572                 (void) closef(delfp, p);
  573         }
  574         return (0);
  575 }
  576 
  577 void
  578 fdremove(struct filedesc *fdp, int fd)
  579 {
  580 
  581         fdp->fd_ofiles[fd] = NULL;
  582         fd_unused(fdp, fd);
  583 }
  584 
  585 int
  586 fdrelease(struct proc *p, int fd)
  587 {
  588         struct filedesc *fdp;
  589         struct file     **fpp, *fp;
  590 
  591         fdp = p->p_fd;
  592         fpp = &fdp->fd_ofiles[fd];
  593         fp = *fpp;
  594         if (fp == NULL)
  595                 return (EBADF);
  596 
  597         simple_lock(&fp->f_slock);
  598         if (!FILE_IS_USABLE(fp)) {
  599                 simple_unlock(&fp->f_slock);
  600                 return (EBADF);
  601         }
  602 
  603         FILE_USE(fp);
  604 
  605         *fpp = NULL;
  606         fdp->fd_ofileflags[fd] = 0;
  607         if (fd < fdp->fd_knlistsize)
  608                 knote_fdclose(p, fd);
  609         fd_unused(fdp, fd);
  610         return (closef(fp, p));
  611 }
  612 
  613 /*
  614  * Close a file descriptor.
  615  */
  616 /* ARGSUSED */
  617 int
  618 sys_close(struct lwp *l, void *v, register_t *retval)
  619 {
  620         struct sys_close_args /* {
  621                 syscallarg(int) fd;
  622         } */ *uap = v;
  623         int             fd;
  624         struct filedesc *fdp;
  625         struct proc *p;
  626 
  627         p = l->l_proc;
  628         fd = SCARG(uap, fd);
  629         fdp = p->p_fd;
  630 
  631         if ((u_int) fd >= fdp->fd_nfiles)
  632                 return (EBADF);
  633 #if 0
  634         if (fd_getfile(fdp, fd) == NULL)
  635                 return (EBADF);
  636 #endif
  637 
  638         return (fdrelease(p, fd));
  639 }
  640 
  641 /*
  642  * Return status information about a file descriptor.
  643  */
  644 /* ARGSUSED */
  645 int
  646 sys___fstat13(struct lwp *l, void *v, register_t *retval)
  647 {
  648         struct sys___fstat13_args /* {
  649                 syscallarg(int)                 fd;
  650                 syscallarg(struct stat *)       sb;
  651         } */ *uap = v;
  652         int             fd;
  653         struct filedesc *fdp;
  654         struct file     *fp;
  655         struct proc     *p;
  656         struct stat     ub;
  657         int             error;
  658 
  659         p = l->l_proc;
  660         fd = SCARG(uap, fd);
  661         fdp = p->p_fd;
  662 
  663         if ((fp = fd_getfile(fdp, fd)) == NULL)
  664                 return (EBADF);
  665 
  666         FILE_USE(fp);
  667         error = (*fp->f_ops->fo_stat)(fp, &ub, p);
  668         FILE_UNUSE(fp, p);
  669 
  670         if (error == 0)
  671                 error = copyout(&ub, SCARG(uap, sb), sizeof(ub));
  672 
  673         return (error);
  674 }
  675 
  676 /*
  677  * Return pathconf information about a file descriptor.
  678  */
  679 /* ARGSUSED */
  680 int
  681 sys_fpathconf(struct lwp *l, void *v, register_t *retval)
  682 {
  683         struct sys_fpathconf_args /* {
  684                 syscallarg(int) fd;
  685                 syscallarg(int) name;
  686         } */ *uap = v;
  687         int             fd;
  688         struct filedesc *fdp;
  689         struct file     *fp;
  690         struct proc     *p;
  691         struct vnode    *vp;
  692         int             error;
  693 
  694         p = l->l_proc;
  695         fd = SCARG(uap, fd);
  696         fdp = p->p_fd;
  697         error = 0;
  698 
  699         if ((fp = fd_getfile(fdp, fd)) == NULL)
  700                 return (EBADF);
  701 
  702         FILE_USE(fp);
  703 
  704         switch (fp->f_type) {
  705 
  706         case DTYPE_SOCKET:
  707         case DTYPE_PIPE:
  708                 if (SCARG(uap, name) != _PC_PIPE_BUF)
  709                         error = EINVAL;
  710                 else
  711                         *retval = PIPE_BUF;
  712                 break;
  713 
  714         case DTYPE_VNODE:
  715                 vp = (struct vnode *)fp->f_data;
  716                 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
  717                 break;
  718 
  719         case DTYPE_KQUEUE:
  720                 error = EINVAL;
  721                 break;
  722 
  723         default:
  724                 error = EOPNOTSUPP;
  725                 break;
  726         }
  727 
  728         FILE_UNUSE(fp, p);
  729         return (error);
  730 }
  731 
  732 /*
  733  * Allocate a file descriptor for the process.
  734  */
  735 int     fdexpanded;             /* XXX: what else uses this? */
  736 
  737 int
  738 fdalloc(struct proc *p, int want, int *result)
  739 {
  740         struct filedesc *fdp;
  741         int i, lim, last;
  742         u_int off, new;
  743 
  744         fdp = p->p_fd;
  745 
  746         /*
  747          * Search for a free descriptor starting at the higher
  748          * of want or fd_freefile.  If that fails, consider
  749          * expanding the ofile array.
  750          */
  751         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
  752         last = min(fdp->fd_nfiles, lim);
  753  again:
  754         if ((i = want) < fdp->fd_freefile)
  755                 i = fdp->fd_freefile;
  756         off = i >> NDENTRYSHIFT;
  757         new = find_next_zero(fdp->fd_himap, off,
  758             (last + NDENTRIES - 1) >> NDENTRYSHIFT);
  759         if (new != -1) {
  760                 i = find_next_zero(&fdp->fd_lomap[new], 
  761                     new > off ? 0 : i & NDENTRYMASK, NDENTRIES);
  762                 if (i == -1) {
  763                         /* 
  764                          * free file descriptor in this block was
  765                          * below want, try again with higher want.
  766                          */
  767                         want = (new + 1) << NDENTRYSHIFT;
  768                         goto again;
  769                 }
  770                 i += (new << NDENTRYSHIFT);
  771                 if (i < last) {
  772                         if (fdp->fd_ofiles[i] == NULL) {
  773                                 fd_used(fdp, i);
  774                                 if (want <= fdp->fd_freefile)
  775                                         fdp->fd_freefile = i;
  776                                 *result = i;
  777                                 return (0);
  778                         }
  779                 }
  780         }
  781 
  782         /* No space in current array.  Expand? */
  783         if (fdp->fd_nfiles >= lim)
  784                 return (EMFILE);
  785 
  786         /* Let the caller do it. */
  787         return (ENOSPC);
  788 }
  789 
  790 void
  791 fdexpand(struct proc *p)
  792 {
  793         struct filedesc *fdp;
  794         int             i, nfiles;
  795         struct file     **newofile;
  796         char            *newofileflags;
  797         uint32_t        *newhimap, *newlomap;
  798 
  799         fdp = p->p_fd;
  800 
  801         if (fdp->fd_nfiles < NDEXTENT)
  802                 nfiles = NDEXTENT;
  803         else
  804                 nfiles = 2 * fdp->fd_nfiles;
  805         newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
  806         newofileflags = (char *) &newofile[nfiles];
  807         /*
  808          * Copy the existing ofile and ofileflags arrays
  809          * and zero the new portion of each array.
  810          */
  811         memcpy(newofile, fdp->fd_ofiles,
  812             (i = sizeof(struct file *) * fdp->fd_nfiles));
  813         memset((char *)newofile + i, 0,
  814             nfiles * sizeof(struct file *) - i);
  815         memcpy(newofileflags, fdp->fd_ofileflags,
  816             (i = sizeof(char) * fdp->fd_nfiles));
  817         memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
  818         if (fdp->fd_nfiles > NDFILE)
  819                 free(fdp->fd_ofiles, M_FILEDESC);
  820 
  821         if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
  822                 newhimap = malloc(NDHISLOTS(nfiles) * sizeof(uint32_t),
  823                     M_FILEDESC, M_WAITOK);
  824                 newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(uint32_t),
  825                     M_FILEDESC, M_WAITOK);
  826 
  827                 memcpy(newhimap, fdp->fd_himap,
  828                     (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(uint32_t)));
  829                 memset((char *)newhimap + i, 0,
  830                     NDHISLOTS(nfiles) * sizeof(uint32_t) - i);
  831 
  832                 memcpy(newlomap, fdp->fd_lomap,
  833                     (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(uint32_t)));
  834                 memset((char *)newlomap + i, 0,
  835                     NDLOSLOTS(nfiles) * sizeof(uint32_t) - i);
  836 
  837                 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
  838                         free(fdp->fd_himap, M_FILEDESC);
  839                         free(fdp->fd_lomap, M_FILEDESC);
  840                 }
  841                 fdp->fd_himap = newhimap;
  842                 fdp->fd_lomap = newlomap;
  843         }
  844 
  845         fdp->fd_ofiles = newofile;
  846         fdp->fd_ofileflags = newofileflags;
  847         fdp->fd_nfiles = nfiles;
  848         fdexpanded++;
  849 }
  850 
  851 /*
  852  * Check to see whether n user file descriptors
  853  * are available to the process p.
  854  */
  855 int
  856 fdavail(struct proc *p, int n)
  857 {
  858         struct filedesc *fdp;
  859         struct file     **fpp;
  860         int             i, lim;
  861 
  862         fdp = p->p_fd;
  863         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
  864         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
  865                 return (1);
  866         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
  867         for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++)
  868                 if (*fpp == NULL && --n <= 0)
  869                         return (1);
  870         return (0);
  871 }
  872 
  873 /*
  874  * Initialize the data structures necessary for managing files.
  875  */
  876 void
  877 finit(void)
  878 {
  879 
  880         pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
  881             &pool_allocator_nointr);
  882         pool_init(&cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl",
  883             &pool_allocator_nointr);
  884         pool_init(&filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
  885             &pool_allocator_nointr);
  886 }
  887 
  888 /*
  889  * Create a new open file structure and allocate
  890  * a file descriptor for the process that refers to it.
  891  */
  892 int
  893 falloc(struct proc *p, struct file **resultfp, int *resultfd)
  894 {
  895         struct file     *fp, *fq;
  896         int             error, i;
  897 
  898  restart:
  899         if ((error = fdalloc(p, 0, &i)) != 0) {
  900                 if (error == ENOSPC) {
  901                         fdexpand(p);
  902                         goto restart;
  903                 }
  904                 return (error);
  905         }
  906 
  907         fp = pool_get(&file_pool, PR_WAITOK);
  908         simple_lock(&filelist_slock);
  909         if (nfiles >= maxfiles) {
  910                 tablefull("file", "increase kern.maxfiles or MAXFILES");
  911                 simple_unlock(&filelist_slock);
  912                 fd_unused(p->p_fd, i);
  913                 pool_put(&file_pool, fp);
  914                 return (ENFILE);
  915         }
  916         /*
  917          * Allocate a new file descriptor.
  918          * If the process has file descriptor zero open, add to the list
  919          * of open files at that point, otherwise put it at the front of
  920          * the list of open files.
  921          */
  922         nfiles++;
  923         memset(fp, 0, sizeof(struct file));
  924         fp->f_iflags = FIF_LARVAL;
  925         if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
  926                 LIST_INSERT_AFTER(fq, fp, f_list);
  927         } else {
  928                 LIST_INSERT_HEAD(&filehead, fp, f_list);
  929         }
  930         simple_unlock(&filelist_slock);
  931         KDASSERT(p->p_fd->fd_ofiles[i] == NULL);
  932         p->p_fd->fd_ofiles[i] = fp;
  933         simple_lock_init(&fp->f_slock);
  934         fp->f_count = 1;
  935         fp->f_cred = p->p_ucred;
  936         crhold(fp->f_cred);
  937         if (resultfp) {
  938                 fp->f_usecount = 1;
  939                 *resultfp = fp;
  940         }
  941         if (resultfd)
  942                 *resultfd = i;
  943         return (0);
  944 }
  945 
  946 /*
  947  * Free a file descriptor.
  948  */
  949 void
  950 ffree(struct file *fp)
  951 {
  952 
  953 #ifdef DIAGNOSTIC
  954         if (fp->f_usecount)
  955                 panic("ffree");
  956 #endif
  957 
  958         simple_lock(&filelist_slock);
  959         LIST_REMOVE(fp, f_list);
  960         crfree(fp->f_cred);
  961 #ifdef DIAGNOSTIC
  962         fp->f_count = 0; /* What's the point? */
  963 #endif
  964         nfiles--;
  965         simple_unlock(&filelist_slock);
  966         pool_put(&file_pool, fp);
  967 }
  968 
  969 /*
  970  * Create an initial cwdinfo structure, using the same current and root
  971  * directories as p.
  972  */
  973 struct cwdinfo *
  974 cwdinit(struct proc *p)
  975 {
  976         struct cwdinfo *cwdi;
  977 
  978         cwdi = pool_get(&cwdi_pool, PR_WAITOK);
  979 
  980         cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir;
  981         if (cwdi->cwdi_cdir)
  982                 VREF(cwdi->cwdi_cdir);
  983         cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir;
  984         if (cwdi->cwdi_rdir)
  985                 VREF(cwdi->cwdi_rdir);
  986         cwdi->cwdi_cmask =  p->p_cwdi->cwdi_cmask;
  987         cwdi->cwdi_refcnt = 1;
  988 
  989         return (cwdi);
  990 }
  991 
  992 /*
  993  * Make p2 share p1's cwdinfo.
  994  */
  995 void
  996 cwdshare(struct proc *p1, struct proc *p2)
  997 {
  998 
  999         p2->p_cwdi = p1->p_cwdi;
 1000         p1->p_cwdi->cwdi_refcnt++;
 1001 }
 1002 
 1003 /*
 1004  * Make this process not share its cwdinfo structure, maintaining
 1005  * all cwdinfo state.
 1006  */
 1007 void
 1008 cwdunshare(struct proc *p)
 1009 {
 1010         struct cwdinfo *newcwdi;
 1011 
 1012         if (p->p_cwdi->cwdi_refcnt == 1)
 1013                 return;
 1014 
 1015         newcwdi = cwdinit(p);
 1016         cwdfree(p);
 1017         p->p_cwdi = newcwdi;
 1018 }
 1019 
 1020 /*
 1021  * Release a cwdinfo structure.
 1022  */
 1023 void
 1024 cwdfree(struct proc *p)
 1025 {
 1026         struct cwdinfo *cwdi;
 1027 
 1028         cwdi = p->p_cwdi;
 1029         if (--cwdi->cwdi_refcnt > 0)
 1030                 return;
 1031 
 1032         p->p_cwdi = NULL;
 1033 
 1034         vrele(cwdi->cwdi_cdir);
 1035         if (cwdi->cwdi_rdir)
 1036                 vrele(cwdi->cwdi_rdir);
 1037         pool_put(&cwdi_pool, cwdi);
 1038 }
 1039 
 1040 /*
 1041  * Create an initial filedesc structure, using the same current and root
 1042  * directories as p.
 1043  */
 1044 struct filedesc *
 1045 fdinit(struct proc *p)
 1046 {
 1047         struct filedesc0 *newfdp;
 1048 
 1049         newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
 1050         memset(newfdp, 0, sizeof(struct filedesc0));
 1051 
 1052         fdinit1(newfdp);
 1053 
 1054         return (&newfdp->fd_fd);
 1055 }
 1056 
 1057 /*
 1058  * Initialize a file descriptor table.
 1059  */
 1060 void
 1061 fdinit1(struct filedesc0 *newfdp)
 1062 {
 1063 
 1064         newfdp->fd_fd.fd_refcnt = 1;
 1065         newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
 1066         newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
 1067         newfdp->fd_fd.fd_nfiles = NDFILE;
 1068         newfdp->fd_fd.fd_knlistsize = -1;
 1069         newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
 1070         newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
 1071         newfdp->fd_fd.fd_lastfile = -1;
 1072 }
 1073 
 1074 /*
 1075  * Make p2 share p1's filedesc structure.
 1076  */
 1077 void
 1078 fdshare(struct proc *p1, struct proc *p2)
 1079 {
 1080 
 1081         p2->p_fd = p1->p_fd;
 1082         p1->p_fd->fd_refcnt++;
 1083 }
 1084 
 1085 /*
 1086  * Make this process not share its filedesc structure, maintaining
 1087  * all file descriptor state.
 1088  */
 1089 void
 1090 fdunshare(struct proc *p)
 1091 {
 1092         struct filedesc *newfd;
 1093 
 1094         if (p->p_fd->fd_refcnt == 1)
 1095                 return;
 1096 
 1097         newfd = fdcopy(p);
 1098         fdfree(p);
 1099         p->p_fd = newfd;
 1100 }
 1101 
 1102 /*
 1103  * Clear a process's fd table.
 1104  */
 1105 void
 1106 fdclear(struct proc *p)
 1107 {
 1108         struct filedesc *newfd;
 1109 
 1110         newfd = fdinit(p);
 1111         fdfree(p);
 1112         p->p_fd = newfd;
 1113 }
 1114 
 1115 /*
 1116  * Copy a filedesc structure.
 1117  */
 1118 struct filedesc *
 1119 fdcopy(struct proc *p)
 1120 {
 1121         struct filedesc *newfdp, *fdp;
 1122         struct file     **fpp;
 1123         int             i;
 1124 
 1125         fdp = p->p_fd;
 1126         newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
 1127         memcpy(newfdp, fdp, sizeof(struct filedesc));
 1128         newfdp->fd_refcnt = 1;
 1129 
 1130         /*
 1131          * If the number of open files fits in the internal arrays
 1132          * of the open file structure, use them, otherwise allocate
 1133          * additional memory for the number of descriptors currently
 1134          * in use.
 1135          */
 1136         if (newfdp->fd_lastfile < NDFILE) {
 1137                 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
 1138                 newfdp->fd_ofileflags =
 1139                     ((struct filedesc0 *) newfdp)->fd_dfileflags;
 1140                 i = NDFILE;
 1141         } else {
 1142                 /*
 1143                  * Compute the smallest multiple of NDEXTENT needed
 1144                  * for the file descriptors currently in use,
 1145                  * allowing the table to shrink.
 1146                  */
 1147                 i = newfdp->fd_nfiles;
 1148                 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
 1149                         i /= 2;
 1150                 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
 1151                 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
 1152         }
 1153         if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
 1154                 newfdp->fd_himap =
 1155                     ((struct filedesc0 *) newfdp)->fd_dhimap;
 1156                 newfdp->fd_lomap =
 1157                     ((struct filedesc0 *) newfdp)->fd_dlomap;
 1158         } else {
 1159                 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t),
 1160                     M_FILEDESC, M_WAITOK);
 1161                 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t),
 1162                     M_FILEDESC, M_WAITOK);
 1163         }
 1164 
 1165         newfdp->fd_nfiles = i;
 1166         memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
 1167         memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
 1168         if (i < NDENTRIES * NDENTRIES)
 1169                 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */
 1170         memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t));
 1171         memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t));
 1172         /*
 1173          * kq descriptors cannot be copied.
 1174          */
 1175         if (newfdp->fd_knlistsize != -1) {
 1176                 fpp = newfdp->fd_ofiles;
 1177                 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) {
 1178                         if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
 1179                                 fdremove(newfdp, i);
 1180                 }
 1181                 newfdp->fd_knlist = NULL;
 1182                 newfdp->fd_knlistsize = -1;
 1183                 newfdp->fd_knhash = NULL;
 1184                 newfdp->fd_knhashmask = 0;
 1185         }
 1186         fpp = newfdp->fd_ofiles;
 1187         for (i = newfdp->fd_lastfile; i >= 0; i--, fpp++)
 1188                 if (*fpp != NULL)
 1189                         (*fpp)->f_count++;
 1190         return (newfdp);
 1191 }
 1192 
 1193 /*
 1194  * Release a filedesc structure.
 1195  */
 1196 void
 1197 fdfree(struct proc *p)
 1198 {
 1199         struct filedesc *fdp;
 1200         struct file     **fpp, *fp;
 1201         int             i;
 1202 
 1203         fdp = p->p_fd;
 1204         if (--fdp->fd_refcnt > 0)
 1205                 return;
 1206         fpp = fdp->fd_ofiles;
 1207         for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
 1208                 fp = *fpp;
 1209                 if (fp != NULL) {
 1210                         *fpp = NULL;
 1211                         simple_lock(&fp->f_slock);
 1212                         FILE_USE(fp);
 1213                         if ((fdp->fd_lastfile - i) < fdp->fd_knlistsize)
 1214                                 knote_fdclose(p, fdp->fd_lastfile - i);
 1215                         (void) closef(fp, p);
 1216                 }
 1217         }
 1218         p->p_fd = NULL;
 1219         if (fdp->fd_nfiles > NDFILE)
 1220                 free(fdp->fd_ofiles, M_FILEDESC);
 1221         if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
 1222                 free(fdp->fd_himap, M_FILEDESC);
 1223                 free(fdp->fd_lomap, M_FILEDESC);
 1224         }
 1225         if (fdp->fd_knlist)
 1226                 free(fdp->fd_knlist, M_KEVENT);
 1227         if (fdp->fd_knhash)
 1228                 hashdone(fdp->fd_knhash, M_KEVENT);
 1229         pool_put(&filedesc0_pool, fdp);
 1230 }
 1231 
 1232 /*
 1233  * Internal form of close.
 1234  * Decrement reference count on file structure.
 1235  * Note: p may be NULL when closing a file
 1236  * that was being passed in a message.
 1237  *
 1238  * Note: we expect the caller is holding a usecount, and expects us
 1239  * to drop it (the caller thinks the file is going away forever).
 1240  */
 1241 int
 1242 closef(struct file *fp, struct proc *p)
 1243 {
 1244         struct vnode    *vp;
 1245         struct flock    lf;
 1246         int             error;
 1247 
 1248         if (fp == NULL)
 1249                 return (0);
 1250 
 1251         /*
 1252          * POSIX record locking dictates that any close releases ALL
 1253          * locks owned by this process.  This is handled by setting
 1254          * a flag in the unlock to free ONLY locks obeying POSIX
 1255          * semantics, and not to free BSD-style file locks.
 1256          * If the descriptor was in a message, POSIX-style locks
 1257          * aren't passed with the descriptor.
 1258          */
 1259         if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
 1260                 lf.l_whence = SEEK_SET;
 1261                 lf.l_start = 0;
 1262                 lf.l_len = 0;
 1263                 lf.l_type = F_UNLCK;
 1264                 vp = (struct vnode *)fp->f_data;
 1265                 (void) VOP_ADVLOCK(vp, p, F_UNLCK, &lf, F_POSIX);
 1266         }
 1267 
 1268         /*
 1269          * If WANTCLOSE is set, then the reference count on the file
 1270          * is 0, but there were multiple users of the file.  This can
 1271          * happen if a filedesc structure is shared by multiple
 1272          * processes.
 1273          */
 1274         simple_lock(&fp->f_slock);
 1275         if (fp->f_iflags & FIF_WANTCLOSE) {
 1276                 /*
 1277                  * Another user of the file is already closing, and is
 1278                  * simply waiting for other users of the file to drain.
 1279                  * Release our usecount, and wake up the closer if it
 1280                  * is the only remaining use.
 1281                  */
 1282 #ifdef DIAGNOSTIC
 1283                 if (fp->f_count != 0)
 1284                         panic("closef: wantclose and count != 0");
 1285                 if (fp->f_usecount < 2)
 1286                         panic("closef: wantclose and usecount < 2");
 1287 #endif
 1288                 if (--fp->f_usecount == 1)
 1289                         wakeup(&fp->f_usecount);
 1290                 simple_unlock(&fp->f_slock);
 1291                 return (0);
 1292         } else {
 1293                 /*
 1294                  * Decrement the reference count.  If we were not the
 1295                  * last reference, then release our use and just
 1296                  * return.
 1297                  */
 1298                 if (--fp->f_count > 0) {
 1299 #ifdef DIAGNOSTIC
 1300                         if (fp->f_usecount < 1)
 1301                                 panic("closef: no wantclose and usecount < 1");
 1302 #endif
 1303                         fp->f_usecount--;
 1304                         simple_unlock(&fp->f_slock);
 1305                         return (0);
 1306                 }
 1307         }
 1308 
 1309         /*
 1310          * The reference count is now 0.  However, there may be
 1311          * multiple potential users of this file.  This can happen
 1312          * if multiple processes shared a single filedesc structure.
 1313          *
 1314          * Notify these potential users that the file is closing.
 1315          * This will prevent them from adding additional uses to
 1316          * the file.
 1317          */
 1318         fp->f_iflags |= FIF_WANTCLOSE;
 1319 
 1320         /*
 1321          * We expect the caller to add a use to the file.  So, if we
 1322          * are the last user, usecount will be 1.  If it is not, we
 1323          * must wait for the usecount to drain.  When it drains back
 1324          * to 1, we will be awakened so that we may proceed with the
 1325          * close.
 1326          */
 1327 #ifdef DIAGNOSTIC
 1328         if (fp->f_usecount < 1)
 1329                 panic("closef: usecount < 1");
 1330 #endif
 1331         while (fp->f_usecount > 1)
 1332                 (void) ltsleep(&fp->f_usecount, PRIBIO, "closef", 0,
 1333                                 &fp->f_slock);
 1334 #ifdef DIAGNOSTIC
 1335         if (fp->f_usecount != 1)
 1336                 panic("closef: usecount != 1");
 1337 #endif
 1338 
 1339         simple_unlock(&fp->f_slock);
 1340         if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
 1341                 lf.l_whence = SEEK_SET;
 1342                 lf.l_start = 0;
 1343                 lf.l_len = 0;
 1344                 lf.l_type = F_UNLCK;
 1345                 vp = (struct vnode *)fp->f_data;
 1346                 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
 1347         }
 1348         if (fp->f_ops)
 1349                 error = (*fp->f_ops->fo_close)(fp, p);
 1350         else
 1351                 error = 0;
 1352 
 1353         /* Nothing references the file now, drop the final use (us). */
 1354         fp->f_usecount--;
 1355 
 1356         ffree(fp);
 1357         return (error);
 1358 }
 1359 
 1360 /*
 1361  * Apply an advisory lock on a file descriptor.
 1362  *
 1363  * Just attempt to get a record lock of the requested type on
 1364  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 1365  */
 1366 /* ARGSUSED */
 1367 int
 1368 sys_flock(struct lwp *l, void *v, register_t *retval)
 1369 {
 1370         struct sys_flock_args /* {
 1371                 syscallarg(int) fd;
 1372                 syscallarg(int) how;
 1373         } */ *uap = v;
 1374         int             fd, how, error;
 1375         struct proc     *p;
 1376         struct filedesc *fdp;
 1377         struct file     *fp;
 1378         struct vnode    *vp;
 1379         struct flock    lf;
 1380 
 1381         p = l->l_proc;
 1382         fd = SCARG(uap, fd);
 1383         how = SCARG(uap, how);
 1384         fdp = p->p_fd;
 1385         error = 0;
 1386 
 1387         if ((fp = fd_getfile(fdp, fd)) == NULL)
 1388                 return (EBADF);
 1389 
 1390         FILE_USE(fp);
 1391 
 1392         if (fp->f_type != DTYPE_VNODE) {
 1393                 error = EOPNOTSUPP;
 1394                 goto out;
 1395         }
 1396 
 1397         vp = (struct vnode *)fp->f_data;
 1398         lf.l_whence = SEEK_SET;
 1399         lf.l_start = 0;
 1400         lf.l_len = 0;
 1401         if (how & LOCK_UN) {
 1402                 lf.l_type = F_UNLCK;
 1403                 fp->f_flag &= ~FHASLOCK;
 1404                 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
 1405                 goto out;
 1406         }
 1407         if (how & LOCK_EX)
 1408                 lf.l_type = F_WRLCK;
 1409         else if (how & LOCK_SH)
 1410                 lf.l_type = F_RDLCK;
 1411         else {
 1412                 error = EINVAL;
 1413                 goto out;
 1414         }
 1415         fp->f_flag |= FHASLOCK;
 1416         if (how & LOCK_NB)
 1417                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
 1418         else
 1419                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf,
 1420                     F_FLOCK|F_WAIT);
 1421  out:
 1422         FILE_UNUSE(fp, p);
 1423         return (error);
 1424 }
 1425 
 1426 /*
 1427  * File Descriptor pseudo-device driver (/dev/fd/).
 1428  *
 1429  * Opening minor device N dup()s the file (if any) connected to file
 1430  * descriptor N belonging to the calling process.  Note that this driver
 1431  * consists of only the ``open()'' routine, because all subsequent
 1432  * references to this file will be direct to the other driver.
 1433  */
 1434 /* ARGSUSED */
 1435 int
 1436 filedescopen(dev_t dev, int mode, int type, struct proc *p)
 1437 {
 1438 
 1439         /*
 1440          * XXX Kludge: set dupfd to contain the value of the
 1441          * the file descriptor being sought for duplication. The error
 1442          * return ensures that the vnode for this device will be released
 1443          * by vn_open. Open will detect this special error and take the
 1444          * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 1445          * will simply report the error.
 1446          */
 1447         curlwp->l_dupfd = minor(dev);   /* XXX */
 1448         return (ENODEV);
 1449 }
 1450 
 1451 /*
 1452  * Duplicate the specified descriptor to a free descriptor.
 1453  *
 1454  * 'indx' has been fdalloc'ed (and will be fdremove'ed on error) by the caller.
 1455  */
 1456 int
 1457 dupfdopen(struct proc *p, int indx, int dfd, int mode, int error)
 1458 {
 1459         struct filedesc *fdp;
 1460         struct file     *wfp;
 1461 
 1462         fdp = p->p_fd;
 1463 
 1464         /* should be cleared by the caller */
 1465         KASSERT(fdp->fd_ofiles[indx] == NULL);
 1466 
 1467         /*
 1468          * If the to-be-dup'd fd number is greater than the allowed number
 1469          * of file descriptors, or the fd to be dup'd has already been
 1470          * closed, reject.
 1471          */
 1472 
 1473         /*
 1474          * Note, in the case of indx == dfd, fd_getfile below returns NULL.
 1475          */
 1476         if ((wfp = fd_getfile(fdp, dfd)) == NULL)
 1477                 return (EBADF);
 1478 
 1479         FILE_USE(wfp);
 1480 
 1481         /*
 1482          * There are two cases of interest here.
 1483          *
 1484          * For ENODEV simply dup (dfd) to file descriptor
 1485          * (indx) and return.
 1486          *
 1487          * For ENXIO steal away the file structure from (dfd) and
 1488          * store it in (indx).  (dfd) is effectively closed by
 1489          * this operation.
 1490          *
 1491          * Any other error code is just returned.
 1492          */
 1493         switch (error) {
 1494         case ENODEV:
 1495                 /*
 1496                  * Check that the mode the file is being opened for is a
 1497                  * subset of the mode of the existing descriptor.
 1498                  */
 1499                 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
 1500                         FILE_UNUSE(wfp, p);
 1501                         return (EACCES);
 1502                 }
 1503                 fdp->fd_ofiles[indx] = wfp;
 1504                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 1505                 wfp->f_count++;
 1506                 /* 'indx' has been fd_used'ed by caller */
 1507                 FILE_UNUSE(wfp, p);
 1508                 return (0);
 1509 
 1510         case ENXIO:
 1511                 /*
 1512                  * Steal away the file pointer from dfd, and stuff it into indx.
 1513                  */
 1514                 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
 1515                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 1516                 fdp->fd_ofiles[dfd] = NULL;
 1517                 fdp->fd_ofileflags[dfd] = 0;
 1518                 /*
 1519                  * Complete the clean up of the filedesc structure by
 1520                  * recomputing the various hints.
 1521                  */
 1522                 /* 'indx' has been fd_used'ed by caller */
 1523                 fd_unused(fdp, dfd);
 1524                 FILE_UNUSE(wfp, p);
 1525                 return (0);
 1526 
 1527         default:
 1528                 FILE_UNUSE(wfp, p);
 1529                 return (error);
 1530         }
 1531         /* NOTREACHED */
 1532 }
 1533 
 1534 /*
 1535  * fcntl call which is being passed to the file's fs.
 1536  */
 1537 int
 1538 fcntl_forfs(int fd, struct proc *p, int cmd, void *arg)
 1539 {
 1540         struct file     *fp;
 1541         struct filedesc *fdp;
 1542         int             error;
 1543         u_int           size;
 1544         void            *data, *memp;
 1545 #define STK_PARAMS      128
 1546         char            stkbuf[STK_PARAMS];
 1547 
 1548         /* fd's value was validated in sys_fcntl before calling this routine */
 1549         fdp = p->p_fd;
 1550         fp = fdp->fd_ofiles[fd];
 1551 
 1552         if ((fp->f_flag & (FREAD | FWRITE)) == 0)
 1553                 return (EBADF);
 1554 
 1555         /*
 1556          * Interpret high order word to find amount of data to be
 1557          * copied to/from the user's address space.
 1558          */
 1559         size = (size_t)F_PARAM_LEN(cmd);
 1560         if (size > F_PARAM_MAX)
 1561                 return (EINVAL);
 1562         memp = NULL;
 1563         if (size > sizeof(stkbuf)) {
 1564                 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
 1565                 data = memp;
 1566         } else
 1567                 data = stkbuf;
 1568         if (cmd & F_FSIN) {
 1569                 if (size) {
 1570                         error = copyin(arg, data, size);
 1571                         if (error) {
 1572                                 if (memp)
 1573                                         free(memp, M_IOCTLOPS);
 1574                                 return (error);
 1575                         }
 1576                 } else
 1577                         *(void **)data = arg;
 1578         } else if ((cmd & F_FSOUT) && size)
 1579                 /*
 1580                  * Zero the buffer so the user always
 1581                  * gets back something deterministic.
 1582                  */
 1583                 memset(data, 0, size);
 1584         else if (cmd & F_FSVOID)
 1585                 *(void **)data = arg;
 1586 
 1587 
 1588         error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, p);
 1589 
 1590         /*
 1591          * Copy any data to user, size was
 1592          * already set and checked above.
 1593          */
 1594         if (error == 0 && (cmd & F_FSOUT) && size)
 1595                 error = copyout(data, arg, size);
 1596         if (memp)
 1597                 free(memp, M_IOCTLOPS);
 1598         return (error);
 1599 }
 1600 
 1601 /*
 1602  * Close any files on exec?
 1603  */
 1604 void
 1605 fdcloseexec(struct proc *p)
 1606 {
 1607         struct filedesc *fdp;
 1608         int             fd;
 1609 
 1610         fdunshare(p);
 1611         cwdunshare(p);
 1612 
 1613         fdp = p->p_fd;
 1614         for (fd = 0; fd <= fdp->fd_lastfile; fd++)
 1615                 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
 1616                         (void) fdrelease(p, fd);
 1617 }
 1618 
 1619 /*
 1620  * It is unsafe for set[ug]id processes to be started with file
 1621  * descriptors 0..2 closed, as these descriptors are given implicit
 1622  * significance in the Standard C library.  fdcheckstd() will create a
 1623  * descriptor referencing /dev/null for each of stdin, stdout, and
 1624  * stderr that is not already open.
 1625  */
 1626 #define CHECK_UPTO 3
 1627 int
 1628 fdcheckstd(p)
 1629         struct proc *p;
 1630 {
 1631         struct nameidata nd;
 1632         struct filedesc *fdp;
 1633         struct file *fp;
 1634         struct file *devnullfp = NULL;  /* Quell compiler warning */
 1635         struct proc *pp;
 1636         register_t retval;
 1637         int fd, i, error, flags = FREAD|FWRITE, devnull = -1;
 1638         char closed[CHECK_UPTO * 3 + 1], which[3 + 1];
 1639 
 1640         closed[0] = '\0';
 1641         if ((fdp = p->p_fd) == NULL)
 1642                 return (0);
 1643         for (i = 0; i < CHECK_UPTO; i++) {
 1644                 if (fdp->fd_ofiles[i] != NULL)
 1645                         continue;
 1646                 snprintf(which, sizeof(which), ",%d", i);
 1647                 strlcat(closed, which, sizeof(closed));
 1648                 if (devnull < 0) {
 1649                         if ((error = falloc(p, &fp, &fd)) != 0)
 1650                                 return (error);
 1651                         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
 1652                             p);
 1653                         if ((error = vn_open(&nd, flags, 0)) != 0) {
 1654                                 FILE_UNUSE(fp, p);
 1655                                 ffree(fp);
 1656                                 fdremove(p->p_fd, fd);
 1657                                 return (error);
 1658                         }
 1659                         fp->f_data = nd.ni_vp;
 1660                         fp->f_flag = flags;
 1661                         fp->f_ops = &vnops;
 1662                         fp->f_type = DTYPE_VNODE;
 1663                         VOP_UNLOCK(nd.ni_vp, 0);
 1664                         devnull = fd;
 1665                         devnullfp = fp;
 1666                         FILE_SET_MATURE(fp);
 1667                 } else {
 1668 restart:
 1669                         if ((error = fdalloc(p, 0, &fd)) != 0) {
 1670                                 if (error == ENOSPC) {
 1671                                         fdexpand(p);
 1672                                         goto restart;
 1673                                 }
 1674                                 return (error);
 1675                         }
 1676 
 1677                         simple_lock(&devnullfp->f_slock);
 1678                         FILE_USE(devnullfp);
 1679                         /* finishdup() will unuse the descriptors for us */
 1680                         if ((error = finishdup(p, devnull, fd, &retval)) != 0)
 1681                                 return (error);
 1682                 }
 1683         }
 1684         if (devnullfp)
 1685                 FILE_UNUSE(devnullfp, p);
 1686         if (closed[0] != '\0') {
 1687                 pp = p->p_pptr;
 1688                 log(LOG_WARNING, "set{u,g}id pid %d (%s) "
 1689                     "was invoked by uid %d ppid %d (%s) "
 1690                     "with fd %s closed\n",
 1691                     p->p_pid, p->p_comm, pp->p_ucred->cr_uid,
 1692                     pp->p_pid, pp->p_comm, &closed[1]);
 1693         }
 1694         return (0);
 1695 }
 1696 #undef CHECK_UPTO
 1697 
 1698 /*
 1699  * Sets descriptor owner. If the owner is a process, 'pgid'
 1700  * is set to positive value, process ID. If the owner is process group,
 1701  * 'pgid' is set to -pg_id.
 1702  */
 1703 int
 1704 fsetown(struct proc *p, pid_t *pgid, int cmd, const void *data)
 1705 {
 1706         int id = *(int *)data;
 1707         int error;
 1708 
 1709         switch (cmd) {
 1710         case TIOCSPGRP:
 1711                 if (id < 0)
 1712                         return (EINVAL);
 1713                 id = -id;
 1714                 break;
 1715         default:
 1716                 break;
 1717         }
 1718 
 1719         if (id > 0 && !pfind(id))
 1720                 return (ESRCH);
 1721         else if (id < 0 && (error = pgid_in_session(p, -id)))
 1722                 return (error);
 1723 
 1724         *pgid = id;
 1725         return (0);
 1726 }
 1727 
 1728 /*
 1729  * Return descriptor owner information. If the value is positive,
 1730  * it's process ID. If it's negative, it's process group ID and
 1731  * needs the sign removed before use.
 1732  */
 1733 int
 1734 fgetown(struct proc *p, pid_t pgid, int cmd, void *data)
 1735 {
 1736         switch (cmd) {
 1737         case TIOCGPGRP:
 1738                 *(int *)data = -pgid;
 1739                 break;
 1740         default:
 1741                 *(int *)data = pgid;
 1742                 break;
 1743         }
 1744         return (0);
 1745 }
 1746 
 1747 /*
 1748  * Send signal to descriptor owner, either process or process group.
 1749  */
 1750 void
 1751 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata)
 1752 {
 1753         struct proc *p1;
 1754         ksiginfo_t ksi; 
 1755 
 1756         memset(&ksi, 0, sizeof(ksi));
 1757         ksi.ksi_signo = signo;
 1758         ksi.ksi_code = code;
 1759         ksi.ksi_band = band;
 1760 
 1761         if (pgid > 0 && (p1 = pfind(pgid)))
 1762                 kpsignal(p1, &ksi, fdescdata);
 1763         else if (pgid < 0)
 1764                 kgsignal(-pgid, &ksi, fdescdata);
 1765 }

Cache object: 6464a5e4b14c3b5858744ea240b842a2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.