The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/kern/kern_descrip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
   29 /*
   30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   31  *      The Regents of the University of California.  All rights reserved.
   32  * (c) UNIX System Laboratories, Inc.
   33  * All or some portions of this file are derived from material licensed
   34  * to the University of California by American Telephone and Telegraph
   35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   36  * the permission of UNIX System Laboratories, Inc.
   37  *
   38  * Redistribution and use in source and binary forms, with or without
   39  * modification, are permitted provided that the following conditions
   40  * are met:
   41  * 1. Redistributions of source code must retain the above copyright
   42  *    notice, this list of conditions and the following disclaimer.
   43  * 2. Redistributions in binary form must reproduce the above copyright
   44  *    notice, this list of conditions and the following disclaimer in the
   45  *    documentation and/or other materials provided with the distribution.
   46  * 3. All advertising materials mentioning features or use of this software
   47  *    must display the following acknowledgement:
   48  *      This product includes software developed by the University of
   49  *      California, Berkeley and its contributors.
   50  * 4. Neither the name of the University nor the names of its contributors
   51  *    may be used to endorse or promote products derived from this software
   52  *    without specific prior written permission.
   53  *
   54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   64  * SUCH DAMAGE.
   65  *
   66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
   67  */
   68 /*
   69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
   70  * support for mandatory and extensible security protections.  This notice
   71  * is included in support of clause 2.2 (b) of the Apple Public License,
   72  * Version 2.0.
   73  */
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/filedesc.h>
   78 #include <sys/kernel.h>
   79 #include <sys/vnode_internal.h>
   80 #include <sys/proc_internal.h>
   81 #include <sys/kauth.h>
   82 #include <sys/file_internal.h>
   83 #include <sys/socket.h>
   84 #include <sys/socketvar.h>
   85 #include <sys/stat.h>
   86 #include <sys/ioctl.h>
   87 #include <sys/fcntl.h>
   88 #include <sys/malloc.h>
   89 #include <sys/mman.h>
   90 #include <sys/syslog.h>
   91 #include <sys/unistd.h>
   92 #include <sys/resourcevar.h>
   93 #include <sys/aio_kern.h>
   94 #include <sys/ev.h>
   95 #include <kern/lock.h>
   96 
   97 #include <bsm/audit_kernel.h>
   98 
   99 #include <sys/mount_internal.h>
  100 #include <sys/kdebug.h>
  101 #include <sys/sysproto.h>
  102 #include <sys/pipe.h>
  103 #include <kern/kern_types.h>
  104 #include <kern/kalloc.h>
  105 #include <libkern/OSAtomic.h>
  106 
  107 #include <sys/ubc.h>
  108 
  109 struct psemnode;
  110 struct pshmnode;
  111 
  112 int fdopen(dev_t dev, int mode, int type, proc_t p);
  113 int finishdup(proc_t p, struct filedesc *fdp, int old, int new, register_t *retval);
  114 
  115 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
  116 int fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp);
  117 void fg_drop(struct fileproc * fp);
  118 void fg_free(struct fileglob *fg);
  119 void fg_ref(struct fileproc * fp);
  120 
  121 /* flags for close_internal_locked */
  122 #define FD_DUP2RESV 1
  123 static int close_internal_locked(struct proc *p, int fd, struct fileproc *fp, int flags);
  124 
  125 static int closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx);
  126 
  127 /* We don't want these exported */
  128 __private_extern__
  129 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *);
  130 
  131 __private_extern__
  132 int unlink1(vfs_context_t, struct nameidata *, int);
  133 
  134 static void _fdrelse(struct proc * p, int fd);
  135 
  136 
  137 extern void file_lock_init(void) __attribute__((section("__TEXT, initcode")));
  138 extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat4, proc_t p);
  139 #if SOCKETS
  140 extern int soo_stat(struct socket *so, void *ub, int isstat64);
  141 #endif /* SOCKETS */
  142 
  143 extern kauth_scope_t    kauth_scope_fileop;
  144 
  145 #define f_flag f_fglob->fg_flag
  146 #define f_type f_fglob->fg_type
  147 #define f_msgcount f_fglob->fg_msgcount
  148 #define f_cred f_fglob->fg_cred
  149 #define f_ops f_fglob->fg_ops
  150 #define f_offset f_fglob->fg_offset
  151 #define f_data f_fglob->fg_data
  152 /*
  153  * Descriptor management.
  154  */
  155 struct filelist filehead;       /* head of list of open files */
  156 struct fmsglist fmsghead;       /* head of list of open files */
  157 struct fmsglist fmsg_ithead;    /* head of list of open files */
  158 int nfiles;                     /* actual number of open files */
  159 
  160 
  161 lck_grp_attr_t * file_lck_grp_attr;
  162 lck_grp_t * file_lck_grp;
  163 lck_attr_t * file_lck_attr;
  164 
  165 lck_mtx_t * uipc_lock;
  166 lck_mtx_t * file_flist_lock;
  167 
  168 
  169 /*
  170  * file_lock_init
  171  *
  172  * Description: Initialize the file lock group and the uipc and flist locks
  173  *
  174  * Parameters:  (void)
  175  *
  176  * Returns:     void
  177  *
  178  * Notes:       Called at system startup from bsd_init().
  179  */
  180 void
  181 file_lock_init(void)
  182 {
  183         /* allocate file lock group attribute and group */
  184         file_lck_grp_attr= lck_grp_attr_alloc_init();
  185 
  186         file_lck_grp = lck_grp_alloc_init("file",  file_lck_grp_attr);
  187 
  188         /* Allocate file lock attribute */
  189         file_lck_attr = lck_attr_alloc_init();
  190 
  191         uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
  192         file_flist_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
  193 }
  194 
  195 
  196 /*
  197  * proc_fdlock, proc_fdlock_spin
  198  *
  199  * Description: Lock to control access to the per process struct fileproc
  200  *              and struct filedesc
  201  *
  202  * Parameters:  p                               Process to take the lock on
  203  *
  204  * Returns:     void
  205  *
  206  * Notes:       The lock is initialized in forkproc() and destroyed in
  207  *              reap_child_process().
  208  */
  209 void
  210 proc_fdlock(proc_t p)
  211 {
  212         lck_mtx_lock(&p->p_fdmlock);
  213 }
  214 
  215 void
  216 proc_fdlock_spin(proc_t p)
  217 {
  218         lck_mtx_lock_spin(&p->p_fdmlock);
  219 }
  220 
  221 void
  222 proc_fdlock_assert(proc_t p, int assertflags)
  223 {
  224         lck_mtx_assert(&p->p_fdmlock, assertflags);
  225 }
  226 
  227 
  228 /*
  229  * proc_fdunlock
  230  *
  231  * Description: Unlock the lock previously locked by a call to proc_fdlock()
  232  *
  233  * Parameters:  p                               Process to drop the lock on
  234  *
  235  * Returns:     void
  236  */
  237 void
  238 proc_fdunlock(proc_t p)
  239 {
  240         lck_mtx_unlock(&p->p_fdmlock);
  241 }
  242 
  243 
  244 /*
  245  * System calls on descriptors.
  246  */
  247 
  248 
  249 /*
  250  * getdtablesize
  251  *
  252  * Description: Returns the per process maximum size of the descriptor table
  253  *
  254  * Parameters:  p                               Process being queried
  255  *              retval                          Pointer to the call return area
  256  *
  257  * Returns:     0                               Success
  258  *
  259  * Implicit returns:
  260  *              *retval (modified)              Size of dtable
  261  */
  262 int
  263 getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, register_t *retval)
  264 {
  265         proc_fdlock_spin(p);
  266         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
  267         proc_fdunlock(p);
  268 
  269         return (0);
  270 }
  271 
  272 
  273 void
  274 procfdtbl_reservefd(struct proc * p, int fd)
  275 {
  276         p->p_fd->fd_ofiles[fd] = NULL;
  277         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
  278 }
  279 
  280 void
  281 procfdtbl_markclosefd(struct proc * p, int fd)
  282 {
  283         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
  284 }
  285 
  286 void
  287 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
  288 {
  289         if (fp != NULL)
  290                 p->p_fd->fd_ofiles[fd] = fp;
  291         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
  292         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
  293                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
  294                 wakeup(&p->p_fd);
  295         }
  296 }
  297 
  298 void 
  299 procfdtbl_waitfd(struct proc * p, int fd)
  300 {
  301         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
  302         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
  303 }
  304 
  305 
  306 void
  307 procfdtbl_clearfd(struct proc * p, int fd)
  308 {
  309         int waiting;
  310 
  311         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
  312         p->p_fd->fd_ofiles[fd] = NULL;       
  313         p->p_fd->fd_ofileflags[fd] = 0;
  314         if ( waiting == UF_RESVWAIT) {
  315                 wakeup(&p->p_fd);
  316         }
  317 }
  318 
  319 /*
  320  * _fdrelse
  321  *
  322  * Description: Inline utility function to free an fd in a filedesc
  323  *
  324  * Parameters:  fdp                             Pointer to filedesc fd lies in
  325  *              fd                              fd to free
  326  *              reserv                          fd should be reserved
  327  *
  328  * Returns:     void
  329  *
  330  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
  331  *              the caller
  332  */
  333 static void
  334 _fdrelse(struct proc * p, int fd)
  335 {
  336         struct filedesc *fdp = p->p_fd;
  337         int nfd = 0;
  338 
  339         if (fd < fdp->fd_freefile)
  340                 fdp->fd_freefile = fd;
  341 #if DIAGNOSTIC
  342         if (fd > fdp->fd_lastfile)
  343                 panic("fdrelse: fd_lastfile inconsistent");
  344 #endif
  345         procfdtbl_clearfd(p, fd);
  346 
  347         while ((nfd = fdp->fd_lastfile) > 0 &&
  348                         fdp->fd_ofiles[nfd] == NULL &&
  349                         !(fdp->fd_ofileflags[nfd] & UF_RESERVED))
  350                 fdp->fd_lastfile--;
  351 }
  352 
  353 
  354 /*
  355  * dup
  356  *
  357  * Description: Duplicate a file descriptor.
  358  *
  359  * Parameters:  p                               Process performing the dup
  360  *              uap->fd                         The fd to dup
  361  *              retval                          Pointer to the call return area
  362  *
  363  * Returns:     0                               Success
  364  *              !0                              Errno
  365  *
  366  * Implicit returns:
  367  *              *retval (modified)              The new descriptor
  368  */
  369 int
  370 dup(proc_t p, struct dup_args *uap, register_t *retval)
  371 {
  372         struct filedesc *fdp = p->p_fd;
  373         int old = uap->fd;
  374         int new, error;
  375         struct fileproc *fp;
  376 
  377         proc_fdlock(p);
  378         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
  379                 proc_fdunlock(p);
  380                 return(error);
  381         }
  382         if ( (error = fdalloc(p, 0, &new)) ) {
  383                 fp_drop(p, old, fp, 1);
  384                 proc_fdunlock(p);
  385                 return (error);
  386         }
  387         error = finishdup(p, fdp, old, new, retval);
  388         fp_drop(p, old, fp, 1);
  389         proc_fdunlock(p);
  390 
  391         return (error);
  392 }
  393 
  394 
  395 /*
  396  * dup2
  397  *
  398  * Description: Duplicate a file descriptor to a particular value.
  399  *
  400  * Parameters:  p                               Process performing the dup
  401  *              uap->fd                         The fd to dup
  402  *              uap->to                         The fd to dup it to
  403  *              retval                          Pointer to the call return area
  404  *
  405  * Returns:     0                               Success
  406  *              !0                              Errno
  407  *
  408  * Implicit returns:
  409  *              *retval (modified)              The new descriptor
  410  */
  411 int
  412 dup2(proc_t p, struct dup2_args *uap, register_t *retval)
  413 {
  414         struct filedesc *fdp = p->p_fd;
  415         int old = uap->from, new = uap->to;
  416         int i, error;
  417         struct fileproc *fp, *nfp;
  418 
  419         proc_fdlock(p);
  420 
  421 startover:
  422         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
  423                 proc_fdunlock(p);
  424                 return(error);
  425         }
  426         if (new < 0 ||
  427                 (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
  428             new >= maxfiles) {
  429                 fp_drop(p, old, fp, 1);
  430                 proc_fdunlock(p);
  431                 return (EBADF);
  432         }
  433         if (old == new) {
  434                 fp_drop(p, old, fp, 1);
  435                 *retval = new;
  436                 proc_fdunlock(p);
  437                 return (0);
  438         }
  439         if (new < 0 || new >= fdp->fd_nfiles) {
  440                 if ( (error = fdalloc(p, new, &i)) ) {
  441                         fp_drop(p, old, fp, 1);
  442                         proc_fdunlock(p);
  443                         return (error);
  444                 }
  445                 if (new != i) {
  446                         fdrelse(p, i);
  447                         goto closeit;
  448                 }
  449         } else {
  450 closeit:
  451                 while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED)  {
  452                                 fp_drop(p, old, fp, 1);
  453                                 procfdtbl_waitfd(p, new);
  454 #if DIAGNOSTIC
  455                                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
  456 #endif
  457                                 goto startover;
  458                 }
  459 
  460                 if ((fdp->fd_ofiles[new] != NULL)  && ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
  461                         fp_drop(p, old, fp, 1);
  462                         (void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
  463 #if DIAGNOSTIC
  464                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
  465 #endif
  466                         procfdtbl_clearfd(p, new);
  467                         goto startover;
  468                 } else  {
  469 #if DIAGNOSTIC
  470                         if (fdp->fd_ofiles[new] != NULL)
  471                                 panic("dup2: unable to get ref on a fileproc %d\n", new);
  472 #endif
  473                         procfdtbl_reservefd(p, new);
  474                 }
  475 
  476 #if DIAGNOSTIC
  477                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
  478 #endif
  479 
  480         }
  481 #if DIAGNOSTIC
  482         if (fdp->fd_ofiles[new] != 0)
  483                 panic("dup2-1: overwriting fd_ofiles with new %d\n", new);
  484         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
  485                 panic("dup2-1: unreserved  fileflags  with new %d\n", new);
  486 #endif
  487         error = finishdup(p, fdp, old, new, retval);
  488         fp_drop(p, old, fp, 1);
  489         proc_fdunlock(p);
  490 
  491         return(error);
  492 }
  493 
  494 
  495 /*
  496  * fcntl
  497  *
  498  * Description: The file control system call.
  499  *
  500  * Parameters:  p                               Process performing the fcntl
  501  *              uap->fd                         The fd to operate against
  502  *              uap->cmd                        The command to perform
  503  *              uap->arg                        Pointer to the command argument
  504  *              retval                          Pointer to the call return area
  505  *
  506  * Returns:     0                               Success
  507  *              !0                              Errno (see fcntl_nocancel)
  508  *
  509  * Implicit returns:
  510  *              *retval (modified)              fcntl return value (if any)
  511  *
  512  * Notes:       This system call differs from fcntl_nocancel() in that it
  513  *              tests for cancellation prior to performing a potentially
  514  *              blocking operation.
  515  */
  516 int
  517 fcntl(proc_t p, struct fcntl_args *uap, register_t *retval)
  518 {
  519         __pthread_testcancel(1);
  520         return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval));
  521 }
  522 
  523 
  524 /*
  525  * fcntl_nocancel
  526  *
  527  * Description: A non-cancel-testing file control system call.
  528  *
  529  * Parameters:  p                               Process performing the fcntl
  530  *              uap->fd                         The fd to operate against
  531  *              uap->cmd                        The command to perform
  532  *              uap->arg                        Pointer to the command argument
  533  *              retval                          Pointer to the call return area
  534  *
  535  * Returns:     0                               Success
  536  *              EINVAL
  537  *      fp_lookup:EBADF                         Bad file descriptor
  538  * [F_DUPFD]
  539  *      fdalloc:EMFILE
  540  *      fdalloc:ENOMEM
  541  *      finishdup:EBADF
  542  *      finishdup:ENOMEM
  543  * [F_SETOWN]
  544  *              ESRCH
  545  * [F_SETLK]
  546  *              EBADF
  547  *              EOVERFLOW
  548  *      copyin:EFAULT
  549  *      vnode_getwithref:???
  550  *      VNOP_ADVLOCK:???
  551  * [F_GETLK]
  552  *              EBADF
  553  *              EOVERFLOW
  554  *      copyin:EFAULT
  555  *      copyout:EFAULT
  556  *      vnode_getwithref:???
  557  *      VNOP_ADVLOCK:???
  558  * [F_PREALLOCATE]
  559  *              EBADF
  560  *              EINVAL
  561  *      copyin:EFAULT
  562  *      copyout:EFAULT
  563  *      vnode_getwithref:???
  564  *      VNOP_ALLOCATE:???
  565  * [F_SETSIZE,F_RDADVISE]
  566  *              EBADF
  567  *      copyin:EFAULT
  568  *      vnode_getwithref:???
  569  * [F_RDAHEAD,F_NOCACHE]
  570  *              EBADF
  571  *      vnode_getwithref:???
  572  * [???]
  573  *
  574  * Implicit returns:
  575  *              *retval (modified)              fcntl return value (if any)
  576  */
  577 int
  578 fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
  579 {
  580         int fd = uap->fd;
  581         struct filedesc *fdp = p->p_fd;
  582         struct fileproc *fp;
  583         char *pop;
  584         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
  585         int i, tmp, error, error2, flg = F_POSIX;
  586         struct flock fl;
  587         struct vfs_context context;
  588         off_t offset;
  589         int newmin;
  590         daddr64_t lbn, bn;
  591         int devBlockSize = 0;
  592         unsigned int fflag;
  593         user_addr_t argp;
  594 
  595         AUDIT_ARG(fd, uap->fd);
  596         AUDIT_ARG(cmd, uap->cmd);
  597 
  598         proc_fdlock(p);
  599         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
  600                 proc_fdunlock(p);
  601                 return(error);
  602         }
  603         context.vc_thread = current_thread();
  604         context.vc_ucred = fp->f_cred;
  605         if (proc_is64bit(p)) {
  606                 argp = uap->arg;
  607         }
  608         else {
  609                 /*
  610                  * Since the arg parameter is defined as a long but may be
  611                  * either a long or a pointer we must take care to handle
  612                  * sign extension issues.  Our sys call munger will sign
  613                  * extend a long when we are called from a 32-bit process.
  614                  * Since we can never have an address greater than 32-bits
  615                  * from a 32-bit process we lop off the top 32-bits to avoid
  616                  * getting the wrong address
  617                  */
  618                 argp = CAST_USER_ADDR_T(uap->arg);
  619         }
  620 
  621         pop = &fdp->fd_ofileflags[fd];
  622 
  623 #if CONFIG_MACF
  624         error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
  625             uap->arg);
  626         if (error)
  627                 goto out;
  628 #endif
  629 
  630         switch (uap->cmd) {
  631 
  632         case F_DUPFD:
  633                 newmin = CAST_DOWN(int, uap->arg);
  634                 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
  635                     newmin >= maxfiles) {
  636                         error = EINVAL;
  637                         goto out;
  638                 }
  639                 if ( (error = fdalloc(p, newmin, &i)) )
  640                         goto out;
  641                 error = finishdup(p, fdp, fd, i, retval);
  642                 goto out;
  643 
  644         case F_GETFD:
  645                 *retval = (*pop & UF_EXCLOSE)? 1 : 0;
  646                 error = 0;
  647                 goto out;
  648 
  649         case F_SETFD:
  650                 *pop = (*pop &~ UF_EXCLOSE) |
  651                         (uap->arg & 1)? UF_EXCLOSE : 0;
  652                 error = 0;
  653                 goto out;
  654 
  655         case F_GETFL:
  656                 *retval = OFLAGS(fp->f_flag);
  657                 error = 0;
  658                 goto out;
  659 
  660         case F_SETFL:
  661                 fp->f_flag &= ~FCNTLFLAGS;
  662                 tmp = CAST_DOWN(int, uap->arg);
  663                 fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
  664                 tmp = fp->f_flag & FNONBLOCK;
  665                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
  666                 if (error)
  667                         goto out;
  668                 tmp = fp->f_flag & FASYNC;
  669                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
  670                 if (!error)
  671                         goto out;
  672                 fp->f_flag &= ~FNONBLOCK;
  673                 tmp = 0;
  674                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
  675                 goto out;
  676 
  677         case F_GETOWN:
  678                 if (fp->f_type == DTYPE_SOCKET) {
  679                         *retval = ((struct socket *)fp->f_data)->so_pgid;
  680                         error = 0;
  681                         goto out;
  682                 }
  683                 error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, &context);
  684                 *retval = -*retval;
  685                 goto out;
  686 
  687         case F_SETOWN:
  688                 tmp = CAST_DOWN(pid_t, uap->arg);
  689                 if (fp->f_type == DTYPE_SOCKET) {
  690                         ((struct socket *)fp->f_data)->so_pgid = tmp;
  691                         error =0;
  692                         goto out;
  693                 }
  694                 if (fp->f_type == DTYPE_PIPE) {
  695                         error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
  696                         goto out;
  697                 }
  698 
  699                 if (tmp <= 0) {
  700                         tmp = -tmp;
  701                 } else {
  702                         proc_t p1 = proc_find(tmp);
  703                         if (p1 == 0) {
  704                                 error = ESRCH;
  705                                 goto out;
  706                         }
  707                         tmp = (int)p1->p_pgrpid;
  708                         proc_rele(p1);
  709                 }
  710                 error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
  711                 goto out;
  712 
  713         case F_SETLKW:
  714                 flg |= F_WAIT;
  715                 /* Fall into F_SETLK */
  716 
  717         case F_SETLK:
  718                 if (fp->f_type != DTYPE_VNODE) {
  719                         error = EBADF;
  720                         goto out;
  721                 }
  722                 vp = (struct vnode *)fp->f_data;
  723 
  724                 fflag = fp->f_flag;
  725                 offset = fp->f_offset;
  726                 proc_fdunlock(p);
  727 
  728                 /* Copy in the lock structure */
  729                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
  730                 if (error) {
  731                         goto outdrop;
  732                 }
  733 
  734                 if ((fl.l_whence == SEEK_CUR) && (fl.l_start + offset < fl.l_start)) {
  735                     error = EOVERFLOW;
  736                     goto outdrop;
  737                 }
  738 
  739                 if ( (error = vnode_getwithref(vp)) ) {
  740                         goto outdrop;
  741                 }
  742                 if (fl.l_whence == SEEK_CUR)
  743                         fl.l_start += offset;
  744 
  745 #if CONFIG_MACF
  746                 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
  747                     F_SETLK, &fl);
  748                 if (error) {
  749                         (void)vnode_put(vp);
  750                         goto outdrop;
  751                 }
  752 #endif
  753                 switch (fl.l_type) {
  754 
  755                 case F_RDLCK:
  756                         if ((fflag & FREAD) == 0) {
  757                                 (void)vnode_put(vp);
  758                                 error = EBADF;
  759                                 goto outdrop;
  760                         }
  761                         // XXX UInt32 unsafe for LP64 kernel
  762                         OSBitOrAtomic(P_LADVLOCK, (UInt32 *)&p->p_ladvflag);
  763                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context);
  764                         (void)vnode_put(vp);
  765                         goto outdrop;
  766 
  767                 case F_WRLCK:
  768                         if ((fflag & FWRITE) == 0) {
  769                                 (void)vnode_put(vp);
  770                                 error = EBADF;
  771                                 goto outdrop;
  772                         }
  773                         // XXX UInt32 unsafe for LP64 kernel
  774                         OSBitOrAtomic(P_LADVLOCK, (UInt32 *)&p->p_ladvflag);
  775                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context);
  776                         (void)vnode_put(vp);
  777                         goto outdrop;
  778 
  779                 case F_UNLCK:
  780                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
  781                                 F_POSIX, &context);
  782                         (void)vnode_put(vp);
  783                         goto outdrop;
  784 
  785                 default:
  786                         (void)vnode_put(vp);
  787                         error = EINVAL;
  788                         goto outdrop;
  789                 }
  790 
  791         case F_GETLK:
  792                 if (fp->f_type != DTYPE_VNODE) {
  793                         error = EBADF;
  794                         goto out;
  795                 }
  796                 vp = (struct vnode *)fp->f_data;
  797 
  798                 offset = fp->f_offset;
  799                 proc_fdunlock(p);
  800 
  801                 /* Copy in the lock structure */
  802                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
  803                 if (error)
  804                         goto outdrop;
  805 
  806                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
  807                 /* and ending byte for EOVERFLOW in SEEK_SET */
  808                 if (((fl.l_whence == SEEK_CUR) && 
  809                      ((fl.l_start + offset < fl.l_start) ||
  810                       ((fl.l_len > 0) && (fl.l_start+offset + fl.l_len - 1 < fl.l_start+offset)))) ||
  811                     ((fl.l_whence == SEEK_SET) && (fl.l_len > 0) && (fl.l_start + fl.l_len - 1 < fl.l_start)))
  812                 {
  813                         /* lf_advlock doesn't check start/end for F_GETLK if file has no locks */
  814                         error = EOVERFLOW;
  815                         goto outdrop;
  816                 }
  817 
  818                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
  819                         error = EINVAL;
  820                         goto outdrop;
  821                 }
  822 
  823                 switch (fl.l_type) {
  824                 case F_RDLCK:
  825                 case F_UNLCK:
  826                 case F_WRLCK:
  827                         break;
  828                 default:
  829                         error = EINVAL;
  830                         goto outdrop;
  831                 }
  832 
  833                 switch (fl.l_whence) {
  834                 case SEEK_CUR:
  835                 case SEEK_SET:
  836                 case SEEK_END:
  837                         break;
  838                 default:
  839                         error = EINVAL;
  840                         goto outdrop;
  841                 }
  842 
  843                 if ( (error = vnode_getwithref(vp)) == 0 ) {
  844                         if (fl.l_whence == SEEK_CUR)
  845                                 fl.l_start += offset;
  846 
  847 #if CONFIG_MACF
  848                         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
  849                             F_GETLK, &fl);
  850                         if (error == 0)
  851 #endif
  852                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX, &context);
  853 
  854                         (void)vnode_put(vp);
  855 
  856                         if (error == 0)
  857                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
  858                 }
  859                 goto outdrop;
  860 
  861         case F_PREALLOCATE: {
  862                 fstore_t alloc_struct;    /* structure for allocate command */
  863                 u_int32_t alloc_flags = 0;
  864 
  865                 if (fp->f_type != DTYPE_VNODE) {
  866                         error = EBADF;
  867                         goto out;
  868                 }
  869 
  870                 vp = (struct vnode *)fp->f_data;
  871                 proc_fdunlock(p);
  872 
  873                 /* make sure that we have write permission */
  874                 if ((fp->f_flag & FWRITE) == 0) {
  875                         error = EBADF;
  876                         goto outdrop;
  877                 }
  878 
  879                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
  880                 if (error)
  881                         goto outdrop;
  882 
  883                 /* now set the space allocated to 0 */
  884                 alloc_struct.fst_bytesalloc = 0;
  885                 
  886                 /*
  887                  * Do some simple parameter checking
  888                  */
  889 
  890                 /* set up the flags */
  891 
  892                 alloc_flags |= PREALLOCATE;
  893                 
  894                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
  895                         alloc_flags |= ALLOCATECONTIG;
  896 
  897                 if (alloc_struct.fst_flags & F_ALLOCATEALL)
  898                         alloc_flags |= ALLOCATEALL;
  899 
  900                 /*
  901                  * Do any position mode specific stuff.  The only
  902                  * position mode  supported now is PEOFPOSMODE
  903                  */
  904 
  905                 switch (alloc_struct.fst_posmode) {
  906         
  907                 case F_PEOFPOSMODE:
  908                         if (alloc_struct.fst_offset != 0) {
  909                                 error = EINVAL;
  910                                 goto outdrop;
  911                         }
  912 
  913                         alloc_flags |= ALLOCATEFROMPEOF;
  914                         break;
  915 
  916                 case F_VOLPOSMODE:
  917                         if (alloc_struct.fst_offset <= 0) {
  918                                 error = EINVAL;
  919                                 goto outdrop;
  920                         }
  921 
  922                         alloc_flags |= ALLOCATEFROMVOL;
  923                         break;
  924 
  925                 default: {
  926                         error = EINVAL;
  927                         goto outdrop;
  928                         }
  929                 }
  930                 if ( (error = vnode_getwithref(vp)) == 0 ) {
  931                         /*
  932                          * call allocate to get the space
  933                          */
  934                         error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
  935                                               &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
  936                                               &context);
  937                         (void)vnode_put(vp);
  938 
  939                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
  940 
  941                         if (error == 0)
  942                                 error = error2;
  943                 }
  944                 goto outdrop;
  945                 
  946                 }
  947         case F_SETSIZE:
  948                 if (fp->f_type != DTYPE_VNODE) {
  949                         error = EBADF;
  950                         goto out;
  951                 }
  952                 vp = (struct vnode *)fp->f_data;
  953                 proc_fdunlock(p);
  954 
  955                 error = copyin(argp, (caddr_t)&offset, sizeof (off_t));
  956                 if (error)
  957                         goto outdrop;
  958 
  959                 error = vnode_getwithref(vp);
  960                 if (error)
  961                         goto outdrop;
  962 
  963 #if CONFIG_MACF
  964                 error = mac_vnode_check_truncate(&context,
  965                     fp->f_fglob->fg_cred, vp);
  966                 if (error) {
  967                         (void)vnode_put(vp);
  968                         goto outdrop;
  969                 }
  970 #endif
  971                 /*
  972                  * Make sure that we are root.  Growing a file
  973                  * without zero filling the data is a security hole 
  974                  * root would have access anyway so we'll allow it
  975                  */
  976                 if (!is_suser()) {
  977                         error = EACCES;
  978                 } else {
  979                         /*
  980                          * set the file size
  981                          */
  982                         error = vnode_setsize(vp, offset, IO_NOZEROFILL,
  983                             &context);
  984                 }
  985 
  986                 (void)vnode_put(vp);
  987                 goto outdrop;
  988 
  989         case F_RDAHEAD:
  990                 if (fp->f_type != DTYPE_VNODE) {
  991                         error = EBADF;
  992                         goto out;
  993                 }
  994                 if (uap->arg)
  995                         fp->f_fglob->fg_flag &= ~FNORDAHEAD;
  996                 else
  997                         fp->f_fglob->fg_flag |= FNORDAHEAD;
  998 
  999                 goto out;
 1000 
 1001         case F_NOCACHE:
 1002                 if (fp->f_type != DTYPE_VNODE) {
 1003                         error = EBADF;
 1004                         goto out;
 1005                 }
 1006                 if (uap->arg)
 1007                         fp->f_fglob->fg_flag |= FNOCACHE;
 1008                 else
 1009                         fp->f_fglob->fg_flag &= ~FNOCACHE;
 1010 
 1011                 goto out;
 1012 
 1013         case F_GLOBAL_NOCACHE:
 1014                 if (fp->f_type != DTYPE_VNODE) {
 1015                         error = EBADF;
 1016                         goto out;
 1017                 }
 1018                 vp = (struct vnode *)fp->f_data;
 1019                 proc_fdunlock(p);
 1020 
 1021                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 1022 
 1023                         *retval = vnode_isnocache(vp);
 1024 
 1025                         if (uap->arg)
 1026                                 vnode_setnocache(vp);
 1027                         else
 1028                                 vnode_clearnocache(vp);
 1029 
 1030                         (void)vnode_put(vp);
 1031                 }
 1032                 goto outdrop;
 1033 
 1034         case F_CHECK_OPENEVT:
 1035                 if (fp->f_type != DTYPE_VNODE) {
 1036                         error = EBADF;
 1037                         goto out;
 1038                 }
 1039                 vp = (struct vnode *)fp->f_data;
 1040                 proc_fdunlock(p);
 1041 
 1042                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 1043 
 1044                         *retval = vnode_is_openevt(vp);
 1045 
 1046                         if (uap->arg)
 1047                                 vnode_set_openevt(vp);
 1048                         else
 1049                                 vnode_clear_openevt(vp);
 1050 
 1051                         (void)vnode_put(vp);
 1052                 }
 1053                 goto outdrop;
 1054 
 1055         case F_RDADVISE: {
 1056                 struct radvisory ra_struct;
 1057 
 1058                 if (fp->f_type != DTYPE_VNODE) {
 1059                         error = EBADF;
 1060                         goto out;
 1061                 }
 1062                 vp = (struct vnode *)fp->f_data;
 1063                 proc_fdunlock(p);
 1064 
 1065                 if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct))) )
 1066                         goto outdrop;
 1067                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 1068                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
 1069 
 1070                         (void)vnode_put(vp);
 1071                 }
 1072                 goto outdrop;
 1073                 }
 1074 
 1075         case F_READBOOTSTRAP:
 1076         case F_WRITEBOOTSTRAP: {
 1077                 fbootstraptransfer_t fbt_struct;
 1078                 user_fbootstraptransfer_t user_fbt_struct;
 1079                 int     sizeof_struct;
 1080                 caddr_t boot_structp;
 1081 
 1082                 if (fp->f_type != DTYPE_VNODE) {
 1083                         error = EBADF;
 1084                         goto out;
 1085                 }
 1086                 vp = (struct vnode *)fp->f_data;
 1087                 proc_fdunlock(p);
 1088 
 1089                 if (IS_64BIT_PROCESS(p)) {
 1090                         sizeof_struct = sizeof(user_fbt_struct);
 1091                         boot_structp = (caddr_t) &user_fbt_struct;
 1092                 }
 1093                 else {
 1094                         sizeof_struct = sizeof(fbt_struct);
 1095                         boot_structp = (caddr_t) &fbt_struct;
 1096                 }
 1097                 error = copyin(argp, boot_structp, sizeof_struct);
 1098                 if (error)
 1099                         goto outdrop;
 1100                 if ( (error = vnode_getwithref(vp)) ) {
 1101                         goto outdrop;
 1102                 }
 1103                 if (uap->cmd == F_WRITEBOOTSTRAP) {
 1104                         /*
 1105                          * Make sure that we are root.  Updating the
 1106                          * bootstrap on a disk could be a security hole
 1107                          */
 1108                         if (!is_suser()) {
 1109                                 (void)vnode_put(vp);
 1110                                 error = EACCES;
 1111                                 goto outdrop;
 1112                         }
 1113                 }
 1114                 if (strncmp(vnode_mount(vp)->mnt_vfsstat.f_fstypename, "hfs",
 1115                         sizeof(vnode_mount(vp)->mnt_vfsstat.f_fstypename)) != 0) {
 1116                         error = EINVAL;
 1117                 } else {
 1118                         /*
 1119                          * call vnop_ioctl to handle the I/O
 1120                          */
 1121                         error = VNOP_IOCTL(vp, uap->cmd, boot_structp, 0, &context);
 1122                 }
 1123                 (void)vnode_put(vp);
 1124                 goto outdrop;
 1125         }
 1126         case F_LOG2PHYS: {
 1127                 struct log2phys l2p_struct;    /* structure for allocate command */
 1128 
 1129                 if (fp->f_type != DTYPE_VNODE) {
 1130                         error = EBADF;
 1131                         goto out;
 1132                 }
 1133                 vp = (struct vnode *)fp->f_data;
 1134                 proc_fdunlock(p);
 1135                 if ( (error = vnode_getwithref(vp)) ) {
 1136                         goto outdrop;
 1137                 }
 1138                 error = VNOP_OFFTOBLK(vp, fp->f_offset, &lbn);
 1139                 if (error) {
 1140                         (void)vnode_put(vp);
 1141                         goto outdrop;
 1142                 }
 1143                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
 1144                 if (error) {
 1145                         (void)vnode_put(vp);
 1146                         goto outdrop;
 1147                 }
 1148                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
 1149 
 1150                 error = VNOP_BLOCKMAP(vp, offset, devBlockSize, &bn, NULL, NULL, 0, &context);
 1151 
 1152                 (void)vnode_put(vp);
 1153 
 1154                 if (!error) {
 1155                         l2p_struct.l2p_flags = 0;       /* for now */
 1156                         l2p_struct.l2p_contigbytes = 0; /* for now */
 1157                         l2p_struct.l2p_devoffset = bn * devBlockSize;
 1158                         l2p_struct.l2p_devoffset += fp->f_offset - offset;
 1159                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
 1160                 }
 1161                 goto outdrop;
 1162                 }
 1163         case F_GETPATH: {
 1164                 char *pathbufp;
 1165                 int pathlen;
 1166 
 1167                 if (fp->f_type != DTYPE_VNODE) {
 1168                         error = EBADF;
 1169                         goto out;
 1170                 }
 1171                 vp = (struct vnode *)fp->f_data;
 1172                 proc_fdunlock(p);
 1173 
 1174                 pathlen = MAXPATHLEN;
 1175                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
 1176                 if (pathbufp == NULL) {
 1177                         error = ENOMEM;
 1178                         goto outdrop;
 1179                 }
 1180                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 1181                         error = vn_getpath(vp, pathbufp, &pathlen);
 1182                         (void)vnode_put(vp);
 1183 
 1184                         if (error == 0)
 1185                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
 1186                 }
 1187                 FREE(pathbufp, M_TEMP);
 1188                 goto outdrop;
 1189         }
 1190 
 1191         case F_PATHPKG_CHECK: {
 1192                 char *pathbufp;
 1193                 size_t pathlen;
 1194 
 1195                 if (fp->f_type != DTYPE_VNODE) {
 1196                         error = EBADF;
 1197                         goto out;
 1198                 }
 1199                 vp = (struct vnode *)fp->f_data;
 1200                 proc_fdunlock(p);
 1201 
 1202                 pathlen = MAXPATHLEN;
 1203                 pathbufp = kalloc(MAXPATHLEN);
 1204 
 1205                 if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) {
 1206                         if ( (error = vnode_getwithref(vp)) == 0 ) {
 1207                                 error = vn_path_package_check(vp, pathbufp, pathlen, retval);
 1208 
 1209                                 (void)vnode_put(vp);
 1210                         }
 1211                 }
 1212                 kfree(pathbufp, MAXPATHLEN);
 1213                 goto outdrop;
 1214         }
 1215 
 1216         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
 1217         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZECACHE
 1218         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
 1219         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
 1220                 if (fp->f_type != DTYPE_VNODE) {
 1221                         error = EBADF;
 1222                         goto out;
 1223                 }
 1224                 vp = (struct vnode *)fp->f_data;
 1225                 proc_fdunlock(p);
 1226 
 1227                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 1228                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
 1229 
 1230                         (void)vnode_put(vp);
 1231                 }
 1232                 break;
 1233         }
 1234 
 1235         /*
 1236          * SPI (private) for opening a file starting from a dir fd
 1237          */
 1238         case F_OPENFROM: {
 1239                 struct user_fopenfrom fopen;
 1240                 struct vnode_attr va;
 1241                 struct nameidata nd;
 1242                 int cmode;
 1243 
 1244                 /* Check if this isn't a valid file descriptor */
 1245                 if ((fp->f_type != DTYPE_VNODE) ||
 1246                     (fp->f_flag & FREAD) == 0) {
 1247                         error = EBADF;
 1248                         goto out;
 1249                 }
 1250                 vp = (struct vnode *)fp->f_data;
 1251                 proc_fdunlock(p);
 1252 
 1253                 if (vnode_getwithref(vp)) {
 1254                         error = ENOENT;
 1255                         goto outdrop;
 1256                 }
 1257                 
 1258                 /* Only valid for directories */
 1259                 if (vp->v_type != VDIR) {
 1260                         vnode_put(vp);
 1261                         error = ENOTDIR;
 1262                         goto outdrop;
 1263                 }
 1264 
 1265                 /* Get flags, mode and pathname arguments. */
 1266                 if (IS_64BIT_PROCESS(p)) {
 1267                         error = copyin(argp, &fopen, sizeof(fopen));
 1268                 } else {
 1269                         struct fopenfrom fopen32;
 1270 
 1271                         error = copyin(argp, &fopen32, sizeof(fopen32));
 1272                         fopen.o_flags = fopen32.o_flags;
 1273                         fopen.o_mode = fopen32.o_mode;
 1274                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
 1275                 }
 1276                 if (error) {
 1277                         vnode_put(vp);
 1278                         goto outdrop;
 1279                 }
 1280                 VATTR_INIT(&va);
 1281                 /* Mask off all but regular access permissions */
 1282                 cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1283                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
 1284 
 1285                 /* Start the lookup relative to the file descriptor's vnode. */
 1286                 NDINIT(&nd, LOOKUP, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
 1287                        fopen.o_pathname, &context);
 1288                 nd.ni_dvp = vp;
 1289 
 1290                 error = open1(&context, &nd, fopen.o_flags, &va, retval);
 1291 
 1292                 vnode_put(vp);
 1293                 break;
 1294         }
 1295         /*
 1296          * SPI (private) for unlinking a file starting from a dir fd
 1297          */
 1298         case F_UNLINKFROM: {
 1299                 struct nameidata nd;
 1300                 user_addr_t pathname;
 1301 
 1302                 /* Check if this isn't a valid file descriptor */
 1303                 if ((fp->f_type != DTYPE_VNODE) ||
 1304                     (fp->f_flag & FREAD) == 0) {
 1305                         error = EBADF;
 1306                         goto out;
 1307                 }
 1308                 vp = (struct vnode *)fp->f_data;
 1309                 proc_fdunlock(p);
 1310 
 1311                 if (vnode_getwithref(vp)) {
 1312                         error = ENOENT;
 1313                         goto outdrop;
 1314                 }
 1315                 
 1316                 /* Only valid for directories */
 1317                 if (vp->v_type != VDIR) {
 1318                         vnode_put(vp);
 1319                         error = ENOTDIR;
 1320                         goto outdrop;
 1321                 }
 1322 
 1323                 /* Get flags, mode and pathname arguments. */
 1324                 if (IS_64BIT_PROCESS(p)) {
 1325                         pathname = (user_addr_t)argp;
 1326                 } else {
 1327                         pathname = CAST_USER_ADDR_T(argp);
 1328                 }
 1329 
 1330                 /* Start the lookup relative to the file descriptor's vnode. */
 1331                 NDINIT(&nd, DELETE, USEDVP | AUDITVNPATH1, UIO_USERSPACE, pathname, &context);
 1332                 nd.ni_dvp = vp;
 1333 
 1334                 error = unlink1(&context, &nd, 0);
 1335                 
 1336                 vnode_put(vp);
 1337                 break;
 1338 
 1339         }
 1340 
 1341         case F_ADDSIGS: {
 1342                 struct user_fsignatures fs;
 1343                 kern_return_t kr;
 1344                 vm_address_t kernel_blob_addr;
 1345                 vm_size_t kernel_blob_size;
 1346 
 1347                 if (fp->f_type != DTYPE_VNODE) {
 1348                         error = EBADF;
 1349                         goto out;
 1350                 }
 1351                 vp = (struct vnode *)fp->f_data;
 1352                 proc_fdunlock(p);
 1353                 error = vnode_getwithref(vp);
 1354                 if (error)
 1355                         goto outdrop;
 1356 
 1357                 if (IS_64BIT_PROCESS(p)) {
 1358                         error = copyin(argp, &fs, sizeof (fs));
 1359                 } else {
 1360                         struct fsignatures fs32;
 1361 
 1362                         error = copyin(argp, &fs32, sizeof (fs32));
 1363                         fs.fs_file_start = fs32.fs_file_start;
 1364                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
 1365                         fs.fs_blob_size = fs32.fs_blob_size;
 1366                 }
 1367 
 1368                 if (error) {
 1369                         vnode_put(vp);
 1370                         goto outdrop;
 1371                 }
 1372 
 1373 #define CS_MAX_BLOB_SIZE (1ULL * 1024 * 1024) /* XXX ? */
 1374                 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
 1375                         error = E2BIG;
 1376                         vnode_put(vp);
 1377                         goto outdrop;
 1378                 }
 1379 
 1380                 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
 1381                 kr = kmem_alloc(kernel_map,
 1382                                 &kernel_blob_addr,
 1383                                 kernel_blob_size);
 1384                 if (kr != KERN_SUCCESS) {
 1385                         error = ENOMEM;
 1386                         vnode_put(vp);
 1387                         goto outdrop;
 1388                 }
 1389 
 1390                 error = copyin(fs.fs_blob_start,
 1391                                (void *) kernel_blob_addr,
 1392                                kernel_blob_size);
 1393                 if (error) {
 1394                         kmem_free(kernel_map,
 1395                                   kernel_blob_addr,
 1396                                   kernel_blob_size);
 1397                         vnode_put(vp);
 1398                         goto outdrop;
 1399                 }
 1400 
 1401                 error = ubc_cs_blob_add(
 1402                         vp,
 1403                         CPU_TYPE_ANY,   /* not for a specific architecture */
 1404                         fs.fs_file_start,
 1405                         kernel_blob_addr,
 1406                         kernel_blob_size);
 1407                 if (error) {
 1408                         kmem_free(kernel_map,
 1409                                   kernel_blob_addr,
 1410                                   kernel_blob_size);
 1411                 } else {
 1412                         /* ubc_blob_add() was consumed "kernel_blob_addr" */
 1413                 }
 1414 
 1415                 (void) vnode_put(vp);
 1416                 break;
 1417         }
 1418 
 1419         case F_MARKDEPENDENCY: {
 1420                 struct vnode *root_vp;
 1421                 struct vnode_attr va;
 1422                 vfs_context_t ctx = vfs_context_current();
 1423                 kauth_cred_t cred;
 1424 
 1425                 if ((current_proc()->p_flag & P_DEPENDENCY_CAPABLE) == 0) {
 1426                     error = EPERM;
 1427                     goto out;
 1428                 }
 1429                 
 1430                 if (fp->f_type != DTYPE_VNODE) {
 1431                         error = EBADF;
 1432                         goto out;
 1433                 }
 1434 
 1435                 vp = (struct vnode *)fp->f_data;
 1436                 proc_fdunlock(p);
 1437 
 1438                 if (vnode_getwithref(vp)) {
 1439                         error = ENOENT;
 1440                         goto outdrop;
 1441                 }
 1442 
 1443                 // the passed in vnode must be the root dir of the file system
 1444                 if (VFS_ROOT(vp->v_mount, &root_vp, ctx) != 0 || vp != root_vp) {
 1445                     error = EINVAL;
 1446                     vnode_put(vp);
 1447                     goto outdrop;
 1448                 }
 1449                 vnode_put(root_vp);
 1450 
 1451                 // get the owner of the root dir
 1452                 VATTR_INIT(&va);
 1453                 VATTR_WANTED(&va, va_uid);
 1454                 if (vnode_getattr(vp, &va, ctx) != 0) {
 1455                     error = EINVAL;
 1456                     vnode_put(vp);
 1457                     goto outdrop;
 1458                 }
 1459 
 1460                 // and last, check that the caller is the super user or
 1461                 // the owner of the mount point
 1462                 cred = vfs_context_ucred(ctx);
 1463                 if (!is_suser() && va.va_uid != kauth_cred_getuid(cred)) {
 1464                         error = EACCES;
 1465                         vnode_put(vp);
 1466                         goto outdrop;
 1467                 }
 1468 
 1469                 // if all those checks pass then we can mark the dependency
 1470                 vfs_markdependency(vp->v_mount);
 1471                 error = 0;
 1472 
 1473                 vnode_put(vp);
 1474                 
 1475                 break;
 1476         }
 1477 
 1478         default:
 1479                 if (uap->cmd < FCNTL_FS_SPECIFIC_BASE) {
 1480                         error = EINVAL;
 1481                         goto out;
 1482                 }
 1483 
 1484                 // if it's a fs-specific fcntl() then just pass it through
 1485 
 1486                 if (fp->f_type != DTYPE_VNODE) {
 1487                         error = EBADF;
 1488                         goto out;
 1489                 }
 1490                 vp = (struct vnode *)fp->f_data;
 1491                 proc_fdunlock(p);
 1492 
 1493                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 1494                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, argp), 0, &context);
 1495 
 1496                         (void)vnode_put(vp);
 1497                 }
 1498                 break;
 1499         
 1500         }
 1501 
 1502 outdrop:
 1503         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
 1504         fp_drop(p, fd, fp, 0);
 1505         return(error);
 1506 out:
 1507         fp_drop(p, fd, fp, 1);
 1508         proc_fdunlock(p);
 1509         return(error);
 1510 }
 1511 
 1512 
 1513 /*
 1514  * finishdup
 1515  *
 1516  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
 1517  *
 1518  * Parameters:  p                               Process performing the dup
 1519  *              old                             The fd to dup
 1520  *              new                             The fd to dup it to
 1521  *              retval                          Pointer to the call return area
 1522  *
 1523  * Returns:     0                               Success
 1524  *              EBADF
 1525  *              ENOMEM
 1526  *
 1527  * Implicit returns:
 1528  *              *retval (modified)              The new descriptor
 1529  *
 1530  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 1531  *              the caller
 1532  *
 1533  * Notes:       This function may drop and reacquire this lock; it is unsafe
 1534  *              for a caller to assume that other state protected by the lock
 1535  *              has not been subsequently changes out from under it.
 1536  */
 1537 int
 1538 finishdup(proc_t p, struct filedesc *fdp, int old, int new, register_t *retval)
 1539 {
 1540         struct fileproc *nfp;
 1541         struct fileproc *ofp;
 1542 #if CONFIG_MACF
 1543         int error;
 1544 #endif
 1545 
 1546 #if DIAGNOSTIC
 1547         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 1548 #endif
 1549 
 1550         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
 1551                         (fdp->fd_ofileflags[old] & UF_RESERVED)) {
 1552                 fdrelse(p, new);
 1553                 return (EBADF);
 1554         }
 1555         fg_ref(ofp);
 1556 
 1557 #if CONFIG_MACF
 1558         error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
 1559         if (error) {
 1560                 fg_drop(ofp);
 1561                 fdrelse(p, new);
 1562                 return (error);
 1563         }
 1564 #endif
 1565 
 1566         proc_fdunlock(p);
 1567 
 1568         MALLOC_ZONE(nfp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
 1569         /* Failure check follows proc_fdlock() due to handling requirements */
 1570 
 1571         proc_fdlock(p);
 1572 
 1573         if (nfp == NULL) {
 1574                 fg_drop(ofp);
 1575                 fdrelse(p, new);
 1576                 return (ENOMEM);
 1577         }
 1578 
 1579         bzero(nfp, sizeof(struct fileproc));
 1580 
 1581         nfp->f_flags = 0;
 1582         nfp->f_fglob = ofp->f_fglob;
 1583         nfp->f_iocount = 0;
 1584 
 1585 #if DIAGNOSTIC
 1586         if (fdp->fd_ofiles[new] != 0)
 1587                 panic("finishdup: overwriting fd_ofiles with new %d\n", new);
 1588         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
 1589                 panic("finishdup: unreserved  fileflags  with new %d\n", new);
 1590 #endif
 1591 
 1592         if (new > fdp->fd_lastfile)
 1593                 fdp->fd_lastfile = new;
 1594         procfdtbl_releasefd(p, new, nfp);
 1595         *retval = new;
 1596         return (0);
 1597 }
 1598 
 1599 
 1600 /*
 1601  * close
 1602  *
 1603  * Description: The implementation of the close(2) system call
 1604  *
 1605  * Parameters:  p                       Process in whose per process file table
 1606  *                                      the close is to occur
 1607  *              uap->fd                 fd to be closed
 1608  *              retval                  <unused>
 1609  *
 1610  * Returns:     0                       Success
 1611  *      fp_lookup:EBADF                 Bad file descriptor
 1612  *      close_internal:EBADF
 1613  *      close_internal:???              Anything returnable by a per-fileops
 1614  *                                      close function
 1615  */
 1616 int
 1617 close(proc_t p, struct close_args *uap, register_t *retval)
 1618 {
 1619         __pthread_testcancel(1);
 1620         return(close_nocancel(p, (struct close_nocancel_args *)uap, retval));
 1621 }
 1622 
 1623 
 1624 int
 1625 close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused register_t *retval)
 1626 {
 1627         struct fileproc *fp;
 1628         int fd = uap->fd;
 1629         int error =0;
 1630 
 1631         AUDIT_SYSCLOSE(p, fd);
 1632 
 1633         proc_fdlock(p);
 1634 
 1635         if ( (error = fp_lookup(p,fd,&fp, 1)) ) {
 1636                 proc_fdunlock(p);
 1637                 return(error);
 1638         }
 1639 
 1640         error = close_internal_locked(p, fd, fp, 0);
 1641 
 1642         proc_fdunlock(p);
 1643 
 1644         return(error);
 1645 }
 1646 
 1647 
 1648 /*
 1649  * close_internal_locked
 1650  *
 1651  * Close a file descriptor.
 1652  *
 1653  * Parameters:  p                       Process in whose per process file table
 1654  *                                      the close is to occur
 1655  *              fd                      fd to be closed
 1656  *              fp                      fileproc associated with the fd
 1657  *
 1658  * Returns:     0                       Success
 1659  *              EBADF                   fd already in close wait state
 1660  *      closef_locked:???               Anything returnable by a per-fileops
 1661  *                                      close function
 1662  *
 1663  * Locks:       Assumes proc_fdlock for process is held by the caller and returns
 1664  *              with lock held
 1665  *
 1666  * Notes:       This function may drop and reacquire this lock; it is unsafe
 1667  *              for a caller to assume that other state protected by the lock
 1668  *              has not been subsequently changes out from under it, if the
 1669  *              caller made the call with the lock held.
 1670  */
 1671 static int
 1672 close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
 1673 {
 1674         struct filedesc *fdp = p->p_fd;
 1675         int error =0;
 1676         int resvfd = flags & FD_DUP2RESV;
 1677 
 1678 
 1679 #if DIAGNOSTIC
 1680         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 1681 #endif
 1682 
 1683         /* Keep people from using the filedesc while we are closing it */
 1684         procfdtbl_markclosefd(p, fd);
 1685 
 1686 
 1687         if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
 1688                 panic("close_internal_locked:  being called on already closing fd\n");
 1689         }
 1690 
 1691 
 1692 #if DIAGNOSTIC
 1693         if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
 1694                 panic("close_internal: unreserved  fileflags  with fd %d\n", fd);
 1695 #endif
 1696 
 1697         fp->f_flags |= FP_CLOSING;
 1698 
 1699         if ( (fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners() ) {
 1700 
 1701                 proc_fdunlock(p);
 1702 
 1703                 if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) {
 1704                         /*
 1705                          * call out to allow 3rd party notification of close. 
 1706                          * Ignore result of kauth_authorize_fileop call.
 1707                          */
 1708                         if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
 1709                                 u_int   fileop_flags = 0;
 1710                                 if ((fp->f_flags & FP_WRITTEN) != 0)
 1711                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
 1712                                 kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE, 
 1713                                                        (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
 1714                                 vnode_put((vnode_t)fp->f_data);
 1715                         }
 1716                 }
 1717                 if (fp->f_flags & FP_AIOISSUED)
 1718                         /*
 1719                          * cancel all async IO requests that can be cancelled.
 1720                          */
 1721                         _aio_close( p, fd );
 1722 
 1723                 proc_fdlock(p);
 1724         }
 1725 
 1726         if (fd < fdp->fd_knlistsize)
 1727                 knote_fdclose(p, fd);
 1728 
 1729         if (fp->f_flags & FP_WAITEVENT) 
 1730                 (void)waitevent_close(p, fp);
 1731 
 1732         if ((fp->f_flags & FP_INCHRREAD) == 0)
 1733                 fileproc_drain(p, fp);
 1734 
 1735         if (resvfd == 0)
 1736                 _fdrelse(p, fd);
 1737 
 1738         error = closef_locked(fp, fp->f_fglob, p);
 1739         if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
 1740                 wakeup(&fp->f_flags);
 1741         fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
 1742 
 1743         proc_fdunlock(p);
 1744 
 1745         FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); 
 1746 
 1747         proc_fdlock(p);
 1748 
 1749 #if DIAGNOSTIC
 1750         if (resvfd != 0) {
 1751                 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
 1752                         panic("close with reserved fd returns with freed fd:%d: proc: %x\n", fd, (unsigned int)p);
 1753         }
 1754 #endif
 1755 
 1756         return(error);
 1757 }
 1758 
 1759 
 1760 /*
 1761  * fstat1
 1762  *
 1763  * Description: Return status information about a file descriptor.
 1764  *
 1765  * Parameters:  p                               The process doing the fstat
 1766  *              fd                              The fd to stat
 1767  *              ub                              The user stat buffer
 1768  *              xsecurity                       The user extended security
 1769  *                                              buffer, or 0 if none
 1770  *              xsecurity_size                  The size of xsecurity, or 0
 1771  *                                              if no xsecurity
 1772  *              isstat64                        Flag to indicate 64 bit version
 1773  *                                              for inode size, etc.
 1774  *
 1775  * Returns:     0                               Success
 1776  *              EBADF
 1777  *              EFAULT
 1778  *      fp_lookup:EBADF                         Bad file descriptor
 1779  *      vnode_getwithref:???
 1780  *      copyout:EFAULT
 1781  *      vnode_getwithref:???
 1782  *      vn_stat:???
 1783  *      soo_stat:???
 1784  *      pipe_stat:???
 1785  *      pshm_stat:???
 1786  *      kqueue_stat:???
 1787  *
 1788  * Notes:       Internal implementation for all other fstat() related
 1789  *              functions
 1790  *
 1791  *              XXX switch on node type is bogus; need a stat in struct
 1792  *              XXX fileops instead.
 1793  */
 1794 static int
 1795 fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
 1796 {
 1797         struct fileproc *fp;
 1798         struct stat sb;
 1799         struct stat64 sb64;
 1800         struct user_stat user_sb;
 1801         struct user_stat64 user_sb64;
 1802         int error, my_size;
 1803         int funnel_state;
 1804         file_type_t type;
 1805         caddr_t data;
 1806         kauth_filesec_t fsec;
 1807         user_size_t xsecurity_bufsize;
 1808         vfs_context_t ctx = vfs_context_current();
 1809         void * sbptr;
 1810 
 1811 
 1812         AUDIT_ARG(fd, fd);
 1813 
 1814         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
 1815                 return(error);
 1816         }
 1817         type = fp->f_type;
 1818         data = fp->f_data;
 1819         fsec = KAUTH_FILESEC_NONE;
 1820 
 1821         sbptr = (isstat64 != 0) ? (void *)&sb64: (void *)&sb;
 1822 
 1823         switch (type) {
 1824 
 1825         case DTYPE_VNODE:
 1826                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
 1827                         /*
 1828                          * If the caller has the file open, and is not
 1829                          * requesting extended security information, we are
 1830                          * going to let them get the basic stat information.
 1831                          */
 1832                         if (xsecurity == USER_ADDR_NULL) {
 1833                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx);
 1834                         } else {
 1835                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx);
 1836                         }
 1837 
 1838                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
 1839                         (void)vnode_put((vnode_t)data);
 1840                 }
 1841                 break;
 1842 
 1843 #if SOCKETS
 1844         case DTYPE_SOCKET:
 1845                 error = soo_stat((struct socket *)data, sbptr, isstat64);
 1846                 break;
 1847 #endif /* SOCKETS */
 1848 
 1849         case DTYPE_PIPE:
 1850                 error = pipe_stat((void *)data, sbptr, isstat64);
 1851                 break;
 1852 
 1853         case DTYPE_PSXSHM:
 1854                 error = pshm_stat((void *)data, sbptr, isstat64);
 1855                 break;
 1856 
 1857         case DTYPE_KQUEUE:
 1858                 funnel_state = thread_funnel_set(kernel_flock, TRUE);
 1859                 error = kqueue_stat(fp, sbptr, isstat64, p);
 1860                 thread_funnel_set(kernel_flock, funnel_state);
 1861                 break;
 1862 
 1863         default:
 1864                 error = EBADF;
 1865                 goto out;
 1866         }
 1867         if (error == 0) {
 1868                 caddr_t sbp;
 1869 
 1870                 if (isstat64 != 0) {
 1871                         sb64.st_lspare = 0;
 1872                         sb64.st_qspare[0] = 0LL;
 1873                         sb64.st_qspare[1] = 0LL;
 1874                         if (IS_64BIT_PROCESS(current_proc())) {
 1875                                 munge_stat64(&sb64, &user_sb64); 
 1876                                 my_size = sizeof(user_sb64);
 1877                                 sbp = (caddr_t)&user_sb64;
 1878                         } else {
 1879                                 my_size = sizeof(sb64);
 1880                                 sbp = (caddr_t)&sb64;
 1881                         }
 1882                 } else {
 1883                         sb.st_lspare = 0;
 1884                         sb.st_qspare[0] = 0LL;
 1885                         sb.st_qspare[1] = 0LL;
 1886                         if (IS_64BIT_PROCESS(current_proc())) {
 1887                                 munge_stat(&sb, &user_sb); 
 1888                                 my_size = sizeof(user_sb);
 1889                                 sbp = (caddr_t)&user_sb;
 1890                         } else {
 1891                                 my_size = sizeof(sb);
 1892                                 sbp = (caddr_t)&sb;
 1893                         }
 1894                 }
 1895 
 1896                 error = copyout(sbp, ub, my_size);
 1897         }
 1898 
 1899         /* caller wants extended security information? */
 1900         if (xsecurity != USER_ADDR_NULL) {
 1901 
 1902                 /* did we get any? */
 1903                  if (fsec == KAUTH_FILESEC_NONE) {
 1904                         if (susize(xsecurity_size, 0) != 0) {
 1905                                 error = EFAULT;
 1906                                 goto out;
 1907                         }
 1908                 } else {
 1909                         /* find the user buffer size */
 1910                         xsecurity_bufsize = fusize(xsecurity_size);
 1911 
 1912                         /* copy out the actual data size */
 1913                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
 1914                                 error = EFAULT;
 1915                                 goto out;
 1916                         }
 1917 
 1918                         /* if the caller supplied enough room, copy out to it */
 1919                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
 1920                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
 1921                 }
 1922         }
 1923 out:
 1924         fp_drop(p, fd, fp, 0);
 1925         if (fsec != NULL)
 1926                 kauth_filesec_free(fsec);
 1927         return (error);
 1928 }
 1929 
 1930 
 1931 /*
 1932  * fstat_extended
 1933  *
 1934  * Description: Extended version of fstat supporting returning extended
 1935  *              security information
 1936  *
 1937  * Parameters:  p                               The process doing the fstat
 1938  *              uap->fd                         The fd to stat
 1939  *              uap->ub                         The user stat buffer
 1940  *              uap->xsecurity                  The user extended security
 1941  *                                              buffer, or 0 if none
 1942  *              uap->xsecurity_size             The size of xsecurity, or 0
 1943  *
 1944  * Returns:     0                               Success
 1945  *              !0                              Errno (see fstat1)
 1946  */
 1947 int
 1948 fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused register_t *retval)
 1949 {
 1950         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
 1951 }
 1952  
 1953 
 1954 /*
 1955  * fstat
 1956  *
 1957  * Description: Get file status for the file associated with fd
 1958  *
 1959  * Parameters:  p                               The process doing the fstat
 1960  *              uap->fd                         The fd to stat
 1961  *              uap->ub                         The user stat buffer
 1962  *
 1963  * Returns:     0                               Success
 1964  *              !0                              Errno (see fstat1)
 1965  */
 1966 int
 1967 fstat(proc_t p, register struct fstat_args *uap, __unused register_t *retval)
 1968 {
 1969         return(fstat1(p, uap->fd, uap->ub, 0, 0, 0));
 1970 }
 1971 
 1972 
 1973 /*
 1974  * fstat64_extended
 1975  *
 1976  * Description: Extended version of fstat64 supporting returning extended
 1977  *              security information
 1978  *
 1979  * Parameters:  p                               The process doing the fstat
 1980  *              uap->fd                         The fd to stat
 1981  *              uap->ub                         The user stat buffer
 1982  *              uap->xsecurity                  The user extended security
 1983  *                                              buffer, or 0 if none
 1984  *              uap->xsecurity_size             The size of xsecurity, or 0
 1985  *
 1986  * Returns:     0                               Success
 1987  *              !0                              Errno (see fstat1)
 1988  */
 1989 int
 1990 fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused register_t *retval)
 1991 {
 1992         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
 1993 }
 1994  
 1995 
 1996 /*
 1997  * fstat64
 1998  *
 1999  * Description: Get 64 bit version of the file status for the file associated
 2000  *              with fd
 2001  *
 2002  * Parameters:  p                               The process doing the fstat
 2003  *              uap->fd                         The fd to stat
 2004  *              uap->ub                         The user stat buffer
 2005  *
 2006  * Returns:     0                               Success
 2007  *              !0                              Errno (see fstat1)
 2008  */
 2009 int
 2010 fstat64(proc_t p, register struct fstat64_args *uap, __unused register_t *retval)
 2011 {
 2012         return(fstat1(p, uap->fd, uap->ub, 0, 0, 1));
 2013 }
 2014 
 2015 
 2016 /*
 2017  * fpathconf
 2018  *
 2019  * Description: Return pathconf information about a file descriptor.
 2020  *
 2021  * Parameters:  p                               Process making the request
 2022  *              uap->fd                         fd to get information about
 2023  *              uap->name                       Name of information desired
 2024  *              retval                          Pointer to the call return area
 2025  *
 2026  * Returns:     0                               Success
 2027  *              EINVAL
 2028  *      fp_lookup:EBADF                         Bad file descriptor
 2029  *      vnode_getwithref:???
 2030  *      vn_pathconf:???
 2031  *
 2032  * Implicit returns:
 2033  *              *retval (modified)              Returned information (numeric)
 2034  */
 2035 int
 2036 fpathconf(proc_t p, struct fpathconf_args *uap, register_t *retval)
 2037 {
 2038         int fd = uap->fd;
 2039         struct fileproc *fp;
 2040         struct vnode *vp;
 2041         int error = 0;
 2042         file_type_t type;
 2043         caddr_t data;
 2044 
 2045 
 2046         AUDIT_ARG(fd, uap->fd);
 2047         if ( (error = fp_lookup(p, fd, &fp, 0)) )
 2048                 return(error);
 2049         type = fp->f_type;
 2050         data = fp->f_data;
 2051 
 2052         switch (type) {
 2053 
 2054         case DTYPE_SOCKET:
 2055                 if (uap->name != _PC_PIPE_BUF) {
 2056                         error = EINVAL;
 2057                         goto out;
 2058                 }
 2059                 *retval = PIPE_BUF;
 2060                 error = 0;
 2061                 goto out;
 2062 
 2063         case DTYPE_PIPE:
 2064                 *retval = PIPE_BUF;
 2065                 error = 0;
 2066                 goto out;
 2067 
 2068         case DTYPE_VNODE:
 2069                 vp = (struct vnode *)data;
 2070 
 2071                 if ( (error = vnode_getwithref(vp)) == 0) {
 2072                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
 2073 
 2074                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
 2075 
 2076                         (void)vnode_put(vp);
 2077                 }
 2078                 goto out;
 2079 
 2080         case DTYPE_PSXSHM:
 2081         case DTYPE_PSXSEM:
 2082         case DTYPE_KQUEUE:
 2083         case DTYPE_FSEVENTS:
 2084                 error = EINVAL;
 2085                 goto out;
 2086 
 2087         }
 2088         /*NOTREACHED*/
 2089 out:
 2090         fp_drop(p, fd, fp, 0);
 2091         return(error);
 2092 }
 2093 
 2094 /*
 2095  * Statistics counter for the number of times a process calling fdalloc()
 2096  * has resulted in an expansion of the per process open file table.
 2097  *
 2098  * XXX This would likely be of more use if it were per process
 2099  */
 2100 int fdexpand;
 2101 
 2102 
 2103 /*
 2104  * fdalloc
 2105  *
 2106  * Description: Allocate a file descriptor for the process.
 2107  *
 2108  * Parameters:  p                               Process to allocate the fd in
 2109  *              want                            The fd we would prefer to get
 2110  *              result                          Pointer to fd we got
 2111  *
 2112  * Returns:     0                               Success
 2113  *              EMFILE
 2114  *              ENOMEM
 2115  *
 2116  * Implicit returns:
 2117  *              *result (modified)              The fd which was allocated
 2118  */
 2119 int
 2120 fdalloc(proc_t p, int want, int *result)
 2121 {
 2122         struct filedesc *fdp = p->p_fd;
 2123         int i;
 2124         int lim, last, numfiles, oldnfiles;
 2125         struct fileproc **newofiles, **ofiles;
 2126         char *newofileflags;
 2127 
 2128         /*
 2129          * Search for a free descriptor starting at the higher
 2130          * of want or fd_freefile.  If that fails, consider
 2131          * expanding the ofile array.
 2132          */
 2133 #if DIAGNOSTIC
 2134         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 2135 #endif
 2136 
 2137         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 2138         for (;;) {
 2139                 last = min(fdp->fd_nfiles, lim);
 2140                 if ((i = want) < fdp->fd_freefile)
 2141                         i = fdp->fd_freefile;
 2142                 for (; i < last; i++) {
 2143                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
 2144                                 procfdtbl_reservefd(p, i);
 2145                                 if (i > fdp->fd_lastfile)
 2146                                         fdp->fd_lastfile = i;
 2147                                 if (want <= fdp->fd_freefile)
 2148                                         fdp->fd_freefile = i;
 2149                                 *result = i;
 2150                                 return (0);
 2151                         }
 2152                 }
 2153 
 2154                 /*
 2155                  * No space in current array.  Expand?
 2156                  */
 2157                 if (fdp->fd_nfiles >= lim)
 2158                         return (EMFILE);
 2159                 if (fdp->fd_nfiles < NDEXTENT)
 2160                         numfiles = NDEXTENT;
 2161                 else
 2162                         numfiles = 2 * fdp->fd_nfiles;
 2163                 /* Enforce lim */
 2164                 if (numfiles > lim)
 2165                         numfiles = lim;
 2166                 proc_fdunlock(p);
 2167                 MALLOC_ZONE(newofiles, struct fileproc **,
 2168                                 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
 2169                 proc_fdlock(p);
 2170                 if (newofiles == NULL) {
 2171                         return (ENOMEM);
 2172                 }
 2173                 if (fdp->fd_nfiles >= numfiles) {
 2174                         FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
 2175                         continue;
 2176                 }
 2177                 newofileflags = (char *) &newofiles[numfiles];
 2178                 /*
 2179                  * Copy the existing ofile and ofileflags arrays
 2180                  * and zero the new portion of each array.
 2181                  */
 2182                 oldnfiles = fdp->fd_nfiles;
 2183                 (void) memcpy(newofiles, fdp->fd_ofiles,
 2184                                 oldnfiles * sizeof(*fdp->fd_ofiles));
 2185                 (void) memset(&newofiles[oldnfiles], 0,
 2186                                 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
 2187 
 2188                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
 2189                                 oldnfiles * sizeof(*fdp->fd_ofileflags));
 2190                 (void) memset(&newofileflags[oldnfiles], 0,
 2191                                 (numfiles - oldnfiles) *
 2192                                                 sizeof(*fdp->fd_ofileflags));
 2193                 ofiles = fdp->fd_ofiles;
 2194                 fdp->fd_ofiles = newofiles;
 2195                 fdp->fd_ofileflags = newofileflags;
 2196                 fdp->fd_nfiles = numfiles;
 2197                 FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
 2198                 fdexpand++;
 2199         }
 2200 }
 2201 
 2202 
 2203 /*
 2204  * fdavail
 2205  *
 2206  * Description: Check to see whether n user file descriptors are available
 2207  *              to the process p.
 2208  *
 2209  * Parameters:  p                               Process to check in
 2210  *              n                               The number of fd's desired
 2211  *
 2212  * Returns:     0                               No
 2213  *              1                               Yes
 2214  *
 2215  * Locks:       Assumes proc_fdlock for process is held by the caller
 2216  *
 2217  * Notes:       The answer only remains valid so long as the proc_fdlock is
 2218  *              held by the caller.
 2219  */
 2220 int
 2221 fdavail(proc_t p, int n)
 2222 {
 2223         struct filedesc *fdp = p->p_fd;
 2224         struct fileproc **fpp;
 2225         char *flags;
 2226         int i, lim;
 2227 
 2228         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 2229         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
 2230                 return (1);
 2231         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
 2232         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
 2233         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++)
 2234                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0)
 2235                         return (1);
 2236         return (0);
 2237 }
 2238 
 2239 
 2240 /*
 2241  * fdrelse
 2242  *
 2243  * Description: Legacy KPI wrapper function for _fdrelse
 2244  *
 2245  * Parameters:  p                               Process in which fd lives
 2246  *              fd                              fd to free
 2247  *
 2248  * Returns:     void
 2249  *
 2250  * Locks:       Assumes proc_fdlock for process is held by the caller
 2251  */
 2252 void
 2253 fdrelse(proc_t p, int fd)
 2254 {
 2255         _fdrelse(p, fd);
 2256 }
 2257 
 2258 
 2259 /*
 2260  * fdgetf_noref
 2261  *
 2262  * Description: Get the fileproc pointer for the given fd from the per process
 2263  *              open file table without taking an explicit reference on it.
 2264  *
 2265  * Parameters:  p                               Process containing fd
 2266  *              fd                              fd to obtain fileproc for
 2267  *              resultfp                        Pointer to pointer return area
 2268  *
 2269  * Returns:     0                               Success
 2270  *              EBADF
 2271  *
 2272  * Implicit returns:
 2273  *              *resultfp (modified)            Pointer to fileproc pointer
 2274  *
 2275  * Locks:       Assumes proc_fdlock for process is held by the caller
 2276  *
 2277  * Notes:       Because there is no reference explicitly taken, the returned
 2278  *              fileproc pointer is only valid so long as the proc_fdlock
 2279  *              remains held by the caller.
 2280  */
 2281 int
 2282 fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
 2283 {
 2284         struct filedesc *fdp = p->p_fd;
 2285         struct fileproc *fp;
 2286 
 2287         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2288                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2289                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2290                 return (EBADF);
 2291         }
 2292         if (resultfp)
 2293                 *resultfp = fp;
 2294         return (0);
 2295 }
 2296 
 2297 
 2298 /*
 2299  * fp_getfvp
 2300  *
 2301  * Description: Get fileproc and vnode pointer for a given fd from the per
 2302  *              process open file table of the specified process, and if
 2303  *              successful, increment the f_iocount
 2304  *
 2305  * Parameters:  p                               Process in which fd lives
 2306  *              fd                              fd to get information for
 2307  *              resultfp                        Pointer to result fileproc
 2308  *                                              pointer area, or 0 if none
 2309  *              resultvp                        Pointer to result vnode pointer
 2310  *                                              area, or 0 if none
 2311  *
 2312  * Returns:     0                               Success
 2313  *              EBADF                           Bad file descriptor
 2314  *              ENOTSUP                         fd does not refer to a vnode
 2315  *
 2316  * Implicit returns:
 2317  *              *resultfp (modified)            Fileproc pointer
 2318  *              *resultvp (modified)            vnode pointer
 2319  *
 2320  * Notes:       The resultfp and resultvp fields are optional, and may be
 2321  *              independently specified as NULL to skip returning information
 2322  *
 2323  * Locks:       Internally takes and releases proc_fdlock
 2324  */
 2325 int
 2326 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
 2327 {
 2328         struct filedesc *fdp = p->p_fd;
 2329         struct fileproc *fp;
 2330 
 2331         proc_fdlock_spin(p);
 2332         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2333                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2334                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2335                 proc_fdunlock(p);
 2336                 return (EBADF);
 2337         }
 2338         if (fp->f_type != DTYPE_VNODE) {
 2339                 proc_fdunlock(p);
 2340                 return(ENOTSUP);
 2341         }
 2342         fp->f_iocount++;
 2343 
 2344         if (resultfp)
 2345                 *resultfp = fp;
 2346         if (resultvp)
 2347                 *resultvp = (struct vnode *)fp->f_data;
 2348         proc_fdunlock(p);
 2349 
 2350         return (0);
 2351 }
 2352 
 2353 
 2354 /*
 2355  * fp_getfvpandvid
 2356  *
 2357  * Description: Get fileproc, vnode pointer, and vid for a given fd from the
 2358  *              per process open file table of the specified process, and if
 2359  *              successful, increment the f_iocount
 2360  *
 2361  * Parameters:  p                               Process in which fd lives
 2362  *              fd                              fd to get information for
 2363  *              resultfp                        Pointer to result fileproc
 2364  *                                              pointer area, or 0 if none
 2365  *              resultvp                        Pointer to result vnode pointer
 2366  *                                              area, or 0 if none
 2367  *              vidp                            Pointer to resuld vid area
 2368  *
 2369  * Returns:     0                               Success
 2370  *              EBADF                           Bad file descriptor
 2371  *              ENOTSUP                         fd does not refer to a vnode
 2372  *
 2373  * Implicit returns:
 2374  *              *resultfp (modified)            Fileproc pointer
 2375  *              *resultvp (modified)            vnode pointer
 2376  *              *vidp                           vid value
 2377  *
 2378  * Notes:       The resultfp and resultvp fields are optional, and may be
 2379  *              independently specified as NULL to skip returning information
 2380  *
 2381  * Locks:       Internally takes and releases proc_fdlock
 2382  */
 2383 int
 2384 fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
 2385                 struct vnode **resultvp, uint32_t *vidp)
 2386 {
 2387         struct filedesc *fdp = p->p_fd;
 2388         struct fileproc *fp;
 2389 
 2390         proc_fdlock_spin(p);
 2391         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2392                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2393                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2394                 proc_fdunlock(p);
 2395                 return (EBADF);
 2396         }
 2397         if (fp->f_type != DTYPE_VNODE) {
 2398                 proc_fdunlock(p);
 2399                 return(ENOTSUP);
 2400         }
 2401         fp->f_iocount++;
 2402 
 2403         if (resultfp)
 2404                 *resultfp = fp;
 2405         if (resultvp)
 2406                 *resultvp = (struct vnode *)fp->f_data;
 2407         if (vidp)
 2408                 *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
 2409         proc_fdunlock(p);
 2410 
 2411         return (0);
 2412 }
 2413 
 2414 
 2415 /*
 2416  * fp_getfsock
 2417  *
 2418  * Description: Get fileproc and socket pointer for a given fd from the
 2419  *              per process open file table of the specified process, and if
 2420  *              successful, increment the f_iocount
 2421  *
 2422  * Parameters:  p                               Process in which fd lives
 2423  *              fd                              fd to get information for
 2424  *              resultfp                        Pointer to result fileproc
 2425  *                                              pointer area, or 0 if none
 2426  *              results                         Pointer to result socket
 2427  *                                              pointer area, or 0 if none
 2428  *
 2429  * Returns:     EBADF                   The file descriptor is invalid
 2430  *              EOPNOTSUPP              The file descriptor is not a socket
 2431  *              0                       Success
 2432  *
 2433  * Implicit returns:
 2434  *              *resultfp (modified)            Fileproc pointer
 2435  *              *results (modified)             socket pointer
 2436  *
 2437  * Notes:       EOPNOTSUPP should probably be ENOTSOCK; this function is only
 2438  *              ever called from accept1().
 2439  */
 2440 int
 2441 fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
 2442             struct socket **results)
 2443 {
 2444         struct filedesc *fdp = p->p_fd;
 2445         struct fileproc *fp;
 2446 
 2447         proc_fdlock_spin(p);
 2448         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2449                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2450                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2451                 proc_fdunlock(p);
 2452                 return (EBADF);
 2453         }
 2454         if (fp->f_type != DTYPE_SOCKET) {
 2455                 proc_fdunlock(p);
 2456                 return(EOPNOTSUPP);
 2457         }
 2458         fp->f_iocount++;
 2459 
 2460         if (resultfp)
 2461                 *resultfp = fp;
 2462         if (results)
 2463                 *results = (struct socket *)fp->f_data;
 2464         proc_fdunlock(p);
 2465 
 2466         return (0);
 2467 }
 2468 
 2469 
 2470 /*
 2471  * fp_getfkq
 2472  *
 2473  * Description: Get fileproc and kqueue pointer for a given fd from the
 2474  *              per process open file table of the specified process, and if
 2475  *              successful, increment the f_iocount
 2476  *
 2477  * Parameters:  p                               Process in which fd lives
 2478  *              fd                              fd to get information for
 2479  *              resultfp                        Pointer to result fileproc
 2480  *                                              pointer area, or 0 if none
 2481  *              resultkq                        Pointer to result kqueue
 2482  *                                              pointer area, or 0 if none
 2483  *
 2484  * Returns:     EBADF                   The file descriptor is invalid
 2485  *              EBADF                   The file descriptor is not a socket
 2486  *              0                       Success
 2487  *
 2488  * Implicit returns:
 2489  *              *resultfp (modified)            Fileproc pointer
 2490  *              *resultkq (modified)            kqueue pointer
 2491  *
 2492  * Notes:       The second EBADF should probably be something else to make
 2493  *              the error condition distinct.
 2494  */
 2495 int
 2496 fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
 2497           struct kqueue **resultkq)
 2498 {
 2499         struct filedesc *fdp = p->p_fd;
 2500         struct fileproc *fp;
 2501 
 2502         proc_fdlock_spin(p);
 2503         if ( fd < 0 || fd >= fdp->fd_nfiles ||
 2504                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2505                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2506                 proc_fdunlock(p);
 2507                 return (EBADF);
 2508         }
 2509         if (fp->f_type != DTYPE_KQUEUE) {
 2510                 proc_fdunlock(p);
 2511                 return(EBADF);
 2512         }
 2513         fp->f_iocount++;
 2514 
 2515         if (resultfp)
 2516                 *resultfp = fp;
 2517         if (resultkq)
 2518                 *resultkq = (struct kqueue *)fp->f_data;
 2519         proc_fdunlock(p);
 2520 
 2521         return (0);
 2522 }
 2523 
 2524 
 2525 /*
 2526  * fp_getfpshm
 2527  *
 2528  * Description: Get fileproc and POSIX shared memory pointer for a given fd
 2529  *              from the per process open file table of the specified process
 2530  *              and if successful, increment the f_iocount
 2531  *
 2532  * Parameters:  p                               Process in which fd lives
 2533  *              fd                              fd to get information for
 2534  *              resultfp                        Pointer to result fileproc
 2535  *                                              pointer area, or 0 if none
 2536  *              resultpshm                      Pointer to result POSIX
 2537  *                                              shared memory pointer
 2538  *                                              pointer area, or 0 if none
 2539  *
 2540  * Returns:     EBADF                   The file descriptor is invalid
 2541  *              EBADF                   The file descriptor is not a POSIX
 2542  *                                      shared memory area
 2543  *              0                       Success
 2544  *
 2545  * Implicit returns:
 2546  *              *resultfp (modified)            Fileproc pointer
 2547  *              *resultpshm (modified)          POSIX shared memory pointer
 2548  *
 2549  * Notes:       The second EBADF should probably be something else to make
 2550  *              the error condition distinct.
 2551  */
 2552 int
 2553 fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
 2554             struct pshmnode **resultpshm)
 2555 {
 2556         struct filedesc *fdp = p->p_fd;
 2557         struct fileproc *fp;
 2558 
 2559         proc_fdlock_spin(p);
 2560         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2561                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2562                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2563                 proc_fdunlock(p);
 2564                 return (EBADF);
 2565         }
 2566         if (fp->f_type != DTYPE_PSXSHM) {
 2567 
 2568                 proc_fdunlock(p);
 2569                 return(EBADF);
 2570         }
 2571         fp->f_iocount++;
 2572 
 2573         if (resultfp)
 2574                 *resultfp = fp;
 2575         if (resultpshm)
 2576                 *resultpshm = (struct pshmnode *)fp->f_data;
 2577         proc_fdunlock(p);
 2578 
 2579         return (0);
 2580 }
 2581 
 2582 
 2583 /*
 2584  * fp_getfsem
 2585  *
 2586  * Description: Get fileproc and POSIX semaphore pointer for a given fd from
 2587  *              the per process open file table of the specified process
 2588  *              and if successful, increment the f_iocount
 2589  *
 2590  * Parameters:  p                               Process in which fd lives
 2591  *              fd                              fd to get information for
 2592  *              resultfp                        Pointer to result fileproc
 2593  *                                              pointer area, or 0 if none
 2594  *              resultpsem                      Pointer to result POSIX
 2595  *                                              semaphore pointer area, or
 2596  *                                              0 if none
 2597  *
 2598  * Returns:     EBADF                   The file descriptor is invalid
 2599  *              EBADF                   The file descriptor is not a POSIX
 2600  *                                      semaphore
 2601  *              0                       Success
 2602  *
 2603  * Implicit returns:
 2604  *              *resultfp (modified)            Fileproc pointer
 2605  *              *resultpsem (modified)          POSIX semaphore pointer
 2606  *
 2607  * Notes:       The second EBADF should probably be something else to make
 2608  *              the error condition distinct.
 2609  *
 2610  *              In order to support unnamed POSIX semaphores, the named
 2611  *              POSIX semaphores will have to move out of the per-process
 2612  *              open filetable, and into a global table that is shared with
 2613  *              unnamed POSIX semaphores, since unnamed POSIX semaphores
 2614  *              are typically used by declaring instances in shared memory,
 2615  *              and there's no other way to do this without changing the
 2616  *              underlying type, which would introduce binary compatibility
 2617  *              issues.
 2618  */
 2619 int
 2620 fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
 2621             struct psemnode **resultpsem)
 2622 {
 2623         struct filedesc *fdp = p->p_fd;
 2624         struct fileproc *fp;
 2625 
 2626         proc_fdlock_spin(p);
 2627         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2628                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2629                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2630                 proc_fdunlock(p);
 2631                 return (EBADF);
 2632         }
 2633         if (fp->f_type != DTYPE_PSXSEM) {
 2634                 proc_fdunlock(p);
 2635                 return(EBADF);
 2636         }
 2637         fp->f_iocount++;
 2638 
 2639         if (resultfp)
 2640                 *resultfp = fp;
 2641         if (resultpsem)
 2642                 *resultpsem = (struct psemnode *)fp->f_data;
 2643         proc_fdunlock(p);
 2644 
 2645         return (0);
 2646 }
 2647 
 2648 
 2649 /*
 2650  * fp_getfpipe
 2651  *
 2652  * Description: Get fileproc and pipe pointer for a given fd from the
 2653  *              per process open file table of the specified process
 2654  *              and if successful, increment the f_iocount
 2655  *
 2656  * Parameters:  p                               Process in which fd lives
 2657  *              fd                              fd to get information for
 2658  *              resultfp                        Pointer to result fileproc
 2659  *                                              pointer area, or 0 if none
 2660  *              resultpipe                      Pointer to result pipe
 2661  *                                              pointer area, or 0 if none
 2662  *
 2663  * Returns:     EBADF                   The file descriptor is invalid
 2664  *              EBADF                   The file descriptor is not a socket
 2665  *              0                       Success
 2666  *
 2667  * Implicit returns:
 2668  *              *resultfp (modified)            Fileproc pointer
 2669  *              *resultpipe (modified)          pipe pointer
 2670  *
 2671  * Notes:       The second EBADF should probably be something else to make
 2672  *              the error condition distinct.
 2673  */
 2674 int
 2675 fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
 2676             struct pipe **resultpipe)
 2677 {
 2678         struct filedesc *fdp = p->p_fd;
 2679         struct fileproc *fp;
 2680 
 2681         proc_fdlock_spin(p);
 2682         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2683                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2684                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2685                 proc_fdunlock(p);
 2686                 return (EBADF);
 2687         }
 2688         if (fp->f_type != DTYPE_PIPE) {
 2689                 proc_fdunlock(p);
 2690                 return(EBADF);
 2691         }
 2692         fp->f_iocount++;
 2693 
 2694         if (resultfp)
 2695                 *resultfp = fp;
 2696         if (resultpipe)
 2697                 *resultpipe = (struct pipe *)fp->f_data;
 2698         proc_fdunlock(p);
 2699 
 2700         return (0);
 2701 }
 2702 
 2703 
 2704 #define DTYPE_ATALK -1          /* XXX This does not belong here */
 2705 
 2706 
 2707 /*
 2708  * fp_getfatalk
 2709  *
 2710  * Description: Get fileproc and atalk pointer for a given fd from the
 2711  *              per process open file table of the specified process
 2712  *              and if successful, increment the f_iocount
 2713  *
 2714  * Parameters:  p                               Process in which fd lives
 2715  *              fd                              fd to get information for
 2716  *              resultfp                        Pointer to result fileproc
 2717  *                                              pointer area, or 0 if none
 2718  *              resultatalk                     Pointer to result atalk
 2719  *                                              pointer area, or 0 if none
 2720  * Returns:     EBADF                   The file descriptor is invalid
 2721  *              EBADF                   The file descriptor is not a socket
 2722  *              0                       Success
 2723  *
 2724  * Implicit returns:
 2725  *              *resultfp (modified)            Fileproc pointer
 2726  *              *resultatalk (modified)         atalk pointer
 2727  *
 2728  * Notes:       The second EBADF should probably be something else to make
 2729  *              the error condition distinct.
 2730  *
 2731  *              XXX This code is specific to AppleTalk protocol support, and
 2732  *              XXX should be conditionally compiled
 2733  */
 2734 int
 2735 fp_getfatalk(proc_t p, int fd, struct fileproc **resultfp,
 2736              struct atalk **resultatalk)
 2737 {
 2738         struct filedesc *fdp = p->p_fd;
 2739         struct fileproc *fp;
 2740 
 2741         proc_fdlock_spin(p);
 2742         if (fd < 0 || fd >= fdp->fd_nfiles ||
 2743                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2744                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2745                 proc_fdunlock(p);
 2746                 return (EBADF);
 2747         }
 2748         if (fp->f_type != (DTYPE_ATALK+1)) {
 2749                 proc_fdunlock(p);
 2750                 return(EBADF);
 2751         }
 2752         fp->f_iocount++;
 2753 
 2754         if (resultfp)
 2755                 *resultfp = fp;
 2756         if (resultatalk)
 2757                 *resultatalk = (struct atalk *)fp->f_data;
 2758         proc_fdunlock(p);
 2759 
 2760         return (0);
 2761 }
 2762 
 2763 
 2764 /*
 2765  * fp_lookup
 2766  *
 2767  * Description: Get fileproc pointer for a given fd from the per process
 2768  *              open file table of the specified process and if successful,
 2769  *              increment the f_iocount
 2770  *
 2771  * Parameters:  p                               Process in which fd lives
 2772  *              fd                              fd to get information for
 2773  *              resultfp                        Pointer to result fileproc
 2774  *                                              pointer area, or 0 if none
 2775  *              locked                          !0 if the caller holds the
 2776  *                                              proc_fdlock, 0 otherwise
 2777  *
 2778  * Returns:     0                       Success
 2779  *              EBADF                   Bad file descriptor
 2780  *
 2781  * Implicit returns:
 2782  *              *resultfp (modified)            Fileproc pointer
 2783  *
 2784  * Locks:       If the argument 'locked' is non-zero, then the caller is
 2785  *              expected to have taken and held the proc_fdlock; if it is
 2786  *              zero, than this routine internally takes and drops this lock.
 2787  */
 2788 int
 2789 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
 2790 {
 2791         struct filedesc *fdp = p->p_fd;
 2792         struct fileproc *fp;
 2793 
 2794         if (!locked)
 2795                 proc_fdlock_spin(p);
 2796         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
 2797                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2798                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 2799                 if (!locked)
 2800                         proc_fdunlock(p);
 2801                 return (EBADF);
 2802         }
 2803         fp->f_iocount++;
 2804 
 2805         if (resultfp)
 2806                 *resultfp = fp;
 2807         if (!locked)
 2808                 proc_fdunlock(p);
 2809                 
 2810         return (0);
 2811 }
 2812 
 2813 
 2814 /*
 2815  * fp_drop_written
 2816  *
 2817  * Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
 2818  *              reference previously taken by calling fp_lookup et. al.
 2819  *
 2820  * Parameters:  p                               Process in which the fd lives
 2821  *              fd                              fd associated with the fileproc
 2822  *              fp                              fileproc on which to set the
 2823  *                                              flag and drop the reference
 2824  *
 2825  * Returns:     0                               Success
 2826  *      fp_drop:EBADF                           Bad file descriptor
 2827  *
 2828  * Locks:       This function internally takes and drops the proc_fdlock for
 2829  *              the supplied process
 2830  *
 2831  * Notes:       The fileproc must correspond to the fd in the supplied proc
 2832  */
 2833 int
 2834 fp_drop_written(proc_t p, int fd, struct fileproc *fp)
 2835 {
 2836         int error;
 2837 
 2838         proc_fdlock_spin(p);
 2839 
 2840         fp->f_flags |= FP_WRITTEN;
 2841         
 2842         error = fp_drop(p, fd, fp, 1);
 2843 
 2844         proc_fdunlock(p);
 2845                 
 2846         return (error);
 2847 }
 2848 
 2849 
 2850 /*
 2851  * fp_drop_event
 2852  *
 2853  * Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
 2854  *              reference previously taken by calling fp_lookup et. al.
 2855  *
 2856  * Parameters:  p                               Process in which the fd lives
 2857  *              fd                              fd associated with the fileproc
 2858  *              fp                              fileproc on which to set the
 2859  *                                              flag and drop the reference
 2860  *
 2861  * Returns:     0                               Success
 2862  *      fp_drop:EBADF                           Bad file descriptor
 2863  *
 2864  * Locks:       This function internally takes and drops the proc_fdlock for
 2865  *              the supplied process
 2866  *
 2867  * Notes:       The fileproc must correspond to the fd in the supplied proc
 2868  */
 2869 int
 2870 fp_drop_event(proc_t p, int fd, struct fileproc *fp)
 2871 {
 2872         int error;
 2873 
 2874         proc_fdlock_spin(p);
 2875 
 2876         fp->f_flags |= FP_WAITEVENT;
 2877         
 2878         error = fp_drop(p, fd, fp, 1);
 2879 
 2880         proc_fdunlock(p);
 2881                 
 2882         return (error);
 2883 }
 2884 
 2885 
 2886 /*
 2887  * fp_drop
 2888  *
 2889  * Description: Drop the I/O reference previously taken by calling fp_lookup
 2890  *              et. al.
 2891  *
 2892  * Parameters:  p                               Process in which the fd lives
 2893  *              fd                              fd associated with the fileproc
 2894  *              fp                              fileproc on which to set the
 2895  *                                              flag and drop the reference
 2896  *              locked                          flag to internally take and
 2897  *                                              drop proc_fdlock if it is not
 2898  *                                              already held by the caller
 2899  *
 2900  * Returns:     0                               Success
 2901  *              EBADF                           Bad file descriptor
 2902  *
 2903  * Locks:       This function internally takes and drops the proc_fdlock for
 2904  *              the supplied process if 'locked' is non-zero, and assumes that
 2905  *              the caller already holds this lock if 'locked' is non-zero.
 2906  *
 2907  * Notes:       The fileproc must correspond to the fd in the supplied proc
 2908  */
 2909 int
 2910 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
 2911 {
 2912         struct filedesc *fdp = p->p_fd;
 2913         int     needwakeup = 0;
 2914 
 2915         if (!locked)
 2916                 proc_fdlock_spin(p);
 2917          if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
 2918                         (fp = fdp->fd_ofiles[fd]) == NULL ||
 2919                         ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
 2920                          !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
 2921                 if (!locked)
 2922                         proc_fdunlock(p);
 2923                 return (EBADF);
 2924         }
 2925         fp->f_iocount--;
 2926 
 2927         if (p->p_fpdrainwait && fp->f_iocount == 0) {
 2928                 p->p_fpdrainwait = 0;
 2929                 needwakeup = 1;
 2930         }
 2931         if (!locked)
 2932                 proc_fdunlock(p);
 2933         if (needwakeup)
 2934                 wakeup(&p->p_fpdrainwait);
 2935                 
 2936         return (0);
 2937 }
 2938 
 2939 
 2940 /*
 2941  * file_vnode
 2942  *
 2943  * Description: Given an fd, look it up in the current process's per process
 2944  *              open file table, and return its internal vnode pointer.
 2945  *
 2946  * Parameters:  fd                              fd to obtain vnode from
 2947  *              vpp                             pointer to vnode return area
 2948  *
 2949  * Returns:     0                               Success
 2950  *              EINVAL                          The fd does not refer to a
 2951  *                                              vnode fileproc entry
 2952  *      fp_lookup:EBADF                         Bad file descriptor
 2953  *
 2954  * Implicit returns:
 2955  *              *vpp (modified)                 Returned vnode pointer
 2956  *
 2957  * Locks:       This function internally takes and drops the proc_fdlock for
 2958  *              the current process
 2959  *
 2960  * Notes:       If successful, this function increments the f_iocount on the
 2961  *              fd's corresponding fileproc.
 2962  *
 2963  *              The fileproc referenced is not returned; because of this, care
 2964  *              must be taken to not drop the last reference (e.g. by closing
 2965  *              the file).  This is inhernely unsafe, since the reference may
 2966  *              not be recoverable from the vnode, if there is a subsequent
 2967  *              close that destroys the associate fileproc.  The caller should
 2968  *              therefore retain their own reference on the fileproc so that
 2969  *              the f_iocount can be dropped subsequently.  Failure to do this
 2970  *              can result in the returned pointer immediately becoming invalid
 2971  *              following the call.
 2972  *
 2973  *              Use of this function is discouraged.
 2974  */
 2975 int
 2976 file_vnode(int fd, struct vnode **vpp)
 2977 {
 2978         proc_t p = current_proc();
 2979         struct fileproc *fp;
 2980         int error;
 2981         
 2982         proc_fdlock_spin(p);
 2983         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
 2984                 proc_fdunlock(p);
 2985                 return(error);
 2986         }
 2987         if (fp->f_type != DTYPE_VNODE) {
 2988                 fp_drop(p, fd, fp,1);
 2989                 proc_fdunlock(p);
 2990                 return(EINVAL);
 2991         }
 2992         *vpp = (struct vnode *)fp->f_data;
 2993         proc_fdunlock(p);
 2994 
 2995         return(0);
 2996 }
 2997 
 2998 
 2999 /*
 3000  * file_socket
 3001  *
 3002  * Description: Given an fd, look it up in the current process's per process
 3003  *              open file table, and return its internal socket pointer.
 3004  *
 3005  * Parameters:  fd                              fd to obtain vnode from
 3006  *              sp                              pointer to socket return area
 3007  *
 3008  * Returns:     0                               Success
 3009  *              ENOTSOCK                        Not a socket
 3010  *              fp_lookup:EBADF                 Bad file descriptor
 3011  *
 3012  * Implicit returns:
 3013  *              *sp (modified)                  Returned socket pointer
 3014  *
 3015  * Locks:       This function internally takes and drops the proc_fdlock for
 3016  *              the current process
 3017  *
 3018  * Notes:       If successful, this function increments the f_iocount on the
 3019  *              fd's corresponding fileproc.
 3020  *
 3021  *              The fileproc referenced is not returned; because of this, care
 3022  *              must be taken to not drop the last reference (e.g. by closing
 3023  *              the file).  This is inhernely unsafe, since the reference may
 3024  *              not be recoverable from the socket, if there is a subsequent
 3025  *              close that destroys the associate fileproc.  The caller should
 3026  *              therefore retain their own reference on the fileproc so that
 3027  *              the f_iocount can be dropped subsequently.  Failure to do this
 3028  *              can result in the returned pointer immediately becoming invalid
 3029  *              following the call.
 3030  *
 3031  *              Use of this function is discouraged.
 3032  */
 3033 int
 3034 file_socket(int fd, struct socket **sp)
 3035 {
 3036         proc_t p = current_proc();
 3037         struct fileproc *fp;
 3038         int error;
 3039         
 3040         proc_fdlock_spin(p);
 3041         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
 3042                 proc_fdunlock(p);
 3043                 return(error);
 3044         }
 3045         if (fp->f_type != DTYPE_SOCKET) {
 3046                 fp_drop(p, fd, fp,1);
 3047                 proc_fdunlock(p);
 3048                 return(ENOTSOCK);
 3049         }
 3050         *sp = (struct socket *)fp->f_data;
 3051         proc_fdunlock(p);
 3052 
 3053         return(0);
 3054 }
 3055 
 3056 
 3057 /*
 3058  * file_flags
 3059  *
 3060  * Description: Given an fd, look it up in the current process's per process
 3061  *              open file table, and return its fileproc's flags field.
 3062  *
 3063  * Parameters:  fd                              fd whose flags are to be
 3064  *                                              retrieved
 3065  *              flags                           pointer to flags data area
 3066  *
 3067  * Returns:     0                               Success
 3068  *              ENOTSOCK                        Not a socket
 3069  *              fp_lookup:EBADF                 Bad file descriptor
 3070  *
 3071  * Implicit returns:
 3072  *              *flags (modified)               Returned flags field
 3073  *
 3074  * Locks:       This function internally takes and drops the proc_fdlock for
 3075  *              the current process
 3076  *
 3077  * Notes:       This function will internally increment and decrement the
 3078  *              f_iocount of the fileproc as part of its operation.
 3079  */
 3080 int
 3081 file_flags(int fd, int *flags)
 3082 {
 3083 
 3084         proc_t p = current_proc();
 3085         struct fileproc *fp;
 3086         int error;
 3087         
 3088         proc_fdlock_spin(p);
 3089         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
 3090                 proc_fdunlock(p);
 3091                 return(error);
 3092         }
 3093         *flags = (int)fp->f_flag;
 3094         fp_drop(p, fd, fp,1);
 3095         proc_fdunlock(p);
 3096 
 3097         return(0);
 3098 }
 3099 
 3100 
 3101 /*
 3102  * file_drop
 3103  *
 3104  * Description: Drop an iocount reference on an fd, and wake up any waiters
 3105  *              for draining (i.e. blocked in fileproc_drain() called during
 3106  *              the last attempt to close a file).
 3107  *
 3108  * Parameters:  fd                              fd on which an ioreference is
 3109  *                                              to be dropped
 3110  *
 3111  * Returns:     0                               Success
 3112  *              EBADF                           Bad file descriptor
 3113  *
 3114  * Description: Given an fd, look it up in the current process's per process
 3115  *              open file table, and drop it's fileproc's f_iocount by one
 3116  *
 3117  * Notes:       This is intended as a corresponding operation to the functions
 3118  *              file_vnode() and file_socket() operations.
 3119  *
 3120  *              Technically, the close reference is supposed to be protected
 3121  *              by a fileproc_drain(), however, a drain will only block if
 3122  *              the fd refers to a character device, and that device has had
 3123  *              preparefileread() called on it.  If it refers to something
 3124  *              other than a character device, then the drain will occur and
 3125  *              block each close attempt, rather than merely the last close.
 3126  *
 3127  *              Since it's possible for an fd that refers to a character
 3128  *              device to have an intermediate close followed by an open to
 3129  *              cause a different file to correspond to that descriptor,
 3130  *              unless there was a cautionary reference taken on the fileproc,
 3131  *              this is an inherently unsafe function.  This happens in the
 3132  *              case where multiple fd's in a process refer to the same
 3133  *              character device (e.g. stdin/out/err pointing to a tty, etc.).
 3134  *
 3135  *              Use of this function is discouraged.
 3136  */
 3137 int 
 3138 file_drop(int fd)
 3139 {
 3140         struct fileproc *fp;
 3141         proc_t p = current_proc();
 3142         int     needwakeup = 0;
 3143 
 3144         proc_fdlock_spin(p);
 3145         if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
 3146                         (fp = p->p_fd->fd_ofiles[fd]) == NULL ||
 3147                         ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
 3148                          !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
 3149                 proc_fdunlock(p);
 3150                 return (EBADF);
 3151         }
 3152         fp->f_iocount --;
 3153 
 3154         if (p->p_fpdrainwait && fp->f_iocount == 0) {
 3155                 p->p_fpdrainwait = 0;
 3156                 needwakeup = 1;
 3157         }
 3158         proc_fdunlock(p);
 3159 
 3160         if (needwakeup)
 3161                 wakeup(&p->p_fpdrainwait);
 3162         return(0);
 3163 }
 3164 
 3165 
 3166 /*
 3167  * falloc
 3168  *
 3169  * Description: Allocate an entry in the per process open file table and
 3170  *              return the corresponding fileproc and fd.
 3171  *
 3172  * Parameters:  p                               The process in whose open file
 3173  *                                              table the fd is to be allocated
 3174  *              resultfp                        Pointer to fileproc pointer
 3175  *                                              return area
 3176  *              resultfd                        Pointer to fd return area
 3177  *              ctx                             VFS context
 3178  *
 3179  * Returns:     0                               Success
 3180  *      falloc:ENFILE                           Too many open files in system
 3181  *      falloc:EMFILE                           Too many open files in process
 3182  *      falloc:ENOMEM                           M_FILEPROC or M_FILEGLOB zone
 3183  *                                              exhausted
 3184  *
 3185  * Implicit returns:
 3186  *              *resultfd (modified)            Returned fileproc pointer
 3187  *              *resultfd (modified)            Returned fd
 3188  *
 3189  * Locks:       This function takes and drops the proc_fdlock; if this lock
 3190  *              is alread held, use falloc_locked() instead.
 3191  *
 3192  * Notes:       This function takes separate process and context arguments
 3193  *              solely to support kern_exec.c; otherwise, it would take
 3194  *              neither, and expect falloc_locked() to use the
 3195  *              vfs_context_current() routine internally.
 3196  */
 3197 int
 3198 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
 3199 {
 3200         int error;
 3201 
 3202         proc_fdlock(p);
 3203         error = falloc_locked(p, resultfp, resultfd, ctx, 1);
 3204         proc_fdunlock(p);
 3205 
 3206         return(error);
 3207 }
 3208 
 3209 
 3210 /*
 3211  * falloc_locked
 3212  *
 3213  * Create a new open file structure and allocate
 3214  * a file decriptor for the process that refers to it.
 3215  *
 3216  * Returns:     0                       Success
 3217  *
 3218  * Description: Allocate an entry in the per process open file table and
 3219  *              return the corresponding fileproc and fd.
 3220  *
 3221  * Parameters:  p                               The process in whose open file
 3222  *                                              table the fd is to be allocated
 3223  *              resultfp                        Pointer to fileproc pointer
 3224  *                                              return area
 3225  *              resultfd                        Pointer to fd return area
 3226  *              ctx                             VFS context
 3227  *              locked                          Flag to indicate whether the
 3228  *                                              caller holds proc_fdlock
 3229  *
 3230  * Returns:     0                               Success
 3231  *              ENFILE                          Too many open files in system
 3232  *              fdalloc:EMFILE                  Too many open files in process
 3233  *              ENOMEM                          M_FILEPROC or M_FILEGLOB zone
 3234  *                                              exhausted
 3235  *      fdalloc:ENOMEM
 3236  *
 3237  * Implicit returns:
 3238  *              *resultfd (modified)            Returned fileproc pointer
 3239  *              *resultfd (modified)            Returned fd
 3240  *
 3241  * Locks:       If the parameter 'locked' is zero, this function takes and
 3242  *              drops the proc_fdlock; if non-zero, the caller must hold the
 3243  *              lock.
 3244  *
 3245  * Notes:       If you intend to use a non-zero 'locked' parameter, use the
 3246  *              utility function falloc() instead.
 3247  *
 3248  *              This function takes separate process and context arguments
 3249  *              solely to support kern_exec.c; otherwise, it would take
 3250  *              neither, and use the vfs_context_current() routine internally.
 3251  */
 3252 int
 3253 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
 3254               vfs_context_t ctx, int locked)
 3255 {
 3256         struct fileproc *fp, *fq;
 3257         struct fileglob *fg;
 3258         int error, nfd;
 3259 
 3260         if (!locked)
 3261                 proc_fdlock(p);
 3262         if ( (error = fdalloc(p, 0, &nfd)) ) {
 3263                 if (!locked)
 3264                         proc_fdunlock(p);
 3265                 return (error);
 3266         }
 3267         if (nfiles >= maxfiles) {
 3268                 if (!locked)
 3269                         proc_fdunlock(p);
 3270                 tablefull("file");
 3271                 return (ENFILE);
 3272         }
 3273 #if CONFIG_MACF
 3274         error = mac_file_check_create(proc_ucred(p));
 3275         if (error) {
 3276                 if (!locked)
 3277                         proc_fdunlock(p);
 3278                 return (error);
 3279         }
 3280 #endif
 3281 
 3282         /*
 3283          * Allocate a new file descriptor.
 3284          * If the process has file descriptor zero open, add to the list
 3285          * of open files at that point, otherwise put it at the front of
 3286          * the list of open files.
 3287          */
 3288         proc_fdunlock(p);
 3289 
 3290         MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
 3291         if (fp == NULL) {
 3292                 if (locked)
 3293                         proc_fdlock(p);
 3294                 return (ENOMEM);
 3295         }
 3296         MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
 3297         if (fg == NULL) {
 3298                 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
 3299                 if (locked)
 3300                         proc_fdlock(p);
 3301                 return (ENOMEM);
 3302         }
 3303         bzero(fp, sizeof(struct fileproc));
 3304         bzero(fg, sizeof(struct fileglob));
 3305         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
 3306 
 3307         fp->f_iocount = 1;
 3308         fg->fg_count = 1;
 3309         fp->f_fglob = fg;
 3310 #if CONFIG_MACF
 3311         mac_file_label_init(fg);
 3312 #endif
 3313 
 3314         kauth_cred_ref(ctx->vc_ucred);
 3315 
 3316         proc_fdlock(p);
 3317 
 3318         fp->f_cred = ctx->vc_ucred;
 3319 
 3320 #if CONFIG_MACF
 3321         mac_file_label_associate(fp->f_cred, fg);
 3322 #endif
 3323 
 3324         lck_mtx_lock_spin(file_flist_lock);
 3325 
 3326         nfiles++;
 3327 
 3328         if ( (fq = p->p_fd->fd_ofiles[0]) ) {
 3329                 LIST_INSERT_AFTER(fq->f_fglob, fg, f_list);
 3330         } else {
 3331                 LIST_INSERT_HEAD(&filehead, fg, f_list);
 3332         }
 3333         lck_mtx_unlock(file_flist_lock);
 3334 
 3335         p->p_fd->fd_ofiles[nfd] = fp;
 3336 
 3337         if (!locked)
 3338                 proc_fdunlock(p);
 3339 
 3340         if (resultfp)
 3341                 *resultfp = fp;
 3342         if (resultfd)
 3343                 *resultfd = nfd;
 3344 
 3345         return (0);
 3346 }
 3347 
 3348 
 3349 /*
 3350  * fg_free
 3351  *
 3352  * Description: Free a file structure; drop the global open file count, and
 3353  *              drop the credential reference, if the fileglob has one, and
 3354  *              destroy the instance mutex before freeing
 3355  *
 3356  * Parameters:  fg                              Pointer to fileglob to be
 3357  *                                              freed
 3358  *
 3359  * Returns:     void
 3360  */
 3361 void
 3362 fg_free(struct fileglob *fg)
 3363 {
 3364         lck_mtx_lock_spin(file_flist_lock);
 3365         LIST_REMOVE(fg, f_list);
 3366         nfiles--;
 3367         lck_mtx_unlock(file_flist_lock);
 3368 
 3369         if (IS_VALID_CRED(fg->fg_cred)) {
 3370                 kauth_cred_unref(&fg->fg_cred);
 3371         }
 3372         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
 3373 
 3374 #if CONFIG_MACF
 3375         mac_file_label_destroy(fg);
 3376 #endif
 3377         FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
 3378 }
 3379 
 3380 
 3381 /*
 3382  * fdexec
 3383  *
 3384  * Description: Perform close-on-exec processing for all files in a process
 3385  *              that are either marked as close-on-exec, or which were in the
 3386  *              process of being opened at the time of the execve
 3387  *
 3388  * Parameters:  p                               Pointer to process calling
 3389  *                                              execve
 3390  *
 3391  * Returns:     void
 3392  *
 3393  * Locks:       This function internally takes and drops proc_fdlock()
 3394  *
 3395  * Notes:       This function drops and retakes the kernel funnel; this is
 3396  *              inherently unsafe, since another thread may have the
 3397  *              proc_fdlock.
 3398  *
 3399  * XXX:         We should likely reverse the lock and funnel drop/acquire
 3400  *              order to avoid the small race window; it's also possible that
 3401  *              if the program doing the exec has an outstanding listen socket
 3402  *              and a network connection is completed asyncrhonously that we
 3403  *              will end up with a "ghost" socket reference in the new process.
 3404  *
 3405  *              This needs reworking to make it safe to remove the funnel from
 3406  *              the execve and posix_spawn system calls.
 3407  */
 3408 void
 3409 fdexec(proc_t p)
 3410 {
 3411         struct filedesc *fdp = p->p_fd;
 3412         int i = fdp->fd_lastfile;
 3413         struct fileproc *fp;
 3414 
 3415         proc_fdlock(p);
 3416         while (i >= 0) {
 3417 
 3418                 fp = fdp->fd_ofiles[i];
 3419                 if (
 3420                     ((fdp->fd_ofileflags[i] & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
 3421 #if CONFIG_MACF
 3422                     || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
 3423 #endif
 3424                 ) {
 3425                         if (i < fdp->fd_knlistsize)
 3426                                 knote_fdclose(p, i);
 3427                         procfdtbl_clearfd(p, i);
 3428                         if (i == fdp->fd_lastfile && i > 0)
 3429                                 fdp->fd_lastfile--;
 3430                         if (i < fdp->fd_freefile)
 3431                                 fdp->fd_freefile = i;
 3432                         closef_locked(fp, fp->f_fglob, p);
 3433                         FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
 3434                 }
 3435                 i--;
 3436         }
 3437         proc_fdunlock(p);
 3438 }
 3439 
 3440 
 3441 /*
 3442  * fdcopy
 3443  *
 3444  * Description: Copy a filedesc structure.  This is normally used as part of
 3445  *              forkproc() when forking a new process, to copy the per process
 3446  *              open file table over to the new process.
 3447  *
 3448  * Parameters:  p                               Process whose open file table
 3449  *                                              is to be copied (parent)
 3450  *              uth_cdir                        Per thread current working
 3451  *                                              cirectory, or NULL
 3452  *
 3453  * Returns:     NULL                            Copy failed
 3454  *              !NULL                           Pointer to new struct filedesc
 3455  *
 3456  * Locks:       This function internally takes and drops proc_fdlock()
 3457  *
 3458  * Notes:       Files are copied directly, ignoring the new resource limits
 3459  *              for the process that's being copied into.  Since the descriptor
 3460  *              references are just additional references, this does not count
 3461  *              against the number of open files on the system.
 3462  *
 3463  *              The struct filedesc includes the current working directory,
 3464  *              and the current root directory, if the process is chroot'ed.
 3465  *
 3466  *              If the exec was called by a thread using a per thread current
 3467  *              working directory, we inherit the working directory from the
 3468  *              thread making the call, rather than from the process.
 3469  *
 3470  *              In the case of a failure to obtain a reference, for most cases,
 3471  *              the file entry will be silently droppped.  There's an exception
 3472  *              for the case of a chroot dir, since a failure to to obtain a
 3473  *              reference there would constitute an "escape" from the chroot
 3474  *              environment, which must not be allowed.  In that case, we will
 3475  *              deny the execve() operation, rather than allowing the escape.
 3476  */
 3477 struct filedesc *
 3478 fdcopy(proc_t p, vnode_t uth_cdir)
 3479 {
 3480         struct filedesc *newfdp, *fdp = p->p_fd;
 3481         int i;
 3482         struct fileproc *ofp, *fp;
 3483         vnode_t v_dir;
 3484 
 3485         MALLOC_ZONE(newfdp, struct filedesc *,
 3486                         sizeof(*newfdp), M_FILEDESC, M_WAITOK);
 3487         if (newfdp == NULL)
 3488                 return(NULL);
 3489 
 3490         proc_fdlock(p);
 3491 
 3492         /*
 3493          * the FD_CHROOT flag will be inherited via this copy
 3494          */
 3495         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
 3496 
 3497         /*
 3498          * If we are running with per-thread current working directories,
 3499          * inherit the new current working directory from the current thread
 3500          * instead, before we take our references.
 3501          */
 3502         if (uth_cdir != NULLVP)
 3503                 newfdp->fd_cdir = uth_cdir;
 3504 
 3505         /*
 3506          * For both fd_cdir and fd_rdir make sure we get
 3507          * a valid reference... if we can't, than set
 3508          * set the pointer(s) to NULL in the child... this
 3509          * will keep us from using a non-referenced vp
 3510          * and allows us to do the vnode_rele only on
 3511          * a properly referenced vp
 3512          */
 3513         if ( (v_dir = newfdp->fd_cdir) ) {
 3514                 if (vnode_getwithref(v_dir) == 0) {
 3515                         if ( (vnode_ref(v_dir)) )
 3516                                 newfdp->fd_cdir = NULL;
 3517                         vnode_put(v_dir);
 3518                 } else
 3519                         newfdp->fd_cdir = NULL;
 3520         }
 3521         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
 3522                 /*
 3523                  * we couldn't get a new reference on
 3524                  * the current working directory being
 3525                  * inherited... we might as well drop
 3526                  * our reference from the parent also
 3527                  * since the vnode has gone DEAD making
 3528                  * it useless... by dropping it we'll
 3529                  * be that much closer to recyling it
 3530                  */
 3531                 vnode_rele(fdp->fd_cdir);
 3532                 fdp->fd_cdir = NULL;
 3533         }
 3534 
 3535         if ( (v_dir = newfdp->fd_rdir) ) {
 3536                 if (vnode_getwithref(v_dir) == 0) {
 3537                         if ( (vnode_ref(v_dir)) )
 3538                                 newfdp->fd_rdir = NULL;
 3539                         vnode_put(v_dir);
 3540                 } else {
 3541                         newfdp->fd_rdir = NULL;
 3542                 }
 3543         }
 3544         /* Coming from a chroot environment and unable to get a reference... */
 3545         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
 3546                 /*
 3547                  * We couldn't get a new reference on
 3548                  * the chroot directory being
 3549                  * inherited... this is fatal, since
 3550                  * otherwise it would constitute an
 3551                  * escape from a chroot environment by
 3552                  * the new process.
 3553                  */
 3554                 if (newfdp->fd_cdir)
 3555                         vnode_rele(newfdp->fd_cdir);
 3556                 FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
 3557                 return(NULL);
 3558         }
 3559         newfdp->fd_refcnt = 1;
 3560 
 3561         /*
 3562          * If the number of open files fits in the internal arrays
 3563          * of the open file structure, use them, otherwise allocate
 3564          * additional memory for the number of descriptors currently
 3565          * in use.
 3566          */
 3567         if (newfdp->fd_lastfile < NDFILE)
 3568                 i = NDFILE;
 3569         else {
 3570                 /*
 3571                  * Compute the smallest multiple of NDEXTENT needed
 3572                  * for the file descriptors currently in use,
 3573                  * allowing the table to shrink.
 3574                  */
 3575                 i = newfdp->fd_nfiles;
 3576                 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
 3577                         i /= 2;
 3578         }
 3579         proc_fdunlock(p);
 3580 
 3581         MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
 3582                                 i * OFILESIZE, M_OFILETABL, M_WAITOK);
 3583         if (newfdp->fd_ofiles == NULL) {
 3584                 if (newfdp->fd_cdir)
 3585                         vnode_rele(newfdp->fd_cdir);
 3586                 if (newfdp->fd_rdir)
 3587                         vnode_rele(newfdp->fd_rdir);
 3588 
 3589                 FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
 3590                 return(NULL);
 3591         }
 3592         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
 3593         proc_fdlock(p);
 3594 
 3595         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
 3596         newfdp->fd_nfiles = i;
 3597 
 3598         if (fdp->fd_nfiles > 0) {
 3599                 struct fileproc **fpp;
 3600                 char *flags;
 3601 
 3602                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
 3603                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
 3604                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
 3605                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
 3606 
 3607                 /*
 3608                  * kq descriptors cannot be copied.
 3609                  */
 3610                 if (newfdp->fd_knlistsize != -1) {
 3611                         fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
 3612                         for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
 3613                                 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
 3614                                         *fpp = NULL;
 3615                                         if (i < newfdp->fd_freefile)
 3616                                                 newfdp->fd_freefile = i;
 3617                                 }
 3618                                 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
 3619                                         newfdp->fd_lastfile--;
 3620                         }
 3621                         newfdp->fd_knlist = NULL;
 3622                         newfdp->fd_knlistsize = -1;
 3623                         newfdp->fd_knhash = NULL;
 3624                         newfdp->fd_knhashmask = 0;
 3625                 }
 3626                 fpp = newfdp->fd_ofiles;
 3627                 flags = newfdp->fd_ofileflags;
 3628 
 3629                 for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++)
 3630                         if ((ofp = *fpp) != NULL && !(*flags & UF_RESERVED)) {
 3631                                 MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
 3632                                 if (fp == NULL) {
 3633                                         /*
 3634                                          * XXX no room to copy, unable to
 3635                                          * XXX safely unwind state at present
 3636                                          */
 3637                                         *fpp = NULL;
 3638                                 } else {
 3639                                         bzero(fp, sizeof(struct fileproc));
 3640                                         fp->f_flags = ofp->f_flags;
 3641                                         //fp->f_iocount = ofp->f_iocount;
 3642                                         fp->f_iocount = 0;
 3643                                         fp->f_fglob = ofp->f_fglob;
 3644                                         (void)fg_ref(fp);
 3645                                         *fpp = fp;
 3646                                 }
 3647                         } else {
 3648                                 if (i < newfdp->fd_freefile)
 3649                                         newfdp->fd_freefile = i;
 3650                                 *fpp = NULL;
 3651                                 *flags = 0;
 3652                         }
 3653         }
 3654 
 3655         proc_fdunlock(p);
 3656         return (newfdp);
 3657 }
 3658 
 3659 
 3660 /*
 3661  * fdfree
 3662  *
 3663  * Description: Release a filedesc (per process open file table) structure;
 3664  *              this is done on process exit(), or from forkproc_free() if
 3665  *              the fork fails for some reason subsequent to a successful
 3666  *              call to fdcopy()
 3667  *
 3668  * Parameters:  p                               Pointer to process going away
 3669  *
 3670  * Returns:     void
 3671  *
 3672  * Locks:       This function internally takes and drops proc_fdlock()
 3673  */
 3674 void
 3675 fdfree(proc_t p)
 3676 {
 3677         struct filedesc *fdp;
 3678         struct fileproc *fp;
 3679         int i;
 3680 
 3681         proc_fdlock(p);
 3682 
 3683         /* Certain daemons might not have file descriptors */
 3684         fdp = p->p_fd;
 3685 
 3686         if ((fdp == NULL) || (--fdp->fd_refcnt > 0)) {
 3687                 proc_fdunlock(p);
 3688                 return;
 3689         }
 3690         if (fdp->fd_refcnt == 0xffff)
 3691                 panic("fdfree: bad fd_refcnt");
 3692 
 3693         /* Last reference: the structure can't change out from under us */
 3694 
 3695         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
 3696                 for (i = fdp->fd_lastfile; i >= 0; i--) {
 3697                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
 3698                           
 3699                           if (fdp->fd_ofileflags[i] & UF_RESERVED)
 3700                                 panic("fdfree: found fp with UF_RESERVED\n");
 3701 
 3702                                 /* closef drops the iocount ... */
 3703                                 if ((fp->f_flags & FP_INCHRREAD) != 0) 
 3704                                         fp->f_iocount++;
 3705                                 procfdtbl_reservefd(p, i);
 3706 
 3707                                 if (i < fdp->fd_knlistsize)
 3708                                         knote_fdclose(p, i);
 3709                                 if (fp->f_flags & FP_WAITEVENT) 
 3710                                         (void)waitevent_close(p, fp);
 3711                                 (void) closef_locked(fp, fp->f_fglob, p);
 3712                                 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
 3713                         }
 3714                 }
 3715                 FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
 3716                 fdp->fd_ofiles = NULL;
 3717                 fdp->fd_nfiles = 0;
 3718         }        
 3719 
 3720         proc_fdunlock(p);
 3721         
 3722         if (fdp->fd_cdir)
 3723                 vnode_rele(fdp->fd_cdir);
 3724         if (fdp->fd_rdir)
 3725                 vnode_rele(fdp->fd_rdir);
 3726 
 3727         proc_fdlock_spin(p);
 3728         p->p_fd = NULL;
 3729         proc_fdunlock(p);
 3730 
 3731         if (fdp->fd_knlist)
 3732                 FREE(fdp->fd_knlist, M_KQUEUE);
 3733         if (fdp->fd_knhash)
 3734                 FREE(fdp->fd_knhash, M_KQUEUE);
 3735 
 3736         FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
 3737 }
 3738 
 3739 
 3740 /*
 3741  * closef_finish
 3742  *
 3743  * Description: Called on last open instance for a fileglob for a file being
 3744  *              closed.
 3745  *
 3746  * Parameters:  fp                      Pointer to fileproc for fd
 3747  *              fg                      Pointer to fileglob for fd
 3748  *              p                       Pointer to proc structure
 3749  *
 3750  * Returns:     0                       Success
 3751  *      <fo_close>:???                  Anything returnable by a per-fileops
 3752  *                                      close function
 3753  *
 3754  * Note:        fp can only be non-NULL if p is also non-NULL.  If p is NULL,
 3755  *              then fg must eith be locked (FHASLOCK) or must not have a
 3756  *              type of DTYPE_VNODE.
 3757  *
 3758  *              On return, the fg is freed.
 3759  *
 3760  *              This function may block draining output to a character
 3761  *              device on last close of that device.
 3762  */
 3763 static int
 3764 closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx)
 3765 {
 3766         int error;
 3767 
 3768 
 3769         /* fg_ops completed initialization? */
 3770         if (fg->fg_ops)
 3771                 error = fo_close(fg, ctx);
 3772         else
 3773                 error = 0;
 3774 
 3775         /* if fp is non-NULL, drain it out */
 3776         if (((fp != (struct fileproc *)0) && ((fp->f_flags & FP_INCHRREAD) != 0))) {
 3777                 proc_fdlock_spin(p);
 3778                 if ( ((fp->f_flags & FP_INCHRREAD) != 0) ) {
 3779                         fileproc_drain(p, fp);
 3780                 }
 3781                 proc_fdunlock(p);
 3782         }
 3783         fg_free(fg);
 3784 
 3785         return (error);
 3786 }
 3787 
 3788 /*
 3789  * closef_locked
 3790  *
 3791  * Description: Internal form of closef; called with proc_fdlock held
 3792  *
 3793  * Parameters:  fp                      Pointer to fileproc for fd
 3794  *              fg                      Pointer to fileglob for fd
 3795  *              p                       Pointer to proc structure
 3796  *
 3797  * Returns:     0                       Success
 3798  *      closef_finish:???               Anything returnable by a per-fileops
 3799  *                                      close function
 3800  *
 3801  * Note:        Decrements reference count on file structure; if this was the
 3802  *              last reference, then closef_finish() is called
 3803  *
 3804  *              p and fp are allowed to  be NULL when closing a file that was
 3805  *              being passed in a message (but only if we are called when this
 3806  *              is NOT the last reference).
 3807  */
 3808 int
 3809 closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
 3810 {
 3811         struct vnode *vp;
 3812         struct flock lf;
 3813         struct vfs_context context;
 3814         int error;
 3815 
 3816         if (fg == NULL) {
 3817                 return (0);
 3818         }
 3819 
 3820         /* Set up context with cred stashed in fg */
 3821         if (p == current_proc())
 3822                 context.vc_thread = current_thread();
 3823         else
 3824                 context.vc_thread = NULL;
 3825         context.vc_ucred = fg->fg_cred;
 3826 
 3827         /*
 3828          * POSIX record locking dictates that any close releases ALL
 3829          * locks owned by this process.  This is handled by setting
 3830          * a flag in the unlock to free ONLY locks obeying POSIX
 3831          * semantics, and not to free BSD-style file locks.
 3832          * If the descriptor was in a message, POSIX-style locks
 3833          * aren't passed with the descriptor.
 3834          */
 3835         if (p && (p->p_ladvflag & P_LADVLOCK) && fg->fg_type == DTYPE_VNODE) {
 3836                 proc_fdunlock(p);
 3837 
 3838                 lf.l_whence = SEEK_SET;
 3839                 lf.l_start = 0;
 3840                 lf.l_len = 0;
 3841                 lf.l_type = F_UNLCK;
 3842                 vp = (struct vnode *)fg->fg_data;
 3843 
 3844                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 3845                         (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context);
 3846                         (void)vnode_put(vp);
 3847                 }
 3848                 proc_fdlock(p);
 3849         }
 3850         lck_mtx_lock_spin(&fg->fg_lock);
 3851         fg->fg_count--;
 3852 
 3853         if (fg->fg_count > 0) {
 3854                 lck_mtx_unlock(&fg->fg_lock);
 3855                 return (0);
 3856         }
 3857 #if DIAGNOSTIC
 3858         if (fg->fg_count != 0)
 3859                 panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
 3860 #endif
 3861 
 3862         if (fp && (fp->f_flags & FP_WRITTEN))
 3863                 fg->fg_flag |= FWASWRITTEN;
 3864 
 3865         fg->fg_lflags |= FG_TERM;
 3866         lck_mtx_unlock(&fg->fg_lock);
 3867 
 3868         proc_fdunlock(p);
 3869         error = closef_finish(fp, fg, p, &context);
 3870         proc_fdlock(p);
 3871 
 3872         return(error);
 3873 }
 3874 
 3875 
 3876 /* sleep address to permit wakeup of select by fileproc_drain() */
 3877 extern int selwait;
 3878 
 3879 
 3880 /*
 3881  * fileproc_drain
 3882  *
 3883  * Description: Drain out pending I/O operations
 3884  *
 3885  * Parameters:  p                               Process closing this file
 3886  *              fp                              fileproc struct for the open
 3887  *                                              instance on the file
 3888  *
 3889  * Returns:     void
 3890  *
 3891  * Locks:       Assumes the caller holds the proc_fdlock
 3892  *
 3893  * Notes:       For character devices, this occurs on the last close of the
 3894  *              device; for all other file descriptos, this occurs on each
 3895  *              close to prevent fd's from being closed out from under
 3896  *              operations currently in progress and blocked
 3897  *
 3898  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
 3899  *              regarding their use and interaction with this function.
 3900  */
 3901 void
 3902 fileproc_drain(proc_t p, struct fileproc * fp)
 3903 {
 3904         struct vfs_context context;
 3905 
 3906         context.vc_thread = proc_thread(p);     /* XXX */
 3907         context.vc_ucred = fp->f_fglob->fg_cred;
 3908 
 3909         fp->f_iocount-- ; /* (the one the close holds) */
 3910 
 3911         while (fp->f_iocount) {
 3912 
 3913                 lck_mtx_convert_spin(&p->p_fdmlock);
 3914 
 3915                 if (((fp->f_flags & FP_INSELECT)== FP_INSELECT)) {
 3916                         wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, &selwait, THREAD_INTERRUPTED);
 3917                 } else  {
 3918                         if (fp->f_fglob->fg_ops->fo_drain) {
 3919                                 (*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
 3920                         }
 3921                 }
 3922                 p->p_fpdrainwait = 1;
 3923 
 3924                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
 3925 
 3926         }
 3927 }
 3928 
 3929 
 3930 /*
 3931  * fp_free
 3932  *
 3933  * Description: Release the fd and free the fileproc associated with the fd
 3934  *              in the per process open file table of the specified process;
 3935  *              these values must correspond.
 3936  *
 3937  * Parameters:  p                               Process containing fd
 3938  *              fd                              fd to be released
 3939  *              fp                              fileproc to be freed
 3940  *
 3941  * Returns:     0                               Success
 3942  *
 3943  * Notes:       XXX function should be void - no one interprets the returns
 3944  *              XXX code
 3945  */
 3946 int
 3947 fp_free(proc_t p, int fd, struct fileproc * fp)
 3948 {
 3949         proc_fdlock_spin(p);
 3950         fdrelse(p, fd);
 3951         proc_fdunlock(p);
 3952 
 3953         fg_free(fp->f_fglob);
 3954         FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
 3955         return(0);
 3956 }
 3957 
 3958 
 3959 /*
 3960  * flock
 3961  *
 3962  * Description: Apply an advisory lock on a file descriptor.
 3963  *
 3964  * Parameters:  p                               Process making request
 3965  *              uap->fd                         fd on which the lock is to be
 3966  *                                              attempted
 3967  *              uap->how                        (Un)Lock bits, including type
 3968  *              retval                          Pointer to the call return area
 3969  *              
 3970  * Returns:     0                               Success
 3971  *      fp_getfvp:EBADF                         Bad file descriptor
 3972  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
 3973  *      vnode_getwithref:???
 3974  *      VNOP_ADVLOCK:???
 3975  *
 3976  * Implicit returns:
 3977  *              *retval (modified)              Size of dtable
 3978  *
 3979  * Notes:       Just attempt to get a record lock of the requested type on
 3980  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 3981  */
 3982 int
 3983 flock(proc_t p, struct flock_args *uap, __unused register_t *retval)
 3984 {
 3985         int fd = uap->fd;
 3986         int how = uap->how;
 3987         struct fileproc *fp;
 3988         struct vnode *vp;
 3989         struct flock lf;
 3990         vfs_context_t ctx = vfs_context_current();
 3991         int error=0;
 3992 
 3993         AUDIT_ARG(fd, uap->fd);
 3994         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
 3995                 return(error);
 3996         }
 3997         if ( (error = vnode_getwithref(vp)) ) {
 3998                 goto out1;
 3999         }
 4000         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
 4001 
 4002         lf.l_whence = SEEK_SET;
 4003         lf.l_start = 0;
 4004         lf.l_len = 0;
 4005         if (how & LOCK_UN) {
 4006                 lf.l_type = F_UNLCK;
 4007                 fp->f_flag &= ~FHASLOCK;
 4008                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx);
 4009                 goto out;
 4010         }
 4011         if (how & LOCK_EX)
 4012                 lf.l_type = F_WRLCK;
 4013         else if (how & LOCK_SH)
 4014                 lf.l_type = F_RDLCK;
 4015         else {
 4016                 error = EBADF;
 4017                 goto out;
 4018         }
 4019 #if CONFIG_MACF
 4020         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
 4021         if (error)
 4022                 goto out;
 4023 #endif
 4024         fp->f_flag |= FHASLOCK;
 4025         if (how & LOCK_NB) {
 4026                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx);
 4027                 goto out;       
 4028         }
 4029         error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx);
 4030 out:
 4031         (void)vnode_put(vp);
 4032 out1:
 4033         fp_drop(p, fd, fp, 0);
 4034         return(error);
 4035 
 4036 }
 4037 
 4038 /*
 4039  * dupfdopen
 4040  *
 4041  * Description: Duplicate the specified descriptor to a free descriptor;
 4042  *              this is the second half of fdopen(), above.
 4043  *
 4044  * Parameters:  fdp                             filedesc pointer to fill in
 4045  *              indx                            fd to dup to
 4046  *              dfd                             fd to dup from
 4047  *              mode                            mode to set on new fd
 4048  *              error                           command code
 4049  *
 4050  * Returns:     0                               Success
 4051  *              EBADF                           Source fd is bad
 4052  *              EACCES                          Requested mode not allowed
 4053  *              !0                              'error', if not ENODEV or
 4054  *                                              ENXIO
 4055  *
 4056  * Notes:       XXX This is not thread safe; see fdopen() above
 4057  */
 4058 int
 4059 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error)
 4060 {
 4061         struct fileproc *wfp;
 4062         struct fileproc *fp;
 4063 #if CONFIG_MACF
 4064         int myerror;
 4065 #endif
 4066         proc_t p = current_proc();
 4067 
 4068         /*
 4069          * If the to-be-dup'd fd number is greater than the allowed number
 4070          * of file descriptors, or the fd to be dup'd has already been
 4071          * closed, reject.  Note, check for new == old is necessary as
 4072          * falloc could allocate an already closed to-be-dup'd descriptor
 4073          * as the new descriptor.
 4074          */
 4075         proc_fdlock(p);
 4076 
 4077         fp = fdp->fd_ofiles[indx];
 4078         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
 4079                         (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
 4080                         (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
 4081 
 4082                 proc_fdunlock(p);
 4083                 return (EBADF);
 4084         }
 4085 #if CONFIG_MACF
 4086         myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
 4087         if (myerror) {
 4088                 proc_fdunlock(p);
 4089                 return (myerror);
 4090         }
 4091 #endif
 4092         /*
 4093          * There are two cases of interest here.
 4094          *
 4095          * For ENODEV simply dup (dfd) to file descriptor
 4096          * (indx) and return.
 4097          *
 4098          * For ENXIO steal away the file structure from (dfd) and
 4099          * store it in (indx).  (dfd) is effectively closed by
 4100          * this operation.
 4101          *
 4102          * Any other error code is just returned.
 4103          */
 4104         switch (error) {
 4105         case ENODEV:
 4106                 /*
 4107                  * Check that the mode the file is being opened for is a
 4108                  * subset of the mode of the existing descriptor.
 4109                  */
 4110                 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
 4111                         proc_fdunlock(p);
 4112                         return (EACCES);
 4113                 }
 4114                 if (indx > fdp->fd_lastfile)
 4115                         fdp->fd_lastfile = indx;
 4116                 (void)fg_ref(wfp);
 4117 
 4118                 if (fp->f_fglob)
 4119                         fg_free(fp->f_fglob);
 4120                 fp->f_fglob = wfp->f_fglob;
 4121 
 4122                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 4123 
 4124                 proc_fdunlock(p);
 4125                 return (0);
 4126 
 4127         default:
 4128                 proc_fdunlock(p);
 4129                 return (error);
 4130         }
 4131         /* NOTREACHED */
 4132 }
 4133 
 4134 
 4135 /*
 4136  * fg_ref
 4137  *
 4138  * Description: Add a reference to a fileglob by fileproc
 4139  *
 4140  * Parameters:  fp                              fileproc containing fileglob
 4141  *                                              pointer
 4142  *
 4143  * Returns:     void
 4144  *
 4145  * Notes:       XXX Should use OSAddAtomic?
 4146  */
 4147 void
 4148 fg_ref(struct fileproc * fp)
 4149 {
 4150         struct fileglob *fg;
 4151 
 4152         fg = fp->f_fglob;
 4153 
 4154         lck_mtx_lock_spin(&fg->fg_lock);
 4155 
 4156 #if DIAGNOSTIC
 4157         if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0)
 4158                 panic("fg_ref: invalid bits on fp%x\n", (unsigned int)fp);
 4159 
 4160         if (fg->fg_count == 0)
 4161                 panic("fg_ref: adding fgcount to zeroed fg :fp %x, fg%x\n ", (unsigned int)fp, (unsigned int)fg);
 4162 #endif
 4163         fg->fg_count++;
 4164         lck_mtx_unlock(&fg->fg_lock);
 4165 }
 4166 
 4167 
 4168 /*
 4169  * fg_drop
 4170  *
 4171  * Description: Remove a reference to a fileglob by fileproc
 4172  *
 4173  * Parameters:  fp                              fileproc containing fileglob
 4174  *                                              pointer
 4175  *
 4176  * Returns:     void
 4177  *
 4178  * Notes:       XXX Should use OSAddAtomic?
 4179  */
 4180 void
 4181 fg_drop(struct fileproc * fp)
 4182 {
 4183         struct fileglob *fg;
 4184 
 4185         fg = fp->f_fglob;
 4186         lck_mtx_lock_spin(&fg->fg_lock);
 4187         fg->fg_count--;
 4188         lck_mtx_unlock(&fg->fg_lock);
 4189 }
 4190 
 4191 
 4192 /*
 4193  * fg_insertuipc
 4194  *
 4195  * Description: Insert fileglob onto message queue
 4196  *
 4197  * Parameters:  fg                              Fileglob pointer to insert
 4198  *
 4199  * Returns:     void
 4200  *
 4201  * Locks:       Takes and drops fg_lock, potentially many times
 4202  */
 4203 void
 4204 fg_insertuipc(struct fileglob * fg)
 4205 {
 4206         int insertque = 0;
 4207 
 4208         lck_mtx_lock_spin(&fg->fg_lock);
 4209 
 4210         while (fg->fg_lflags & FG_RMMSGQ) {
 4211                 lck_mtx_convert_spin(&fg->fg_lock);
 4212 
 4213                 fg->fg_lflags |= FG_WRMMSGQ;
 4214                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
 4215         }
 4216 
 4217         fg->fg_count++;
 4218         fg->fg_msgcount++;
 4219         if (fg->fg_msgcount == 1) {
 4220                 fg->fg_lflags |= FG_INSMSGQ;
 4221                 insertque=1;
 4222         }
 4223         lck_mtx_unlock(&fg->fg_lock);
 4224 
 4225         if (insertque) {
 4226                 lck_mtx_lock_spin(uipc_lock);
 4227                 unp_gc_wait();
 4228                 LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
 4229                 lck_mtx_unlock(uipc_lock);
 4230                 lck_mtx_lock(&fg->fg_lock);
 4231                 fg->fg_lflags &= ~FG_INSMSGQ;
 4232                 if (fg->fg_lflags & FG_WINSMSGQ) {
 4233                         fg->fg_lflags &= ~FG_WINSMSGQ;
 4234                         wakeup(&fg->fg_lflags);
 4235                 }
 4236                 lck_mtx_unlock(&fg->fg_lock);
 4237         }
 4238 
 4239 }
 4240 
 4241 
 4242 /*
 4243  * fg_removeuipc
 4244  *
 4245  * Description: Remove fileglob from message queue
 4246  *
 4247  * Parameters:  fg                              Fileglob pointer to remove
 4248  *
 4249  * Returns:     void
 4250  *
 4251  * Locks:       Takes and drops fg_lock, potentially many times
 4252  */
 4253 void
 4254 fg_removeuipc(struct fileglob * fg)
 4255 {
 4256         int removeque = 0;
 4257 
 4258         lck_mtx_lock_spin(&fg->fg_lock);
 4259         while (fg->fg_lflags & FG_INSMSGQ) {
 4260                 lck_mtx_convert_spin(&fg->fg_lock);
 4261 
 4262                 fg->fg_lflags |= FG_WINSMSGQ;
 4263                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
 4264         }
 4265         fg->fg_msgcount--;
 4266         if (fg->fg_msgcount == 0) {
 4267                 fg->fg_lflags |= FG_RMMSGQ;
 4268                 removeque=1;
 4269         }
 4270         lck_mtx_unlock(&fg->fg_lock);
 4271 
 4272         if (removeque) {
 4273                 lck_mtx_lock_spin(uipc_lock);
 4274                 unp_gc_wait();
 4275                 LIST_REMOVE(fg, f_msglist);
 4276                 lck_mtx_unlock(uipc_lock);
 4277                 lck_mtx_lock(&fg->fg_lock);
 4278                 fg->fg_lflags &= ~FG_RMMSGQ;
 4279                 if (fg->fg_lflags & FG_WRMMSGQ) {
 4280                         fg->fg_lflags &= ~FG_WRMMSGQ;
 4281                         wakeup(&fg->fg_lflags);
 4282                 }
 4283                 lck_mtx_unlock(&fg->fg_lock);
 4284         }
 4285 }
 4286 
 4287 
 4288 /*
 4289  * fo_read
 4290  *
 4291  * Description: Generic fileops read indirected through the fileops pointer
 4292  *              in the fileproc structure
 4293  *
 4294  * Parameters:  fp                              fileproc structure pointer
 4295  *              uio                             user I/O structure pointer
 4296  *              flags                           FOF_ flags
 4297  *              ctx                             VFS context for operation
 4298  *
 4299  * Returns:     0                               Success
 4300  *              !0                              Errno from read
 4301  */
 4302 int
 4303 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 4304 {
 4305         return ((*fp->f_ops->fo_read)(fp, uio, flags, ctx));
 4306 }
 4307 
 4308 
 4309 /*
 4310  * fo_write
 4311  *
 4312  * Description: Generic fileops write indirected through the fileops pointer
 4313  *              in the fileproc structure
 4314  *
 4315  * Parameters:  fp                              fileproc structure pointer
 4316  *              uio                             user I/O structure pointer
 4317  *              flags                           FOF_ flags
 4318  *              ctx                             VFS context for operation
 4319  *
 4320  * Returns:     0                               Success
 4321  *              !0                              Errno from write
 4322  */
 4323 int
 4324 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 4325 {
 4326         return((*fp->f_ops->fo_write)(fp, uio, flags, ctx));
 4327 }
 4328 
 4329 
 4330 /*
 4331  * fo_ioctl
 4332  *
 4333  * Description: Generic fileops ioctl indirected through the fileops pointer
 4334  *              in the fileproc structure
 4335  *
 4336  * Parameters:  fp                              fileproc structure pointer
 4337  *              com                             ioctl command
 4338  *              data                            pointer to internalized copy
 4339  *                                              of user space ioctl command
 4340  *                                              parameter data in kernel space
 4341  *              ctx                             VFS context for operation
 4342  *
 4343  * Returns:     0                               Success
 4344  *              !0                              Errno from ioctl
 4345  *
 4346  * Locks:       The caller is assumed to have held the proc_fdlock; this
 4347  *              function releases and reacquires this lock.  If the caller
 4348  *              accesses data protected by this lock prior to calling this
 4349  *              function, it will need to revalidate/reacquire any cached
 4350  *              protected data obtained prior to the call.
 4351  */
 4352 int 
 4353 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
 4354 {
 4355         int error;
 4356 
 4357         proc_fdunlock(vfs_context_proc(ctx));
 4358         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
 4359         proc_fdlock(vfs_context_proc(ctx));
 4360         return(error);
 4361 }       
 4362 
 4363 
 4364 /*
 4365  * fo_select
 4366  *
 4367  * Description: Generic fileops select indirected through the fileops pointer
 4368  *              in the fileproc structure
 4369  *
 4370  * Parameters:  fp                              fileproc structure pointer
 4371  *              which                           select which
 4372  *              wql                             pointer to wait queue list
 4373  *              ctx                             VFS context for operation
 4374  *
 4375  * Returns:     0                               Success
 4376  *              !0                              Errno from select
 4377  */
 4378 int
 4379 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
 4380 {       
 4381         return((*fp->f_ops->fo_select)(fp, which, wql, ctx));
 4382 }
 4383 
 4384 
 4385 /*
 4386  * fo_close
 4387  *
 4388  * Description: Generic fileops close indirected through the fileops pointer
 4389  *              in the fileproc structure
 4390  *
 4391  * Parameters:  fp                              fileproc structure pointer for
 4392  *                                              file to close
 4393  *              ctx                             VFS context for operation
 4394  *
 4395  * Returns:     0                               Success
 4396  *              !0                              Errno from close
 4397  */
 4398 int
 4399 fo_close(struct fileglob *fg, vfs_context_t ctx)
 4400 {       
 4401         return((*fg->fg_ops->fo_close)(fg, ctx));
 4402 }
 4403 
 4404 
 4405 /*
 4406  * fo_kqfilter
 4407  *
 4408  * Description: Generic fileops kqueue filter indirected through the fileops
 4409  *              pointer in the fileproc structure
 4410  *
 4411  * Parameters:  fp                              fileproc structure pointer
 4412  *              kn                              pointer to knote to filter on
 4413  *              ctx                             VFS context for operation
 4414  *
 4415  * Returns:     0                               Success
 4416  *              !0                              Errno from kqueue filter
 4417  */
 4418 int
 4419 fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
 4420 {
 4421         return ((*fp->f_ops->fo_kqfilter)(fp, kn, ctx));
 4422 }

Cache object: 790cf39e968504fd18c0033038947018


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.