kern_descrip.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  *
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  *
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  *
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  *
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
   29 /*
   30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   31  *      The Regents of the University of California.  All rights reserved.
   32  * (c) UNIX System Laboratories, Inc.
   33  * All or some portions of this file are derived from material licensed
   34  * to the University of California by American Telephone and Telegraph
   35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   36  * the permission of UNIX System Laboratories, Inc.
   37  *
   38  * Redistribution and use in source and binary forms, with or without
   39  * modification, are permitted provided that the following conditions
   40  * are met:
   41  * 1. Redistributions of source code must retain the above copyright
   42  *    notice, this list of conditions and the following disclaimer.
   43  * 2. Redistributions in binary form must reproduce the above copyright
   44  *    notice, this list of conditions and the following disclaimer in the
   45  *    documentation and/or other materials provided with the distribution.
   46  * 3. All advertising materials mentioning features or use of this software
   47  *    must display the following acknowledgement:
   48  *      This product includes software developed by the University of
   49  *      California, Berkeley and its contributors.
   50  * 4. Neither the name of the University nor the names of its contributors
   51  *    may be used to endorse or promote products derived from this software
   52  *    without specific prior written permission.
   53  *
   54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   64  * SUCH DAMAGE.
   65  *
   66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
   67  */
   68 /*
   69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
   70  * support for mandatory and extensible security protections.  This notice
   71  * is included in support of clause 2.2 (b) of the Apple Public License,
   72  * Version 2.0.
   73  */
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/filedesc.h>
   78 #include <sys/kernel.h>
   79 #include <sys/vnode_internal.h>
   80 #include <sys/proc_internal.h>
   81 #include <sys/kauth.h>
   82 #include <sys/file_internal.h>
   83 #include <sys/guarded.h>
   84 #include <sys/priv.h>
   85 #include <sys/socket.h>
   86 #include <sys/socketvar.h>
   87 #include <sys/stat.h>
   88 #include <sys/ioctl.h>
   89 #include <sys/fcntl.h>
   90 #include <sys/fsctl.h>
   91 #include <sys/malloc.h>
   92 #include <sys/mman.h>
   93 #include <sys/mount.h>
   94 #include <sys/syslog.h>
   95 #include <sys/unistd.h>
   96 #include <sys/resourcevar.h>
   97 #include <sys/aio_kern.h>
   98 #include <sys/ev.h>
   99 #include <kern/locks.h>
  100 #include <sys/uio_internal.h>
  101 #include <sys/codesign.h>
  102 #include <sys/codedir_internal.h>
  103 #include <sys/mount_internal.h>
  104 #include <sys/kdebug.h>
  105 #include <sys/sysproto.h>
  106 #include <sys/pipe.h>
  107 #include <sys/spawn.h>
  108 #include <sys/cprotect.h>
  109 #include <sys/ubc_internal.h>
  110 
  111 #include <kern/kern_types.h>
  112 #include <kern/kalloc.h>
  113 #include <kern/waitq.h>
  114 #include <kern/ipc_misc.h>
  115 #include <kern/ast.h>
  116 
  117 #include <vm/vm_protos.h>
  118 #include <mach/mach_port.h>
  119 
  120 #include <security/audit/audit.h>
  121 #if CONFIG_MACF
  122 #include <security/mac_framework.h>
  123 #endif
  124 
  125 #include <stdbool.h>
  126 #include <os/atomic_private.h>
  127 #include <os/overflow.h>
  128 #include <IOKit/IOBSD.h>
  129 
  130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
  131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
  132     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
  133 void ipc_port_release_send(ipc_port_t);
  134 
  135 void fileport_releasefg(struct fileglob *fg);
  136 
  137 /* flags for fp_close_and_unlock */
  138 #define FD_DUP2RESV 1
  139 
  140 /* We don't want these exported */
  141 
  142 __private_extern__
  143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
  144 
  145 /* Conflict wait queue for when selects collide (opaque type) */
  146 extern struct waitq select_conflict_queue;
  147 
  148 #define f_flag fp_glob->fg_flag
  149 #define f_type fp_glob->fg_ops->fo_type
  150 #define f_cred fp_glob->fg_cred
  151 #define f_ops fp_glob->fg_ops
  152 #define f_offset fp_glob->fg_offset
  153 
  154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
  155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
  156 
  157 /*
  158  * If you need accounting for KM_OFILETABL consider using
  159  * KALLOC_HEAP_DEFINE to define a view.
  160  */
  161 #define KM_OFILETABL KHEAP_DEFAULT
  162 
  163 /*
  164  * rdar://88960128
  165  */
  166 #define fd_alloc_files(n_files, flags)                       \
  167         __typed_allocators_ignore_push                        \
  168         kheap_alloc(KM_OFILETABL, n_files * OFILESIZE, flags) \
  169         __typed_allocators_ignore_pop
  170 
  171 #define fd_free_files(files, n_files)                        \
  172         __typed_allocators_ignore_push                        \
  173         kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE) \
  174         __typed_allocators_ignore_pop
  175 
  176 /*
  177  * Descriptor management.
  178  */
  179 int nfiles;                     /* actual number of open files */
  180 /*
  181  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
  182  */
  183 static const struct fileops uninitops;
  184 
  185 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
  186 static LCK_GRP_DECLARE(file_lck_grp, "file");
  187 
  188 
  189 #pragma mark fileglobs
  190 
  191 /*!
  192  * @function fg_free
  193  *
  194  * @brief
  195  * Free a file structure.
  196  */
  197 static void
  198 fg_free(struct fileglob *fg)
  199 {
  200         os_atomic_dec(&nfiles, relaxed);
  201 
  202         if (fg->fg_vn_data) {
  203                 fg_vn_data_free(fg->fg_vn_data);
  204                 fg->fg_vn_data = NULL;
  205         }
  206 
  207         kauth_cred_t cred = fg->fg_cred;
  208         if (IS_VALID_CRED(cred)) {
  209                 kauth_cred_unref(&cred);
  210                 fg->fg_cred = NOCRED;
  211         }
  212         lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
  213 
  214 #if CONFIG_MACF && CONFIG_VNGUARD
  215         vng_file_label_destroy(fg);
  216 #endif
  217         zfree(fg_zone, fg);
  218 }
  219 
  220 OS_ALWAYS_INLINE
  221 void
  222 fg_ref(proc_t p, struct fileglob *fg)
  223 {
  224 #if DEBUG || DEVELOPMENT
  225         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
  226 #else
  227         (void)p;
  228 #endif
  229         os_ref_retain_raw(&fg->fg_count, &f_refgrp);
  230 }
  231 
  232 void
  233 fg_drop_live(struct fileglob *fg)
  234 {
  235         os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
  236 }
  237 
  238 int
  239 fg_drop(proc_t p, struct fileglob *fg)
  240 {
  241         struct vnode *vp;
  242         struct vfs_context context;
  243         int error = 0;
  244 
  245         if (fg == NULL) {
  246                 return 0;
  247         }
  248 
  249         /* Set up context with cred stashed in fg */
  250         if (p == current_proc()) {
  251                 context.vc_thread = current_thread();
  252         } else {
  253                 context.vc_thread = NULL;
  254         }
  255         context.vc_ucred = fg->fg_cred;
  256 
  257         /*
  258          * POSIX record locking dictates that any close releases ALL
  259          * locks owned by this process.  This is handled by setting
  260          * a flag in the unlock to free ONLY locks obeying POSIX
  261          * semantics, and not to free BSD-style file locks.
  262          * If the descriptor was in a message, POSIX-style locks
  263          * aren't passed with the descriptor.
  264          */
  265         if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
  266             (p->p_ladvflag & P_LADVLOCK)) {
  267                 struct flock lf = {
  268                         .l_whence = SEEK_SET,
  269                         .l_type = F_UNLCK,
  270                 };
  271 
  272                 vp = (struct vnode *)fg_get_data(fg);
  273                 if ((error = vnode_getwithref(vp)) == 0) {
  274                         (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
  275                         (void)vnode_put(vp);
  276                 }
  277         }
  278 
  279         if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
  280                 /*
  281                  * Since we ensure that fg->fg_ops is always initialized,
  282                  * it is safe to invoke fo_close on the fg
  283                  */
  284                 error = fo_close(fg, &context);
  285 
  286                 fg_free(fg);
  287         }
  288 
  289         return error;
  290 }
  291 
  292 inline
  293 void
  294 fg_set_data(
  295         struct fileglob *fg,
  296         void *fg_data)
  297 {
  298         uintptr_t *store = &fg->fg_data;
  299 
  300 #if __has_feature(ptrauth_calls)
  301         int type = FILEGLOB_DTYPE(fg);
  302 
  303         if (fg_data) {
  304                 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
  305                 fg_data = ptrauth_sign_unauthenticated(fg_data,
  306                     ptrauth_key_process_independent_data,
  307                     ptrauth_blend_discriminator(store, type));
  308         }
  309 #endif // __has_feature(ptrauth_calls)
  310 
  311         *store = (uintptr_t)fg_data;
  312 }
  313 
  314 inline
  315 void *
  316 fg_get_data_volatile(struct fileglob *fg)
  317 {
  318         uintptr_t *store = &fg->fg_data;
  319         void *fg_data = (void *)*store;
  320 
  321 #if __has_feature(ptrauth_calls)
  322         int type = FILEGLOB_DTYPE(fg);
  323 
  324         if (fg_data) {
  325                 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
  326                 fg_data = ptrauth_auth_data(fg_data,
  327                     ptrauth_key_process_independent_data,
  328                     ptrauth_blend_discriminator(store, type));
  329         }
  330 #endif // __has_feature(ptrauth_calls)
  331 
  332         return fg_data;
  333 }
  334 
  335 static void
  336 fg_transfer_filelocks(proc_t p, struct fileglob *fg, thread_t thread)
  337 {
  338         struct vnode *vp;
  339         struct vfs_context context;
  340         struct proc *old_proc = current_proc();
  341 
  342         assert(fg != NULL);
  343 
  344         assert(p != old_proc);
  345         context.vc_thread = thread;
  346         context.vc_ucred = fg->fg_cred;
  347 
  348         /* Transfer all POSIX Style locks to new proc */
  349         if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
  350             (p->p_ladvflag & P_LADVLOCK)) {
  351                 struct flock lf = {
  352                         .l_whence = SEEK_SET,
  353                         .l_start = 0,
  354                         .l_len = 0,
  355                         .l_type = F_TRANSFER,
  356                 };
  357 
  358                 vp = (struct vnode *)fg_get_data(fg);
  359                 if (vnode_getwithref(vp) == 0) {
  360                         (void)VNOP_ADVLOCK(vp, (caddr_t)old_proc, F_TRANSFER, &lf, F_POSIX, &context, NULL);
  361                         (void)vnode_put(vp);
  362                 }
  363         }
  364 
  365         /* Transfer all OFD Style locks to new proc */
  366         if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
  367             (fg->fg_lflags & FG_HAS_OFDLOCK)) {
  368                 struct flock lf = {
  369                         .l_whence = SEEK_SET,
  370                         .l_start = 0,
  371                         .l_len = 0,
  372                         .l_type = F_TRANSFER,
  373                 };
  374 
  375                 vp = (struct vnode *)fg_get_data(fg);
  376                 if (vnode_getwithref(vp) == 0) {
  377                         (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_TRANSFER, &lf, F_OFD_LOCK, &context, NULL);
  378                         (void)vnode_put(vp);
  379                 }
  380         }
  381         return;
  382 }
  383 
  384 bool
  385 fg_sendable(struct fileglob *fg)
  386 {
  387         switch (FILEGLOB_DTYPE(fg)) {
  388         case DTYPE_VNODE:
  389         case DTYPE_SOCKET:
  390         case DTYPE_PIPE:
  391         case DTYPE_PSXSHM:
  392         case DTYPE_NETPOLICY:
  393                 return (fg->fg_lflags & FG_CONFINED) == 0;
  394 
  395         default:
  396                 return false;
  397         }
  398 }
  399 
  400 #pragma mark file descriptor table (static helpers)
  401 
  402 static void
  403 procfdtbl_reservefd(struct proc * p, int fd)
  404 {
  405         p->p_fd.fd_ofiles[fd] = NULL;
  406         p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
  407 }
  408 
  409 void
  410 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
  411 {
  412         if (fp != NULL) {
  413                 p->p_fd.fd_ofiles[fd] = fp;
  414         }
  415         p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
  416         if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
  417                 p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
  418                 wakeup(&p->p_fd);
  419         }
  420 }
  421 
  422 static void
  423 procfdtbl_waitfd(struct proc * p, int fd)
  424 {
  425         p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
  426         msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
  427 }
  428 
  429 static void
  430 procfdtbl_clearfd(struct proc * p, int fd)
  431 {
  432         int waiting;
  433 
  434         waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
  435         p->p_fd.fd_ofiles[fd] = NULL;
  436         p->p_fd.fd_ofileflags[fd] = 0;
  437         if (waiting == UF_RESVWAIT) {
  438                 wakeup(&p->p_fd);
  439         }
  440 }
  441 
  442 /*
  443  * fdrelse
  444  *
  445  * Description: Inline utility function to free an fd in a filedesc
  446  *
  447  * Parameters:  fdp                             Pointer to filedesc fd lies in
  448  *              fd                              fd to free
  449  *              reserv                          fd should be reserved
  450  *
  451  * Returns:     void
  452  *
  453  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
  454  *              the caller
  455  */
  456 void
  457 fdrelse(struct proc * p, int fd)
  458 {
  459         struct filedesc *fdp = &p->p_fd;
  460         int nfd = 0;
  461 
  462         if (fd < fdp->fd_freefile) {
  463                 fdp->fd_freefile = fd;
  464         }
  465 #if DIAGNOSTIC
  466         if (fd >= fdp->fd_afterlast) {
  467                 panic("fdrelse: fd_afterlast inconsistent");
  468         }
  469 #endif
  470         procfdtbl_clearfd(p, fd);
  471 
  472         nfd = fdp->fd_afterlast;
  473         while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
  474             !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
  475                 nfd--;
  476         }
  477         fdp->fd_afterlast = nfd;
  478 
  479 #if CONFIG_PROC_RESOURCE_LIMITS
  480         fdp->fd_nfiles_open--;
  481 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
  482 }
  483 
  484 
  485 /*
  486  * finishdup
  487  *
  488  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
  489  *
  490  * Parameters:  p                               Process performing the dup
  491  *              old                             The fd to dup
  492  *              new                             The fd to dup it to
  493  *              fp_flags                        Flags to augment the new fp
  494  *              retval                          Pointer to the call return area
  495  *
  496  * Returns:     0                               Success
  497  *              EBADF
  498  *              ENOMEM
  499  *
  500  * Implicit returns:
  501  *              *retval (modified)              The new descriptor
  502  *
  503  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
  504  *              the caller
  505  *
  506  * Notes:       This function may drop and reacquire this lock; it is unsafe
  507  *              for a caller to assume that other state protected by the lock
  508  *              has not been subsequently changed out from under it.
  509  */
  510 static int
  511 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
  512     fileproc_flags_t fp_flags, int32_t *retval)
  513 {
  514         struct fileproc *nfp;
  515         struct fileproc *ofp;
  516 #if CONFIG_MACF
  517         int error;
  518         kauth_cred_t cred;
  519 #endif
  520 
  521 #if DIAGNOSTIC
  522         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
  523 #endif
  524         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
  525             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
  526                 fdrelse(p, new);
  527                 return EBADF;
  528         }
  529 
  530 #if CONFIG_MACF
  531         cred = kauth_cred_proc_ref(p);
  532         error = mac_file_check_dup(cred, ofp->fp_glob, new);
  533         kauth_cred_unref(&cred);
  534 
  535         if (error) {
  536                 fdrelse(p, new);
  537                 return error;
  538         }
  539 #endif
  540 
  541         fg_ref(p, ofp->fp_glob);
  542 
  543         proc_fdunlock(p);
  544 
  545         nfp = fileproc_alloc_init();
  546 
  547         if (fp_flags) {
  548                 nfp->fp_flags |= fp_flags;
  549         }
  550         nfp->fp_glob = ofp->fp_glob;
  551 
  552         proc_fdlock(p);
  553 
  554 #if DIAGNOSTIC
  555         if (fdp->fd_ofiles[new] != 0) {
  556                 panic("finishdup: overwriting fd_ofiles with new %d", new);
  557         }
  558         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
  559                 panic("finishdup: unreserved fileflags with new %d", new);
  560         }
  561 #endif
  562 
  563         if (new >= fdp->fd_afterlast) {
  564                 fdp->fd_afterlast = new + 1;
  565         }
  566         procfdtbl_releasefd(p, new, nfp);
  567         *retval = new;
  568         return 0;
  569 }
  570 
  571 
  572 #pragma mark file descriptor table (exported functions)
  573 
  574 void
  575 proc_dirs_lock_shared(proc_t p)
  576 {
  577         lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
  578 }
  579 
  580 void
  581 proc_dirs_unlock_shared(proc_t p)
  582 {
  583         lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
  584 }
  585 
  586 void
  587 proc_dirs_lock_exclusive(proc_t p)
  588 {
  589         lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
  590 }
  591 
  592 void
  593 proc_dirs_unlock_exclusive(proc_t p)
  594 {
  595         lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
  596 }
  597 
  598 /*
  599  * proc_fdlock, proc_fdlock_spin
  600  *
  601  * Description: Lock to control access to the per process struct fileproc
  602  *              and struct filedesc
  603  *
  604  * Parameters:  p                               Process to take the lock on
  605  *
  606  * Returns:     void
  607  *
  608  * Notes:       The lock is initialized in forkproc() and destroyed in
  609  *              reap_child_process().
  610  */
  611 void
  612 proc_fdlock(proc_t p)
  613 {
  614         lck_mtx_lock(&p->p_fd.fd_lock);
  615 }
  616 
  617 void
  618 proc_fdlock_spin(proc_t p)
  619 {
  620         lck_mtx_lock_spin(&p->p_fd.fd_lock);
  621 }
  622 
  623 void
  624 proc_fdlock_assert(proc_t p, int assertflags)
  625 {
  626         lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
  627 }
  628 
  629 
  630 /*
  631  * proc_fdunlock
  632  *
  633  * Description: Unlock the lock previously locked by a call to proc_fdlock()
  634  *
  635  * Parameters:  p                               Process to drop the lock on
  636  *
  637  * Returns:     void
  638  */
  639 void
  640 proc_fdunlock(proc_t p)
  641 {
  642         lck_mtx_unlock(&p->p_fd.fd_lock);
  643 }
  644 
  645 bool
  646 fdt_available_locked(proc_t p, int n)
  647 {
  648         struct filedesc *fdp = &p->p_fd;
  649         struct fileproc **fpp;
  650         char *flags;
  651         int i;
  652         int lim = proc_limitgetcur_nofile(p);
  653 
  654         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
  655                 return true;
  656         }
  657         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
  658         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
  659         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
  660                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
  661                         return true;
  662                 }
  663         }
  664         return false;
  665 }
  666 
  667 
  668 struct fdt_iterator
  669 fdt_next(proc_t p, int fd, bool only_settled)
  670 {
  671         struct fdt_iterator it;
  672         struct filedesc *fdp = &p->p_fd;
  673         struct fileproc *fp;
  674         int nfds = fdp->fd_afterlast;
  675 
  676         while (++fd < nfds) {
  677                 fp = fdp->fd_ofiles[fd];
  678                 if (fp == NULL || fp->fp_glob == NULL) {
  679                         continue;
  680                 }
  681                 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
  682                         continue;
  683                 }
  684                 it.fdti_fd = fd;
  685                 it.fdti_fp = fp;
  686                 return it;
  687         }
  688 
  689         it.fdti_fd = nfds;
  690         it.fdti_fp = NULL;
  691         return it;
  692 }
  693 
  694 struct fdt_iterator
  695 fdt_prev(proc_t p, int fd, bool only_settled)
  696 {
  697         struct fdt_iterator it;
  698         struct filedesc *fdp = &p->p_fd;
  699         struct fileproc *fp;
  700 
  701         while (--fd >= 0) {
  702                 fp = fdp->fd_ofiles[fd];
  703                 if (fp == NULL || fp->fp_glob == NULL) {
  704                         continue;
  705                 }
  706                 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
  707                         continue;
  708                 }
  709                 it.fdti_fd = fd;
  710                 it.fdti_fp = fp;
  711                 return it;
  712         }
  713 
  714         it.fdti_fd = -1;
  715         it.fdti_fp = NULL;
  716         return it;
  717 }
  718 
  719 void
  720 fdt_init(proc_t p)
  721 {
  722         struct filedesc *fdp = &p->p_fd;
  723 
  724         lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
  725         lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
  726         lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
  727         lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
  728 }
  729 
  730 void
  731 fdt_destroy(proc_t p)
  732 {
  733         struct filedesc *fdp = &p->p_fd;
  734 
  735         lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
  736         lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
  737         lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
  738         lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
  739 }
  740 
  741 void
  742 fdt_exec(proc_t p, short posix_spawn_flags, thread_t thread, bool in_exec)
  743 {
  744         struct filedesc *fdp = &p->p_fd;
  745         thread_t self = current_thread();
  746         struct uthread *ut = get_bsdthread_info(self);
  747         struct kqworkq *dealloc_kqwq = NULL;
  748 
  749         /*
  750          * If the current thread is bound as a workq/workloop
  751          * servicing thread, we need to unbind it first.
  752          */
  753         if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
  754                 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
  755         }
  756 
  757         /*
  758          * Deallocate the knotes for this process
  759          * and mark the tables non-existent so
  760          * subsequent kqueue closes go faster.
  761          */
  762         knotes_dealloc(p);
  763         assert(fdp->fd_knlistsize == 0);
  764         assert(fdp->fd_knhashmask == 0);
  765 
  766         proc_fdlock(p);
  767 
  768         /* Set the P_LADVLOCK flag if the flag set on old proc */
  769         if (in_exec && (current_proc()->p_ladvflag & P_LADVLOCK)) {
  770                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
  771         }
  772 
  773         for (int i = fdp->fd_afterlast; i-- > 0;) {
  774                 struct fileproc *fp = fdp->fd_ofiles[i];
  775                 char *flagp = &fdp->fd_ofileflags[i];
  776                 bool inherit_file = true;
  777 
  778                 if (fp == FILEPROC_NULL) {
  779                         continue;
  780                 }
  781 
  782                 /*
  783                  * no file descriptor should be in flux when in exec,
  784                  * because we stopped all other threads
  785                  */
  786                 if (*flagp & ~UF_INHERIT) {
  787                         panic("file %d/%p in flux during exec of %p", i, fp, p);
  788                 }
  789 
  790                 if (fp->fp_flags & FP_CLOEXEC) {
  791                         inherit_file = false;
  792                 } else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
  793                     !(*flagp & UF_INHERIT)) {
  794                         /*
  795                          * Reverse the usual semantics of file descriptor
  796                          * inheritance - all of them should be closed
  797                          * except files marked explicitly as "inherit" and
  798                          * not marked close-on-exec.
  799                          */
  800                         inherit_file = false;
  801 #if CONFIG_MACF
  802                 } else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
  803                         inherit_file = false;
  804 #endif
  805                 }
  806 
  807                 *flagp = 0; /* clear UF_INHERIT */
  808 
  809                 if (!inherit_file) {
  810                         fp_close_and_unlock(p, i, fp, 0);
  811                         proc_fdlock(p);
  812                 } else if (in_exec) {
  813                         /* Transfer F_POSIX style lock to new proc */
  814                         proc_fdunlock(p);
  815                         fg_transfer_filelocks(p, fp->fp_glob, thread);
  816                         proc_fdlock(p);
  817                 }
  818         }
  819 
  820         /* release the per-process workq kq */
  821         if (fdp->fd_wqkqueue) {
  822                 dealloc_kqwq = fdp->fd_wqkqueue;
  823                 fdp->fd_wqkqueue = NULL;
  824         }
  825 
  826         proc_fdunlock(p);
  827 
  828         /* Anything to free? */
  829         if (dealloc_kqwq) {
  830                 kqworkq_dealloc(dealloc_kqwq);
  831         }
  832 }
  833 
  834 
  835 int
  836 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir, bool in_exec)
  837 {
  838         struct filedesc *fdp = &p->p_fd;
  839         struct fileproc **ofiles;
  840         char *ofileflags;
  841         int n_files, afterlast, freefile;
  842         vnode_t v_dir;
  843 #if CONFIG_PROC_RESOURCE_LIMITS
  844         int fd_nfiles_open = 0;
  845 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
  846         proc_fdlock(p);
  847 
  848         newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
  849         newfdp->fd_cmask = fdp->fd_cmask;
  850 #if CONFIG_PROC_RESOURCE_LIMITS
  851         newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
  852         newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
  853 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
  854 
  855         /*
  856          * For both fd_cdir and fd_rdir make sure we get
  857          * a valid reference... if we can't, than set
  858          * set the pointer(s) to NULL in the child... this
  859          * will keep us from using a non-referenced vp
  860          * and allows us to do the vnode_rele only on
  861          * a properly referenced vp
  862          */
  863         if ((v_dir = fdp->fd_rdir)) {
  864                 if (vnode_getwithref(v_dir) == 0) {
  865                         if (vnode_ref(v_dir) == 0) {
  866                                 newfdp->fd_rdir = v_dir;
  867                         }
  868                         vnode_put(v_dir);
  869                 }
  870                 if (newfdp->fd_rdir == NULL) {
  871                         /*
  872                          * We couldn't get a new reference on
  873                          * the chroot directory being
  874                          * inherited... this is fatal, since
  875                          * otherwise it would constitute an
  876                          * escape from a chroot environment by
  877                          * the new process.
  878                          */
  879                         proc_fdunlock(p);
  880                         return EPERM;
  881                 }
  882         }
  883 
  884         /*
  885          * If we are running with per-thread current working directories,
  886          * inherit the new current working directory from the current thread.
  887          */
  888         if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
  889                 if (vnode_getwithref(v_dir) == 0) {
  890                         if (vnode_ref(v_dir) == 0) {
  891                                 newfdp->fd_cdir = v_dir;
  892                         }
  893                         vnode_put(v_dir);
  894                 }
  895                 if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
  896                         /*
  897                          * we couldn't get a new reference on
  898                          * the current working directory being
  899                          * inherited... we might as well drop
  900                          * our reference from the parent also
  901                          * since the vnode has gone DEAD making
  902                          * it useless... by dropping it we'll
  903                          * be that much closer to recycling it
  904                          */
  905                         vnode_rele(fdp->fd_cdir);
  906                         fdp->fd_cdir = NULL;
  907                 }
  908         }
  909 
  910         /*
  911          * If the number of open files fits in the internal arrays
  912          * of the open file structure, use them, otherwise allocate
  913          * additional memory for the number of descriptors currently
  914          * in use.
  915          */
  916         afterlast = fdp->fd_afterlast;
  917         freefile = fdp->fd_freefile;
  918         if (afterlast <= NDFILE) {
  919                 n_files = NDFILE;
  920         } else {
  921                 n_files = roundup(afterlast, NDEXTENT);
  922         }
  923 
  924         proc_fdunlock(p);
  925 
  926         ofiles = fd_alloc_files(n_files, Z_WAITOK | Z_ZERO);
  927         if (ofiles == NULL) {
  928                 if (newfdp->fd_cdir) {
  929                         vnode_rele(newfdp->fd_cdir);
  930                         newfdp->fd_cdir = NULL;
  931                 }
  932                 if (newfdp->fd_rdir) {
  933                         vnode_rele(newfdp->fd_rdir);
  934                         newfdp->fd_rdir = NULL;
  935                 }
  936                 return ENOMEM;
  937         }
  938         ofileflags = (char *)&ofiles[n_files];
  939 
  940         proc_fdlock(p);
  941 
  942         for (int i = afterlast; i-- > 0;) {
  943                 struct fileproc *ofp, *nfp;
  944                 char flags;
  945 
  946                 ofp = fdp->fd_ofiles[i];
  947                 flags = fdp->fd_ofileflags[i];
  948 
  949                 if (ofp == NULL ||
  950                     (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
  951                     ((ofp->fp_flags & FP_CLOFORK) && !in_exec) ||
  952                     ((ofp->fp_flags & FP_CLOEXEC) && in_exec) ||
  953                     (flags & UF_RESERVED)) {
  954                         if (i + 1 == afterlast) {
  955                                 afterlast = i;
  956                         }
  957                         if (i < freefile) {
  958                                 freefile = i;
  959                         }
  960 
  961                         continue;
  962                 }
  963 
  964                 nfp = fileproc_alloc_init();
  965                 nfp->fp_glob = ofp->fp_glob;
  966                 if (in_exec) {
  967                         nfp->fp_flags = (ofp->fp_flags & (FP_CLOEXEC | FP_CLOFORK));
  968                         if (ofp->fp_guard_attrs) {
  969                                 guarded_fileproc_copy_guard(ofp, nfp);
  970                         }
  971                 } else {
  972                         assert(ofp->fp_guard_attrs == 0);
  973                         nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
  974                 }
  975                 fg_ref(p, nfp->fp_glob);
  976 
  977                 ofiles[i] = nfp;
  978 #if CONFIG_PROC_RESOURCE_LIMITS
  979                 fd_nfiles_open++;
  980 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
  981         }
  982 
  983         proc_fdunlock(p);
  984 
  985         newfdp->fd_ofiles = ofiles;
  986         newfdp->fd_ofileflags = ofileflags;
  987         newfdp->fd_nfiles = n_files;
  988         newfdp->fd_afterlast = afterlast;
  989         newfdp->fd_freefile = freefile;
  990 
  991 #if CONFIG_PROC_RESOURCE_LIMITS
  992         newfdp->fd_nfiles_open = fd_nfiles_open;
  993 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
  994 
  995         return 0;
  996 }
  997 
  998 void
  999 fdt_invalidate(proc_t p)
 1000 {
 1001         struct filedesc *fdp = &p->p_fd;
 1002         struct fileproc *fp, **ofiles;
 1003         struct kqworkq *kqwq = NULL;
 1004         vnode_t vn1 = NULL, vn2 = NULL;
 1005         struct kqwllist *kqhash = NULL;
 1006         u_long kqhashmask = 0;
 1007         int n_files = 0;
 1008 
 1009         /*
 1010          * deallocate all the knotes up front and claim empty
 1011          * tables to make any subsequent kqueue closes faster.
 1012          */
 1013         knotes_dealloc(p);
 1014         assert(fdp->fd_knlistsize == 0);
 1015         assert(fdp->fd_knhashmask == 0);
 1016 
 1017         /*
 1018          * dealloc all workloops that have outstanding retains
 1019          * when created with scheduling parameters.
 1020          */
 1021         kqworkloops_dealloc(p);
 1022 
 1023         proc_fdlock(p);
 1024 
 1025         /* close file descriptors */
 1026         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
 1027                 for (int i = fdp->fd_afterlast; i-- > 0;) {
 1028                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
 1029                                 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
 1030                                         panic("fdfree: found fp with UF_RESERVED");
 1031                                 }
 1032                                 fp_close_and_unlock(p, i, fp, 0);
 1033                                 proc_fdlock(p);
 1034                         }
 1035                 }
 1036         }
 1037 
 1038         n_files = fdp->fd_nfiles;
 1039         ofiles = fdp->fd_ofiles;
 1040         kqwq = fdp->fd_wqkqueue;
 1041         vn1 = fdp->fd_cdir;
 1042         vn2 = fdp->fd_rdir;
 1043 
 1044         fdp->fd_ofileflags = NULL;
 1045         fdp->fd_ofiles = NULL;
 1046         fdp->fd_nfiles = 0;
 1047         fdp->fd_wqkqueue = NULL;
 1048         fdp->fd_cdir = NULL;
 1049         fdp->fd_rdir = NULL;
 1050 
 1051         proc_fdunlock(p);
 1052 
 1053         lck_mtx_lock(&fdp->fd_knhashlock);
 1054 
 1055         kqhash = fdp->fd_kqhash;
 1056         kqhashmask = fdp->fd_kqhashmask;
 1057 
 1058         fdp->fd_kqhash = 0;
 1059         fdp->fd_kqhashmask = 0;
 1060 
 1061         lck_mtx_unlock(&fdp->fd_knhashlock);
 1062 
 1063         fd_free_files(ofiles, n_files);
 1064 
 1065         if (kqwq) {
 1066                 kqworkq_dealloc(kqwq);
 1067         }
 1068         if (vn1) {
 1069                 vnode_rele(vn1);
 1070         }
 1071         if (vn2) {
 1072                 vnode_rele(vn2);
 1073         }
 1074         if (kqhash) {
 1075                 for (uint32_t i = 0; i <= kqhashmask; i++) {
 1076                         assert(LIST_EMPTY(&kqhash[i]));
 1077                 }
 1078                 hashdestroy(kqhash, M_KQUEUE, kqhashmask);
 1079         }
 1080 }
 1081 
 1082 
 1083 struct fileproc *
 1084 fileproc_alloc_init(void)
 1085 {
 1086         struct fileproc *fp;
 1087 
 1088         fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
 1089         os_ref_init(&fp->fp_iocount, &f_refgrp);
 1090         return fp;
 1091 }
 1092 
 1093 
 1094 void
 1095 fileproc_free(struct fileproc *fp)
 1096 {
 1097         os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
 1098 #if DEVELOPMENT || DEBUG
 1099         if (0 != refc) {
 1100                 panic("%s: pid %d refc: %u != 0",
 1101                     __func__, proc_pid(current_proc()), refc);
 1102         }
 1103 #endif
 1104         if (fp->fp_guard_attrs) {
 1105                 guarded_fileproc_unguard(fp);
 1106         }
 1107         assert(fp->fp_wset == NULL);
 1108         zfree_id(ZONE_ID_FILEPROC, fp);
 1109 }
 1110 
 1111 
 1112 /*
 1113  * Statistics counter for the number of times a process calling fdalloc()
 1114  * has resulted in an expansion of the per process open file table.
 1115  *
 1116  * XXX This would likely be of more use if it were per process
 1117  */
 1118 int fdexpand;
 1119 
 1120 #if CONFIG_PROC_RESOURCE_LIMITS
 1121 /*
 1122  * Should be called only with the proc_fdlock held.
 1123  */
 1124 void
 1125 fd_check_limit_exceeded(struct filedesc *fdp)
 1126 {
 1127 #if DIAGNOSTIC
 1128         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 1129 #endif
 1130         if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
 1131             (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
 1132                 fd_above_soft_limit_send_notification(fdp);
 1133                 act_set_astproc_resource(current_thread());
 1134         } else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
 1135             (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
 1136                 fd_above_hard_limit_send_notification(fdp);
 1137                 act_set_astproc_resource(current_thread());
 1138         }
 1139 }
 1140 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
 1141 
 1142 /*
 1143  * fdalloc
 1144  *
 1145  * Description: Allocate a file descriptor for the process.
 1146  *
 1147  * Parameters:  p                               Process to allocate the fd in
 1148  *              want                            The fd we would prefer to get
 1149  *              result                          Pointer to fd we got
 1150  *
 1151  * Returns:     0                               Success
 1152  *              EMFILE
 1153  *              ENOMEM
 1154  *
 1155  * Implicit returns:
 1156  *              *result (modified)              The fd which was allocated
 1157  */
 1158 int
 1159 fdalloc(proc_t p, int want, int *result)
 1160 {
 1161         struct filedesc *fdp = &p->p_fd;
 1162         int i;
 1163         int last, numfiles, oldnfiles;
 1164         struct fileproc **newofiles, **ofiles;
 1165         char *newofileflags;
 1166         int lim = proc_limitgetcur_nofile(p);
 1167 
 1168         /*
 1169          * Search for a free descriptor starting at the higher
 1170          * of want or fd_freefile.  If that fails, consider
 1171          * expanding the ofile array.
 1172          */
 1173 #if DIAGNOSTIC
 1174         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 1175 #endif
 1176 
 1177         for (;;) {
 1178                 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
 1179                 if ((i = want) < fdp->fd_freefile) {
 1180                         i = fdp->fd_freefile;
 1181                 }
 1182                 for (; i < last; i++) {
 1183                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
 1184                                 procfdtbl_reservefd(p, i);
 1185                                 if (i >= fdp->fd_afterlast) {
 1186                                         fdp->fd_afterlast = i + 1;
 1187                                 }
 1188                                 if (want <= fdp->fd_freefile) {
 1189                                         fdp->fd_freefile = i;
 1190                                 }
 1191                                 *result = i;
 1192 #if CONFIG_PROC_RESOURCE_LIMITS
 1193                                 fdp->fd_nfiles_open++;
 1194                                 fd_check_limit_exceeded(fdp);
 1195 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
 1196                                 return 0;
 1197                         }
 1198                 }
 1199 
 1200                 /*
 1201                  * No space in current array.  Expand?
 1202                  */
 1203                 if ((rlim_t)fdp->fd_nfiles >= lim) {
 1204                         return EMFILE;
 1205                 }
 1206                 if (fdp->fd_nfiles < NDEXTENT) {
 1207                         numfiles = NDEXTENT;
 1208                 } else {
 1209                         numfiles = 2 * fdp->fd_nfiles;
 1210                 }
 1211                 /* Enforce lim */
 1212                 if ((rlim_t)numfiles > lim) {
 1213                         numfiles = (int)lim;
 1214                 }
 1215                 proc_fdunlock(p);
 1216                 newofiles = fd_alloc_files(numfiles, Z_WAITOK);
 1217                 proc_fdlock(p);
 1218                 if (newofiles == NULL) {
 1219                         return ENOMEM;
 1220                 }
 1221                 if (fdp->fd_nfiles >= numfiles) {
 1222                         fd_free_files(newofiles, numfiles);
 1223                         continue;
 1224                 }
 1225                 newofileflags = (char *) &newofiles[numfiles];
 1226                 /*
 1227                  * Copy the existing ofile and ofileflags arrays
 1228                  * and zero the new portion of each array.
 1229                  */
 1230                 oldnfiles = fdp->fd_nfiles;
 1231                 (void) memcpy(newofiles, fdp->fd_ofiles,
 1232                     oldnfiles * sizeof(*fdp->fd_ofiles));
 1233                 (void) memset(&newofiles[oldnfiles], 0,
 1234                     (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
 1235 
 1236                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
 1237                     oldnfiles * sizeof(*fdp->fd_ofileflags));
 1238                 (void) memset(&newofileflags[oldnfiles], 0,
 1239                     (numfiles - oldnfiles) *
 1240                     sizeof(*fdp->fd_ofileflags));
 1241                 ofiles = fdp->fd_ofiles;
 1242                 fdp->fd_ofiles = newofiles;
 1243                 fdp->fd_ofileflags = newofileflags;
 1244                 fdp->fd_nfiles = numfiles;
 1245                 fd_free_files(ofiles, oldnfiles);
 1246                 fdexpand++;
 1247         }
 1248 }
 1249 
 1250 
 1251 #pragma mark fileprocs
 1252 
 1253 void
 1254 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
 1255 {
 1256         if (clearflags) {
 1257                 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
 1258         } else {
 1259                 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
 1260         }
 1261 }
 1262 
 1263 fileproc_vflags_t
 1264 fileproc_get_vflags(struct fileproc *fp)
 1265 {
 1266         return os_atomic_load(&fp->fp_vflags, relaxed);
 1267 }
 1268 
 1269 /*
 1270  * falloc_withinit
 1271  *
 1272  * Create a new open file structure and allocate
 1273  * a file descriptor for the process that refers to it.
 1274  *
 1275  * Returns:     0                       Success
 1276  *
 1277  * Description: Allocate an entry in the per process open file table and
 1278  *              return the corresponding fileproc and fd.
 1279  *
 1280  * Parameters:  p                               The process in whose open file
 1281  *                                              table the fd is to be allocated
 1282  *              resultfp                        Pointer to fileproc pointer
 1283  *                                              return area
 1284  *              resultfd                        Pointer to fd return area
 1285  *              ctx                             VFS context
 1286  *              fp_zalloc                       fileproc allocator to use
 1287  *              crarg                           allocator args
 1288  *
 1289  * Returns:     0                               Success
 1290  *              ENFILE                          Too many open files in system
 1291  *              fdalloc:EMFILE                  Too many open files in process
 1292  *              fdalloc:ENOMEM                  M_OFILETABL zone exhausted
 1293  *              ENOMEM                          fp_zone or fg_zone zone
 1294  *                                              exhausted
 1295  *
 1296  * Implicit returns:
 1297  *              *resultfd (modified)            Returned fileproc pointer
 1298  *              *resultfd (modified)            Returned fd
 1299  *
 1300  * Notes:       This function takes separate process and context arguments
 1301  *              solely to support kern_exec.c; otherwise, it would take
 1302  *              neither, and use the vfs_context_current() routine internally.
 1303  */
 1304 int
 1305 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
 1306     vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
 1307 {
 1308         struct fileproc *fp;
 1309         struct fileglob *fg;
 1310         int error, nfd;
 1311 #if CONFIG_MACF
 1312         kauth_cred_t cred;
 1313 #endif
 1314 
 1315         /* Make sure we don't go beyond the system-wide limit */
 1316         if (nfiles >= maxfiles) {
 1317                 tablefull("file");
 1318                 return ENFILE;
 1319         }
 1320 
 1321         proc_fdlock(p);
 1322 
 1323         /* fdalloc will make sure the process stays below per-process limit */
 1324         if ((error = fdalloc(p, 0, &nfd))) {
 1325                 proc_fdunlock(p);
 1326                 return error;
 1327         }
 1328 
 1329 #if CONFIG_MACF
 1330         cred = kauth_cred_proc_ref(p);
 1331         error = mac_file_check_create(cred);
 1332         kauth_cred_unref(&cred);
 1333         if (error) {
 1334                 proc_fdunlock(p);
 1335                 return error;
 1336         }
 1337 #endif
 1338 
 1339         /*
 1340          * Allocate a new file descriptor.
 1341          * If the process has file descriptor zero open, add to the list
 1342          * of open files at that point, otherwise put it at the front of
 1343          * the list of open files.
 1344          */
 1345         proc_fdunlock(p);
 1346 
 1347         fp = fileproc_alloc_init();
 1348         if (fp_init) {
 1349                 fp_init(fp, initarg);
 1350         }
 1351 
 1352         fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
 1353         lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
 1354 
 1355         os_ref_retain_locked(&fp->fp_iocount);
 1356         os_ref_init_raw(&fg->fg_count, &f_refgrp);
 1357         fg->fg_ops = &uninitops;
 1358         fp->fp_glob = fg;
 1359 
 1360         kauth_cred_ref(ctx->vc_ucred);
 1361 
 1362         fp->f_cred = ctx->vc_ucred;
 1363 
 1364         os_atomic_inc(&nfiles, relaxed);
 1365 
 1366         proc_fdlock(p);
 1367 
 1368         p->p_fd.fd_ofiles[nfd] = fp;
 1369 
 1370         proc_fdunlock(p);
 1371 
 1372         if (resultfp) {
 1373                 *resultfp = fp;
 1374         }
 1375         if (resultfd) {
 1376                 *resultfd = nfd;
 1377         }
 1378 
 1379         return 0;
 1380 }
 1381 
 1382 int
 1383 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
 1384 {
 1385         return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
 1386 }
 1387 
 1388 
 1389 /*
 1390  * fp_free
 1391  *
 1392  * Description: Release the fd and free the fileproc associated with the fd
 1393  *              in the per process open file table of the specified process;
 1394  *              these values must correspond.
 1395  *
 1396  * Parameters:  p                               Process containing fd
 1397  *              fd                              fd to be released
 1398  *              fp                              fileproc to be freed
 1399  */
 1400 void
 1401 fp_free(proc_t p, int fd, struct fileproc * fp)
 1402 {
 1403         proc_fdlock_spin(p);
 1404         fdrelse(p, fd);
 1405         proc_fdunlock(p);
 1406 
 1407         fg_free(fp->fp_glob);
 1408         os_ref_release_live(&fp->fp_iocount);
 1409         fileproc_free(fp);
 1410 }
 1411 
 1412 
 1413 struct fileproc *
 1414 fp_get_noref_locked(proc_t p, int fd)
 1415 {
 1416         struct filedesc *fdp = &p->p_fd;
 1417         struct fileproc *fp;
 1418 
 1419         if (fd < 0 || fd >= fdp->fd_nfiles ||
 1420             (fp = fdp->fd_ofiles[fd]) == NULL ||
 1421             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 1422                 return NULL;
 1423         }
 1424 
 1425         zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
 1426         return fp;
 1427 }
 1428 
 1429 struct fileproc *
 1430 fp_get_noref_locked_with_iocount(proc_t p, int fd)
 1431 {
 1432         struct filedesc *fdp = &p->p_fd;
 1433         struct fileproc *fp = NULL;
 1434 
 1435         if (fd < 0 || fd >= fdp->fd_nfiles ||
 1436             (fp = fdp->fd_ofiles[fd]) == NULL ||
 1437             os_ref_get_count(&fp->fp_iocount) <= 1 ||
 1438             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
 1439             !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
 1440                 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
 1441                     __func__, fd, fp);
 1442         }
 1443 
 1444         zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
 1445         return fp;
 1446 }
 1447 
 1448 
 1449 /*
 1450  * fp_lookup
 1451  *
 1452  * Description: Get fileproc pointer for a given fd from the per process
 1453  *              open file table of the specified process and if successful,
 1454  *              increment the fp_iocount
 1455  *
 1456  * Parameters:  p                               Process in which fd lives
 1457  *              fd                              fd to get information for
 1458  *              resultfp                        Pointer to result fileproc
 1459  *                                              pointer area, or 0 if none
 1460  *              locked                          !0 if the caller holds the
 1461  *                                              proc_fdlock, 0 otherwise
 1462  *
 1463  * Returns:     0                       Success
 1464  *              EBADF                   Bad file descriptor
 1465  *
 1466  * Implicit returns:
 1467  *              *resultfp (modified)            Fileproc pointer
 1468  *
 1469  * Locks:       If the argument 'locked' is non-zero, then the caller is
 1470  *              expected to have taken and held the proc_fdlock; if it is
 1471  *              zero, than this routine internally takes and drops this lock.
 1472  */
 1473 int
 1474 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
 1475 {
 1476         struct filedesc *fdp = &p->p_fd;
 1477         struct fileproc *fp;
 1478 
 1479         if (!locked) {
 1480                 proc_fdlock_spin(p);
 1481         }
 1482         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
 1483             (fp = fdp->fd_ofiles[fd]) == NULL ||
 1484             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 1485                 if (!locked) {
 1486                         proc_fdunlock(p);
 1487                 }
 1488                 return EBADF;
 1489         }
 1490 
 1491         zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
 1492         os_ref_retain_locked(&fp->fp_iocount);
 1493 
 1494         if (resultfp) {
 1495                 *resultfp = fp;
 1496         }
 1497         if (!locked) {
 1498                 proc_fdunlock(p);
 1499         }
 1500 
 1501         return 0;
 1502 }
 1503 
 1504 
 1505 int
 1506 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
 1507 {
 1508         struct filedesc *fdp = &p->p_fd;
 1509         struct fileproc *fp;
 1510 
 1511         proc_fdlock_spin(p);
 1512         if (fd < 0 || fd >= fdp->fd_nfiles ||
 1513             (fp = fdp->fd_ofiles[fd]) == NULL ||
 1514             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 1515                 proc_fdunlock(p);
 1516                 return EBADF;
 1517         }
 1518 
 1519         if (fp->f_type != ftype) {
 1520                 proc_fdunlock(p);
 1521                 return err;
 1522         }
 1523 
 1524         zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
 1525         os_ref_retain_locked(&fp->fp_iocount);
 1526         proc_fdunlock(p);
 1527 
 1528         *fpp = fp;
 1529         return 0;
 1530 }
 1531 
 1532 
 1533 /*
 1534  * fp_drop
 1535  *
 1536  * Description: Drop the I/O reference previously taken by calling fp_lookup
 1537  *              et. al.
 1538  *
 1539  * Parameters:  p                               Process in which the fd lives
 1540  *              fd                              fd associated with the fileproc
 1541  *              fp                              fileproc on which to set the
 1542  *                                              flag and drop the reference
 1543  *              locked                          flag to internally take and
 1544  *                                              drop proc_fdlock if it is not
 1545  *                                              already held by the caller
 1546  *
 1547  * Returns:     0                               Success
 1548  *              EBADF                           Bad file descriptor
 1549  *
 1550  * Locks:       This function internally takes and drops the proc_fdlock for
 1551  *              the supplied process if 'locked' is non-zero, and assumes that
 1552  *              the caller already holds this lock if 'locked' is non-zero.
 1553  *
 1554  * Notes:       The fileproc must correspond to the fd in the supplied proc
 1555  */
 1556 int
 1557 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
 1558 {
 1559         struct filedesc *fdp = &p->p_fd;
 1560         int     needwakeup = 0;
 1561 
 1562         if (!locked) {
 1563                 proc_fdlock_spin(p);
 1564         }
 1565         if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
 1566             (fp = fdp->fd_ofiles[fd]) == NULL ||
 1567             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
 1568             !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
 1569                 if (!locked) {
 1570                         proc_fdunlock(p);
 1571                 }
 1572                 return EBADF;
 1573         }
 1574 
 1575         if (1 == os_ref_release_locked(&fp->fp_iocount)) {
 1576                 if (fp->fp_flags & FP_SELCONFLICT) {
 1577                         fp->fp_flags &= ~FP_SELCONFLICT;
 1578                 }
 1579 
 1580                 if (fdp->fd_fpdrainwait) {
 1581                         fdp->fd_fpdrainwait = 0;
 1582                         needwakeup = 1;
 1583                 }
 1584         }
 1585         if (!locked) {
 1586                 proc_fdunlock(p);
 1587         }
 1588         if (needwakeup) {
 1589                 wakeup(&fdp->fd_fpdrainwait);
 1590         }
 1591 
 1592         return 0;
 1593 }
 1594 
 1595 
 1596 /*
 1597  * fileproc_drain
 1598  *
 1599  * Description: Drain out pending I/O operations
 1600  *
 1601  * Parameters:  p                               Process closing this file
 1602  *              fp                              fileproc struct for the open
 1603  *                                              instance on the file
 1604  *
 1605  * Returns:     void
 1606  *
 1607  * Locks:       Assumes the caller holds the proc_fdlock
 1608  *
 1609  * Notes:       For character devices, this occurs on the last close of the
 1610  *              device; for all other file descriptors, this occurs on each
 1611  *              close to prevent fd's from being closed out from under
 1612  *              operations currently in progress and blocked
 1613  *
 1614  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
 1615  *              regarding their use and interaction with this function.
 1616  */
 1617 static void
 1618 fileproc_drain(proc_t p, struct fileproc * fp)
 1619 {
 1620         struct filedesc *fdp = &p->p_fd;
 1621         struct vfs_context context;
 1622         thread_t thread;
 1623         bool is_current_proc;
 1624 
 1625         is_current_proc = (p == current_proc());
 1626 
 1627         if (!is_current_proc) {
 1628                 proc_lock(p);
 1629                 thread = proc_thread(p); /* XXX */
 1630                 thread_reference(thread);
 1631                 proc_unlock(p);
 1632         } else {
 1633                 thread = current_thread();
 1634         }
 1635 
 1636         context.vc_thread = thread;
 1637         context.vc_ucred = fp->fp_glob->fg_cred;
 1638 
 1639         /* Set the vflag for drain */
 1640         fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
 1641 
 1642         while (os_ref_get_count(&fp->fp_iocount) > 1) {
 1643                 lck_mtx_convert_spin(&fdp->fd_lock);
 1644 
 1645                 fo_drain(fp, &context);
 1646                 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
 1647                         struct select_set *selset;
 1648 
 1649                         if (fp->fp_guard_attrs) {
 1650                                 selset = fp->fp_guard->fpg_wset;
 1651                         } else {
 1652                                 selset = fp->fp_wset;
 1653                         }
 1654                         if (waitq_wakeup64_all(selset, NO_EVENT64,
 1655                             THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
 1656                                 panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
 1657                                     selset, fp->fp_guard_attrs ? "guarded " : "", fp);
 1658                         }
 1659                 }
 1660                 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
 1661                         if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
 1662                             THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
 1663                                 panic("bad select_conflict_queue");
 1664                         }
 1665                 }
 1666                 fdp->fd_fpdrainwait = 1;
 1667                 msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
 1668         }
 1669 #if DIAGNOSTIC
 1670         if ((fp->fp_flags & FP_INSELECT) != 0) {
 1671                 panic("FP_INSELECT set on drained fp");
 1672         }
 1673 #endif
 1674         if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
 1675                 fp->fp_flags &= ~FP_SELCONFLICT;
 1676         }
 1677 
 1678         if (!is_current_proc) {
 1679                 thread_deallocate(thread);
 1680         }
 1681 }
 1682 
 1683 
 1684 int
 1685 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
 1686 {
 1687         struct filedesc *fdp = &p->p_fd;
 1688         struct fileglob *fg = fp->fp_glob;
 1689 #if CONFIG_MACF
 1690         kauth_cred_t cred;
 1691 #endif
 1692 
 1693 #if DIAGNOSTIC
 1694         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 1695 #endif
 1696 
 1697         /*
 1698          * Keep most people from finding the filedesc while we are closing it.
 1699          *
 1700          * Callers are:
 1701          *
 1702          * - dup2() which always waits for UF_RESERVED to clear
 1703          *
 1704          * - close/guarded_close/... who will fail the fileproc lookup if
 1705          *   UF_RESERVED is set,
 1706          *
 1707          * - fdexec()/fdfree() who only run once all threads in the proc
 1708          *   are properly canceled, hence no fileproc in this proc should
 1709          *   be in flux.
 1710          *
 1711          * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
 1712          *
 1713          * Callers of fp_get_noref_locked_with_iocount() can still find
 1714          * this entry so that they can drop their I/O reference despite
 1715          * not having remembered the fileproc pointer (namely select() and
 1716          * file_drop()).
 1717          */
 1718         if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
 1719                 panic("%s: called with fileproc in flux (%d/:%p)",
 1720                     __func__, fd, fp);
 1721         }
 1722         p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
 1723 
 1724         if ((fp->fp_flags & FP_AIOISSUED) ||
 1725 #if CONFIG_MACF
 1726             (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
 1727 #else
 1728             kauth_authorize_fileop_has_listeners()
 1729 #endif
 1730             ) {
 1731                 proc_fdunlock(p);
 1732 
 1733                 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
 1734                         /*
 1735                          * call out to allow 3rd party notification of close.
 1736                          * Ignore result of kauth_authorize_fileop call.
 1737                          */
 1738 #if CONFIG_MACF
 1739                         cred = kauth_cred_proc_ref(p);
 1740                         mac_file_notify_close(cred, fp->fp_glob);
 1741                         kauth_cred_unref(&cred);
 1742 #endif
 1743 
 1744                         if (kauth_authorize_fileop_has_listeners() &&
 1745                             vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
 1746                                 u_int   fileop_flags = 0;
 1747                                 if (fg->fg_flag & FWASWRITTEN) {
 1748                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
 1749                                 }
 1750                                 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
 1751                                     (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
 1752 
 1753                                 vnode_put((vnode_t)fg_get_data(fg));
 1754                         }
 1755                 }
 1756 
 1757                 if (fp->fp_flags & FP_AIOISSUED) {
 1758                         /*
 1759                          * cancel all async IO requests that can be cancelled.
 1760                          */
 1761                         _aio_close( p, fd );
 1762                 }
 1763 
 1764                 proc_fdlock(p);
 1765         }
 1766 
 1767         if (fd < fdp->fd_knlistsize) {
 1768                 knote_fdclose(p, fd);
 1769         }
 1770 
 1771         fileproc_drain(p, fp);
 1772 
 1773         if (flags & FD_DUP2RESV) {
 1774                 fdp->fd_ofiles[fd] = NULL;
 1775                 fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
 1776         } else {
 1777                 fdrelse(p, fd);
 1778         }
 1779 
 1780         proc_fdunlock(p);
 1781 
 1782         if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
 1783                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
 1784                     fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
 1785         }
 1786 
 1787         fileproc_free(fp);
 1788 
 1789         return fg_drop(p, fg);
 1790 }
 1791 
 1792 /*
 1793  * dupfdopen
 1794  *
 1795  * Description: Duplicate the specified descriptor to a free descriptor;
 1796  *              this is the second half of fdopen(), above.
 1797  *
 1798  * Parameters:  p                               current process pointer
 1799  *              indx                            fd to dup to
 1800  *              dfd                             fd to dup from
 1801  *              mode                            mode to set on new fd
 1802  *              error                           command code
 1803  *
 1804  * Returns:     0                               Success
 1805  *              EBADF                           Source fd is bad
 1806  *              EACCES                          Requested mode not allowed
 1807  *              !0                              'error', if not ENODEV or
 1808  *                                              ENXIO
 1809  *
 1810  * Notes:       XXX This is not thread safe; see fdopen() above
 1811  */
 1812 int
 1813 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
 1814 {
 1815         struct filedesc *fdp = &p->p_fd;
 1816         struct fileproc *wfp;
 1817         struct fileproc *fp;
 1818 #if CONFIG_MACF
 1819         int myerror;
 1820 #endif
 1821 
 1822         /*
 1823          * If the to-be-dup'd fd number is greater than the allowed number
 1824          * of file descriptors, or the fd to be dup'd has already been
 1825          * closed, reject.  Note, check for new == old is necessary as
 1826          * falloc could allocate an already closed to-be-dup'd descriptor
 1827          * as the new descriptor.
 1828          */
 1829         proc_fdlock(p);
 1830 
 1831         fp = fdp->fd_ofiles[indx];
 1832         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
 1833             (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
 1834             (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
 1835                 proc_fdunlock(p);
 1836                 return EBADF;
 1837         }
 1838 #if CONFIG_MACF
 1839         myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
 1840         if (myerror) {
 1841                 proc_fdunlock(p);
 1842                 return myerror;
 1843         }
 1844 #endif
 1845         /*
 1846          * There are two cases of interest here.
 1847          *
 1848          * For ENODEV simply dup (dfd) to file descriptor
 1849          * (indx) and return.
 1850          *
 1851          * For ENXIO steal away the file structure from (dfd) and
 1852          * store it in (indx).  (dfd) is effectively closed by
 1853          * this operation.
 1854          *
 1855          * Any other error code is just returned.
 1856          */
 1857         switch (error) {
 1858         case ENODEV:
 1859                 if (fp_isguarded(wfp, GUARD_DUP)) {
 1860                         proc_fdunlock(p);
 1861                         return EPERM;
 1862                 }
 1863 
 1864                 /*
 1865                  * Check that the mode the file is being opened for is a
 1866                  * subset of the mode of the existing descriptor.
 1867                  */
 1868                 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
 1869                         proc_fdunlock(p);
 1870                         return EACCES;
 1871                 }
 1872                 if (indx >= fdp->fd_afterlast) {
 1873                         fdp->fd_afterlast = indx + 1;
 1874                 }
 1875 
 1876                 if (fp->fp_glob) {
 1877                         fg_free(fp->fp_glob);
 1878                 }
 1879                 fg_ref(p, wfp->fp_glob);
 1880                 fp->fp_glob = wfp->fp_glob;
 1881                 /*
 1882                  * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
 1883                  * unlike dup(), dup2() or fcntl(F_DUPFD).
 1884                  *
 1885                  * open1() already handled O_CLO{EXEC,FORK}
 1886                  */
 1887                 fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
 1888 
 1889                 procfdtbl_releasefd(p, indx, NULL);
 1890                 fp_drop(p, indx, fp, 1);
 1891                 proc_fdunlock(p);
 1892                 return 0;
 1893 
 1894         default:
 1895                 proc_fdunlock(p);
 1896                 return error;
 1897         }
 1898         /* NOTREACHED */
 1899 }
 1900 
 1901 
 1902 #pragma mark KPIS (sys/file.h)
 1903 
 1904 /*
 1905  * fg_get_vnode
 1906  *
 1907  * Description: Return vnode associated with the file structure, if
 1908  *              any.  The lifetime of the returned vnode is bound to
 1909  *              the lifetime of the file structure.
 1910  *
 1911  * Parameters:  fg                              Pointer to fileglob to
 1912  *                                              inspect
 1913  *
 1914  * Returns:     vnode_t
 1915  */
 1916 vnode_t
 1917 fg_get_vnode(struct fileglob *fg)
 1918 {
 1919         if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
 1920                 return (vnode_t)fg_get_data(fg);
 1921         } else {
 1922                 return NULL;
 1923         }
 1924 }
 1925 
 1926 
 1927 /*
 1928  * fp_getfvp
 1929  *
 1930  * Description: Get fileproc and vnode pointer for a given fd from the per
 1931  *              process open file table of the specified process, and if
 1932  *              successful, increment the fp_iocount
 1933  *
 1934  * Parameters:  p                               Process in which fd lives
 1935  *              fd                              fd to get information for
 1936  *              resultfp                        Pointer to result fileproc
 1937  *                                              pointer area, or 0 if none
 1938  *              resultvp                        Pointer to result vnode pointer
 1939  *                                              area, or 0 if none
 1940  *
 1941  * Returns:     0                               Success
 1942  *              EBADF                           Bad file descriptor
 1943  *              ENOTSUP                         fd does not refer to a vnode
 1944  *
 1945  * Implicit returns:
 1946  *              *resultfp (modified)            Fileproc pointer
 1947  *              *resultvp (modified)            vnode pointer
 1948  *
 1949  * Notes:       The resultfp and resultvp fields are optional, and may be
 1950  *              independently specified as NULL to skip returning information
 1951  *
 1952  * Locks:       Internally takes and releases proc_fdlock
 1953  */
 1954 int
 1955 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
 1956 {
 1957         struct fileproc *fp;
 1958         int error;
 1959 
 1960         error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
 1961         if (error == 0) {
 1962                 if (resultfp) {
 1963                         *resultfp = fp;
 1964                 }
 1965                 if (resultvp) {
 1966                         *resultvp = (struct vnode *)fp_get_data(fp);
 1967                 }
 1968         }
 1969 
 1970         return error;
 1971 }
 1972 
 1973 
 1974 /*
 1975  * fp_get_pipe_id
 1976  *
 1977  * Description: Get pipe id for a given fd from the per process open file table
 1978  *              of the specified process.
 1979  *
 1980  * Parameters:  p                               Process in which fd lives
 1981  *              fd                              fd to get information for
 1982  *              result_pipe_id                  Pointer to result pipe id
 1983  *
 1984  * Returns:     0                               Success
 1985  *              EIVAL                           NULL pointer arguments passed
 1986  *              fp_lookup:EBADF                 Bad file descriptor
 1987  *              ENOTSUP                         fd does not refer to a pipe
 1988  *
 1989  * Implicit returns:
 1990  *              *result_pipe_id (modified)      pipe id
 1991  *
 1992  * Locks:       Internally takes and releases proc_fdlock
 1993  */
 1994 int
 1995 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
 1996 {
 1997         struct fileproc *fp = FILEPROC_NULL;
 1998         struct fileglob *fg = NULL;
 1999         int error = 0;
 2000 
 2001         if (p == NULL || result_pipe_id == NULL) {
 2002                 return EINVAL;
 2003         }
 2004 
 2005         proc_fdlock(p);
 2006         if ((error = fp_lookup(p, fd, &fp, 1))) {
 2007                 proc_fdunlock(p);
 2008                 return error;
 2009         }
 2010         fg = fp->fp_glob;
 2011 
 2012         if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
 2013                 *result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
 2014         } else {
 2015                 error = ENOTSUP;
 2016         }
 2017 
 2018         fp_drop(p, fd, fp, 1);
 2019         proc_fdunlock(p);
 2020         return error;
 2021 }
 2022 
 2023 
 2024 /*
 2025  * file_vnode
 2026  *
 2027  * Description: Given an fd, look it up in the current process's per process
 2028  *              open file table, and return its internal vnode pointer.
 2029  *
 2030  * Parameters:  fd                              fd to obtain vnode from
 2031  *              vpp                             pointer to vnode return area
 2032  *
 2033  * Returns:     0                               Success
 2034  *              EINVAL                          The fd does not refer to a
 2035  *                                              vnode fileproc entry
 2036  *      fp_lookup:EBADF                         Bad file descriptor
 2037  *
 2038  * Implicit returns:
 2039  *              *vpp (modified)                 Returned vnode pointer
 2040  *
 2041  * Locks:       This function internally takes and drops the proc_fdlock for
 2042  *              the current process
 2043  *
 2044  * Notes:       If successful, this function increments the fp_iocount on the
 2045  *              fd's corresponding fileproc.
 2046  *
 2047  *              The fileproc referenced is not returned; because of this, care
 2048  *              must be taken to not drop the last reference (e.g. by closing
 2049  *              the file).  This is inherently unsafe, since the reference may
 2050  *              not be recoverable from the vnode, if there is a subsequent
 2051  *              close that destroys the associate fileproc.  The caller should
 2052  *              therefore retain their own reference on the fileproc so that
 2053  *              the fp_iocount can be dropped subsequently.  Failure to do this
 2054  *              can result in the returned pointer immediately becoming invalid
 2055  *              following the call.
 2056  *
 2057  *              Use of this function is discouraged.
 2058  */
 2059 int
 2060 file_vnode(int fd, struct vnode **vpp)
 2061 {
 2062         return file_vnode_withvid(fd, vpp, NULL);
 2063 }
 2064 
 2065 
 2066 /*
 2067  * file_vnode_withvid
 2068  *
 2069  * Description: Given an fd, look it up in the current process's per process
 2070  *              open file table, and return its internal vnode pointer.
 2071  *
 2072  * Parameters:  fd                              fd to obtain vnode from
 2073  *              vpp                             pointer to vnode return area
 2074  *              vidp                            pointer to vid of the returned vnode
 2075  *
 2076  * Returns:     0                               Success
 2077  *              EINVAL                          The fd does not refer to a
 2078  *                                              vnode fileproc entry
 2079  *      fp_lookup:EBADF                         Bad file descriptor
 2080  *
 2081  * Implicit returns:
 2082  *              *vpp (modified)                 Returned vnode pointer
 2083  *
 2084  * Locks:       This function internally takes and drops the proc_fdlock for
 2085  *              the current process
 2086  *
 2087  * Notes:       If successful, this function increments the fp_iocount on the
 2088  *              fd's corresponding fileproc.
 2089  *
 2090  *              The fileproc referenced is not returned; because of this, care
 2091  *              must be taken to not drop the last reference (e.g. by closing
 2092  *              the file).  This is inherently unsafe, since the reference may
 2093  *              not be recoverable from the vnode, if there is a subsequent
 2094  *              close that destroys the associate fileproc.  The caller should
 2095  *              therefore retain their own reference on the fileproc so that
 2096  *              the fp_iocount can be dropped subsequently.  Failure to do this
 2097  *              can result in the returned pointer immediately becoming invalid
 2098  *              following the call.
 2099  *
 2100  *              Use of this function is discouraged.
 2101  */
 2102 int
 2103 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
 2104 {
 2105         struct fileproc *fp;
 2106         int error;
 2107 
 2108         error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
 2109         if (error == 0) {
 2110                 if (vpp) {
 2111                         *vpp = (struct vnode *)fp_get_data(fp);
 2112                 }
 2113                 if (vidp) {
 2114                         *vidp = vnode_vid((struct vnode *)fp_get_data(fp));
 2115                 }
 2116         }
 2117         return error;
 2118 }
 2119 
 2120 /*
 2121  * file_socket
 2122  *
 2123  * Description: Given an fd, look it up in the current process's per process
 2124  *              open file table, and return its internal socket pointer.
 2125  *
 2126  * Parameters:  fd                              fd to obtain vnode from
 2127  *              sp                              pointer to socket return area
 2128  *
 2129  * Returns:     0                               Success
 2130  *              ENOTSOCK                        Not a socket
 2131  *              fp_lookup:EBADF                 Bad file descriptor
 2132  *
 2133  * Implicit returns:
 2134  *              *sp (modified)                  Returned socket pointer
 2135  *
 2136  * Locks:       This function internally takes and drops the proc_fdlock for
 2137  *              the current process
 2138  *
 2139  * Notes:       If successful, this function increments the fp_iocount on the
 2140  *              fd's corresponding fileproc.
 2141  *
 2142  *              The fileproc referenced is not returned; because of this, care
 2143  *              must be taken to not drop the last reference (e.g. by closing
 2144  *              the file).  This is inherently unsafe, since the reference may
 2145  *              not be recoverable from the socket, if there is a subsequent
 2146  *              close that destroys the associate fileproc.  The caller should
 2147  *              therefore retain their own reference on the fileproc so that
 2148  *              the fp_iocount can be dropped subsequently.  Failure to do this
 2149  *              can result in the returned pointer immediately becoming invalid
 2150  *              following the call.
 2151  *
 2152  *              Use of this function is discouraged.
 2153  */
 2154 int
 2155 file_socket(int fd, struct socket **sp)
 2156 {
 2157         struct fileproc *fp;
 2158         int error;
 2159 
 2160         error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
 2161         if (error == 0) {
 2162                 if (sp) {
 2163                         *sp = (struct socket *)fp_get_data(fp);
 2164                 }
 2165         }
 2166         return error;
 2167 }
 2168 
 2169 
 2170 /*
 2171  * file_flags
 2172  *
 2173  * Description: Given an fd, look it up in the current process's per process
 2174  *              open file table, and return its fileproc's flags field.
 2175  *
 2176  * Parameters:  fd                              fd whose flags are to be
 2177  *                                              retrieved
 2178  *              flags                           pointer to flags data area
 2179  *
 2180  * Returns:     0                               Success
 2181  *              ENOTSOCK                        Not a socket
 2182  *              fp_lookup:EBADF                 Bad file descriptor
 2183  *
 2184  * Implicit returns:
 2185  *              *flags (modified)               Returned flags field
 2186  *
 2187  * Locks:       This function internally takes and drops the proc_fdlock for
 2188  *              the current process
 2189  */
 2190 int
 2191 file_flags(int fd, int *flags)
 2192 {
 2193         proc_t p = current_proc();
 2194         struct fileproc *fp;
 2195         int error = EBADF;
 2196 
 2197         proc_fdlock_spin(p);
 2198         fp = fp_get_noref_locked(p, fd);
 2199         if (fp) {
 2200                 *flags = (int)fp->f_flag;
 2201                 error = 0;
 2202         }
 2203         proc_fdunlock(p);
 2204 
 2205         return error;
 2206 }
 2207 
 2208 
 2209 /*
 2210  * file_drop
 2211  *
 2212  * Description: Drop an iocount reference on an fd, and wake up any waiters
 2213  *              for draining (i.e. blocked in fileproc_drain() called during
 2214  *              the last attempt to close a file).
 2215  *
 2216  * Parameters:  fd                              fd on which an ioreference is
 2217  *                                              to be dropped
 2218  *
 2219  * Returns:     0                               Success
 2220  *
 2221  * Description: Given an fd, look it up in the current process's per process
 2222  *              open file table, and drop it's fileproc's fp_iocount by one
 2223  *
 2224  * Notes:       This is intended as a corresponding operation to the functions
 2225  *              file_vnode() and file_socket() operations.
 2226  *
 2227  *              If the caller can't possibly hold an I/O reference,
 2228  *              this function will panic the kernel rather than allowing
 2229  *              for memory corruption. Callers should always call this
 2230  *              because they acquired an I/O reference on this file before.
 2231  *
 2232  *              Use of this function is discouraged.
 2233  */
 2234 int
 2235 file_drop(int fd)
 2236 {
 2237         struct fileproc *fp;
 2238         proc_t p = current_proc();
 2239         struct filedesc *fdp = &p->p_fd;
 2240         int     needwakeup = 0;
 2241 
 2242         proc_fdlock_spin(p);
 2243         fp = fp_get_noref_locked_with_iocount(p, fd);
 2244 
 2245         if (1 == os_ref_release_locked(&fp->fp_iocount)) {
 2246                 if (fp->fp_flags & FP_SELCONFLICT) {
 2247                         fp->fp_flags &= ~FP_SELCONFLICT;
 2248                 }
 2249 
 2250                 if (fdp->fd_fpdrainwait) {
 2251                         fdp->fd_fpdrainwait = 0;
 2252                         needwakeup = 1;
 2253                 }
 2254         }
 2255         proc_fdunlock(p);
 2256 
 2257         if (needwakeup) {
 2258                 wakeup(&fdp->fd_fpdrainwait);
 2259         }
 2260         return 0;
 2261 }
 2262 
 2263 
 2264 #pragma mark syscalls
 2265 
 2266 #ifndef HFS_GET_BOOT_INFO
 2267 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
 2268 #endif
 2269 
 2270 #ifndef HFS_SET_BOOT_INFO
 2271 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
 2272 #endif
 2273 
 2274 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
 2275 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
 2276 #endif
 2277 
 2278 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
 2279                 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
 2280                 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
 2281                 ? 1 : 0)
 2282 
 2283 /*
 2284  * sys_getdtablesize
 2285  *
 2286  * Description: Returns the per process maximum size of the descriptor table
 2287  *
 2288  * Parameters:  p                               Process being queried
 2289  *              retval                          Pointer to the call return area
 2290  *
 2291  * Returns:     0                               Success
 2292  *
 2293  * Implicit returns:
 2294  *              *retval (modified)              Size of dtable
 2295  */
 2296 int
 2297 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 2298 {
 2299         *retval = proc_limitgetcur_nofile(p);
 2300         return 0;
 2301 }
 2302 
 2303 
 2304 /*
 2305  * check_file_seek_range
 2306  *
 2307  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
 2308  *
 2309  * Parameters:  fl              Flock structure.
 2310  *              cur_file_offset Current offset in the file.
 2311  *
 2312  * Returns:     0               on Success.
 2313  *              EOVERFLOW       on overflow.
 2314  *              EINVAL          on offset less than zero.
 2315  */
 2316 
 2317 static int
 2318 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
 2319 {
 2320         if (fl->l_whence == SEEK_CUR) {
 2321                 /* Check if the start marker is beyond LLONG_MAX. */
 2322                 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
 2323                         /* Check if start marker is negative */
 2324                         if (fl->l_start < 0) {
 2325                                 return EINVAL;
 2326                         }
 2327                         return EOVERFLOW;
 2328                 }
 2329                 /* Check if the start marker is negative. */
 2330                 if (fl->l_start + cur_file_offset < 0) {
 2331                         return EINVAL;
 2332                 }
 2333                 /* Check if end marker is beyond LLONG_MAX. */
 2334                 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
 2335                     cur_file_offset, fl->l_len - 1))) {
 2336                         return EOVERFLOW;
 2337                 }
 2338                 /* Check if the end marker is negative. */
 2339                 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
 2340                     fl->l_len < 0)) {
 2341                         return EINVAL;
 2342                 }
 2343         } else if (fl->l_whence == SEEK_SET) {
 2344                 /* Check if the start marker is negative. */
 2345                 if (fl->l_start < 0) {
 2346                         return EINVAL;
 2347                 }
 2348                 /* Check if the end marker is beyond LLONG_MAX. */
 2349                 if ((fl->l_len > 0) &&
 2350                     CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
 2351                         return EOVERFLOW;
 2352                 }
 2353                 /* Check if the end marker is negative. */
 2354                 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
 2355                         return EINVAL;
 2356                 }
 2357         }
 2358         return 0;
 2359 }
 2360 
 2361 
 2362 /*
 2363  * sys_dup
 2364  *
 2365  * Description: Duplicate a file descriptor.
 2366  *
 2367  * Parameters:  p                               Process performing the dup
 2368  *              uap->fd                         The fd to dup
 2369  *              retval                          Pointer to the call return area
 2370  *
 2371  * Returns:     0                               Success
 2372  *              !0                              Errno
 2373  *
 2374  * Implicit returns:
 2375  *              *retval (modified)              The new descriptor
 2376  */
 2377 int
 2378 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
 2379 {
 2380         struct filedesc *fdp = &p->p_fd;
 2381         int old = uap->fd;
 2382         int new, error;
 2383         struct fileproc *fp;
 2384 
 2385         proc_fdlock(p);
 2386         if ((error = fp_lookup(p, old, &fp, 1))) {
 2387                 proc_fdunlock(p);
 2388                 return error;
 2389         }
 2390         if (fp_isguarded(fp, GUARD_DUP)) {
 2391                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 2392                 (void) fp_drop(p, old, fp, 1);
 2393                 proc_fdunlock(p);
 2394                 return error;
 2395         }
 2396         if ((error = fdalloc(p, 0, &new))) {
 2397                 fp_drop(p, old, fp, 1);
 2398                 proc_fdunlock(p);
 2399                 return error;
 2400         }
 2401         error = finishdup(p, fdp, old, new, 0, retval);
 2402 
 2403         if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
 2404                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
 2405                     new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
 2406         }
 2407 
 2408         fp_drop(p, old, fp, 1);
 2409         proc_fdunlock(p);
 2410 
 2411         return error;
 2412 }
 2413 
 2414 /*
 2415  * sys_dup2
 2416  *
 2417  * Description: Duplicate a file descriptor to a particular value.
 2418  *
 2419  * Parameters:  p                               Process performing the dup
 2420  *              uap->from                       The fd to dup
 2421  *              uap->to                         The fd to dup it to
 2422  *              retval                          Pointer to the call return area
 2423  *
 2424  * Returns:     0                               Success
 2425  *              !0                              Errno
 2426  *
 2427  * Implicit returns:
 2428  *              *retval (modified)              The new descriptor
 2429  */
 2430 int
 2431 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 2432 {
 2433         return dup2(p, uap->from, uap->to, retval);
 2434 }
 2435 
 2436 int
 2437 dup2(proc_t p, int old, int new, int *retval)
 2438 {
 2439         struct filedesc *fdp = &p->p_fd;
 2440         struct fileproc *fp, *nfp;
 2441         int i, error;
 2442 
 2443         proc_fdlock(p);
 2444 
 2445 startover:
 2446         if ((error = fp_lookup(p, old, &fp, 1))) {
 2447                 proc_fdunlock(p);
 2448                 return error;
 2449         }
 2450         if (fp_isguarded(fp, GUARD_DUP)) {
 2451                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 2452                 (void) fp_drop(p, old, fp, 1);
 2453                 proc_fdunlock(p);
 2454                 return error;
 2455         }
 2456         if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
 2457                 fp_drop(p, old, fp, 1);
 2458                 proc_fdunlock(p);
 2459                 return EBADF;
 2460         }
 2461         if (old == new) {
 2462                 fp_drop(p, old, fp, 1);
 2463                 *retval = new;
 2464                 proc_fdunlock(p);
 2465                 return 0;
 2466         }
 2467         if (new < 0 || new >= fdp->fd_nfiles) {
 2468                 if ((error = fdalloc(p, new, &i))) {
 2469                         fp_drop(p, old, fp, 1);
 2470                         proc_fdunlock(p);
 2471                         return error;
 2472                 }
 2473                 if (new != i) {
 2474                         fdrelse(p, i);
 2475                         goto closeit;
 2476                 }
 2477         } else {
 2478 closeit:
 2479                 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
 2480                         fp_drop(p, old, fp, 1);
 2481                         procfdtbl_waitfd(p, new);
 2482 #if DIAGNOSTIC
 2483                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 2484 #endif
 2485                         goto startover;
 2486                 }
 2487 
 2488                 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
 2489                         if (fp_isguarded(nfp, GUARD_CLOSE)) {
 2490                                 fp_drop(p, old, fp, 1);
 2491                                 error = fp_guard_exception(p,
 2492                                     new, nfp, kGUARD_EXC_CLOSE);
 2493                                 proc_fdunlock(p);
 2494                                 return error;
 2495                         }
 2496                         (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
 2497                         proc_fdlock(p);
 2498                         assert(fdp->fd_ofileflags[new] & UF_RESERVED);
 2499                 } else {
 2500 #if DIAGNOSTIC
 2501                         if (fdp->fd_ofiles[new] != NULL) {
 2502                                 panic("dup2: no ref on fileproc %d", new);
 2503                         }
 2504 #endif
 2505                         procfdtbl_reservefd(p, new);
 2506                 }
 2507         }
 2508 #if DIAGNOSTIC
 2509         if (fdp->fd_ofiles[new] != 0) {
 2510                 panic("dup2: overwriting fd_ofiles with new %d", new);
 2511         }
 2512         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
 2513                 panic("dup2: unreserved fileflags with new %d", new);
 2514         }
 2515 #endif
 2516         error = finishdup(p, fdp, old, new, 0, retval);
 2517         fp_drop(p, old, fp, 1);
 2518         proc_fdunlock(p);
 2519 
 2520         return error;
 2521 }
 2522 
 2523 
 2524 /*
 2525  * fcntl
 2526  *
 2527  * Description: The file control system call.
 2528  *
 2529  * Parameters:  p                               Process performing the fcntl
 2530  *              uap->fd                         The fd to operate against
 2531  *              uap->cmd                        The command to perform
 2532  *              uap->arg                        Pointer to the command argument
 2533  *              retval                          Pointer to the call return area
 2534  *
 2535  * Returns:     0                               Success
 2536  *              !0                              Errno (see fcntl_nocancel)
 2537  *
 2538  * Implicit returns:
 2539  *              *retval (modified)              fcntl return value (if any)
 2540  *
 2541  * Notes:       This system call differs from fcntl_nocancel() in that it
 2542  *              tests for cancellation prior to performing a potentially
 2543  *              blocking operation.
 2544  */
 2545 int
 2546 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 2547 {
 2548         __pthread_testcancel(1);
 2549         return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
 2550 }
 2551 
 2552 #define ACCOUNT_OPENFROM_ENTITLEMENT \
 2553         "com.apple.private.vfs.role-account-openfrom"
 2554 
 2555 /*
 2556  * sys_fcntl_nocancel
 2557  *
 2558  * Description: A non-cancel-testing file control system call.
 2559  *
 2560  * Parameters:  p                               Process performing the fcntl
 2561  *              uap->fd                         The fd to operate against
 2562  *              uap->cmd                        The command to perform
 2563  *              uap->arg                        Pointer to the command argument
 2564  *              retval                          Pointer to the call return area
 2565  *
 2566  * Returns:     0                               Success
 2567  *              EINVAL
 2568  *      fp_lookup:EBADF                         Bad file descriptor
 2569  * [F_DUPFD]
 2570  *      fdalloc:EMFILE
 2571  *      fdalloc:ENOMEM
 2572  *      finishdup:EBADF
 2573  *      finishdup:ENOMEM
 2574  * [F_SETOWN]
 2575  *              ESRCH
 2576  * [F_SETLK]
 2577  *              EBADF
 2578  *              EOVERFLOW
 2579  *      copyin:EFAULT
 2580  *      vnode_getwithref:???
 2581  *      VNOP_ADVLOCK:???
 2582  *      msleep:ETIMEDOUT
 2583  * [F_GETLK]
 2584  *              EBADF
 2585  *              EOVERFLOW
 2586  *      copyin:EFAULT
 2587  *      copyout:EFAULT
 2588  *      vnode_getwithref:???
 2589  *      VNOP_ADVLOCK:???
 2590  * [F_PREALLOCATE]
 2591  *              EBADF
 2592  *              EFBIG
 2593  *              EINVAL
 2594  *              ENOSPC
 2595  *      copyin:EFAULT
 2596  *      copyout:EFAULT
 2597  *      vnode_getwithref:???
 2598  *      VNOP_ALLOCATE:???
 2599  * [F_SETSIZE,F_RDADVISE]
 2600  *              EBADF
 2601  *              EINVAL
 2602  *      copyin:EFAULT
 2603  *      vnode_getwithref:???
 2604  * [F_RDAHEAD,F_NOCACHE]
 2605  *              EBADF
 2606  *      vnode_getwithref:???
 2607  * [???]
 2608  *
 2609  * Implicit returns:
 2610  *              *retval (modified)              fcntl return value (if any)
 2611  */
 2612 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
 2613         struct vfs_context context = { \
 2614             .vc_thread = current_thread(), \
 2615             .vc_ucred = fp->f_cred, \
 2616         }
 2617 
 2618 static user_addr_t
 2619 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
 2620 {
 2621         /*
 2622          * Since the arg parameter is defined as a long but may be
 2623          * either a long or a pointer we must take care to handle
 2624          * sign extension issues.  Our sys call munger will sign
 2625          * extend a long when we are called from a 32-bit process.
 2626          * Since we can never have an address greater than 32-bits
 2627          * from a 32-bit process we lop off the top 32-bits to avoid
 2628          * getting the wrong address
 2629          */
 2630         return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
 2631 }
 2632 
 2633 /* cleanup code common to fnctl functions, for when the fdlock is still held */
 2634 static int
 2635 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
 2636 {
 2637         fp_drop(p, fd, fp, 1);
 2638         proc_fdunlock(p);
 2639         return error;
 2640 }
 2641 
 2642 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
 2643 static int
 2644 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
 2645 {
 2646 #pragma unused(vp)
 2647 
 2648         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
 2649         fp_drop(p, fd, fp, 0);
 2650         return error;
 2651 }
 2652 
 2653 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
 2654     struct fileproc *fp, int32_t *retval);
 2655 
 2656 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
 2657     user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
 2658 
 2659 /*
 2660  * SPI (private) for opening a file starting from a dir fd
 2661  *
 2662  * Note: do not inline to keep stack usage under control.
 2663  */
 2664 __attribute__((noinline))
 2665 static int
 2666 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
 2667     struct fileproc *fp, struct vnode *vp, int32_t *retval)
 2668 {
 2669 #pragma unused(cmd)
 2670 
 2671         user_addr_t argp = sys_fnctl_parse_arg(p, arg);
 2672         struct user_fopenfrom fopen;
 2673         struct vnode_attr *va;
 2674         struct nameidata *nd;
 2675         int error, cmode;
 2676         bool has_entitlement;
 2677 
 2678         /* Check if this isn't a valid file descriptor */
 2679         if ((fp->f_flag & FREAD) == 0) {
 2680                 return sys_fcntl_out(p, fd, fp, EBADF);
 2681         }
 2682         proc_fdunlock(p);
 2683 
 2684         if (vnode_getwithref(vp)) {
 2685                 error = ENOENT;
 2686                 goto outdrop;
 2687         }
 2688 
 2689         /* Only valid for directories */
 2690         if (vp->v_type != VDIR) {
 2691                 vnode_put(vp);
 2692                 error = ENOTDIR;
 2693                 goto outdrop;
 2694         }
 2695 
 2696         /*
 2697          * Only entitled apps may use the credentials of the thread
 2698          * that opened the file descriptor.
 2699          * Non-entitled threads will use their own context.
 2700          */
 2701         has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
 2702 
 2703         /* Get flags, mode and pathname arguments. */
 2704         if (IS_64BIT_PROCESS(p)) {
 2705                 error = copyin(argp, &fopen, sizeof(fopen));
 2706         } else {
 2707                 struct user32_fopenfrom fopen32;
 2708 
 2709                 error = copyin(argp, &fopen32, sizeof(fopen32));
 2710                 fopen.o_flags = fopen32.o_flags;
 2711                 fopen.o_mode = fopen32.o_mode;
 2712                 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
 2713         }
 2714         if (error) {
 2715                 vnode_put(vp);
 2716                 goto outdrop;
 2717         }
 2718 
 2719         /* open1() can have really deep stacks, so allocate those */
 2720         va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
 2721         nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
 2722 
 2723         AUDIT_ARG(fflags, fopen.o_flags);
 2724         AUDIT_ARG(mode, fopen.o_mode);
 2725         VATTR_INIT(va);
 2726         /* Mask off all but regular access permissions */
 2727         cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
 2728         VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
 2729 
 2730         SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
 2731 
 2732         /* Start the lookup relative to the file descriptor's vnode. */
 2733         NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
 2734             fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
 2735         nd->ni_dvp = vp;
 2736 
 2737         error = open1(has_entitlement ? &context : vfs_context_current(),
 2738             nd, fopen.o_flags, va, NULL, NULL, retval, AUTH_OPEN_NOAUTHFD);
 2739 
 2740         kfree_type(struct vnode_attr, va);
 2741         kfree_type(struct nameidata, nd);
 2742 
 2743         vnode_put(vp);
 2744 
 2745 outdrop:
 2746         return sys_fcntl_outdrop(p, fd, fp, vp, error);
 2747 }
 2748 
 2749 int
 2750 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 2751 {
 2752         int fd = uap->fd;
 2753         int cmd = uap->cmd;
 2754         struct filedesc *fdp = &p->p_fd;
 2755         struct fileproc *fp;
 2756         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 2757         unsigned int oflags, nflags;
 2758         int i, tmp, error, error2, flg = 0;
 2759         struct flock fl = {};
 2760         struct flocktimeout fltimeout;
 2761         struct timespec *timeout = NULL;
 2762         off_t offset;
 2763         int newmin;
 2764         daddr64_t lbn, bn;
 2765         unsigned int fflag;
 2766         user_addr_t argp;
 2767         boolean_t is64bit;
 2768         int has_entitlement = 0;
 2769 
 2770         AUDIT_ARG(fd, uap->fd);
 2771         AUDIT_ARG(cmd, uap->cmd);
 2772 
 2773         proc_fdlock(p);
 2774         if ((error = fp_lookup(p, fd, &fp, 1))) {
 2775                 proc_fdunlock(p);
 2776                 return error;
 2777         }
 2778 
 2779         SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
 2780 
 2781         is64bit = proc_is64bit(p);
 2782         if (is64bit) {
 2783                 argp = uap->arg;
 2784         } else {
 2785                 /*
 2786                  * Since the arg parameter is defined as a long but may be
 2787                  * either a long or a pointer we must take care to handle
 2788                  * sign extension issues.  Our sys call munger will sign
 2789                  * extend a long when we are called from a 32-bit process.
 2790                  * Since we can never have an address greater than 32-bits
 2791                  * from a 32-bit process we lop off the top 32-bits to avoid
 2792                  * getting the wrong address
 2793                  */
 2794                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
 2795         }
 2796 
 2797 #if CONFIG_MACF
 2798         error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
 2799         if (error) {
 2800                 goto out;
 2801         }
 2802 #endif
 2803 
 2804         switch (cmd) {
 2805         case F_DUPFD:
 2806         case F_DUPFD_CLOEXEC:
 2807                 if (fp_isguarded(fp, GUARD_DUP)) {
 2808                         error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
 2809                         goto out;
 2810                 }
 2811                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 2812                 AUDIT_ARG(value32, newmin);
 2813                 if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
 2814                         error = EINVAL;
 2815                         goto out;
 2816                 }
 2817                 if ((error = fdalloc(p, newmin, &i))) {
 2818                         goto out;
 2819                 }
 2820                 error = finishdup(p, fdp, fd, i,
 2821                     cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
 2822                 goto out;
 2823 
 2824         case F_GETFD:
 2825                 *retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
 2826                 error = 0;
 2827                 goto out;
 2828 
 2829         case F_SETFD:
 2830                 AUDIT_ARG(value32, (uint32_t)uap->arg);
 2831                 if (uap->arg & FD_CLOEXEC) {
 2832                         fp->fp_flags |= FP_CLOEXEC;
 2833                         error = 0;
 2834                 } else if (!fp->fp_guard_attrs) {
 2835                         fp->fp_flags &= ~FP_CLOEXEC;
 2836                         error = 0;
 2837                 } else {
 2838                         error = fp_guard_exception(p,
 2839                             fd, fp, kGUARD_EXC_NOCLOEXEC);
 2840                 }
 2841                 goto out;
 2842 
 2843         case F_GETFL:
 2844                 fflag = fp->f_flag;
 2845                 if ((fflag & O_EVTONLY) && proc_disallow_rw_for_o_evtonly(p)) {
 2846                         /*
 2847                          * We insert back F_READ so that conversion back to open flags with
 2848                          * OFLAGS() will come out right. We only need to set 'FREAD' as the
 2849                          * 'O_RDONLY' is always implied.
 2850                          */
 2851                         fflag |= FREAD;
 2852                 }
 2853                 *retval = OFLAGS(fflag);
 2854                 error = 0;
 2855                 goto out;
 2856 
 2857         case F_SETFL:
 2858                 // FIXME (rdar://54898652)
 2859                 //
 2860                 // this code is broken if fnctl(F_SETFL), ioctl() are
 2861                 // called concurrently for the same fileglob.
 2862 
 2863                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 2864                 AUDIT_ARG(value32, tmp);
 2865 
 2866                 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
 2867                         nflags  = oflags & ~FCNTLFLAGS;
 2868                         nflags |= FFLAGS(tmp) & FCNTLFLAGS;
 2869                 });
 2870                 tmp = nflags & FNONBLOCK;
 2871                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 2872                 if (error) {
 2873                         goto out;
 2874                 }
 2875                 tmp = nflags & FASYNC;
 2876                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 2877                 if (!error) {
 2878                         goto out;
 2879                 }
 2880                 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
 2881                 tmp = 0;
 2882                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 2883                 goto out;
 2884 
 2885         case F_GETOWN:
 2886                 if (fp->f_type == DTYPE_SOCKET) {
 2887                         *retval = ((struct socket *)fp_get_data(fp))->so_pgid;
 2888                         error = 0;
 2889                         goto out;
 2890                 }
 2891                 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
 2892                 *retval = -*retval;
 2893                 goto out;
 2894 
 2895         case F_SETOWN:
 2896                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
 2897                 AUDIT_ARG(value32, tmp);
 2898                 if (fp->f_type == DTYPE_SOCKET) {
 2899                         ((struct socket *)fp_get_data(fp))->so_pgid = tmp;
 2900                         error = 0;
 2901                         goto out;
 2902                 }
 2903                 if (fp->f_type == DTYPE_PIPE) {
 2904                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 2905                         goto out;
 2906                 }
 2907 
 2908                 if (tmp <= 0) {
 2909                         tmp = -tmp;
 2910                 } else {
 2911                         proc_t p1 = proc_find(tmp);
 2912                         if (p1 == 0) {
 2913                                 error = ESRCH;
 2914                                 goto out;
 2915                         }
 2916                         tmp = (int)p1->p_pgrpid;
 2917                         proc_rele(p1);
 2918                 }
 2919                 error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 2920                 goto out;
 2921 
 2922         case F_SETNOSIGPIPE:
 2923                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
 2924                 if (fp->f_type == DTYPE_SOCKET) {
 2925 #if SOCKETS
 2926                         error = sock_setsockopt((struct socket *)fp_get_data(fp),
 2927                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
 2928 #else
 2929                         error = EINVAL;
 2930 #endif
 2931                 } else {
 2932                         struct fileglob *fg = fp->fp_glob;
 2933 
 2934                         lck_mtx_lock_spin(&fg->fg_lock);
 2935                         if (tmp) {
 2936                                 fg->fg_lflags |= FG_NOSIGPIPE;
 2937                         } else {
 2938                                 fg->fg_lflags &= ~FG_NOSIGPIPE;
 2939                         }
 2940                         lck_mtx_unlock(&fg->fg_lock);
 2941                         error = 0;
 2942                 }
 2943                 goto out;
 2944 
 2945         case F_GETNOSIGPIPE:
 2946                 if (fp->f_type == DTYPE_SOCKET) {
 2947 #if SOCKETS
 2948                         int retsize = sizeof(*retval);
 2949                         error = sock_getsockopt((struct socket *)fp_get_data(fp),
 2950                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
 2951 #else
 2952                         error = EINVAL;
 2953 #endif
 2954                 } else {
 2955                         *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
 2956                             1 : 0;
 2957                         error = 0;
 2958                 }
 2959                 goto out;
 2960 
 2961         case F_SETCONFINED:
 2962                 /*
 2963                  * If this is the only reference to this fglob in the process
 2964                  * and it's already marked as close-on-fork then mark it as
 2965                  * (immutably) "confined" i.e. any fd that points to it will
 2966                  * forever be close-on-fork, and attempts to use an IPC
 2967                  * mechanism to move the descriptor elsewhere will fail.
 2968                  */
 2969                 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
 2970                         struct fileglob *fg = fp->fp_glob;
 2971 
 2972                         lck_mtx_lock_spin(&fg->fg_lock);
 2973                         if (fg->fg_lflags & FG_CONFINED) {
 2974                                 error = 0;
 2975                         } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
 2976                                 error = EAGAIN; /* go close the dup .. */
 2977                         } else if (fp->fp_flags & FP_CLOFORK) {
 2978                                 fg->fg_lflags |= FG_CONFINED;
 2979                                 error = 0;
 2980                         } else {
 2981                                 error = EBADF;  /* open without O_CLOFORK? */
 2982                         }
 2983                         lck_mtx_unlock(&fg->fg_lock);
 2984                 } else {
 2985                         /*
 2986                          * Other subsystems may have built on the immutability
 2987                          * of FG_CONFINED; clearing it may be tricky.
 2988                          */
 2989                         error = EPERM;          /* immutable */
 2990                 }
 2991                 goto out;
 2992 
 2993         case F_GETCONFINED:
 2994                 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
 2995                 error = 0;
 2996                 goto out;
 2997 
 2998         case F_SETLKWTIMEOUT:
 2999         case F_SETLKW:
 3000         case F_OFD_SETLKWTIMEOUT:
 3001         case F_OFD_SETLKW:
 3002                 flg |= F_WAIT;
 3003                 OS_FALLTHROUGH;
 3004 
 3005         case F_SETLK:
 3006         case F_OFD_SETLK:
 3007                 if (fp->f_type != DTYPE_VNODE) {
 3008                         error = EBADF;
 3009                         goto out;
 3010                 }
 3011                 vp = (struct vnode *)fp_get_data(fp);
 3012 
 3013                 fflag = fp->f_flag;
 3014                 offset = fp->f_offset;
 3015                 proc_fdunlock(p);
 3016 
 3017                 /* Copy in the lock structure */
 3018                 if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
 3019                         error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
 3020                         if (error) {
 3021                                 goto outdrop;
 3022                         }
 3023                         fl = fltimeout.fl;
 3024                         timeout = &fltimeout.timeout;
 3025                 } else {
 3026                         error = copyin(argp, (caddr_t)&fl, sizeof(fl));
 3027                         if (error) {
 3028                                 goto outdrop;
 3029                         }
 3030                 }
 3031 
 3032                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
 3033                 /* and ending byte for EOVERFLOW in SEEK_SET */
 3034                 error = check_file_seek_range(&fl, offset);
 3035                 if (error) {
 3036                         goto outdrop;
 3037                 }
 3038 
 3039                 if ((error = vnode_getwithref(vp))) {
 3040                         goto outdrop;
 3041                 }
 3042                 if (fl.l_whence == SEEK_CUR) {
 3043                         fl.l_start += offset;
 3044                 }
 3045 
 3046 #if CONFIG_MACF
 3047                 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
 3048                     F_SETLK, &fl);
 3049                 if (error) {
 3050                         (void)vnode_put(vp);
 3051                         goto outdrop;
 3052                 }
 3053 #endif
 3054 
 3055 #if CONFIG_FILE_LEASES
 3056                 (void)vnode_breaklease(vp, O_WRONLY, vfs_context_current());
 3057 #endif
 3058 
 3059                 switch (cmd) {
 3060                 case F_OFD_SETLK:
 3061                 case F_OFD_SETLKW:
 3062                 case F_OFD_SETLKWTIMEOUT:
 3063                         flg |= F_OFD_LOCK;
 3064                         switch (fl.l_type) {
 3065                         case F_RDLCK:
 3066                                 if ((fflag & FREAD) == 0) {
 3067                                         error = EBADF;
 3068                                         break;
 3069                                 }
 3070                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
 3071                                     F_SETLK, &fl, flg, &context, timeout);
 3072                                 break;
 3073                         case F_WRLCK:
 3074                                 if ((fflag & FWRITE) == 0) {
 3075                                         error = EBADF;
 3076                                         break;
 3077                                 }
 3078                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
 3079                                     F_SETLK, &fl, flg, &context, timeout);
 3080                                 break;
 3081                         case F_UNLCK:
 3082                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
 3083                                     F_UNLCK, &fl, F_OFD_LOCK, &context,
 3084                                     timeout);
 3085                                 break;
 3086                         default:
 3087                                 error = EINVAL;
 3088                                 break;
 3089                         }
 3090                         if (0 == error &&
 3091                             (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
 3092                                 struct fileglob *fg = fp->fp_glob;
 3093 
 3094                                 /*
 3095                                  * arrange F_UNLCK on last close (once
 3096                                  * set, FG_HAS_OFDLOCK is immutable)
 3097                                  */
 3098                                 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
 3099                                         lck_mtx_lock_spin(&fg->fg_lock);
 3100                                         fg->fg_lflags |= FG_HAS_OFDLOCK;
 3101                                         lck_mtx_unlock(&fg->fg_lock);
 3102                                 }
 3103                         }
 3104                         break;
 3105                 default:
 3106                         flg |= F_POSIX;
 3107                         switch (fl.l_type) {
 3108                         case F_RDLCK:
 3109                                 if ((fflag & FREAD) == 0) {
 3110                                         error = EBADF;
 3111                                         break;
 3112                                 }
 3113                                 // XXX UInt32 unsafe for LP64 kernel
 3114                                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
 3115                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
 3116                                     F_SETLK, &fl, flg, &context, timeout);
 3117                                 break;
 3118                         case F_WRLCK:
 3119                                 if ((fflag & FWRITE) == 0) {
 3120                                         error = EBADF;
 3121                                         break;
 3122                                 }
 3123                                 // XXX UInt32 unsafe for LP64 kernel
 3124                                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
 3125                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
 3126                                     F_SETLK, &fl, flg, &context, timeout);
 3127                                 break;
 3128                         case F_UNLCK:
 3129                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
 3130                                     F_UNLCK, &fl, F_POSIX, &context, timeout);
 3131                                 break;
 3132                         default:
 3133                                 error = EINVAL;
 3134                                 break;
 3135                         }
 3136                         break;
 3137                 }
 3138                 (void) vnode_put(vp);
 3139                 goto outdrop;
 3140 
 3141         case F_GETLK:
 3142         case F_OFD_GETLK:
 3143         case F_GETLKPID:
 3144         case F_OFD_GETLKPID:
 3145                 if (fp->f_type != DTYPE_VNODE) {
 3146                         error = EBADF;
 3147                         goto out;
 3148                 }
 3149                 vp = (struct vnode *)fp_get_data(fp);
 3150 
 3151                 offset = fp->f_offset;
 3152                 proc_fdunlock(p);
 3153 
 3154                 /* Copy in the lock structure */
 3155                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
 3156                 if (error) {
 3157                         goto outdrop;
 3158                 }
 3159 
 3160                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
 3161                 /* and ending byte for EOVERFLOW in SEEK_SET */
 3162                 error = check_file_seek_range(&fl, offset);
 3163                 if (error) {
 3164                         goto outdrop;
 3165                 }
 3166 
 3167                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
 3168                         error = EINVAL;
 3169                         goto outdrop;
 3170                 }
 3171 
 3172                 switch (fl.l_type) {
 3173                 case F_RDLCK:
 3174                 case F_UNLCK:
 3175                 case F_WRLCK:
 3176                         break;
 3177                 default:
 3178                         error = EINVAL;
 3179                         goto outdrop;
 3180                 }
 3181 
 3182                 switch (fl.l_whence) {
 3183                 case SEEK_CUR:
 3184                 case SEEK_SET:
 3185                 case SEEK_END:
 3186                         break;
 3187                 default:
 3188                         error = EINVAL;
 3189                         goto outdrop;
 3190                 }
 3191 
 3192                 if ((error = vnode_getwithref(vp)) == 0) {
 3193                         if (fl.l_whence == SEEK_CUR) {
 3194                                 fl.l_start += offset;
 3195                         }
 3196 
 3197 #if CONFIG_MACF
 3198                         error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
 3199                             cmd, &fl);
 3200                         if (error == 0)
 3201 #endif
 3202                         switch (cmd) {
 3203                         case F_OFD_GETLK:
 3204                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
 3205                                     F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
 3206                                 break;
 3207                         case F_OFD_GETLKPID:
 3208                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
 3209                                     F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
 3210                                 break;
 3211                         default:
 3212                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
 3213                                     cmd, &fl, F_POSIX, &context, NULL);
 3214                                 break;
 3215                         }
 3216 
 3217                         (void)vnode_put(vp);
 3218 
 3219                         if (error == 0) {
 3220                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
 3221                         }
 3222                 }
 3223                 goto outdrop;
 3224 
 3225         case F_PREALLOCATE: {
 3226                 fstore_t alloc_struct;    /* structure for allocate command */
 3227                 u_int32_t alloc_flags = 0;
 3228 
 3229                 if (fp->f_type != DTYPE_VNODE) {
 3230                         error = EBADF;
 3231                         goto out;
 3232                 }
 3233 
 3234                 vp = (struct vnode *)fp_get_data(fp);
 3235                 proc_fdunlock(p);
 3236 
 3237                 /* make sure that we have write permission */
 3238                 if ((fp->f_flag & FWRITE) == 0) {
 3239                         error = EBADF;
 3240                         goto outdrop;
 3241                 }
 3242 
 3243                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
 3244                 if (error) {
 3245                         goto outdrop;
 3246                 }
 3247 
 3248                 /* now set the space allocated to 0 */
 3249                 alloc_struct.fst_bytesalloc = 0;
 3250 
 3251                 /*
 3252                  * Do some simple parameter checking
 3253                  */
 3254 
 3255                 /* set up the flags */
 3256 
 3257                 alloc_flags |= PREALLOCATE;
 3258 
 3259                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
 3260                         alloc_flags |= ALLOCATECONTIG;
 3261                 }
 3262 
 3263                 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
 3264                         alloc_flags |= ALLOCATEALL;
 3265                 }
 3266 
 3267                 if (alloc_struct.fst_flags & F_ALLOCATEPERSIST) {
 3268                         alloc_flags |= ALLOCATEPERSIST;
 3269                 }
 3270 
 3271                 /*
 3272                  * Do any position mode specific stuff.  The only
 3273                  * position mode  supported now is PEOFPOSMODE
 3274                  */
 3275 
 3276                 switch (alloc_struct.fst_posmode) {
 3277                 case F_PEOFPOSMODE:
 3278                         if (alloc_struct.fst_offset != 0) {
 3279                                 error = EINVAL;
 3280                                 goto outdrop;
 3281                         }
 3282 
 3283                         alloc_flags |= ALLOCATEFROMPEOF;
 3284                         break;
 3285 
 3286                 case F_VOLPOSMODE:
 3287                         if (alloc_struct.fst_offset <= 0) {
 3288                                 error = EINVAL;
 3289                                 goto outdrop;
 3290                         }
 3291 
 3292                         alloc_flags |= ALLOCATEFROMVOL;
 3293                         break;
 3294 
 3295                 default: {
 3296                         error = EINVAL;
 3297                         goto outdrop;
 3298                 }
 3299                 }
 3300                 if ((error = vnode_getwithref(vp)) == 0) {
 3301                         /*
 3302                          * call allocate to get the space
 3303                          */
 3304                         error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
 3305                             &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
 3306                             &context);
 3307                         (void)vnode_put(vp);
 3308 
 3309                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
 3310 
 3311                         if (error == 0) {
 3312                                 error = error2;
 3313                         }
 3314                 }
 3315                 goto outdrop;
 3316         }
 3317         case F_PUNCHHOLE: {
 3318                 fpunchhole_t args;
 3319 
 3320                 if (fp->f_type != DTYPE_VNODE) {
 3321                         error = EBADF;
 3322                         goto out;
 3323                 }
 3324 
 3325                 vp = (struct vnode *)fp_get_data(fp);
 3326                 proc_fdunlock(p);
 3327 
 3328                 /* need write permissions */
 3329                 if ((fp->f_flag & FWRITE) == 0) {
 3330                         error = EPERM;
 3331                         goto outdrop;
 3332                 }
 3333 
 3334                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
 3335                         goto outdrop;
 3336                 }
 3337 
 3338                 if ((error = vnode_getwithref(vp))) {
 3339                         goto outdrop;
 3340                 }
 3341 
 3342 #if CONFIG_MACF
 3343                 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
 3344                         (void)vnode_put(vp);
 3345                         goto outdrop;
 3346                 }
 3347 #endif
 3348 
 3349                 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
 3350                 (void)vnode_put(vp);
 3351 
 3352                 goto outdrop;
 3353         }
 3354         case F_TRIM_ACTIVE_FILE: {
 3355                 ftrimactivefile_t args;
 3356 
 3357                 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
 3358                         error = EACCES;
 3359                         goto out;
 3360                 }
 3361 
 3362                 if (fp->f_type != DTYPE_VNODE) {
 3363                         error = EBADF;
 3364                         goto out;
 3365                 }
 3366 
 3367                 vp = (struct vnode *)fp_get_data(fp);
 3368                 proc_fdunlock(p);
 3369 
 3370                 /* need write permissions */
 3371                 if ((fp->f_flag & FWRITE) == 0) {
 3372                         error = EPERM;
 3373                         goto outdrop;
 3374                 }
 3375 
 3376                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
 3377                         goto outdrop;
 3378                 }
 3379 
 3380                 if ((error = vnode_getwithref(vp))) {
 3381                         goto outdrop;
 3382                 }
 3383 
 3384                 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
 3385                 (void)vnode_put(vp);
 3386 
 3387                 goto outdrop;
 3388         }
 3389         case F_SPECULATIVE_READ: {
 3390                 fspecread_t args;
 3391                 off_t temp_length = 0;
 3392 
 3393                 if (fp->f_type != DTYPE_VNODE) {
 3394                         error = EBADF;
 3395                         goto out;
 3396                 }
 3397 
 3398                 vp = (struct vnode *)fp_get_data(fp);
 3399                 proc_fdunlock(p);
 3400 
 3401                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
 3402                         goto outdrop;
 3403                 }
 3404 
 3405                 /* Discard invalid offsets or lengths */
 3406                 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
 3407                         error = EINVAL;
 3408                         goto outdrop;
 3409                 }
 3410 
 3411                 /*
 3412                  * Round the file offset down to a page-size boundary (or to 0).
 3413                  * The filesystem will need to round the length up to the end of the page boundary
 3414                  * or to the EOF of the file.
 3415                  */
 3416                 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
 3417                 uint64_t foff_delta = args.fsr_offset - foff;
 3418                 args.fsr_offset = (off_t) foff;
 3419 
 3420                 /*
 3421                  * Now add in the delta to the supplied length. Since we may have adjusted the
 3422                  * offset, increase it by the amount that we adjusted.
 3423                  */
 3424                 if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
 3425                         error = EOVERFLOW;
 3426                         goto outdrop;
 3427                 }
 3428 
 3429                 /*
 3430                  * Make sure (fsr_offset + fsr_length) does not overflow.
 3431                  */
 3432                 if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
 3433                         error = EOVERFLOW;
 3434                         goto outdrop;
 3435                 }
 3436 
 3437                 if ((error = vnode_getwithref(vp))) {
 3438                         goto outdrop;
 3439                 }
 3440                 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
 3441                 (void)vnode_put(vp);
 3442 
 3443                 goto outdrop;
 3444         }
 3445         case F_SETSIZE:
 3446                 if (fp->f_type != DTYPE_VNODE) {
 3447                         error = EBADF;
 3448                         goto out;
 3449                 }
 3450                 vp = (struct vnode *)fp_get_data(fp);
 3451                 proc_fdunlock(p);
 3452 
 3453                 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
 3454                 if (error) {
 3455                         goto outdrop;
 3456                 }
 3457                 AUDIT_ARG(value64, offset);
 3458 
 3459                 error = vnode_getwithref(vp);
 3460                 if (error) {
 3461                         goto outdrop;
 3462                 }
 3463 
 3464 #if CONFIG_MACF
 3465                 error = mac_vnode_check_truncate(&context,
 3466                     fp->fp_glob->fg_cred, vp);
 3467                 if (error) {
 3468                         (void)vnode_put(vp);
 3469                         goto outdrop;
 3470                 }
 3471 #endif
 3472                 /*
 3473                  * Make sure that we are root.  Growing a file
 3474                  * without zero filling the data is a security hole.
 3475                  */
 3476                 if (!kauth_cred_issuser(kauth_cred_get())) {
 3477                         error = EACCES;
 3478                 } else {
 3479                         /*
 3480                          * Require privilege to change file size without zerofill,
 3481                          * else will change the file size and zerofill it.
 3482                          */
 3483                         error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
 3484                         if (error == 0) {
 3485                                 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
 3486                         } else {
 3487                                 error = vnode_setsize(vp, offset, 0, &context);
 3488                         }
 3489 
 3490 #if CONFIG_MACF
 3491                         if (error == 0) {
 3492                                 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
 3493                         }
 3494 #endif
 3495                 }
 3496 
 3497                 (void)vnode_put(vp);
 3498                 goto outdrop;
 3499 
 3500         case F_RDAHEAD:
 3501                 if (fp->f_type != DTYPE_VNODE) {
 3502                         error = EBADF;
 3503                         goto out;
 3504                 }
 3505                 if (uap->arg) {
 3506                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
 3507                 } else {
 3508                         os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
 3509                 }
 3510                 goto out;
 3511 
 3512         case F_NOCACHE:
 3513                 if (fp->f_type != DTYPE_VNODE) {
 3514                         error = EBADF;
 3515                         goto out;
 3516                 }
 3517                 if (uap->arg) {
 3518                         os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
 3519                 } else {
 3520                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
 3521                 }
 3522                 goto out;
 3523 
 3524         case F_NODIRECT:
 3525                 if (fp->f_type != DTYPE_VNODE) {
 3526                         error = EBADF;
 3527                         goto out;
 3528                 }
 3529                 if (uap->arg) {
 3530                         os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
 3531                 } else {
 3532                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
 3533                 }
 3534                 goto out;
 3535 
 3536         case F_SINGLE_WRITER:
 3537                 if (fp->f_type != DTYPE_VNODE) {
 3538                         error = EBADF;
 3539                         goto out;
 3540                 }
 3541                 if (uap->arg) {
 3542                         os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
 3543                 } else {
 3544                         os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
 3545                 }
 3546                 goto out;
 3547 
 3548         case F_GLOBAL_NOCACHE:
 3549                 if (fp->f_type != DTYPE_VNODE) {
 3550                         error = EBADF;
 3551                         goto out;
 3552                 }
 3553                 vp = (struct vnode *)fp_get_data(fp);
 3554                 proc_fdunlock(p);
 3555 
 3556                 if ((error = vnode_getwithref(vp)) == 0) {
 3557                         *retval = vnode_isnocache(vp);
 3558 
 3559                         if (uap->arg) {
 3560                                 vnode_setnocache(vp);
 3561                         } else {
 3562                                 vnode_clearnocache(vp);
 3563                         }
 3564 
 3565                         (void)vnode_put(vp);
 3566                 }
 3567                 goto outdrop;
 3568 
 3569         case F_CHECK_OPENEVT:
 3570                 if (fp->f_type != DTYPE_VNODE) {
 3571                         error = EBADF;
 3572                         goto out;
 3573                 }
 3574                 vp = (struct vnode *)fp_get_data(fp);
 3575                 proc_fdunlock(p);
 3576 
 3577                 if ((error = vnode_getwithref(vp)) == 0) {
 3578                         *retval = vnode_is_openevt(vp);
 3579 
 3580                         if (uap->arg) {
 3581                                 vnode_set_openevt(vp);
 3582                         } else {
 3583                                 vnode_clear_openevt(vp);
 3584                         }
 3585 
 3586                         (void)vnode_put(vp);
 3587                 }
 3588                 goto outdrop;
 3589 
 3590         case F_RDADVISE: {
 3591                 struct radvisory ra_struct;
 3592 
 3593                 if (fp->f_type != DTYPE_VNODE) {
 3594                         error = EBADF;
 3595                         goto out;
 3596                 }
 3597                 vp = (struct vnode *)fp_get_data(fp);
 3598                 proc_fdunlock(p);
 3599 
 3600                 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
 3601                         goto outdrop;
 3602                 }
 3603                 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
 3604                         error = EINVAL;
 3605                         goto outdrop;
 3606                 }
 3607                 if ((error = vnode_getwithref(vp)) == 0) {
 3608                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
 3609 
 3610                         (void)vnode_put(vp);
 3611                 }
 3612                 goto outdrop;
 3613         }
 3614 
 3615         case F_FLUSH_DATA:
 3616 
 3617                 if (fp->f_type != DTYPE_VNODE) {
 3618                         error = EBADF;
 3619                         goto out;
 3620                 }
 3621                 vp = (struct vnode *)fp_get_data(fp);
 3622                 proc_fdunlock(p);
 3623 
 3624                 if ((error = vnode_getwithref(vp)) == 0) {
 3625                         error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
 3626 
 3627                         (void)vnode_put(vp);
 3628                 }
 3629                 goto outdrop;
 3630 
 3631         case F_LOG2PHYS:
 3632         case F_LOG2PHYS_EXT: {
 3633                 struct log2phys l2p_struct = {};    /* structure for allocate command */
 3634                 int devBlockSize;
 3635 
 3636                 off_t file_offset = 0;
 3637                 size_t a_size = 0;
 3638                 size_t run = 0;
 3639 
 3640                 if (cmd == F_LOG2PHYS_EXT) {
 3641                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
 3642                         if (error) {
 3643                                 goto out;
 3644                         }
 3645                         file_offset = l2p_struct.l2p_devoffset;
 3646                 } else {
 3647                         file_offset = fp->f_offset;
 3648                 }
 3649                 if (fp->f_type != DTYPE_VNODE) {
 3650                         error = EBADF;
 3651                         goto out;
 3652                 }
 3653                 vp = (struct vnode *)fp_get_data(fp);
 3654                 proc_fdunlock(p);
 3655                 if ((error = vnode_getwithref(vp))) {
 3656                         goto outdrop;
 3657                 }
 3658                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
 3659                 if (error) {
 3660                         (void)vnode_put(vp);
 3661                         goto outdrop;
 3662                 }
 3663                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
 3664                 if (error) {
 3665                         (void)vnode_put(vp);
 3666                         goto outdrop;
 3667                 }
 3668                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
 3669                 if (cmd == F_LOG2PHYS_EXT) {
 3670                         if (l2p_struct.l2p_contigbytes < 0) {
 3671                                 vnode_put(vp);
 3672                                 error = EINVAL;
 3673                                 goto outdrop;
 3674                         }
 3675 
 3676                         a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
 3677                 } else {
 3678                         a_size = devBlockSize;
 3679                 }
 3680 
 3681                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
 3682 
 3683                 (void)vnode_put(vp);
 3684 
 3685                 if (!error) {
 3686                         l2p_struct.l2p_flags = 0;       /* for now */
 3687                         if (cmd == F_LOG2PHYS_EXT) {
 3688                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
 3689                         } else {
 3690                                 l2p_struct.l2p_contigbytes = 0; /* for now */
 3691                         }
 3692 
 3693                         /*
 3694                          * The block number being -1 suggests that the file offset is not backed
 3695                          * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
 3696                          */
 3697                         if (bn == -1) {
 3698                                 /* Don't multiply it by the block size */
 3699                                 l2p_struct.l2p_devoffset = bn;
 3700                         } else {
 3701                                 l2p_struct.l2p_devoffset = bn * devBlockSize;
 3702                                 l2p_struct.l2p_devoffset += file_offset - offset;
 3703                         }
 3704                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
 3705                 }
 3706                 goto outdrop;
 3707         }
 3708         case F_GETPATH:
 3709         case F_GETPATH_NOFIRMLINK: {
 3710                 char *pathbufp;
 3711                 size_t pathlen;
 3712 
 3713                 if (fp->f_type != DTYPE_VNODE) {
 3714                         error = EBADF;
 3715                         goto out;
 3716                 }
 3717                 vp = (struct vnode *)fp_get_data(fp);
 3718                 proc_fdunlock(p);
 3719 
 3720                 pathlen = MAXPATHLEN;
 3721                 pathbufp = zalloc(ZV_NAMEI);
 3722 
 3723                 if ((error = vnode_getwithref(vp)) == 0) {
 3724                         error = vn_getpath_ext(vp, NULL, pathbufp,
 3725                             &pathlen, cmd == F_GETPATH_NOFIRMLINK ?
 3726                             VN_GETPATH_NO_FIRMLINK : 0);
 3727                         (void)vnode_put(vp);
 3728 
 3729                         if (error == 0) {
 3730                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
 3731                         }
 3732                 }
 3733                 zfree(ZV_NAMEI, pathbufp);
 3734                 goto outdrop;
 3735         }
 3736 
 3737         case F_PATHPKG_CHECK: {
 3738                 char *pathbufp;
 3739                 size_t pathlen;
 3740 
 3741                 if (fp->f_type != DTYPE_VNODE) {
 3742                         error = EBADF;
 3743                         goto out;
 3744                 }
 3745                 vp = (struct vnode *)fp_get_data(fp);
 3746                 proc_fdunlock(p);
 3747 
 3748                 pathlen = MAXPATHLEN;
 3749                 pathbufp = zalloc(ZV_NAMEI);
 3750 
 3751                 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
 3752                         if ((error = vnode_getwithref(vp)) == 0) {
 3753                                 AUDIT_ARG(text, pathbufp);
 3754                                 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
 3755 
 3756                                 (void)vnode_put(vp);
 3757                         }
 3758                 }
 3759                 zfree(ZV_NAMEI, pathbufp);
 3760                 goto outdrop;
 3761         }
 3762 
 3763         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
 3764         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
 3765         case F_BARRIERFSYNC:  // fsync + barrier
 3766         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
 3767         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
 3768                 if (fp->f_type != DTYPE_VNODE) {
 3769                         error = EBADF;
 3770                         goto out;
 3771                 }
 3772                 vp = (struct vnode *)fp_get_data(fp);
 3773                 proc_fdunlock(p);
 3774 
 3775                 if ((error = vnode_getwithref(vp)) == 0) {
 3776                         error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
 3777 
 3778                         (void)vnode_put(vp);
 3779                 }
 3780                 break;
 3781         }
 3782 
 3783         /*
 3784          * SPI (private) for opening a file starting from a dir fd
 3785          */
 3786         case F_OPENFROM: {
 3787                 /* Check if this isn't a valid file descriptor */
 3788                 if (fp->f_type != DTYPE_VNODE) {
 3789                         error = EBADF;
 3790                         goto out;
 3791                 }
 3792                 vp = (struct vnode *)fp_get_data(fp);
 3793 
 3794                 return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
 3795         }
 3796 
 3797         /*
 3798          * SPI (private) for unlinking a file starting from a dir fd
 3799          */
 3800         case F_UNLINKFROM: {
 3801                 user_addr_t pathname;
 3802 
 3803                 /* Check if this isn't a valid file descriptor */
 3804                 if ((fp->f_type != DTYPE_VNODE) ||
 3805                     (fp->f_flag & FREAD) == 0) {
 3806                         error = EBADF;
 3807                         goto out;
 3808                 }
 3809                 vp = (struct vnode *)fp_get_data(fp);
 3810                 proc_fdunlock(p);
 3811 
 3812                 if (vnode_getwithref(vp)) {
 3813                         error = ENOENT;
 3814                         goto outdrop;
 3815                 }
 3816 
 3817                 /* Only valid for directories */
 3818                 if (vp->v_type != VDIR) {
 3819                         vnode_put(vp);
 3820                         error = ENOTDIR;
 3821                         goto outdrop;
 3822                 }
 3823 
 3824                 /*
 3825                  * Only entitled apps may use the credentials of the thread
 3826                  * that opened the file descriptor.
 3827                  * Non-entitled threads will use their own context.
 3828                  */
 3829                 if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
 3830                         has_entitlement = 1;
 3831                 }
 3832 
 3833                 /* Get flags, mode and pathname arguments. */
 3834                 if (IS_64BIT_PROCESS(p)) {
 3835                         pathname = (user_addr_t)argp;
 3836                 } else {
 3837                         pathname = CAST_USER_ADDR_T(argp);
 3838                 }
 3839 
 3840                 /* Start the lookup relative to the file descriptor's vnode. */
 3841                 error = unlink1(has_entitlement ? &context : vfs_context_current(),
 3842                     vp, pathname, UIO_USERSPACE, 0);
 3843 
 3844                 vnode_put(vp);
 3845                 break;
 3846         }
 3847 
 3848         case F_ADDSIGS:
 3849         case F_ADDFILESIGS:
 3850         case F_ADDFILESIGS_FOR_DYLD_SIM:
 3851         case F_ADDFILESIGS_RETURN:
 3852         case F_ADDFILESIGS_INFO:
 3853         {
 3854                 struct cs_blob *blob = NULL;
 3855                 struct user_fsignatures fs;
 3856                 kern_return_t kr;
 3857                 vm_offset_t kernel_blob_addr;
 3858                 vm_size_t kernel_blob_size;
 3859                 int blob_add_flags = 0;
 3860                 const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
 3861                     offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
 3862                     offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
 3863 
 3864                 if (fp->f_type != DTYPE_VNODE) {
 3865                         error = EBADF;
 3866                         goto out;
 3867                 }
 3868                 vp = (struct vnode *)fp_get_data(fp);
 3869                 proc_fdunlock(p);
 3870 
 3871                 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
 3872                         blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
 3873                         if ((proc_getcsflags(p) & CS_KILL) == 0) {
 3874                                 proc_lock(p);
 3875                                 proc_csflags_set(p, CS_KILL);
 3876                                 proc_unlock(p);
 3877                         }
 3878                 }
 3879 
 3880                 error = vnode_getwithref(vp);
 3881                 if (error) {
 3882                         goto outdrop;
 3883                 }
 3884 
 3885                 if (IS_64BIT_PROCESS(p)) {
 3886                         error = copyin(argp, &fs, sizeof_fs);
 3887                 } else {
 3888                         if (cmd == F_ADDFILESIGS_INFO) {
 3889                                 error = EINVAL;
 3890                                 vnode_put(vp);
 3891                                 goto outdrop;
 3892                         }
 3893 
 3894                         struct user32_fsignatures fs32;
 3895 
 3896                         error = copyin(argp, &fs32, sizeof(fs32));
 3897                         fs.fs_file_start = fs32.fs_file_start;
 3898                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
 3899                         fs.fs_blob_size = fs32.fs_blob_size;
 3900                 }
 3901 
 3902                 if (error) {
 3903                         vnode_put(vp);
 3904                         goto outdrop;
 3905                 }
 3906 
 3907                 /*
 3908                  * First check if we have something loaded a this offset
 3909                  */
 3910                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
 3911                 if (blob != NULL) {
 3912                         /* If this is for dyld_sim revalidate the blob */
 3913                         if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
 3914                                 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
 3915                                 if (error) {
 3916                                         blob = NULL;
 3917                                         if (error != EAGAIN) {
 3918                                                 vnode_put(vp);
 3919                                                 goto outdrop;
 3920                                         }
 3921                                 }
 3922                         }
 3923                 }
 3924 
 3925                 if (blob == NULL) {
 3926                         /*
 3927                          * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
 3928                          * our use cases for the immediate future, but note that at the time of this commit, some
 3929                          * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
 3930                          *
 3931                          * We should consider how we can manage this more effectively; the above means that some
 3932                          * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
 3933                          * threshold considered ridiculous at the time of this change.
 3934                          */
 3935 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
 3936                         if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
 3937                                 error = E2BIG;
 3938                                 vnode_put(vp);
 3939                                 goto outdrop;
 3940                         }
 3941 
 3942                         kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
 3943                         kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
 3944                         if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
 3945                                 error = ENOMEM;
 3946                                 vnode_put(vp);
 3947                                 goto outdrop;
 3948                         }
 3949 
 3950                         if (cmd == F_ADDSIGS) {
 3951                                 error = copyin(fs.fs_blob_start,
 3952                                     (void *) kernel_blob_addr,
 3953                                     fs.fs_blob_size);
 3954                         } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
 3955                                 int resid;
 3956 
 3957                                 error = vn_rdwr(UIO_READ,
 3958                                     vp,
 3959                                     (caddr_t) kernel_blob_addr,
 3960                                     (int)kernel_blob_size,
 3961                                     fs.fs_file_start + fs.fs_blob_start,
 3962                                     UIO_SYSSPACE,
 3963                                     0,
 3964                                     kauth_cred_get(),
 3965                                     &resid,
 3966                                     p);
 3967                                 if ((error == 0) && resid) {
 3968                                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
 3969                                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
 3970                                 }
 3971                         }
 3972 
 3973                         if (error) {
 3974                                 ubc_cs_blob_deallocate(kernel_blob_addr,
 3975                                     kernel_blob_size);
 3976                                 vnode_put(vp);
 3977                                 goto outdrop;
 3978                         }
 3979 
 3980                         blob = NULL;
 3981                         error = ubc_cs_blob_add(vp,
 3982                             proc_platform(p),
 3983                             CPU_TYPE_ANY,                       /* not for a specific architecture */
 3984                             CPU_SUBTYPE_ANY,
 3985                             fs.fs_file_start,
 3986                             &kernel_blob_addr,
 3987                             kernel_blob_size,
 3988                             NULL,
 3989                             blob_add_flags,
 3990                             &blob);
 3991 
 3992                         /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
 3993                         if (error) {
 3994                                 if (kernel_blob_addr) {
 3995                                         ubc_cs_blob_deallocate(kernel_blob_addr,
 3996                                             kernel_blob_size);
 3997                                 }
 3998                                 vnode_put(vp);
 3999                                 goto outdrop;
 4000                         } else {
 4001 #if CHECK_CS_VALIDATION_BITMAP
 4002                                 ubc_cs_validation_bitmap_allocate( vp );
 4003 #endif
 4004                         }
 4005                 }
 4006 
 4007                 if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
 4008                     cmd == F_ADDFILESIGS_INFO) {
 4009                         /*
 4010                          * The first element of the structure is a
 4011                          * off_t that happen to have the same size for
 4012                          * all archs. Lets overwrite that.
 4013                          */
 4014                         off_t end_offset = 0;
 4015                         if (blob) {
 4016                                 end_offset = blob->csb_end_offset;
 4017                         }
 4018                         error = copyout(&end_offset, argp, sizeof(end_offset));
 4019 
 4020                         if (error) {
 4021                                 vnode_put(vp);
 4022                                 goto outdrop;
 4023                         }
 4024                 }
 4025 
 4026                 if (cmd == F_ADDFILESIGS_INFO) {
 4027                         /* Return information. What we copy out depends on the size of the
 4028                          * passed in structure, to keep binary compatibility. */
 4029 
 4030                         if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
 4031                                 // enough room for fs_cdhash[20]+fs_hash_type
 4032 
 4033                                 if (blob != NULL) {
 4034                                         error = copyout(blob->csb_cdhash,
 4035                                             (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
 4036                                             USER_FSIGNATURES_CDHASH_LEN);
 4037                                         if (error) {
 4038                                                 vnode_put(vp);
 4039                                                 goto outdrop;
 4040                                         }
 4041                                         int hashtype = cs_hash_type(blob->csb_hashtype);
 4042                                         error = copyout(&hashtype,
 4043                                             (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
 4044                                             sizeof(int));
 4045                                         if (error) {
 4046                                                 vnode_put(vp);
 4047                                                 goto outdrop;
 4048                                         }
 4049                                 }
 4050                         }
 4051                 }
 4052 
 4053                 (void) vnode_put(vp);
 4054                 break;
 4055         }
 4056 #if CONFIG_SUPPLEMENTAL_SIGNATURES
 4057         case F_ADDFILESUPPL:
 4058         {
 4059                 struct vnode *ivp;
 4060                 struct cs_blob *blob = NULL;
 4061                 struct user_fsupplement fs;
 4062                 int orig_fd;
 4063                 struct fileproc* orig_fp = NULL;
 4064                 kern_return_t kr;
 4065                 vm_offset_t kernel_blob_addr;
 4066                 vm_size_t kernel_blob_size;
 4067 
 4068                 if (!IS_64BIT_PROCESS(p)) {
 4069                         error = EINVAL;
 4070                         goto out; // drop fp and unlock fds
 4071                 }
 4072 
 4073                 if (fp->f_type != DTYPE_VNODE) {
 4074                         error = EBADF;
 4075                         goto out;
 4076                 }
 4077 
 4078                 error = copyin(argp, &fs, sizeof(fs));
 4079                 if (error) {
 4080                         goto out;
 4081                 }
 4082 
 4083                 orig_fd = fs.fs_orig_fd;
 4084                 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
 4085                         printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
 4086                         goto out;
 4087                 }
 4088 
 4089                 if (orig_fp->f_type != DTYPE_VNODE) {
 4090                         error = EBADF;
 4091                         fp_drop(p, orig_fd, orig_fp, 1);
 4092                         goto out;
 4093                 }
 4094 
 4095                 ivp = (struct vnode *)fp_get_data(orig_fp);
 4096 
 4097                 vp = (struct vnode *)fp_get_data(fp);
 4098 
 4099                 proc_fdunlock(p);
 4100 
 4101                 error = vnode_getwithref(ivp);
 4102                 if (error) {
 4103                         fp_drop(p, orig_fd, orig_fp, 0);
 4104                         goto outdrop; //drop fp
 4105                 }
 4106 
 4107                 error = vnode_getwithref(vp);
 4108                 if (error) {
 4109                         vnode_put(ivp);
 4110                         fp_drop(p, orig_fd, orig_fp, 0);
 4111                         goto outdrop;
 4112                 }
 4113 
 4114                 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
 4115                         error = E2BIG;
 4116                         goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
 4117                 }
 4118 
 4119                 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
 4120                 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
 4121                 if (kr != KERN_SUCCESS) {
 4122                         error = ENOMEM;
 4123                         goto dropboth;
 4124                 }
 4125 
 4126                 int resid;
 4127                 error = vn_rdwr(UIO_READ, vp,
 4128                     (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
 4129                     fs.fs_file_start + fs.fs_blob_start,
 4130                     UIO_SYSSPACE, 0,
 4131                     kauth_cred_get(), &resid, p);
 4132                 if ((error == 0) && resid) {
 4133                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
 4134                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
 4135                 }
 4136 
 4137                 if (error) {
 4138                         ubc_cs_blob_deallocate(kernel_blob_addr,
 4139                             kernel_blob_size);
 4140                         goto dropboth;
 4141                 }
 4142 
 4143                 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
 4144                     &kernel_blob_addr, kernel_blob_size, &blob);
 4145 
 4146                 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
 4147                 if (error) {
 4148                         if (kernel_blob_addr) {
 4149                                 ubc_cs_blob_deallocate(kernel_blob_addr,
 4150                                     kernel_blob_size);
 4151                         }
 4152                         goto dropboth;
 4153                 }
 4154                 vnode_put(ivp);
 4155                 vnode_put(vp);
 4156                 fp_drop(p, orig_fd, orig_fp, 0);
 4157                 break;
 4158 
 4159 dropboth:
 4160                 vnode_put(ivp);
 4161                 vnode_put(vp);
 4162                 fp_drop(p, orig_fd, orig_fp, 0);
 4163                 goto outdrop;
 4164         }
 4165 #endif
 4166         case F_GETCODEDIR:
 4167         case F_FINDSIGS: {
 4168                 error = ENOTSUP;
 4169                 goto out;
 4170         }
 4171         case F_CHECK_LV: {
 4172                 struct fileglob *fg;
 4173                 fchecklv_t lv = {};
 4174 
 4175                 if (fp->f_type != DTYPE_VNODE) {
 4176                         error = EBADF;
 4177                         goto out;
 4178                 }
 4179                 fg = fp->fp_glob;
 4180                 proc_fdunlock(p);
 4181 
 4182                 if (IS_64BIT_PROCESS(p)) {
 4183                         error = copyin(argp, &lv, sizeof(lv));
 4184                 } else {
 4185                         struct user32_fchecklv lv32 = {};
 4186 
 4187                         error = copyin(argp, &lv32, sizeof(lv32));
 4188                         lv.lv_file_start = lv32.lv_file_start;
 4189                         lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
 4190                         lv.lv_error_message_size = lv32.lv_error_message_size;
 4191                 }
 4192                 if (error) {
 4193                         goto outdrop;
 4194                 }
 4195 
 4196 #if CONFIG_MACF
 4197                 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
 4198                     (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
 4199 #endif
 4200 
 4201                 break;
 4202         }
 4203         case F_GETSIGSINFO: {
 4204                 struct cs_blob *blob = NULL;
 4205                 fgetsigsinfo_t sigsinfo = {};
 4206 
 4207                 if (fp->f_type != DTYPE_VNODE) {
 4208                         error = EBADF;
 4209                         goto out;
 4210                 }
 4211                 vp = (struct vnode *)fp_get_data(fp);
 4212                 proc_fdunlock(p);
 4213 
 4214                 error = vnode_getwithref(vp);
 4215                 if (error) {
 4216                         goto outdrop;
 4217                 }
 4218 
 4219                 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
 4220                 if (error) {
 4221                         vnode_put(vp);
 4222                         goto outdrop;
 4223                 }
 4224 
 4225                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
 4226                 if (blob == NULL) {
 4227                         error = ENOENT;
 4228                         vnode_put(vp);
 4229                         goto outdrop;
 4230                 }
 4231                 switch (sigsinfo.fg_info_request) {
 4232                 case GETSIGSINFO_PLATFORM_BINARY:
 4233                         sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
 4234                         error = copyout(&sigsinfo.fg_sig_is_platform,
 4235                             (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
 4236                             sizeof(sigsinfo.fg_sig_is_platform));
 4237                         if (error) {
 4238                                 vnode_put(vp);
 4239                                 goto outdrop;
 4240                         }
 4241                         break;
 4242                 default:
 4243                         error = EINVAL;
 4244                         vnode_put(vp);
 4245                         goto outdrop;
 4246                 }
 4247                 vnode_put(vp);
 4248                 break;
 4249         }
 4250 #if CONFIG_PROTECT
 4251         case F_GETPROTECTIONCLASS: {
 4252                 if (fp->f_type != DTYPE_VNODE) {
 4253                         error = EBADF;
 4254                         goto out;
 4255                 }
 4256                 vp = (struct vnode *)fp_get_data(fp);
 4257 
 4258                 proc_fdunlock(p);
 4259 
 4260                 if (vnode_getwithref(vp)) {
 4261                         error = ENOENT;
 4262                         goto outdrop;
 4263                 }
 4264 
 4265                 struct vnode_attr va;
 4266 
 4267                 VATTR_INIT(&va);
 4268                 VATTR_WANTED(&va, va_dataprotect_class);
 4269                 error = VNOP_GETATTR(vp, &va, &context);
 4270                 if (!error) {
 4271                         if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
 4272                                 *retval = va.va_dataprotect_class;
 4273                         } else {
 4274                                 error = ENOTSUP;
 4275                         }
 4276                 }
 4277 
 4278                 vnode_put(vp);
 4279                 break;
 4280         }
 4281 
 4282         case F_SETPROTECTIONCLASS: {
 4283                 /* tmp must be a valid PROTECTION_CLASS_* */
 4284                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
 4285 
 4286                 if (fp->f_type != DTYPE_VNODE) {
 4287                         error = EBADF;
 4288                         goto out;
 4289                 }
 4290                 vp = (struct vnode *)fp_get_data(fp);
 4291 
 4292                 proc_fdunlock(p);
 4293 
 4294                 if (vnode_getwithref(vp)) {
 4295                         error = ENOENT;
 4296                         goto outdrop;
 4297                 }
 4298 
 4299                 /* Only go forward if you have write access */
 4300                 vfs_context_t ctx = vfs_context_current();
 4301                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
 4302                         vnode_put(vp);
 4303                         error = EBADF;
 4304                         goto outdrop;
 4305                 }
 4306 
 4307                 struct vnode_attr va;
 4308 
 4309                 VATTR_INIT(&va);
 4310                 VATTR_SET(&va, va_dataprotect_class, tmp);
 4311 
 4312                 error = VNOP_SETATTR(vp, &va, ctx);
 4313 
 4314                 vnode_put(vp);
 4315                 break;
 4316         }
 4317 
 4318         case F_TRANSCODEKEY: {
 4319                 if (fp->f_type != DTYPE_VNODE) {
 4320                         error = EBADF;
 4321                         goto out;
 4322                 }
 4323 
 4324                 vp = (struct vnode *)fp_get_data(fp);
 4325                 proc_fdunlock(p);
 4326 
 4327                 if (vnode_getwithref(vp)) {
 4328                         error = ENOENT;
 4329                         goto outdrop;
 4330                 }
 4331 
 4332                 cp_key_t k = {
 4333                         .len = CP_MAX_WRAPPEDKEYSIZE,
 4334                 };
 4335 
 4336                 k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
 4337                 if (k.key == NULL) {
 4338                         error = ENOMEM;
 4339                 } else {
 4340                         error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
 4341                 }
 4342 
 4343                 vnode_put(vp);
 4344 
 4345                 if (error == 0) {
 4346                         error = copyout(k.key, argp, k.len);
 4347                         *retval = k.len;
 4348                 }
 4349                 kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
 4350 
 4351                 break;
 4352         }
 4353 
 4354         case F_GETPROTECTIONLEVEL:  {
 4355                 if (fp->f_type != DTYPE_VNODE) {
 4356                         error = EBADF;
 4357                         goto out;
 4358                 }
 4359 
 4360                 vp = (struct vnode*)fp_get_data(fp);
 4361                 proc_fdunlock(p);
 4362 
 4363                 if (vnode_getwithref(vp)) {
 4364                         error = ENOENT;
 4365                         goto outdrop;
 4366                 }
 4367 
 4368                 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
 4369 
 4370                 vnode_put(vp);
 4371                 break;
 4372         }
 4373 
 4374         case F_GETDEFAULTPROTLEVEL:  {
 4375                 if (fp->f_type != DTYPE_VNODE) {
 4376                         error = EBADF;
 4377                         goto out;
 4378                 }
 4379 
 4380                 vp = (struct vnode*)fp_get_data(fp);
 4381                 proc_fdunlock(p);
 4382 
 4383                 if (vnode_getwithref(vp)) {
 4384                         error = ENOENT;
 4385                         goto outdrop;
 4386                 }
 4387 
 4388                 /*
 4389                  * if cp_get_major_vers fails, error will be set to proper errno
 4390                  * and cp_version will still be 0.
 4391                  */
 4392 
 4393                 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
 4394 
 4395                 vnode_put(vp);
 4396                 break;
 4397         }
 4398 
 4399 #endif /* CONFIG_PROTECT */
 4400 
 4401         case F_MOVEDATAEXTENTS: {
 4402                 struct fileproc *fp2 = NULL;
 4403                 struct vnode *src_vp = NULLVP;
 4404                 struct vnode *dst_vp = NULLVP;
 4405                 /* We need to grab the 2nd FD out of the arguments before moving on. */
 4406                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
 4407 
 4408                 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
 4409                 if (error) {
 4410                         goto out;
 4411                 }
 4412 
 4413                 if (fp->f_type != DTYPE_VNODE) {
 4414                         error = EBADF;
 4415                         goto out;
 4416                 }
 4417 
 4418                 /*
 4419                  * For now, special case HFS+ and APFS only, since this
 4420                  * is SPI.
 4421                  */
 4422                 src_vp = (struct vnode *)fp_get_data(fp);
 4423                 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
 4424                         error = ENOTSUP;
 4425                         goto out;
 4426                 }
 4427 
 4428                 /*
 4429                  * Get the references before we start acquiring iocounts on the vnodes,
 4430                  * while we still hold the proc fd lock
 4431                  */
 4432                 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
 4433                         error = EBADF;
 4434                         goto out;
 4435                 }
 4436                 if (fp2->f_type != DTYPE_VNODE) {
 4437                         fp_drop(p, fd2, fp2, 1);
 4438                         error = EBADF;
 4439                         goto out;
 4440                 }
 4441                 dst_vp = (struct vnode *)fp_get_data(fp2);
 4442                 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
 4443                         fp_drop(p, fd2, fp2, 1);
 4444                         error = ENOTSUP;
 4445                         goto out;
 4446                 }
 4447 
 4448 #if CONFIG_MACF
 4449                 /* Re-do MAC checks against the new FD, pass in a fake argument */
 4450                 error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
 4451                 if (error) {
 4452                         fp_drop(p, fd2, fp2, 1);
 4453                         goto out;
 4454                 }
 4455 #endif
 4456                 /* Audit the 2nd FD */
 4457                 AUDIT_ARG(fd, fd2);
 4458 
 4459                 proc_fdunlock(p);
 4460 
 4461                 if (vnode_getwithref(src_vp)) {
 4462                         fp_drop(p, fd2, fp2, 0);
 4463                         error = ENOENT;
 4464                         goto outdrop;
 4465                 }
 4466                 if (vnode_getwithref(dst_vp)) {
 4467                         vnode_put(src_vp);
 4468                         fp_drop(p, fd2, fp2, 0);
 4469                         error = ENOENT;
 4470                         goto outdrop;
 4471                 }
 4472 
 4473                 /*
 4474                  * Basic asserts; validate they are not the same and that
 4475                  * both live on the same filesystem.
 4476                  */
 4477                 if (dst_vp == src_vp) {
 4478                         vnode_put(src_vp);
 4479                         vnode_put(dst_vp);
 4480                         fp_drop(p, fd2, fp2, 0);
 4481                         error = EINVAL;
 4482                         goto outdrop;
 4483                 }
 4484 
 4485                 if (dst_vp->v_mount != src_vp->v_mount) {
 4486                         vnode_put(src_vp);
 4487                         vnode_put(dst_vp);
 4488                         fp_drop(p, fd2, fp2, 0);
 4489                         error = EXDEV;
 4490                         goto outdrop;
 4491                 }
 4492 
 4493                 /* Now we have a legit pair of FDs.  Go to work */
 4494 
 4495                 /* Now check for write access to the target files */
 4496                 if (vnode_authorize(src_vp, NULLVP,
 4497                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
 4498                         vnode_put(src_vp);
 4499                         vnode_put(dst_vp);
 4500                         fp_drop(p, fd2, fp2, 0);
 4501                         error = EBADF;
 4502                         goto outdrop;
 4503                 }
 4504 
 4505                 if (vnode_authorize(dst_vp, NULLVP,
 4506                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
 4507                         vnode_put(src_vp);
 4508                         vnode_put(dst_vp);
 4509                         fp_drop(p, fd2, fp2, 0);
 4510                         error = EBADF;
 4511                         goto outdrop;
 4512                 }
 4513 
 4514                 /* Verify that both vps point to files and not directories */
 4515                 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
 4516                         error = EINVAL;
 4517                         vnode_put(src_vp);
 4518                         vnode_put(dst_vp);
 4519                         fp_drop(p, fd2, fp2, 0);
 4520                         goto outdrop;
 4521                 }
 4522 
 4523                 /*
 4524                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
 4525                  * We'll pass in our special bit indicating that the new behavior is expected
 4526                  */
 4527 
 4528                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
 4529 
 4530                 vnode_put(src_vp);
 4531                 vnode_put(dst_vp);
 4532                 fp_drop(p, fd2, fp2, 0);
 4533                 break;
 4534         }
 4535 
 4536         case F_TRANSFEREXTENTS: {
 4537                 struct fileproc *fp2 = NULL;
 4538                 struct vnode *src_vp = NULLVP;
 4539                 struct vnode *dst_vp = NULLVP;
 4540 
 4541                 /* Get 2nd FD out of the arguments. */
 4542                 int fd2 = CAST_DOWN_EXPLICIT(int, uap->arg);
 4543                 if (fd2 < 0) {
 4544                         error = EINVAL;
 4545                         goto out;
 4546                 }
 4547 
 4548                 if (fp->f_type != DTYPE_VNODE) {
 4549                         error = EBADF;
 4550                         goto out;
 4551                 }
 4552 
 4553                 /*
 4554                  * Only allow this for APFS
 4555                  */
 4556                 src_vp = (struct vnode *)fp_get_data(fp);
 4557                 if (src_vp->v_tag != VT_APFS) {
 4558                         error = ENOTSUP;
 4559                         goto out;
 4560                 }
 4561 
 4562                 /*
 4563                  * Get the references before we start acquiring iocounts on the vnodes,
 4564                  * while we still hold the proc fd lock
 4565                  */
 4566                 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
 4567                         error = EBADF;
 4568                         goto out;
 4569                 }
 4570                 if (fp2->f_type != DTYPE_VNODE) {
 4571                         fp_drop(p, fd2, fp2, 1);
 4572                         error = EBADF;
 4573                         goto out;
 4574                 }
 4575                 dst_vp = (struct vnode *)fp_get_data(fp2);
 4576                 if (dst_vp->v_tag != VT_APFS) {
 4577                         fp_drop(p, fd2, fp2, 1);
 4578                         error = ENOTSUP;
 4579                         goto out;
 4580                 }
 4581 
 4582 #if CONFIG_MACF
 4583                 /* Re-do MAC checks against the new FD, pass in a fake argument */
 4584                 error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
 4585                 if (error) {
 4586                         fp_drop(p, fd2, fp2, 1);
 4587                         goto out;
 4588                 }
 4589 #endif
 4590                 /* Audit the 2nd FD */
 4591                 AUDIT_ARG(fd, fd2);
 4592 
 4593                 proc_fdunlock(p);
 4594 
 4595                 if (vnode_getwithref(src_vp)) {
 4596                         fp_drop(p, fd2, fp2, 0);
 4597                         error = ENOENT;
 4598                         goto outdrop;
 4599                 }
 4600                 if (vnode_getwithref(dst_vp)) {
 4601                         vnode_put(src_vp);
 4602                         fp_drop(p, fd2, fp2, 0);
 4603                         error = ENOENT;
 4604                         goto outdrop;
 4605                 }
 4606 
 4607                 /*
 4608                  * Validate they are not the same and that
 4609                  * both live on the same filesystem.
 4610                  */
 4611                 if (dst_vp == src_vp) {
 4612                         vnode_put(src_vp);
 4613                         vnode_put(dst_vp);
 4614                         fp_drop(p, fd2, fp2, 0);
 4615                         error = EINVAL;
 4616                         goto outdrop;
 4617                 }
 4618                 if (dst_vp->v_mount != src_vp->v_mount) {
 4619                         vnode_put(src_vp);
 4620                         vnode_put(dst_vp);
 4621                         fp_drop(p, fd2, fp2, 0);
 4622                         error = EXDEV;
 4623                         goto outdrop;
 4624                 }
 4625 
 4626                 /* Verify that both vps point to files and not directories */
 4627                 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
 4628                         error = EINVAL;
 4629                         vnode_put(src_vp);
 4630                         vnode_put(dst_vp);
 4631                         fp_drop(p, fd2, fp2, 0);
 4632                         goto outdrop;
 4633                 }
 4634 
 4635 
 4636                 /*
 4637                  * Okay, vps are legit. Check  access.  We'll require write access
 4638                  * to both files.
 4639                  */
 4640                 if (vnode_authorize(src_vp, NULLVP,
 4641                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
 4642                         vnode_put(src_vp);
 4643                         vnode_put(dst_vp);
 4644                         fp_drop(p, fd2, fp2, 0);
 4645                         error = EBADF;
 4646                         goto outdrop;
 4647                 }
 4648                 if (vnode_authorize(dst_vp, NULLVP,
 4649                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
 4650                         vnode_put(src_vp);
 4651                         vnode_put(dst_vp);
 4652                         fp_drop(p, fd2, fp2, 0);
 4653                         error = EBADF;
 4654                         goto outdrop;
 4655                 }
 4656 
 4657                 /* Pass it on through to the fs */
 4658                 error = VNOP_IOCTL(src_vp, cmd, (caddr_t)dst_vp, 0, &context);
 4659 
 4660                 vnode_put(src_vp);
 4661                 vnode_put(dst_vp);
 4662                 fp_drop(p, fd2, fp2, 0);
 4663                 break;
 4664         }
 4665 
 4666         /*
 4667          * SPI for making a file compressed.
 4668          */
 4669         case F_MAKECOMPRESSED: {
 4670                 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
 4671 
 4672                 if (fp->f_type != DTYPE_VNODE) {
 4673                         error = EBADF;
 4674                         goto out;
 4675                 }
 4676 
 4677                 vp = (struct vnode*)fp_get_data(fp);
 4678                 proc_fdunlock(p);
 4679 
 4680                 /* get the vnode */
 4681                 if (vnode_getwithref(vp)) {
 4682                         error = ENOENT;
 4683                         goto outdrop;
 4684                 }
 4685 
 4686                 /* Is it a file? */
 4687                 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
 4688                         vnode_put(vp);
 4689                         error = EBADF;
 4690                         goto outdrop;
 4691                 }
 4692 
 4693                 /* invoke ioctl to pass off to FS */
 4694                 /* Only go forward if you have write access */
 4695                 vfs_context_t ctx = vfs_context_current();
 4696                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
 4697                         vnode_put(vp);
 4698                         error = EBADF;
 4699                         goto outdrop;
 4700                 }
 4701 
 4702                 error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
 4703 
 4704                 vnode_put(vp);
 4705                 break;
 4706         }
 4707 
 4708         /*
 4709          * SPI (private) for indicating to a filesystem that subsequent writes to
 4710          * the open FD will written to the Fastflow.
 4711          */
 4712         case F_SET_GREEDY_MODE:
 4713         /* intentionally drop through to the same handler as F_SETSTATIC.
 4714          * both fcntls should pass the argument and their selector into VNOP_IOCTL.
 4715          */
 4716 
 4717         /*
 4718          * SPI (private) for indicating to a filesystem that subsequent writes to
 4719          * the open FD will represent static content.
 4720          */
 4721         case F_SETSTATICCONTENT: {
 4722                 caddr_t ioctl_arg = NULL;
 4723 
 4724                 if (uap->arg) {
 4725                         ioctl_arg = (caddr_t) 1;
 4726                 }
 4727 
 4728                 if (fp->f_type != DTYPE_VNODE) {
 4729                         error = EBADF;
 4730                         goto out;
 4731                 }
 4732                 vp = (struct vnode *)fp_get_data(fp);
 4733                 proc_fdunlock(p);
 4734 
 4735                 error = vnode_getwithref(vp);
 4736                 if (error) {
 4737                         error = ENOENT;
 4738                         goto outdrop;
 4739                 }
 4740 
 4741                 /* Only go forward if you have write access */
 4742                 vfs_context_t ctx = vfs_context_current();
 4743                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
 4744                         vnode_put(vp);
 4745                         error = EBADF;
 4746                         goto outdrop;
 4747                 }
 4748 
 4749                 error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
 4750                 (void)vnode_put(vp);
 4751 
 4752                 break;
 4753         }
 4754 
 4755         /*
 4756          * SPI (private) for indicating to the lower level storage driver that the
 4757          * subsequent writes should be of a particular IO type (burst, greedy, static),
 4758          * or other flavors that may be necessary.
 4759          */
 4760         case F_SETIOTYPE: {
 4761                 caddr_t param_ptr;
 4762                 uint32_t param;
 4763 
 4764                 if (uap->arg) {
 4765                         /* extract 32 bits of flags from userland */
 4766                         param_ptr = (caddr_t) uap->arg;
 4767                         param = (uint32_t) param_ptr;
 4768                 } else {
 4769                         /* If no argument is specified, error out */
 4770                         error = EINVAL;
 4771                         goto out;
 4772                 }
 4773 
 4774                 /*
 4775                  * Validate the different types of flags that can be specified:
 4776                  * all of them are mutually exclusive for now.
 4777                  */
 4778                 switch (param) {
 4779                 case F_IOTYPE_ISOCHRONOUS:
 4780                         break;
 4781 
 4782                 default:
 4783                         error = EINVAL;
 4784                         goto out;
 4785                 }
 4786 
 4787 
 4788                 if (fp->f_type != DTYPE_VNODE) {
 4789                         error = EBADF;
 4790                         goto out;
 4791                 }
 4792                 vp = (struct vnode *)fp_get_data(fp);
 4793                 proc_fdunlock(p);
 4794 
 4795                 error = vnode_getwithref(vp);
 4796                 if (error) {
 4797                         error = ENOENT;
 4798                         goto outdrop;
 4799                 }
 4800 
 4801                 /* Only go forward if you have write access */
 4802                 vfs_context_t ctx = vfs_context_current();
 4803                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
 4804                         vnode_put(vp);
 4805                         error = EBADF;
 4806                         goto outdrop;
 4807                 }
 4808 
 4809                 error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
 4810                 (void)vnode_put(vp);
 4811 
 4812                 break;
 4813         }
 4814 
 4815         /*
 4816          * Set the vnode pointed to by 'fd'
 4817          * and tag it as the (potentially future) backing store
 4818          * for another filesystem
 4819          */
 4820         case F_SETBACKINGSTORE: {
 4821                 if (fp->f_type != DTYPE_VNODE) {
 4822                         error = EBADF;
 4823                         goto out;
 4824                 }
 4825 
 4826                 vp = (struct vnode *)fp_get_data(fp);
 4827 
 4828                 if (vp->v_tag != VT_HFS) {
 4829                         error = EINVAL;
 4830                         goto out;
 4831                 }
 4832                 proc_fdunlock(p);
 4833 
 4834                 if (vnode_getwithref(vp)) {
 4835                         error = ENOENT;
 4836                         goto outdrop;
 4837                 }
 4838 
 4839                 /* only proceed if you have write access */
 4840                 vfs_context_t ctx = vfs_context_current();
 4841                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
 4842                         vnode_put(vp);
 4843                         error = EBADF;
 4844                         goto outdrop;
 4845                 }
 4846 
 4847 
 4848                 /* If arg != 0, set, otherwise unset */
 4849                 if (uap->arg) {
 4850                         error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
 4851                 } else {
 4852                         error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
 4853                 }
 4854 
 4855                 vnode_put(vp);
 4856                 break;
 4857         }
 4858 
 4859         /*
 4860          * like F_GETPATH, but special semantics for
 4861          * the mobile time machine handler.
 4862          */
 4863         case F_GETPATH_MTMINFO: {
 4864                 char *pathbufp;
 4865                 int pathlen;
 4866 
 4867                 if (fp->f_type != DTYPE_VNODE) {
 4868                         error = EBADF;
 4869                         goto out;
 4870                 }
 4871                 vp = (struct vnode *)fp_get_data(fp);
 4872                 proc_fdunlock(p);
 4873 
 4874                 pathlen = MAXPATHLEN;
 4875                 pathbufp = zalloc(ZV_NAMEI);
 4876 
 4877                 if ((error = vnode_getwithref(vp)) == 0) {
 4878                         int backingstore = 0;
 4879 
 4880                         /* Check for error from vn_getpath before moving on */
 4881                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
 4882                                 if (vp->v_tag == VT_HFS) {
 4883                                         error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
 4884                                 }
 4885                                 (void)vnode_put(vp);
 4886 
 4887                                 if (error == 0) {
 4888                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
 4889                                 }
 4890                                 if (error == 0) {
 4891                                         /*
 4892                                          * If the copyout was successful, now check to ensure
 4893                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
 4894                                          * wants the path regardless.
 4895                                          */
 4896                                         if (backingstore) {
 4897                                                 error = EBUSY;
 4898                                         }
 4899                                 }
 4900                         } else {
 4901                                 (void)vnode_put(vp);
 4902                         }
 4903                 }
 4904 
 4905                 zfree(ZV_NAMEI, pathbufp);
 4906                 goto outdrop;
 4907         }
 4908 
 4909         case F_RECYCLE: {
 4910 #if !DEBUG && !DEVELOPMENT
 4911                 bool allowed = false;
 4912 
 4913                 //
 4914                 // non-debug and non-development kernels have restrictions
 4915                 // on who can all this fcntl.  the process has to be marked
 4916                 // with the dataless-manipulator entitlement and either the
 4917                 // process or thread have to be marked rapid-aging.
 4918                 //
 4919                 if (!vfs_context_is_dataless_manipulator(&context)) {
 4920                         error = EPERM;
 4921                         goto out;
 4922                 }
 4923 
 4924                 proc_t proc = vfs_context_proc(&context);
 4925                 if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
 4926                         allowed = true;
 4927                 } else {
 4928                         thread_t thr = vfs_context_thread(&context);
 4929                         if (thr) {
 4930                                 struct uthread *ut = get_bsdthread_info(thr);
 4931 
 4932                                 if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
 4933                                         allowed = true;
 4934                                 }
 4935                         }
 4936                 }
 4937                 if (!allowed) {
 4938                         error = EPERM;
 4939                         goto out;
 4940                 }
 4941 #endif
 4942 
 4943                 if (fp->f_type != DTYPE_VNODE) {
 4944                         error = EBADF;
 4945                         goto out;
 4946                 }
 4947                 vp = (struct vnode *)fp_get_data(fp);
 4948                 proc_fdunlock(p);
 4949 
 4950                 vnode_recycle(vp);
 4951                 break;
 4952         }
 4953 
 4954 #if CONFIG_FILE_LEASES
 4955         case F_SETLEASE: {
 4956                 struct fileglob *fg;
 4957                 int fl_type;
 4958                 int expcounts;
 4959 
 4960                 if (fp->f_type != DTYPE_VNODE) {
 4961                         error = EBADF;
 4962                         goto out;
 4963                 }
 4964                 vp = (struct vnode *)fp_get_data(fp);
 4965                 fg = fp->fp_glob;;
 4966                 proc_fdunlock(p);
 4967 
 4968                 /*
 4969                  * In order to allow a process to avoid breaking
 4970                  * its own leases, the expected open count needs
 4971                  * to be provided to F_SETLEASE when placing write lease.
 4972                  * Similarly, in order to allow a process to place a read lease
 4973                  * after opening the file multiple times in RW mode, the expected
 4974                  * write count needs to be provided to F_SETLEASE when placing a
 4975                  * read lease.
 4976                  *
 4977                  * We use the upper 30 bits of the integer argument (way more than
 4978                  * enough) as the expected open/write count.
 4979                  *
 4980                  * If the caller passed 0 for the expected open count,
 4981                  * assume 1.
 4982                  */
 4983                 fl_type = CAST_DOWN_EXPLICIT(int, uap->arg);
 4984                 expcounts = (unsigned int)fl_type >> 2;
 4985                 fl_type &= 3;
 4986 
 4987                 if (fl_type == F_WRLCK && expcounts == 0) {
 4988                         expcounts = 1;
 4989                 }
 4990 
 4991                 AUDIT_ARG(value32, fl_type);
 4992 
 4993                 if ((error = vnode_getwithref(vp))) {
 4994                         goto outdrop;
 4995                 }
 4996 
 4997                 /*
 4998                  * Only support for regular file/dir mounted on local-based filesystem.
 4999                  */
 5000                 if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
 5001                     !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
 5002                         error = EBADF;
 5003                         vnode_put(vp);
 5004                         goto outdrop;
 5005                 }
 5006 
 5007                 /* For directory, we only support read lease. */
 5008                 if (vnode_vtype(vp) == VDIR && fl_type == F_WRLCK) {
 5009                         error = ENOTSUP;
 5010                         vnode_put(vp);
 5011                         goto outdrop;
 5012                 }
 5013 
 5014                 switch (fl_type) {
 5015                 case F_RDLCK:
 5016                 case F_WRLCK:
 5017                 case F_UNLCK:
 5018                         error = vnode_setlease(vp, fg, fl_type, expcounts,
 5019                             vfs_context_current());
 5020                         break;
 5021                 default:
 5022                         error = EINVAL;
 5023                         break;
 5024                 }
 5025 
 5026                 vnode_put(vp);
 5027                 goto outdrop;
 5028         }
 5029 
 5030         case F_GETLEASE: {
 5031                 if (fp->f_type != DTYPE_VNODE) {
 5032                         error = EBADF;
 5033                         goto out;
 5034                 }
 5035                 vp = (struct vnode *)fp_get_data(fp);
 5036                 proc_fdunlock(p);
 5037 
 5038                 if ((error = vnode_getwithref(vp))) {
 5039                         goto outdrop;
 5040                 }
 5041 
 5042                 if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
 5043                     !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
 5044                         error = EBADF;
 5045                         vnode_put(vp);
 5046                         goto outdrop;
 5047                 }
 5048 
 5049                 error = 0;
 5050                 *retval = vnode_getlease(vp);
 5051                 vnode_put(vp);
 5052                 goto outdrop;
 5053         }
 5054 #endif /* CONFIG_FILE_LEASES */
 5055 
 5056         /* SPI (private) for asserting background access to a file */
 5057         case F_ASSERT_BG_ACCESS:
 5058         /* SPI (private) for releasing background access to a file */
 5059         case F_RELEASE_BG_ACCESS: {
 5060                 /*
 5061                  * Check if the process is platform code, which means
 5062                  * that it is considered part of the Operating System.
 5063                  */
 5064                 if (!csproc_get_platform_binary(p)) {
 5065                         error = EPERM;
 5066                         goto out;
 5067                 }
 5068 
 5069                 if (fp->f_type != DTYPE_VNODE) {
 5070                         error = EBADF;
 5071                         goto out;
 5072                 }
 5073 
 5074                 vp = (struct vnode *)fp_get_data(fp);
 5075                 proc_fdunlock(p);
 5076 
 5077                 if (vnode_getwithref(vp)) {
 5078                         error = ENOENT;
 5079                         goto outdrop;
 5080                 }
 5081 
 5082                 /* Verify that vp points to a file and not a directory */
 5083                 if (!vnode_isreg(vp)) {
 5084                         vnode_put(vp);
 5085                         error = EINVAL;
 5086                         goto outdrop;
 5087                 }
 5088 
 5089                 /* Only proceed if you have write access */
 5090                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
 5091                         vnode_put(vp);
 5092                         error = EBADF;
 5093                         goto outdrop;
 5094                 }
 5095 
 5096                 if (cmd == F_ASSERT_BG_ACCESS) {
 5097                         fassertbgaccess_t args;
 5098 
 5099                         if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
 5100                                 vnode_put(vp);
 5101                                 goto outdrop;
 5102                         }
 5103 
 5104                         error = VNOP_IOCTL(vp, F_ASSERT_BG_ACCESS, (caddr_t)&args, 0, &context);
 5105                 } else {
 5106                         // cmd == F_RELEASE_BG_ACCESS
 5107                         error = VNOP_IOCTL(vp, F_RELEASE_BG_ACCESS, (caddr_t)NULL, 0, &context);
 5108                 }
 5109 
 5110                 vnode_put(vp);
 5111 
 5112                 goto outdrop;
 5113         }
 5114 
 5115         default:
 5116                 /*
 5117                  * This is an fcntl() that we d not recognize at this level;
 5118                  * if this is a vnode, we send it down into the VNOP_IOCTL
 5119                  * for this vnode; this can include special devices, and will
 5120                  * effectively overload fcntl() to send ioctl()'s.
 5121                  */
 5122                 if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
 5123                         error = EINVAL;
 5124                         goto out;
 5125                 }
 5126 
 5127                 /* Catch any now-invalid fcntl() selectors */
 5128                 switch (cmd) {
 5129                 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
 5130                 case (int)FSIOC_FIOSEEKHOLE:
 5131                 case (int)FSIOC_FIOSEEKDATA:
 5132                 case (int)FSIOC_CAS_BSDFLAGS:
 5133                 case (int)FSIOC_AUTH_FS:
 5134                 case HFS_GET_BOOT_INFO:
 5135                 case HFS_SET_BOOT_INFO:
 5136                 case FIOPINSWAP:
 5137                 case F_MARKDEPENDENCY:
 5138                 case TIOCREVOKE:
 5139                 case TIOCREVOKECLEAR:
 5140                         error = EINVAL;
 5141                         goto out;
 5142                 default:
 5143                         break;
 5144                 }
 5145 
 5146                 if (fp->f_type != DTYPE_VNODE) {
 5147                         error = EBADF;
 5148                         goto out;
 5149                 }
 5150                 vp = (struct vnode *)fp_get_data(fp);
 5151                 proc_fdunlock(p);
 5152 
 5153                 if ((error = vnode_getwithref(vp)) == 0) {
 5154 #define STK_PARAMS 128
 5155                         char stkbuf[STK_PARAMS] = {0};
 5156                         unsigned int size;
 5157                         caddr_t data, memp;
 5158                         /*
 5159                          * For this to work properly, we have to copy in the
 5160                          * ioctl() cmd argument if there is one; we must also
 5161                          * check that a command parameter, if present, does
 5162                          * not exceed the maximum command length dictated by
 5163                          * the number of bits we have available in the command
 5164                          * to represent a structure length.  Finally, we have
 5165                          * to copy the results back out, if it is that type of
 5166                          * ioctl().
 5167                          */
 5168                         size = IOCPARM_LEN(cmd);
 5169                         if (size > IOCPARM_MAX) {
 5170                                 (void)vnode_put(vp);
 5171                                 error = EINVAL;
 5172                                 break;
 5173                         }
 5174 
 5175                         memp = NULL;
 5176                         if (size > sizeof(stkbuf)) {
 5177                                 memp = (caddr_t)kalloc_data(size, Z_WAITOK);
 5178                                 if (memp == 0) {
 5179                                         (void)vnode_put(vp);
 5180                                         error = ENOMEM;
 5181                                         goto outdrop;
 5182                                 }
 5183                                 data = memp;
 5184                         } else {
 5185                                 data = &stkbuf[0];
 5186                         }
 5187 
 5188                         if (cmd & IOC_IN) {
 5189                                 if (size) {
 5190                                         /* structure */
 5191                                         error = copyin(argp, data, size);
 5192                                         if (error) {
 5193                                                 (void)vnode_put(vp);
 5194                                                 if (memp) {
 5195                                                         kfree_data(memp, size);
 5196                                                 }
 5197                                                 goto outdrop;
 5198                                         }
 5199 
 5200                                         /* Bzero the section beyond that which was needed */
 5201                                         if (size <= sizeof(stkbuf)) {
 5202                                                 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
 5203                                         }
 5204                                 } else {
 5205                                         /* int */
 5206                                         if (is64bit) {
 5207                                                 *(user_addr_t *)data = argp;
 5208                                         } else {
 5209                                                 *(uint32_t *)data = (uint32_t)argp;
 5210                                         }
 5211                                 };
 5212                         } else if ((cmd & IOC_OUT) && size) {
 5213                                 /*
 5214                                  * Zero the buffer so the user always
 5215                                  * gets back something deterministic.
 5216                                  */
 5217                                 bzero(data, size);
 5218                         } else if (cmd & IOC_VOID) {
 5219                                 if (is64bit) {
 5220                                         *(user_addr_t *)data = argp;
 5221                                 } else {
 5222                                         *(uint32_t *)data = (uint32_t)argp;
 5223                                 }
 5224                         }
 5225 
 5226                         error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
 5227 
 5228                         (void)vnode_put(vp);
 5229 
 5230                         /* Copy any output data to user */
 5231                         if (error == 0 && (cmd & IOC_OUT) && size) {
 5232                                 error = copyout(data, argp, size);
 5233                         }
 5234                         if (memp) {
 5235                                 kfree_data(memp, size);
 5236                         }
 5237                 }
 5238                 break;
 5239         }
 5240 
 5241 outdrop:
 5242         return sys_fcntl_outdrop(p, fd, fp, vp, error);
 5243 
 5244 out:
 5245         return sys_fcntl_out(p, fd, fp, error);
 5246 }
 5247 
 5248 
 5249 /*
 5250  * sys_close
 5251  *
 5252  * Description: The implementation of the close(2) system call
 5253  *
 5254  * Parameters:  p                       Process in whose per process file table
 5255  *                                      the close is to occur
 5256  *              uap->fd                 fd to be closed
 5257  *              retval                  <unused>
 5258  *
 5259  * Returns:     0                       Success
 5260  *      fp_lookup:EBADF                 Bad file descriptor
 5261  *      fp_guard_exception:???          Guarded file descriptor
 5262  *      close_internal:EBADF
 5263  *      close_internal:???              Anything returnable by a per-fileops
 5264  *                                      close function
 5265  */
 5266 int
 5267 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
 5268 {
 5269         __pthread_testcancel(1);
 5270         return close_nocancel(p, uap->fd);
 5271 }
 5272 
 5273 int
 5274 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
 5275 {
 5276         return close_nocancel(p, uap->fd);
 5277 }
 5278 
 5279 int
 5280 close_nocancel(proc_t p, int fd)
 5281 {
 5282         struct fileproc *fp;
 5283 
 5284         AUDIT_SYSCLOSE(p, fd);
 5285 
 5286         proc_fdlock(p);
 5287         if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
 5288                 proc_fdunlock(p);
 5289                 return EBADF;
 5290         }
 5291 
 5292         if (fp_isguarded(fp, GUARD_CLOSE)) {
 5293                 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
 5294                 proc_fdunlock(p);
 5295                 return error;
 5296         }
 5297 
 5298         return fp_close_and_unlock(p, fd, fp, 0);
 5299 }
 5300 
 5301 
 5302 /*
 5303  * fstat
 5304  *
 5305  * Description: Return status information about a file descriptor.
 5306  *
 5307  * Parameters:  p                               The process doing the fstat
 5308  *              fd                              The fd to stat
 5309  *              ub                              The user stat buffer
 5310  *              xsecurity                       The user extended security
 5311  *                                              buffer, or 0 if none
 5312  *              xsecurity_size                  The size of xsecurity, or 0
 5313  *                                              if no xsecurity
 5314  *              isstat64                        Flag to indicate 64 bit version
 5315  *                                              for inode size, etc.
 5316  *
 5317  * Returns:     0                               Success
 5318  *              EBADF
 5319  *              EFAULT
 5320  *      fp_lookup:EBADF                         Bad file descriptor
 5321  *      vnode_getwithref:???
 5322  *      copyout:EFAULT
 5323  *      vnode_getwithref:???
 5324  *      vn_stat:???
 5325  *      soo_stat:???
 5326  *      pipe_stat:???
 5327  *      pshm_stat:???
 5328  *      kqueue_stat:???
 5329  *
 5330  * Notes:       Internal implementation for all other fstat() related
 5331  *              functions
 5332  *
 5333  *              XXX switch on node type is bogus; need a stat in struct
 5334  *              XXX fileops instead.
 5335  */
 5336 static int
 5337 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
 5338     user_addr_t xsecurity_size, int isstat64)
 5339 {
 5340         struct fileproc *fp;
 5341         union {
 5342                 struct stat sb;
 5343                 struct stat64 sb64;
 5344         } source;
 5345         union {
 5346                 struct user64_stat user64_sb;
 5347                 struct user32_stat user32_sb;
 5348                 struct user64_stat64 user64_sb64;
 5349                 struct user32_stat64 user32_sb64;
 5350         } dest;
 5351         int error, my_size;
 5352         file_type_t type;
 5353         caddr_t data;
 5354         kauth_filesec_t fsec;
 5355         user_size_t xsecurity_bufsize;
 5356         vfs_context_t ctx = vfs_context_current();
 5357         void * sbptr;
 5358 
 5359 
 5360         AUDIT_ARG(fd, fd);
 5361 
 5362         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
 5363                 return error;
 5364         }
 5365         type = fp->f_type;
 5366         data = (caddr_t)fp_get_data(fp);
 5367         fsec = KAUTH_FILESEC_NONE;
 5368 
 5369         sbptr = (void *)&source;
 5370 
 5371         switch (type) {
 5372         case DTYPE_VNODE:
 5373                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
 5374                         /*
 5375                          * If the caller has the file open, and is not
 5376                          * requesting extended security information, we are
 5377                          * going to let them get the basic stat information.
 5378                          */
 5379                         if (xsecurity == USER_ADDR_NULL) {
 5380                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
 5381                                     fp->fp_glob->fg_cred);
 5382                         } else {
 5383                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
 5384                         }
 5385 
 5386                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
 5387                         (void)vnode_put((vnode_t)data);
 5388                 }
 5389                 break;
 5390 
 5391 #if SOCKETS
 5392         case DTYPE_SOCKET:
 5393                 error = soo_stat((struct socket *)data, sbptr, isstat64);
 5394                 break;
 5395 #endif /* SOCKETS */
 5396 
 5397         case DTYPE_PIPE:
 5398                 error = pipe_stat((void *)data, sbptr, isstat64);
 5399                 break;
 5400 
 5401         case DTYPE_PSXSHM:
 5402                 error = pshm_stat((void *)data, sbptr, isstat64);
 5403                 break;
 5404 
 5405         case DTYPE_KQUEUE:
 5406                 error = kqueue_stat((void *)data, sbptr, isstat64, p);
 5407                 break;
 5408 
 5409         default:
 5410                 error = EBADF;
 5411                 goto out;
 5412         }
 5413         if (error == 0) {
 5414                 caddr_t sbp;
 5415 
 5416                 if (isstat64 != 0) {
 5417                         source.sb64.st_lspare = 0;
 5418                         source.sb64.st_qspare[0] = 0LL;
 5419                         source.sb64.st_qspare[1] = 0LL;
 5420 
 5421                         if (IS_64BIT_PROCESS(p)) {
 5422                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
 5423                                 my_size = sizeof(dest.user64_sb64);
 5424                                 sbp = (caddr_t)&dest.user64_sb64;
 5425                         } else {
 5426                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
 5427                                 my_size = sizeof(dest.user32_sb64);
 5428                                 sbp = (caddr_t)&dest.user32_sb64;
 5429                         }
 5430                 } else {
 5431                         source.sb.st_lspare = 0;
 5432                         source.sb.st_qspare[0] = 0LL;
 5433                         source.sb.st_qspare[1] = 0LL;
 5434                         if (IS_64BIT_PROCESS(p)) {
 5435                                 munge_user64_stat(&source.sb, &dest.user64_sb);
 5436                                 my_size = sizeof(dest.user64_sb);
 5437                                 sbp = (caddr_t)&dest.user64_sb;
 5438                         } else {
 5439                                 munge_user32_stat(&source.sb, &dest.user32_sb);
 5440                                 my_size = sizeof(dest.user32_sb);
 5441                                 sbp = (caddr_t)&dest.user32_sb;
 5442                         }
 5443                 }
 5444 
 5445                 error = copyout(sbp, ub, my_size);
 5446         }
 5447 
 5448         /* caller wants extended security information? */
 5449         if (xsecurity != USER_ADDR_NULL) {
 5450                 /* did we get any? */
 5451                 if (fsec == KAUTH_FILESEC_NONE) {
 5452                         if (susize(xsecurity_size, 0) != 0) {
 5453                                 error = EFAULT;
 5454                                 goto out;
 5455                         }
 5456                 } else {
 5457                         /* find the user buffer size */
 5458                         xsecurity_bufsize = fusize(xsecurity_size);
 5459 
 5460                         /* copy out the actual data size */
 5461                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
 5462                                 error = EFAULT;
 5463                                 goto out;
 5464                         }
 5465 
 5466                         /* if the caller supplied enough room, copy out to it */
 5467                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
 5468                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
 5469                         }
 5470                 }
 5471         }
 5472 out:
 5473         fp_drop(p, fd, fp, 0);
 5474         if (fsec != NULL) {
 5475                 kauth_filesec_free(fsec);
 5476         }
 5477         return error;
 5478 }
 5479 
 5480 
 5481 /*
 5482  * sys_fstat_extended
 5483  *
 5484  * Description: Extended version of fstat supporting returning extended
 5485  *              security information
 5486  *
 5487  * Parameters:  p                               The process doing the fstat
 5488  *              uap->fd                         The fd to stat
 5489  *              uap->ub                         The user stat buffer
 5490  *              uap->xsecurity                  The user extended security
 5491  *                                              buffer, or 0 if none
 5492  *              uap->xsecurity_size             The size of xsecurity, or 0
 5493  *
 5494  * Returns:     0                               Success
 5495  *              !0                              Errno (see fstat)
 5496  */
 5497 int
 5498 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
 5499 {
 5500         return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
 5501 }
 5502 
 5503 
 5504 /*
 5505  * sys_fstat
 5506  *
 5507  * Description: Get file status for the file associated with fd
 5508  *
 5509  * Parameters:  p                               The process doing the fstat
 5510  *              uap->fd                         The fd to stat
 5511  *              uap->ub                         The user stat buffer
 5512  *
 5513  * Returns:     0                               Success
 5514  *              !0                              Errno (see fstat)
 5515  */
 5516 int
 5517 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
 5518 {
 5519         return fstat(p, uap->fd, uap->ub, 0, 0, 0);
 5520 }
 5521 
 5522 
 5523 /*
 5524  * sys_fstat64_extended
 5525  *
 5526  * Description: Extended version of fstat64 supporting returning extended
 5527  *              security information
 5528  *
 5529  * Parameters:  p                               The process doing the fstat
 5530  *              uap->fd                         The fd to stat
 5531  *              uap->ub                         The user stat buffer
 5532  *              uap->xsecurity                  The user extended security
 5533  *                                              buffer, or 0 if none
 5534  *              uap->xsecurity_size             The size of xsecurity, or 0
 5535  *
 5536  * Returns:     0                               Success
 5537  *              !0                              Errno (see fstat)
 5538  */
 5539 int
 5540 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
 5541 {
 5542         return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
 5543 }
 5544 
 5545 
 5546 /*
 5547  * sys_fstat64
 5548  *
 5549  * Description: Get 64 bit version of the file status for the file associated
 5550  *              with fd
 5551  *
 5552  * Parameters:  p                               The process doing the fstat
 5553  *              uap->fd                         The fd to stat
 5554  *              uap->ub                         The user stat buffer
 5555  *
 5556  * Returns:     0                               Success
 5557  *              !0                              Errno (see fstat)
 5558  */
 5559 int
 5560 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
 5561 {
 5562         return fstat(p, uap->fd, uap->ub, 0, 0, 1);
 5563 }
 5564 
 5565 
 5566 /*
 5567  * sys_fpathconf
 5568  *
 5569  * Description: Return pathconf information about a file descriptor.
 5570  *
 5571  * Parameters:  p                               Process making the request
 5572  *              uap->fd                         fd to get information about
 5573  *              uap->name                       Name of information desired
 5574  *              retval                          Pointer to the call return area
 5575  *
 5576  * Returns:     0                               Success
 5577  *              EINVAL
 5578  *      fp_lookup:EBADF                         Bad file descriptor
 5579  *      vnode_getwithref:???
 5580  *      vn_pathconf:???
 5581  *
 5582  * Implicit returns:
 5583  *              *retval (modified)              Returned information (numeric)
 5584  */
 5585 int
 5586 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
 5587 {
 5588         int fd = uap->fd;
 5589         struct fileproc *fp;
 5590         struct vnode *vp;
 5591         int error = 0;
 5592         file_type_t type;
 5593 
 5594 
 5595         AUDIT_ARG(fd, uap->fd);
 5596         if ((error = fp_lookup(p, fd, &fp, 0))) {
 5597                 return error;
 5598         }
 5599         type = fp->f_type;
 5600 
 5601         switch (type) {
 5602         case DTYPE_SOCKET:
 5603                 if (uap->name != _PC_PIPE_BUF) {
 5604                         error = EINVAL;
 5605                         goto out;
 5606                 }
 5607                 *retval = PIPE_BUF;
 5608                 error = 0;
 5609                 goto out;
 5610 
 5611         case DTYPE_PIPE:
 5612                 if (uap->name != _PC_PIPE_BUF) {
 5613                         error = EINVAL;
 5614                         goto out;
 5615                 }
 5616                 *retval = PIPE_BUF;
 5617                 error = 0;
 5618                 goto out;
 5619 
 5620         case DTYPE_VNODE:
 5621                 vp = (struct vnode *)fp_get_data(fp);
 5622 
 5623                 if ((error = vnode_getwithref(vp)) == 0) {
 5624                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
 5625 
 5626                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
 5627 
 5628                         (void)vnode_put(vp);
 5629                 }
 5630                 goto out;
 5631 
 5632         default:
 5633                 error = EINVAL;
 5634                 goto out;
 5635         }
 5636         /*NOTREACHED*/
 5637 out:
 5638         fp_drop(p, fd, fp, 0);
 5639         return error;
 5640 }
 5641 
 5642 /*
 5643  * sys_flock
 5644  *
 5645  * Description: Apply an advisory lock on a file descriptor.
 5646  *
 5647  * Parameters:  p                               Process making request
 5648  *              uap->fd                         fd on which the lock is to be
 5649  *                                              attempted
 5650  *              uap->how                        (Un)Lock bits, including type
 5651  *              retval                          Pointer to the call return area
 5652  *
 5653  * Returns:     0                               Success
 5654  *      fp_getfvp:EBADF                         Bad file descriptor
 5655  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
 5656  *      vnode_getwithref:???
 5657  *      VNOP_ADVLOCK:???
 5658  *
 5659  * Implicit returns:
 5660  *              *retval (modified)              Size of dtable
 5661  *
 5662  * Notes:       Just attempt to get a record lock of the requested type on
 5663  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 5664  */
 5665 int
 5666 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
 5667 {
 5668         int fd = uap->fd;
 5669         int how = uap->how;
 5670         struct fileproc *fp;
 5671         struct vnode *vp;
 5672         struct flock lf;
 5673         vfs_context_t ctx = vfs_context_current();
 5674         int error = 0;
 5675 
 5676         AUDIT_ARG(fd, uap->fd);
 5677         if ((error = fp_getfvp(p, fd, &fp, &vp))) {
 5678                 return error;
 5679         }
 5680         if ((error = vnode_getwithref(vp))) {
 5681                 goto out1;
 5682         }
 5683         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
 5684 
 5685         lf.l_whence = SEEK_SET;
 5686         lf.l_start = 0;
 5687         lf.l_len = 0;
 5688         if (how & LOCK_UN) {
 5689                 lf.l_type = F_UNLCK;
 5690                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
 5691                 goto out;
 5692         }
 5693         if (how & LOCK_EX) {
 5694                 lf.l_type = F_WRLCK;
 5695         } else if (how & LOCK_SH) {
 5696                 lf.l_type = F_RDLCK;
 5697         } else {
 5698                 error = EBADF;
 5699                 goto out;
 5700         }
 5701 #if CONFIG_MACF
 5702         error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
 5703         if (error) {
 5704                 goto out;
 5705         }
 5706 #endif
 5707         error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
 5708             (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
 5709             ctx, NULL);
 5710         if (!error) {
 5711                 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
 5712         }
 5713 out:
 5714         (void)vnode_put(vp);
 5715 out1:
 5716         fp_drop(p, fd, fp, 0);
 5717         return error;
 5718 }
 5719 
 5720 /*
 5721  * sys_fileport_makeport
 5722  *
 5723  * Description: Obtain a Mach send right for a given file descriptor.
 5724  *
 5725  * Parameters:  p               Process calling fileport
 5726  *              uap->fd         The fd to reference
 5727  *              uap->portnamep  User address at which to place port name.
 5728  *
 5729  * Returns:     0               Success.
 5730  *              EBADF           Bad file descriptor.
 5731  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
 5732  *              EFAULT          Address at which to store port name is not valid.
 5733  *              EAGAIN          Resource shortage.
 5734  *
 5735  * Implicit returns:
 5736  *              On success, name of send right is stored at user-specified address.
 5737  */
 5738 int
 5739 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
 5740     __unused int *retval)
 5741 {
 5742         int err;
 5743         int fd = uap->fd;
 5744         user_addr_t user_portaddr = uap->portnamep;
 5745         struct fileproc *fp = FILEPROC_NULL;
 5746         struct fileglob *fg = NULL;
 5747         ipc_port_t fileport;
 5748         mach_port_name_t name = MACH_PORT_NULL;
 5749 
 5750         proc_fdlock(p);
 5751         err = fp_lookup(p, fd, &fp, 1);
 5752         if (err != 0) {
 5753                 goto out_unlock;
 5754         }
 5755 
 5756         fg = fp->fp_glob;
 5757         if (!fg_sendable(fg)) {
 5758                 err = EINVAL;
 5759                 goto out_unlock;
 5760         }
 5761 
 5762         if (fp_isguarded(fp, GUARD_FILEPORT)) {
 5763                 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
 5764                 goto out_unlock;
 5765         }
 5766 
 5767         /* Dropped when port is deallocated */
 5768         fg_ref(p, fg);
 5769 
 5770         proc_fdunlock(p);
 5771 
 5772         /* Allocate and initialize a port */
 5773         fileport = fileport_alloc(fg);
 5774         if (fileport == IPC_PORT_NULL) {
 5775                 fg_drop_live(fg);
 5776                 err = EAGAIN;
 5777                 goto out;
 5778         }
 5779 
 5780         /* Add an entry.  Deallocates port on failure. */
 5781         name = ipc_port_copyout_send(fileport, get_task_ipcspace(proc_task(p)));
 5782         if (!MACH_PORT_VALID(name)) {
 5783                 err = EINVAL;
 5784                 goto out;
 5785         }
 5786 
 5787         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
 5788         if (err != 0) {
 5789                 goto out;
 5790         }
 5791 
 5792         /* Tag the fileglob for debugging purposes */
 5793         lck_mtx_lock_spin(&fg->fg_lock);
 5794         fg->fg_lflags |= FG_PORTMADE;
 5795         lck_mtx_unlock(&fg->fg_lock);
 5796 
 5797         fp_drop(p, fd, fp, 0);
 5798 
 5799         return 0;
 5800 
 5801 out_unlock:
 5802         proc_fdunlock(p);
 5803 out:
 5804         if (MACH_PORT_VALID(name)) {
 5805                 /* Don't care if another thread races us to deallocate the entry */
 5806                 (void) mach_port_deallocate(get_task_ipcspace(proc_task(p)), name);
 5807         }
 5808 
 5809         if (fp != FILEPROC_NULL) {
 5810                 fp_drop(p, fd, fp, 0);
 5811         }
 5812 
 5813         return err;
 5814 }
 5815 
 5816 void
 5817 fileport_releasefg(struct fileglob *fg)
 5818 {
 5819         (void)fg_drop(PROC_NULL, fg);
 5820 }
 5821 
 5822 /*
 5823  * fileport_makefd
 5824  *
 5825  * Description: Obtain the file descriptor for a given Mach send right.
 5826  *
 5827  * Returns:     0               Success
 5828  *              EINVAL          Invalid Mach port name, or port is not for a file.
 5829  *      fdalloc:EMFILE
 5830  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
 5831  *
 5832  * Implicit returns:
 5833  *              *retval (modified)              The new descriptor
 5834  */
 5835 int
 5836 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
 5837 {
 5838         struct fileglob *fg;
 5839         struct fileproc *fp = FILEPROC_NULL;
 5840         int fd;
 5841         int err;
 5842 
 5843         fg = fileport_port_to_fileglob(port);
 5844         if (fg == NULL) {
 5845                 err = EINVAL;
 5846                 goto out;
 5847         }
 5848 
 5849         fp = fileproc_alloc_init();
 5850 
 5851         proc_fdlock(p);
 5852         err = fdalloc(p, 0, &fd);
 5853         if (err != 0) {
 5854                 proc_fdunlock(p);
 5855                 goto out;
 5856         }
 5857         if (fp_flags) {
 5858                 fp->fp_flags |= fp_flags;
 5859         }
 5860 
 5861         fp->fp_glob = fg;
 5862         fg_ref(p, fg);
 5863 
 5864         procfdtbl_releasefd(p, fd, fp);
 5865         proc_fdunlock(p);
 5866 
 5867         *retval = fd;
 5868         err = 0;
 5869 out:
 5870         if ((fp != NULL) && (0 != err)) {
 5871                 fileproc_free(fp);
 5872         }
 5873 
 5874         return err;
 5875 }
 5876 
 5877 /*
 5878  * sys_fileport_makefd
 5879  *
 5880  * Description: Obtain the file descriptor for a given Mach send right.
 5881  *
 5882  * Parameters:  p               Process calling fileport
 5883  *              uap->port       Name of send right to file port.
 5884  *
 5885  * Returns:     0               Success
 5886  *              EINVAL          Invalid Mach port name, or port is not for a file.
 5887  *      fdalloc:EMFILE
 5888  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
 5889  *
 5890  * Implicit returns:
 5891  *              *retval (modified)              The new descriptor
 5892  */
 5893 int
 5894 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
 5895 {
 5896         ipc_port_t port = IPC_PORT_NULL;
 5897         mach_port_name_t send = uap->port;
 5898         kern_return_t res;
 5899         int err;
 5900 
 5901         res = ipc_object_copyin(get_task_ipcspace(proc_task(p)),
 5902             send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
 5903 
 5904         if (res == KERN_SUCCESS) {
 5905                 err = fileport_makefd(p, port, FP_CLOEXEC, retval);
 5906         } else {
 5907                 err = EINVAL;
 5908         }
 5909 
 5910         if (IPC_PORT_NULL != port) {
 5911                 ipc_port_release_send(port);
 5912         }
 5913 
 5914         return err;
 5915 }
 5916 
 5917 
 5918 #pragma mark fileops wrappers
 5919 
 5920 /*
 5921  * fo_read
 5922  *
 5923  * Description: Generic fileops read indirected through the fileops pointer
 5924  *              in the fileproc structure
 5925  *
 5926  * Parameters:  fp                              fileproc structure pointer
 5927  *              uio                             user I/O structure pointer
 5928  *              flags                           FOF_ flags
 5929  *              ctx                             VFS context for operation
 5930  *
 5931  * Returns:     0                               Success
 5932  *              !0                              Errno from read
 5933  */
 5934 int
 5935 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 5936 {
 5937         return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
 5938 }
 5939 
 5940 int
 5941 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 5942 {
 5943 #pragma unused(fp, uio, flags, ctx)
 5944         return ENXIO;
 5945 }
 5946 
 5947 
 5948 /*
 5949  * fo_write
 5950  *
 5951  * Description: Generic fileops write indirected through the fileops pointer
 5952  *              in the fileproc structure
 5953  *
 5954  * Parameters:  fp                              fileproc structure pointer
 5955  *              uio                             user I/O structure pointer
 5956  *              flags                           FOF_ flags
 5957  *              ctx                             VFS context for operation
 5958  *
 5959  * Returns:     0                               Success
 5960  *              !0                              Errno from write
 5961  */
 5962 int
 5963 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 5964 {
 5965         return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
 5966 }
 5967 
 5968 int
 5969 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 5970 {
 5971 #pragma unused(fp, uio, flags, ctx)
 5972         return ENXIO;
 5973 }
 5974 
 5975 
 5976 /*
 5977  * fo_ioctl
 5978  *
 5979  * Description: Generic fileops ioctl indirected through the fileops pointer
 5980  *              in the fileproc structure
 5981  *
 5982  * Parameters:  fp                              fileproc structure pointer
 5983  *              com                             ioctl command
 5984  *              data                            pointer to internalized copy
 5985  *                                              of user space ioctl command
 5986  *                                              parameter data in kernel space
 5987  *              ctx                             VFS context for operation
 5988  *
 5989  * Returns:     0                               Success
 5990  *              !0                              Errno from ioctl
 5991  *
 5992  * Locks:       The caller is assumed to have held the proc_fdlock; this
 5993  *              function releases and reacquires this lock.  If the caller
 5994  *              accesses data protected by this lock prior to calling this
 5995  *              function, it will need to revalidate/reacquire any cached
 5996  *              protected data obtained prior to the call.
 5997  */
 5998 int
 5999 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
 6000 {
 6001         int error;
 6002 
 6003         proc_fdunlock(vfs_context_proc(ctx));
 6004         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
 6005         proc_fdlock(vfs_context_proc(ctx));
 6006         return error;
 6007 }
 6008 
 6009 int
 6010 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
 6011 {
 6012 #pragma unused(fp, com, data, ctx)
 6013         return ENOTTY;
 6014 }
 6015 
 6016 
 6017 /*
 6018  * fo_select
 6019  *
 6020  * Description: Generic fileops select indirected through the fileops pointer
 6021  *              in the fileproc structure
 6022  *
 6023  * Parameters:  fp                              fileproc structure pointer
 6024  *              which                           select which
 6025  *              wql                             pointer to wait queue list
 6026  *              ctx                             VFS context for operation
 6027  *
 6028  * Returns:     0                               Success
 6029  *              !0                              Errno from select
 6030  */
 6031 int
 6032 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
 6033 {
 6034         return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
 6035 }
 6036 
 6037 int
 6038 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
 6039 {
 6040 #pragma unused(fp, which, wql, ctx)
 6041         return ENOTSUP;
 6042 }
 6043 
 6044 
 6045 /*
 6046  * fo_close
 6047  *
 6048  * Description: Generic fileops close indirected through the fileops pointer
 6049  *              in the fileproc structure
 6050  *
 6051  * Parameters:  fp                              fileproc structure pointer for
 6052  *                                              file to close
 6053  *              ctx                             VFS context for operation
 6054  *
 6055  * Returns:     0                               Success
 6056  *              !0                              Errno from close
 6057  */
 6058 int
 6059 fo_close(struct fileglob *fg, vfs_context_t ctx)
 6060 {
 6061         return (*fg->fg_ops->fo_close)(fg, ctx);
 6062 }
 6063 
 6064 
 6065 /*
 6066  * fo_drain
 6067  *
 6068  * Description: Generic fileops kqueue filter indirected through the fileops
 6069  *              pointer in the fileproc structure
 6070  *
 6071  * Parameters:  fp                              fileproc structure pointer
 6072  *              ctx                             VFS context for operation
 6073  *
 6074  * Returns:     0                               Success
 6075  *              !0                              errno from drain
 6076  */
 6077 int
 6078 fo_drain(struct fileproc *fp, vfs_context_t ctx)
 6079 {
 6080         return (*fp->f_ops->fo_drain)(fp, ctx);
 6081 }
 6082 
 6083 int
 6084 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
 6085 {
 6086 #pragma unused(fp, ctx)
 6087         return ENOTSUP;
 6088 }
 6089 
 6090 
 6091 /*
 6092  * fo_kqfilter
 6093  *
 6094  * Description: Generic fileops kqueue filter indirected through the fileops
 6095  *              pointer in the fileproc structure
 6096  *
 6097  * Parameters:  fp                              fileproc structure pointer
 6098  *              kn                              pointer to knote to filter on
 6099  *
 6100  * Returns:     (kn->kn_flags & EV_ERROR)       error in kn->kn_data
 6101  *              0                               Filter is not active
 6102  *              !0                              Filter is active
 6103  */
 6104 int
 6105 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
 6106 {
 6107         return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
 6108 }
 6109 
 6110 int
 6111 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
 6112 {
 6113 #pragma unused(fp, kev)
 6114         knote_set_error(kn, ENOTSUP);
 6115         return 0;
 6116 }
Cache object: 79e6c158e3ed6d392573e87d900d7247
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/bsd/kern/kern_descrip.c

FreeBSD/Linux Kernel Cross Reference
sys/bsd/kern/kern_descrip.c