vfs_syscalls.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: vfs_syscalls.c,v 1.279.2.6 2011/03/20 20:36:56 bouyer Exp $    */
    2 
    3 /*
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_syscalls.c      8.42 (Berkeley) 7/31/95
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.279.2.6 2011/03/20 20:36:56 bouyer Exp $");
   41 
   42 #include "opt_compat_netbsd.h"
   43 #include "opt_compat_43.h"
   44 #include "opt_fileassoc.h"
   45 #include "opt_ktrace.h"
   46 #include "fss.h"
   47 #include "veriexec.h"
   48 
   49 #include <sys/param.h>
   50 #include <sys/systm.h>
   51 #include <sys/namei.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/kernel.h>
   54 #include <sys/file.h>
   55 #include <sys/stat.h>
   56 #include <sys/vnode.h>
   57 #include <sys/mount.h>
   58 #include <sys/proc.h>
   59 #include <sys/uio.h>
   60 #include <sys/malloc.h>
   61 #include <sys/kmem.h>
   62 #include <sys/dirent.h>
   63 #include <sys/sysctl.h>
   64 #include <sys/sa.h>
   65 #include <sys/syscallargs.h>
   66 #ifdef KTRACE
   67 #include <sys/ktrace.h>
   68 #endif
   69 #ifdef FILEASSOC
   70 #include <sys/fileassoc.h>
   71 #endif /* FILEASSOC */
   72 #if NVERIEXEC > 0
   73 #include <sys/verified_exec.h>
   74 #include <sys/syslog.h>
   75 #endif /* NVERIEXEC > 0 */
   76 #include <sys/kauth.h>
   77 
   78 #include <miscfs/genfs/genfs.h>
   79 #include <miscfs/syncfs/syncfs.h>
   80 
   81 #ifdef COMPAT_30
   82 #include "opt_nfsserver.h"
   83 #include <nfs/rpcv2.h>
   84 #endif
   85 #include <nfs/nfsproto.h>
   86 #ifdef COMPAT_30
   87 #include <nfs/nfs.h>
   88 #include <nfs/nfs_var.h>
   89 #endif
   90 
   91 #if NFSS > 0
   92 #include <dev/fssvar.h>
   93 #endif
   94 
   95 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
   96 
   97 static int change_dir(struct nameidata *, struct lwp *);
   98 static int change_flags(struct vnode *, u_long, struct lwp *);
   99 static int change_mode(struct vnode *, int, struct lwp *l);
  100 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
  101 static int change_utimes(struct vnode *vp, const struct timeval *,
  102                struct lwp *l);
  103 static int rename_files(const char *, const char *, struct lwp *, int);
  104 
  105 void checkdirs(struct vnode *);
  106 
  107 int dovfsusermount = 0;
  108 
  109 /*
  110  * Virtual File System System Calls
  111  */
  112 
  113 /*
  114  * Mount a file system.
  115  */
  116 
  117 #if defined(COMPAT_09) || defined(COMPAT_43)
  118 /*
  119  * This table is used to maintain compatibility with 4.3BSD
  120  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
  121  *
  122  * Do not modify this table. It should only contain filesystems
  123  * supported by NetBSD 0.9 and 4.3BSD.
  124  */
  125 const char * const mountcompatnames[] = {
  126         NULL,           /* 0 = MOUNT_NONE */
  127         MOUNT_FFS,      /* 1 = MOUNT_UFS */
  128         MOUNT_NFS,      /* 2 */
  129         MOUNT_MFS,      /* 3 */
  130         MOUNT_MSDOS,    /* 4 */
  131         MOUNT_CD9660,   /* 5 = MOUNT_ISOFS */
  132         MOUNT_FDESC,    /* 6 */
  133         MOUNT_KERNFS,   /* 7 */
  134         NULL,           /* 8 = MOUNT_DEVFS */
  135         MOUNT_AFS,      /* 9 */
  136 };
  137 const int nmountcompatnames = sizeof(mountcompatnames) /
  138     sizeof(mountcompatnames[0]);
  139 #endif /* COMPAT_09 || COMPAT_43 */
  140 
  141 /* ARGSUSED */
  142 int
  143 sys_mount(struct lwp *l, void *v, register_t *retval)
  144 {
  145         struct sys_mount_args /* {
  146                 syscallarg(const char *) type;
  147                 syscallarg(const char *) path;
  148                 syscallarg(int) flags;
  149                 syscallarg(void *) data;
  150         } */ *uap = v;
  151         struct vnode *vp;
  152         struct mount *mp;
  153         int error, flag = 0;
  154         char fstypename[MFSNAMELEN];
  155         struct vattr va;
  156         struct nameidata nd;
  157         struct vfsops *vfs;
  158 
  159         /*
  160          * if MNT_GETARGS is specified, it should be only flag.
  161          */
  162 
  163         if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
  164             (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
  165                 return EINVAL;
  166         }
  167 
  168         if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
  169             (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
  170             &l->l_acflag)))
  171                 return (error);
  172         /*
  173          * Get vnode to be covered
  174          */
  175         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
  176             SCARG(uap, path), l);
  177         if ((error = namei(&nd)) != 0)
  178                 return (error);
  179         vp = nd.ni_vp;
  180         /*
  181          * A lookup in VFS_MOUNT might result in an attempt to
  182          * lock this vnode again, so make the lock recursive.
  183          */
  184         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
  185         if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
  186                 if ((vp->v_flag & VROOT) == 0) {
  187                         vput(vp);
  188                         return (EINVAL);
  189                 }
  190                 mp = vp->v_mount;
  191                 flag = mp->mnt_flag;
  192                 vfs = mp->mnt_op;
  193                 /*
  194                  * We only allow the filesystem to be reloaded if it
  195                  * is currently mounted read-only.
  196                  */
  197                 if ((SCARG(uap, flags) & MNT_RELOAD) &&
  198                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  199                         vput(vp);
  200                         return (EOPNOTSUPP);    /* Needs translation */
  201                 }
  202                 /*
  203                  * In "highly secure" mode, don't let the caller do anything
  204                  * but downgrade a filesystem from read-write to read-only.
  205                  * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
  206                  */
  207                 if (securelevel >= 2 &&
  208                     SCARG(uap, flags) != MNT_GETARGS &&
  209                     SCARG(uap, flags) !=
  210                     (mp->mnt_flag | MNT_RDONLY |
  211                      MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
  212                         vput(vp);
  213                         return (EPERM);
  214                 }
  215                 mp->mnt_flag |= SCARG(uap, flags) &
  216                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  217                 /*
  218                  * Only root, or the user that did the original mount is
  219                  * permitted to update it.
  220                  */
  221                 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
  222                     mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred) &&
  223                     (error = kauth_authorize_generic(l->l_cred,
  224                     KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) {
  225                         vput(vp);
  226                         return (error);
  227                 }
  228                 /*
  229                  * Do not allow NFS export by non-root users. For non-root
  230                  * users, silently enforce MNT_NOSUID and MNT_NODEV, and
  231                  * MNT_NOEXEC if mount point is already MNT_NOEXEC.
  232                  */
  233                 if (kauth_cred_geteuid(l->l_cred) != 0) {
  234                         if (SCARG(uap, flags) & MNT_EXPORTED) {
  235                                 vput(vp);
  236                                 return (EPERM);
  237                         }
  238                         SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
  239                         if (flag & MNT_NOEXEC)
  240                                 SCARG(uap, flags) |= MNT_NOEXEC;
  241                 }
  242                 if (vfs_busy(mp, LK_NOWAIT, 0)) {
  243                         vput(vp);
  244                         return (EPERM);
  245                 }
  246                 goto update;
  247         } else {
  248                 if (securelevel >= 2) {
  249                         vput(vp);
  250                         return (EPERM);
  251                 }
  252         }
  253         /*
  254          * If the user is not root, ensure that they own the directory
  255          * onto which we are attempting to mount.
  256          */
  257         if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
  258             (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
  259                 (error = kauth_authorize_generic(l->l_cred,
  260                 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)) {
  261                 vput(vp);
  262                 return (error);
  263         }
  264         /*
  265          * Do not allow NFS export by non-root users. For non-root users,
  266          * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
  267          * mount point is already MNT_NOEXEC.
  268          */
  269         if (kauth_cred_geteuid(l->l_cred) != 0) {
  270                 if (SCARG(uap, flags) & MNT_EXPORTED) {
  271                         vput(vp);
  272                         return (EPERM);
  273                 }
  274                 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
  275                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
  276                         SCARG(uap, flags) |= MNT_NOEXEC;
  277         }
  278         if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) {
  279                 vput(vp);
  280                 return (error);
  281         }
  282         if (vp->v_type != VDIR) {
  283                 vput(vp);
  284                 return (ENOTDIR);
  285         }
  286         error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
  287         if (error) {
  288 #if defined(COMPAT_09) || defined(COMPAT_43)
  289                 /*
  290                  * Historically, filesystem types were identified by numbers.
  291                  * If we get an integer for the filesystem type instead of a
  292                  * string, we check to see if it matches one of the historic
  293                  * filesystem types.
  294                  */
  295                 u_long fsindex = (u_long)SCARG(uap, type);
  296                 if (fsindex >= nmountcompatnames ||
  297                     mountcompatnames[fsindex] == NULL) {
  298                         vput(vp);
  299                         return (ENODEV);
  300                 }
  301                 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
  302 #else
  303                 vput(vp);
  304                 return (error);
  305 #endif
  306         }
  307 #ifdef  COMPAT_10
  308         /* Accept `ufs' as an alias for `ffs'. */
  309         if (!strncmp(fstypename, "ufs", MFSNAMELEN))
  310                 strncpy(fstypename, "ffs", MFSNAMELEN);
  311 #endif
  312         if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
  313                 vput(vp);
  314                 return (ENODEV);
  315         }
  316         if (vp->v_mountedhere != NULL) {
  317                 vput(vp);
  318                 return (EBUSY);
  319         }
  320 
  321         /*
  322          * Allocate and initialize the file system.
  323          */
  324         mp = (struct mount *)malloc((u_long)sizeof(struct mount),
  325                 M_MOUNT, M_WAITOK);
  326         memset((char *)mp, 0, (u_long)sizeof(struct mount));
  327         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  328         simple_lock_init(&mp->mnt_slock);
  329         (void)vfs_busy(mp, LK_NOWAIT, 0);
  330         mp->mnt_op = vfs;
  331         vfs->vfs_refcount++;
  332         mp->mnt_vnodecovered = vp;
  333         mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
  334         mp->mnt_unmounter = NULL;
  335         mp->mnt_leaf = mp;
  336         mount_initspecific(mp);
  337 
  338         /*
  339          * The underlying file system may refuse the mount for
  340          * various reasons.  Allow the user to force it to happen.
  341          */
  342         mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
  343  update:
  344         if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
  345                 /*
  346                  * Set the mount level flags.
  347                  */
  348                 if (SCARG(uap, flags) & MNT_RDONLY)
  349                         mp->mnt_flag |= MNT_RDONLY;
  350                 else if (mp->mnt_flag & MNT_RDONLY)
  351                         mp->mnt_iflag |= IMNT_WANTRDWR;
  352                 mp->mnt_flag &=
  353                   ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  354                     MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  355                     MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
  356                 mp->mnt_flag |= SCARG(uap, flags) &
  357                    (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  358                     MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  359                     MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
  360                     MNT_IGNORE);
  361         }
  362         /*
  363          * Mount the filesystem.
  364          */
  365         error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l);
  366         if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
  367                 VOP_UNLOCK(vp, 0);
  368 #if defined(COMPAT_30) && defined(NFSSERVER)
  369                 if (mp->mnt_flag & MNT_UPDATE && error != 0) {
  370                         int error2;
  371 
  372                         /* Update failed; let's try and see if it was an
  373                          * export request. */
  374                         error2 = nfs_update_exports_30(mp, SCARG(uap, path),
  375                             SCARG(uap, data), l);
  376 
  377                         /* Only update error code if the export request was
  378                          * understood but some problem occurred while
  379                          * processing it. */
  380                         if (error2 != EJUSTRETURN)
  381                                 error = error2;
  382                 }
  383 #endif
  384                 if (mp->mnt_iflag & IMNT_WANTRDWR)
  385                         mp->mnt_flag &= ~MNT_RDONLY;
  386                 if (error)
  387                         mp->mnt_flag = flag;
  388                 mp->mnt_flag &=~
  389                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  390                 mp->mnt_iflag &=~ IMNT_WANTRDWR;
  391                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
  392                         if (mp->mnt_syncer == NULL)
  393                                 error = vfs_allocate_syncvnode(mp);
  394                 } else {
  395                         if (mp->mnt_syncer != NULL)
  396                                 vfs_deallocate_syncvnode(mp);
  397                 }
  398                 vfs_unbusy(mp);
  399                 vrele(vp);
  400                 return (error);
  401         }
  402         /*
  403          * Put the new filesystem on the mount list after root.
  404          */
  405         cache_purge(vp);
  406         if (!error) {
  407                 mp->mnt_flag &=~
  408                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  409                 mp->mnt_iflag &=~ IMNT_WANTRDWR;
  410                 vp->v_mountedhere = mp;
  411                 simple_lock(&mountlist_slock);
  412                 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  413                 simple_unlock(&mountlist_slock);
  414                 VOP_UNLOCK(vp, 0);
  415                 checkdirs(vp);
  416                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  417                         error = vfs_allocate_syncvnode(mp);
  418                 vfs_unbusy(mp);
  419                 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
  420                 if ((error = VFS_START(mp, 0, l)))
  421                         vrele(vp);
  422         } else {
  423                 vp->v_mountedhere = (struct mount *)0;
  424                 vfs->vfs_refcount--;
  425                 vfs_unbusy(mp);
  426                 free(mp, M_MOUNT);
  427                 vput(vp);
  428         }
  429         return (error);
  430 }
  431 
  432 /*
  433  * Scan all active processes to see if any of them have a current
  434  * or root directory onto which the new filesystem has just been
  435  * mounted. If so, replace them with the new mount point.
  436  */
  437 void
  438 checkdirs(struct vnode *olddp)
  439 {
  440         struct cwdinfo *cwdi;
  441         struct vnode *newdp;
  442         struct proc *p;
  443 
  444         if (olddp->v_usecount == 1)
  445                 return;
  446         if (VFS_ROOT(olddp->v_mountedhere, &newdp))
  447                 panic("mount: lost mount");
  448         proclist_lock_read();
  449         PROCLIST_FOREACH(p, &allproc) {
  450                 cwdi = p->p_cwdi;
  451                 if (!cwdi)
  452                         continue;
  453                 if (cwdi->cwdi_cdir == olddp) {
  454                         vrele(cwdi->cwdi_cdir);
  455                         VREF(newdp);
  456                         cwdi->cwdi_cdir = newdp;
  457                 }
  458                 if (cwdi->cwdi_rdir == olddp) {
  459                         vrele(cwdi->cwdi_rdir);
  460                         VREF(newdp);
  461                         cwdi->cwdi_rdir = newdp;
  462                 }
  463         }
  464         proclist_unlock_read();
  465         if (rootvnode == olddp) {
  466                 vrele(rootvnode);
  467                 VREF(newdp);
  468                 rootvnode = newdp;
  469         }
  470         vput(newdp);
  471 }
  472 
  473 /*
  474  * Unmount a file system.
  475  *
  476  * Note: unmount takes a path to the vnode mounted on as argument,
  477  * not special file (as before).
  478  */
  479 /* ARGSUSED */
  480 int
  481 sys_unmount(struct lwp *l, void *v, register_t *retval)
  482 {
  483         struct sys_unmount_args /* {
  484                 syscallarg(const char *) path;
  485                 syscallarg(int) flags;
  486         } */ *uap = v;
  487         struct vnode *vp;
  488         struct mount *mp;
  489         int error;
  490         struct nameidata nd;
  491 
  492         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
  493             SCARG(uap, path), l);
  494         if ((error = namei(&nd)) != 0)
  495                 return (error);
  496         vp = nd.ni_vp;
  497         mp = vp->v_mount;
  498 
  499         /*
  500          * Only root, or the user that did the original mount is
  501          * permitted to unmount this filesystem.
  502          */
  503         if ((mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred)) &&
  504             (error = kauth_authorize_generic(l->l_cred,
  505                 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) {
  506                 vput(vp);
  507                 return (error);
  508         }
  509 
  510         /*
  511          * Don't allow unmounting the root file system.
  512          */
  513         if (mp->mnt_flag & MNT_ROOTFS) {
  514                 vput(vp);
  515                 return (EINVAL);
  516         }
  517 
  518         /*
  519          * Must be the root of the filesystem
  520          */
  521         if ((vp->v_flag & VROOT) == 0) {
  522                 vput(vp);
  523                 return (EINVAL);
  524         }
  525         vput(vp);
  526 
  527         /*
  528          * XXX Freeze syncer.  Must do this before locking the
  529          * mount point.  See dounmount() for details.
  530          */
  531         lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
  532 
  533         if (vfs_busy(mp, 0, 0)) {
  534                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  535                 return (EBUSY);
  536         }
  537 
  538         return (dounmount(mp, SCARG(uap, flags), l));
  539 }
  540 
  541 /*
  542  * Do the actual file system unmount. File system is assumed to have been
  543  * marked busy by the caller.
  544  */
  545 int
  546 dounmount(struct mount *mp, int flags, struct lwp *l)
  547 {
  548         struct vnode *coveredvp;
  549         int error;
  550         int async;
  551         int used_syncer;
  552 
  553 #if NVERIEXEC > 0
  554         error = veriexec_unmountchk(mp);
  555         if (error)
  556                 return (error);
  557 #endif /* NVERIEXEC > 0 */
  558 
  559 #ifdef FILEASSOC
  560         (void)fileassoc_table_delete(mp);
  561 #endif /* FILEASSOC */
  562 
  563         simple_lock(&mountlist_slock);
  564         vfs_unbusy(mp);
  565         used_syncer = (mp->mnt_syncer != NULL);
  566 
  567         /*
  568          * XXX Syncer must be frozen when we get here.  This should really
  569          * be done on a per-mountpoint basis, but especially the softdep
  570          * code possibly called from the syncer doesn't exactly work on a
  571          * per-mountpoint basis, so the softdep code would become a maze
  572          * of vfs_busy() calls.
  573          *
  574          * The caller of dounmount() must acquire syncer_lock because
  575          * the syncer itself acquires locks in syncer_lock -> vfs_busy
  576          * order, and we must preserve that order to avoid deadlock.
  577          *
  578          * So, if the file system did not use the syncer, now is
  579          * the time to release the syncer_lock.
  580          */
  581         if (used_syncer == 0)
  582                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  583 
  584         mp->mnt_iflag |= IMNT_UNMOUNT;
  585         mp->mnt_unmounter = l;
  586         lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
  587         vn_start_write(NULL, &mp, V_WAIT);
  588 
  589         async = mp->mnt_flag & MNT_ASYNC;
  590         mp->mnt_flag &= ~MNT_ASYNC;
  591         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  592         if (mp->mnt_syncer != NULL)
  593                 vfs_deallocate_syncvnode(mp);
  594         error = 0;
  595         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  596 #if NFSS > 0
  597                 error = fss_umount_hook(mp, (flags & MNT_FORCE));
  598 #endif
  599                 if (error == 0)
  600                         error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
  601         }
  602         if (error == 0 || (flags & MNT_FORCE))
  603                 error = VFS_UNMOUNT(mp, flags, l);
  604         vn_finished_write(mp, 0);
  605         simple_lock(&mountlist_slock);
  606         if (error) {
  607                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  608                         (void) vfs_allocate_syncvnode(mp);
  609                 mp->mnt_iflag &= ~IMNT_UNMOUNT;
  610                 mp->mnt_unmounter = NULL;
  611                 mp->mnt_flag |= async;
  612                 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
  613                     &mountlist_slock);
  614                 if (used_syncer)
  615                         lockmgr(&syncer_lock, LK_RELEASE, NULL);
  616                 simple_lock(&mp->mnt_slock);
  617                 while (mp->mnt_wcnt > 0) {
  618                         wakeup(mp);
  619                         ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
  620                                 0, &mp->mnt_slock);
  621                 }
  622                 simple_unlock(&mp->mnt_slock);
  623                 return (error);
  624         }
  625         CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
  626         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
  627                 coveredvp->v_mountedhere = NULL;
  628                 vrele(coveredvp);
  629         }
  630         mp->mnt_op->vfs_refcount--;
  631         if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
  632                 panic("unmount: dangling vnode");
  633         mp->mnt_iflag |= IMNT_GONE;
  634         lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
  635         mount_finispecific(mp);
  636         if (used_syncer)
  637                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  638         simple_lock(&mp->mnt_slock);
  639         while (mp->mnt_wcnt > 0) {
  640                 wakeup(mp);
  641                 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
  642         }
  643         simple_unlock(&mp->mnt_slock);
  644         vfs_hooks_unmount(mp);
  645         free(mp, M_MOUNT);
  646         return (0);
  647 }
  648 
  649 /*
  650  * Sync each mounted filesystem.
  651  */
  652 #ifdef DEBUG
  653 int syncprt = 0;
  654 struct ctldebug debug0 = { "syncprt", &syncprt };
  655 #endif
  656 
  657 /* ARGSUSED */
  658 int
  659 sys_sync(struct lwp *l, void *v, register_t *retval)
  660 {
  661         struct mount *mp, *nmp;
  662         int asyncflag;
  663 
  664         if (l == NULL)
  665                 l = &lwp0;
  666 
  667         simple_lock(&mountlist_slock);
  668         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
  669                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
  670                         nmp = mp->mnt_list.cqe_prev;
  671                         continue;
  672                 }
  673                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  674                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  675                         asyncflag = mp->mnt_flag & MNT_ASYNC;
  676                         mp->mnt_flag &= ~MNT_ASYNC;
  677                         VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
  678                         if (asyncflag)
  679                                  mp->mnt_flag |= MNT_ASYNC;
  680                         vn_finished_write(mp, 0);
  681                 }
  682                 simple_lock(&mountlist_slock);
  683                 nmp = mp->mnt_list.cqe_prev;
  684                 vfs_unbusy(mp);
  685 
  686         }
  687         simple_unlock(&mountlist_slock);
  688 #ifdef DEBUG
  689         if (syncprt)
  690                 vfs_bufstats();
  691 #endif /* DEBUG */
  692         return (0);
  693 }
  694 
  695 /*
  696  * Change filesystem quotas.
  697  */
  698 /* ARGSUSED */
  699 int
  700 sys_quotactl(struct lwp *l, void *v, register_t *retval)
  701 {
  702         struct sys_quotactl_args /* {
  703                 syscallarg(const char *) path;
  704                 syscallarg(int) cmd;
  705                 syscallarg(int) uid;
  706                 syscallarg(void *) arg;
  707         } */ *uap = v;
  708         struct mount *mp;
  709         int error;
  710         struct nameidata nd;
  711 
  712         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
  713         if ((error = namei(&nd)) != 0)
  714                 return (error);
  715         error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
  716         vrele(nd.ni_vp);
  717         if (error)
  718                 return (error);
  719         error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
  720             SCARG(uap, arg), l);
  721         vn_finished_write(mp, 0);
  722         return (error);
  723 }
  724 
  725 int
  726 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
  727     int root)
  728 {
  729         struct cwdinfo *cwdi = l->l_proc->p_cwdi;
  730         int error = 0;
  731 
  732         /*
  733          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  734          * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
  735          * overrides MNT_NOWAIT.
  736          */
  737         if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
  738             (flags != MNT_WAIT && flags != 0)) {
  739                 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
  740                 goto done;
  741         }
  742 
  743         /* Get the filesystem stats now */
  744         memset(sp, 0, sizeof(*sp));
  745         if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
  746                 return error;
  747         }
  748 
  749         if (cwdi->cwdi_rdir == NULL)
  750                 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
  751 done:
  752         if (cwdi->cwdi_rdir != NULL) {
  753                 size_t len;
  754                 char *bp;
  755                 char *path = PNBUF_GET();
  756                 if (!path)
  757                         return ENOMEM;
  758 
  759                 bp = path + MAXPATHLEN;
  760                 *--bp = '\0';
  761                 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
  762                     MAXPATHLEN / 2, 0, l);
  763                 if (error) {
  764                         PNBUF_PUT(path);
  765                         return error;
  766                 }
  767                 len = strlen(bp);
  768                 /*
  769                  * for mount points that are below our root, we can see
  770                  * them, so we fix up the pathname and return them. The
  771                  * rest we cannot see, so we don't allow viewing the
  772                  * data.
  773                  */
  774                 if (strncmp(bp, sp->f_mntonname, len) == 0) {
  775                         strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
  776                             sizeof(sp->f_mntonname));
  777                         if (sp->f_mntonname[0] == '\0')
  778                                 (void)strlcpy(sp->f_mntonname, "/",
  779                                     sizeof(sp->f_mntonname));
  780                 } else {
  781                         if (root)
  782                                 (void)strlcpy(sp->f_mntonname, "/",
  783                                     sizeof(sp->f_mntonname));
  784                         else
  785                                 error = EPERM;
  786                 }
  787                 PNBUF_PUT(path);
  788         }
  789         sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
  790         return error;
  791 }
  792 
  793 /*
  794  * Get filesystem statistics.
  795  */
  796 /* ARGSUSED */
  797 int
  798 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
  799 {
  800         struct sys_statvfs1_args /* {
  801                 syscallarg(const char *) path;
  802                 syscallarg(struct statvfs *) buf;
  803                 syscallarg(int) flags;
  804         } */ *uap = v;
  805         struct mount *mp;
  806         struct statvfs *sb;
  807         int error;
  808         struct nameidata nd;
  809 
  810         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
  811         if ((error = namei(&nd)) != 0)
  812                 return error;
  813         mp = nd.ni_vp->v_mount;
  814         vrele(nd.ni_vp);
  815         sb = STATVFSBUF_GET();
  816         error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
  817         if (error == 0) {
  818                 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
  819         }
  820         STATVFSBUF_PUT(sb);
  821         return error;
  822 }
  823 
  824 /*
  825  * Get filesystem statistics.
  826  */
  827 /* ARGSUSED */
  828 int
  829 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
  830 {
  831         struct sys_fstatvfs1_args /* {
  832                 syscallarg(int) fd;
  833                 syscallarg(struct statvfs *) buf;
  834                 syscallarg(int) flags;
  835         } */ *uap = v;
  836         struct proc *p = l->l_proc;
  837         struct file *fp;
  838         struct mount *mp;
  839         struct statvfs *sb;
  840         int error;
  841 
  842         /* getvnode() will use the descriptor for us */
  843         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
  844                 return (error);
  845         mp = ((struct vnode *)fp->f_data)->v_mount;
  846         sb = STATVFSBUF_GET();
  847         if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
  848                 goto out;
  849         error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
  850  out:
  851         FILE_UNUSE(fp, l);
  852         STATVFSBUF_PUT(sb);
  853         return error;
  854 }
  855 
  856 
  857 /*
  858  * Get statistics on all filesystems.
  859  */
  860 int
  861 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
  862 {
  863         struct sys_getvfsstat_args /* {
  864                 syscallarg(struct statvfs *) buf;
  865                 syscallarg(size_t) bufsize;
  866                 syscallarg(int) flags;
  867         } */ *uap = v;
  868         int root = 0;
  869         struct proc *p = l->l_proc;
  870         struct mount *mp, *nmp;
  871         struct statvfs *sb;
  872         struct statvfs *sfsp;
  873         size_t count, maxcount;
  874         int error = 0;
  875 
  876         sb = STATVFSBUF_GET();
  877         maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
  878         sfsp = SCARG(uap, buf);
  879         simple_lock(&mountlist_slock);
  880         count = 0;
  881         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
  882              mp = nmp) {
  883                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
  884                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
  885                         continue;
  886                 }
  887                 if (sfsp && count < maxcount) {
  888                         error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
  889                         if (error) {
  890                                 simple_lock(&mountlist_slock);
  891                                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
  892                                 vfs_unbusy(mp);
  893                                 continue;
  894                         }
  895                         error = copyout(sb, sfsp, sizeof(*sfsp));
  896                         if (error) {
  897                                 vfs_unbusy(mp);
  898                                 goto out;
  899                         }
  900                         sfsp++;
  901                         root |= strcmp(sb->f_mntonname, "/") == 0;
  902                 }
  903                 count++;
  904                 simple_lock(&mountlist_slock);
  905                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
  906                 vfs_unbusy(mp);
  907         }
  908         simple_unlock(&mountlist_slock);
  909         if (root == 0 && p->p_cwdi->cwdi_rdir) {
  910                 /*
  911                  * fake a root entry
  912                  */
  913                 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
  914                     SCARG(uap, flags), 1)) != 0)
  915                         goto out;
  916                 if (sfsp)
  917                         error = copyout(sb, sfsp, sizeof(*sfsp));
  918                 count++;
  919         }
  920         if (sfsp && count > maxcount)
  921                 *retval = maxcount;
  922         else
  923                 *retval = count;
  924 out:
  925         STATVFSBUF_PUT(sb);
  926         return error;
  927 }
  928 
  929 /*
  930  * Change current working directory to a given file descriptor.
  931  */
  932 /* ARGSUSED */
  933 int
  934 sys_fchdir(struct lwp *l, void *v, register_t *retval)
  935 {
  936         struct sys_fchdir_args /* {
  937                 syscallarg(int) fd;
  938         } */ *uap = v;
  939         struct proc *p = l->l_proc;
  940         struct filedesc *fdp = p->p_fd;
  941         struct cwdinfo *cwdi = p->p_cwdi;
  942         struct vnode *vp, *tdp;
  943         struct mount *mp;
  944         struct file *fp;
  945         int error;
  946 
  947         /* getvnode() will use the descriptor for us */
  948         if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
  949                 return (error);
  950         vp = (struct vnode *)fp->f_data;
  951 
  952         VREF(vp);
  953         vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
  954         if (vp->v_type != VDIR)
  955                 error = ENOTDIR;
  956         else
  957                 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
  958         if (error) {
  959                 vput(vp);
  960                 goto out;
  961         }
  962         while (!error && (mp = vp->v_mountedhere) != NULL) {
  963                 if (vfs_busy(mp, 0, 0))
  964                         continue;
  965 
  966                 vput(vp);
  967                 error = VFS_ROOT(mp, &tdp);
  968                 vfs_unbusy(mp);
  969                 if (error)
  970                         goto out;
  971                 vp = tdp;
  972         }
  973         VOP_UNLOCK(vp, 0);
  974 
  975         /*
  976          * Disallow changing to a directory not under the process's
  977          * current root directory (if there is one).
  978          */
  979         if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
  980                 vrele(vp);
  981                 error = EPERM;  /* operation not permitted */
  982                 goto out;
  983         }
  984 
  985         vrele(cwdi->cwdi_cdir);
  986         cwdi->cwdi_cdir = vp;
  987  out:
  988         FILE_UNUSE(fp, l);
  989         return (error);
  990 }
  991 
  992 /*
  993  * Change this process's notion of the root directory to a given file
  994  * descriptor.
  995  */
  996 int
  997 sys_fchroot(struct lwp *l, void *v, register_t *retval)
  998 {
  999         struct sys_fchroot_args *uap = v;
 1000         struct proc *p = l->l_proc;
 1001         struct filedesc *fdp = p->p_fd;
 1002         struct cwdinfo *cwdi = p->p_cwdi;
 1003         struct vnode    *vp;
 1004         struct file     *fp;
 1005         int              error;
 1006 
 1007         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
 1008             KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
 1009                 return error;
 1010         /* getvnode() will use the descriptor for us */
 1011         if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
 1012                 return error;
 1013         vp = (struct vnode *) fp->f_data;
 1014         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1015         if (vp->v_type != VDIR)
 1016                 error = ENOTDIR;
 1017         else
 1018                 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
 1019         VOP_UNLOCK(vp, 0);
 1020         if (error)
 1021                 goto out;
 1022         VREF(vp);
 1023 
 1024         /*
 1025          * Prevent escaping from chroot by putting the root under
 1026          * the working directory.  Silently chdir to / if we aren't
 1027          * already there.
 1028          */
 1029         if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
 1030                 /*
 1031                  * XXX would be more failsafe to change directory to a
 1032                  * deadfs node here instead
 1033                  */
 1034                 vrele(cwdi->cwdi_cdir);
 1035                 VREF(vp);
 1036                 cwdi->cwdi_cdir = vp;
 1037         }
 1038 
 1039         if (cwdi->cwdi_rdir != NULL)
 1040                 vrele(cwdi->cwdi_rdir);
 1041         cwdi->cwdi_rdir = vp;
 1042  out:
 1043         FILE_UNUSE(fp, l);
 1044         return (error);
 1045 }
 1046 
 1047 /*
 1048  * Change current working directory (``.'').
 1049  */
 1050 /* ARGSUSED */
 1051 int
 1052 sys_chdir(struct lwp *l, void *v, register_t *retval)
 1053 {
 1054         struct sys_chdir_args /* {
 1055                 syscallarg(const char *) path;
 1056         } */ *uap = v;
 1057         struct proc *p = l->l_proc;
 1058         struct cwdinfo *cwdi = p->p_cwdi;
 1059         int error;
 1060         struct nameidata nd;
 1061 
 1062         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1063             SCARG(uap, path), l);
 1064         if ((error = change_dir(&nd, l)) != 0)
 1065                 return (error);
 1066         vrele(cwdi->cwdi_cdir);
 1067         cwdi->cwdi_cdir = nd.ni_vp;
 1068         return (0);
 1069 }
 1070 
 1071 /*
 1072  * Change notion of root (``/'') directory.
 1073  */
 1074 /* ARGSUSED */
 1075 int
 1076 sys_chroot(struct lwp *l, void *v, register_t *retval)
 1077 {
 1078         struct sys_chroot_args /* {
 1079                 syscallarg(const char *) path;
 1080         } */ *uap = v;
 1081         struct proc *p = l->l_proc;
 1082         struct cwdinfo *cwdi = p->p_cwdi;
 1083         struct vnode *vp;
 1084         int error;
 1085         struct nameidata nd;
 1086 
 1087         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
 1088             KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
 1089                 return (error);
 1090         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1091             SCARG(uap, path), l);
 1092         if ((error = change_dir(&nd, l)) != 0)
 1093                 return (error);
 1094         if (cwdi->cwdi_rdir != NULL)
 1095                 vrele(cwdi->cwdi_rdir);
 1096         vp = nd.ni_vp;
 1097         cwdi->cwdi_rdir = vp;
 1098 
 1099         /*
 1100          * Prevent escaping from chroot by putting the root under
 1101          * the working directory.  Silently chdir to / if we aren't
 1102          * already there.
 1103          */
 1104         if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
 1105                 /*
 1106                  * XXX would be more failsafe to change directory to a
 1107                  * deadfs node here instead
 1108                  */
 1109                 vrele(cwdi->cwdi_cdir);
 1110                 VREF(vp);
 1111                 cwdi->cwdi_cdir = vp;
 1112         }
 1113 
 1114         return (0);
 1115 }
 1116 
 1117 /*
 1118  * Common routine for chroot and chdir.
 1119  */
 1120 static int
 1121 change_dir(struct nameidata *ndp, struct lwp *l)
 1122 {
 1123         struct vnode *vp;
 1124         int error;
 1125 
 1126         if ((error = namei(ndp)) != 0)
 1127                 return (error);
 1128         vp = ndp->ni_vp;
 1129         if (vp->v_type != VDIR)
 1130                 error = ENOTDIR;
 1131         else
 1132                 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
 1133 
 1134         if (error)
 1135                 vput(vp);
 1136         else
 1137                 VOP_UNLOCK(vp, 0);
 1138         return (error);
 1139 }
 1140 
 1141 /*
 1142  * Check permissions, allocate an open file structure,
 1143  * and call the device open routine if any.
 1144  */
 1145 int
 1146 sys_open(struct lwp *l, void *v, register_t *retval)
 1147 {
 1148         struct sys_open_args /* {
 1149                 syscallarg(const char *) path;
 1150                 syscallarg(int) flags;
 1151                 syscallarg(int) mode;
 1152         } */ *uap = v;
 1153         struct proc *p = l->l_proc;
 1154         struct cwdinfo *cwdi = p->p_cwdi;
 1155         struct filedesc *fdp = p->p_fd;
 1156         struct file *fp;
 1157         struct vnode *vp;
 1158         int flags, cmode;
 1159         int type, indx, error;
 1160         struct flock lf;
 1161         struct nameidata nd;
 1162 
 1163         flags = FFLAGS(SCARG(uap, flags));
 1164         if ((flags & (FREAD | FWRITE)) == 0)
 1165                 return (EINVAL);
 1166         /* falloc() will use the file descriptor for us */
 1167         if ((error = falloc(l, &fp, &indx)) != 0)
 1168                 return (error);
 1169         cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
 1170         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 1171         l->l_dupfd = -indx - 1;                 /* XXX check for fdopen */
 1172         if ((error = vn_open(&nd, flags, cmode)) != 0) {
 1173                 FILE_UNUSE(fp, l);
 1174                 fdp->fd_ofiles[indx] = NULL;
 1175                 ffree(fp);
 1176                 if ((error == EDUPFD || error == EMOVEFD) &&
 1177                     l->l_dupfd >= 0 &&                  /* XXX from fdopen */
 1178                     (error =
 1179                         dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
 1180                         *retval = indx;
 1181                         return (0);
 1182                 }
 1183                 if (error == ERESTART)
 1184                         error = EINTR;
 1185                 fdremove(fdp, indx);
 1186                 return (error);
 1187         }
 1188         l->l_dupfd = 0;
 1189         vp = nd.ni_vp;
 1190         fp->f_flag = flags & FMASK;
 1191         fp->f_type = DTYPE_VNODE;
 1192         fp->f_ops = &vnops;
 1193         fp->f_data = vp;
 1194         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1195                 lf.l_whence = SEEK_SET;
 1196                 lf.l_start = 0;
 1197                 lf.l_len = 0;
 1198                 if (flags & O_EXLOCK)
 1199                         lf.l_type = F_WRLCK;
 1200                 else
 1201                         lf.l_type = F_RDLCK;
 1202                 type = F_FLOCK;
 1203                 if ((flags & FNONBLOCK) == 0)
 1204                         type |= F_WAIT;
 1205                 VOP_UNLOCK(vp, 0);
 1206                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1207                 if (error) {
 1208                         (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
 1209                         FILE_UNUSE(fp, l);
 1210                         ffree(fp);
 1211                         fdremove(fdp, indx);
 1212                         return (error);
 1213                 }
 1214                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1215                 fp->f_flag |= FHASLOCK;
 1216         }
 1217         VOP_UNLOCK(vp, 0);
 1218         *retval = indx;
 1219         FILE_SET_MATURE(fp);
 1220         FILE_UNUSE(fp, l);
 1221         return (0);
 1222 }
 1223 
 1224 static void
 1225 vfs__fhfree(fhandle_t *fhp)
 1226 {
 1227         size_t fhsize;
 1228 
 1229         if (fhp == NULL) {
 1230                 return;
 1231         }
 1232         fhsize = FHANDLE_SIZE(fhp);
 1233         kmem_free(fhp, fhsize);
 1234 }
 1235 
 1236 /*
 1237  * vfs_composefh: compose a filehandle.
 1238  */
 1239 
 1240 int
 1241 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
 1242 {
 1243         struct mount *mp;
 1244         struct fid *fidp;
 1245         int error;
 1246         size_t needfhsize;
 1247         size_t fidsize;
 1248 
 1249         mp = vp->v_mount;
 1250         fidp = NULL;
 1251         if (*fh_size < FHANDLE_SIZE_MIN) {
 1252                 fidsize = 0;
 1253         } else {
 1254                 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
 1255                 if (fhp != NULL) {
 1256                         memset(fhp, 0, *fh_size);
 1257                         fhp->fh_fsid = mp->mnt_stat.f_fsidx;
 1258                         fidp = &fhp->fh_fid;
 1259                 }
 1260         }
 1261         error = VFS_VPTOFH(vp, fidp, &fidsize);
 1262         needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
 1263         if (error == 0 && *fh_size < needfhsize) {
 1264                 error = E2BIG;
 1265         }
 1266         *fh_size = needfhsize;
 1267         return error;
 1268 }
 1269 
 1270 int
 1271 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
 1272 {
 1273         struct mount *mp;
 1274         fhandle_t *fhp;
 1275         size_t fhsize;
 1276         size_t fidsize;
 1277         int error;
 1278 
 1279         *fhpp = NULL;
 1280         mp = vp->v_mount;
 1281         fidsize = 0;
 1282         error = VFS_VPTOFH(vp, NULL, &fidsize);
 1283         KASSERT(error != 0);
 1284         if (error != E2BIG) {
 1285                 goto out;
 1286         }
 1287         fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
 1288         fhp = kmem_zalloc(fhsize, KM_SLEEP);
 1289         if (fhp == NULL) {
 1290                 error = ENOMEM;
 1291                 goto out;
 1292         }
 1293         fhp->fh_fsid = mp->mnt_stat.f_fsidx;
 1294         error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
 1295         if (error == 0) {
 1296                 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
 1297                     FHANDLE_FILEID(fhp)->fid_len == fidsize));
 1298                 *fhpp = fhp;
 1299         } else {
 1300                 kmem_free(fhp, fhsize);
 1301         }
 1302 out:
 1303         return error;
 1304 }
 1305 
 1306 void
 1307 vfs_composefh_free(fhandle_t *fhp)
 1308 {
 1309 
 1310         vfs__fhfree(fhp);
 1311 }
 1312 
 1313 /*
 1314  * vfs_fhtovp: lookup a vnode by a filehandle.
 1315  */
 1316 
 1317 int
 1318 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
 1319 {
 1320         struct mount *mp;
 1321         int error;
 1322 
 1323         *vpp = NULL;
 1324         mp = vfs_getvfs(FHANDLE_FSID(fhp));
 1325         if (mp == NULL) {
 1326                 error = ESTALE;
 1327                 goto out;
 1328         }
 1329         if (mp->mnt_op->vfs_fhtovp == NULL) {
 1330                 error = EOPNOTSUPP;
 1331                 goto out;
 1332         }
 1333         error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
 1334 out:
 1335         return error;
 1336 }
 1337 
 1338 /*
 1339  * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
 1340  * the needed size.
 1341  */
 1342 
 1343 int
 1344 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
 1345 {
 1346         fhandle_t *fhp;
 1347         int error;
 1348 
 1349         *fhpp = NULL;
 1350         if (fhsize > FHANDLE_SIZE_MAX) {
 1351                 return EINVAL;
 1352         }
 1353         if (fhsize < FHANDLE_SIZE_MIN) {
 1354                 return EINVAL;
 1355         }
 1356 again:
 1357         fhp = kmem_alloc(fhsize, KM_SLEEP);
 1358         if (fhp == NULL) {
 1359                 return ENOMEM;
 1360         }
 1361         error = copyin(ufhp, fhp, fhsize);
 1362         if (error == 0) {
 1363                 /* XXX this check shouldn't be here */
 1364                 if (FHANDLE_SIZE(fhp) == fhsize) {
 1365                         *fhpp = fhp;
 1366                         return 0;
 1367                 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
 1368                         /*
 1369                          * a kludge for nfsv2 padded handles.
 1370                          */
 1371                         size_t sz;
 1372 
 1373                         sz = FHANDLE_SIZE(fhp);
 1374                         kmem_free(fhp, fhsize);
 1375                         fhsize = sz;
 1376                         goto again;
 1377                 } else {
 1378                         /*
 1379                          * userland told us wrong size.
 1380                          */
 1381                         error = EINVAL;
 1382                 }
 1383         }
 1384         kmem_free(fhp, fhsize);
 1385         return error;
 1386 }
 1387 
 1388 void
 1389 vfs_copyinfh_free(fhandle_t *fhp)
 1390 {
 1391 
 1392         vfs__fhfree(fhp);
 1393 }
 1394 
 1395 /*
 1396  * Get file handle system call
 1397  */
 1398 int
 1399 sys___getfh30(struct lwp *l, void *v, register_t *retval)
 1400 {
 1401         struct sys___getfh30_args /* {
 1402                 syscallarg(char *) fname;
 1403                 syscallarg(fhandle_t *) fhp;
 1404                 syscallarg(size_t *) fh_size;
 1405         } */ *uap = v;
 1406         struct vnode *vp;
 1407         fhandle_t *fh;
 1408         int error;
 1409         struct nameidata nd;
 1410         size_t sz;
 1411         size_t usz;
 1412 
 1413         /*
 1414          * Must be super user
 1415          */
 1416         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1417             0, NULL, NULL, NULL);
 1418         if (error)
 1419                 return (error);
 1420         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1421             SCARG(uap, fname), l);
 1422         error = namei(&nd);
 1423         if (error)
 1424                 return (error);
 1425         vp = nd.ni_vp;
 1426         error = vfs_composefh_alloc(vp, &fh);
 1427         vput(vp);
 1428         if (error != 0) {
 1429                 goto out;
 1430         }
 1431         error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
 1432         if (error != 0) {
 1433                 goto out;
 1434         }
 1435         sz = FHANDLE_SIZE(fh);
 1436         error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
 1437         if (error != 0) {
 1438                 goto out;
 1439         }
 1440         if (usz >= sz) {
 1441                 error = copyout(fh, SCARG(uap, fhp), sz);
 1442         } else {
 1443                 error = E2BIG;
 1444         }
 1445 out:
 1446         vfs_composefh_free(fh);
 1447         return (error);
 1448 }
 1449 
 1450 /*
 1451  * Open a file given a file handle.
 1452  *
 1453  * Check permissions, allocate an open file structure,
 1454  * and call the device open routine if any.
 1455  */
 1456 
 1457 int
 1458 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
 1459     register_t *retval)
 1460 {
 1461         struct filedesc *fdp = l->l_proc->p_fd;
 1462         struct file *fp;
 1463         struct vnode *vp = NULL;
 1464         struct mount *mp;
 1465         kauth_cred_t cred = l->l_cred;
 1466         struct file *nfp;
 1467         int type, indx, error=0;
 1468         struct flock lf;
 1469         struct vattr va;
 1470         fhandle_t *fh;
 1471         int flags;
 1472 
 1473         /*
 1474          * Must be super user
 1475          */
 1476         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1477             0, NULL, NULL, NULL)))
 1478                 return (error);
 1479 
 1480         flags = FFLAGS(oflags);
 1481         if ((flags & (FREAD | FWRITE)) == 0)
 1482                 return (EINVAL);
 1483         if ((flags & O_CREAT))
 1484                 return (EINVAL);
 1485         /* falloc() will use the file descriptor for us */
 1486         if ((error = falloc(l, &nfp, &indx)) != 0)
 1487                 return (error);
 1488         fp = nfp;
 1489         error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
 1490         if (error != 0) {
 1491                 goto bad;
 1492         }
 1493         error = vfs_fhtovp(fh, &vp);
 1494         if (error != 0) {
 1495                 goto bad;
 1496         }
 1497 
 1498         /* Now do an effective vn_open */
 1499 
 1500         if (vp->v_type == VSOCK) {
 1501                 error = EOPNOTSUPP;
 1502                 goto bad;
 1503         }
 1504         if (flags & FREAD) {
 1505                 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
 1506                         goto bad;
 1507         }
 1508         if (flags & (FWRITE | O_TRUNC)) {
 1509                 if (vp->v_type == VDIR) {
 1510                         error = EISDIR;
 1511                         goto bad;
 1512                 }
 1513                 if ((error = vn_writechk(vp)) != 0 ||
 1514                     (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
 1515                         goto bad;
 1516         }
 1517         if (flags & O_TRUNC) {
 1518                 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 1519                         goto bad;
 1520                 VOP_UNLOCK(vp, 0);                      /* XXX */
 1521                 VOP_LEASE(vp, l, cred, LEASE_WRITE);
 1522                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 1523                 VATTR_NULL(&va);
 1524                 va.va_size = 0;
 1525                 error = VOP_SETATTR(vp, &va, cred, l);
 1526                 vn_finished_write(mp, 0);
 1527                 if (error)
 1528                         goto bad;
 1529         }
 1530         if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
 1531                 goto bad;
 1532         if (vp->v_type == VREG &&
 1533             uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
 1534                 error = EIO;
 1535                 goto bad;
 1536         }
 1537         if (flags & FWRITE)
 1538                 vp->v_writecount++;
 1539 
 1540         /* done with modified vn_open, now finish what sys_open does. */
 1541 
 1542         fp->f_flag = flags & FMASK;
 1543         fp->f_type = DTYPE_VNODE;
 1544         fp->f_ops = &vnops;
 1545         fp->f_data = vp;
 1546         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1547                 lf.l_whence = SEEK_SET;
 1548                 lf.l_start = 0;
 1549                 lf.l_len = 0;
 1550                 if (flags & O_EXLOCK)
 1551                         lf.l_type = F_WRLCK;
 1552                 else
 1553                         lf.l_type = F_RDLCK;
 1554                 type = F_FLOCK;
 1555                 if ((flags & FNONBLOCK) == 0)
 1556                         type |= F_WAIT;
 1557                 VOP_UNLOCK(vp, 0);
 1558                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1559                 if (error) {
 1560                         (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
 1561                         FILE_UNUSE(fp, l);
 1562                         ffree(fp);
 1563                         fdremove(fdp, indx);
 1564                         return (error);
 1565                 }
 1566                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1567                 fp->f_flag |= FHASLOCK;
 1568         }
 1569         VOP_UNLOCK(vp, 0);
 1570         *retval = indx;
 1571         FILE_SET_MATURE(fp);
 1572         FILE_UNUSE(fp, l);
 1573         vfs_copyinfh_free(fh);
 1574         return (0);
 1575 
 1576 bad:
 1577         FILE_UNUSE(fp, l);
 1578         ffree(fp);
 1579         fdremove(fdp, indx);
 1580         if (vp != NULL)
 1581                 vput(vp);
 1582         vfs_copyinfh_free(fh);
 1583         return (error);
 1584 }
 1585 
 1586 int
 1587 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
 1588 {
 1589         struct sys___fhopen40_args /* {
 1590                 syscallarg(const void *) fhp;
 1591                 syscallarg(size_t) fh_size;
 1592                 syscallarg(int) flags;
 1593         } */ *uap = v;
 1594 
 1595         return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
 1596             SCARG(uap, flags), retval);
 1597 }
 1598 
 1599 int
 1600 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp,
 1601     register_t *retval)
 1602 {
 1603         struct stat sb;
 1604         int error;
 1605         fhandle_t *fh;
 1606         struct vnode *vp;
 1607 
 1608         /*
 1609          * Must be super user
 1610          */
 1611         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1612             0, NULL, NULL, NULL)))
 1613                 return (error);
 1614 
 1615         error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
 1616         if (error != 0) {
 1617                 goto bad;
 1618         }
 1619         error = vfs_fhtovp(fh, &vp);
 1620         if (error != 0) {
 1621                 goto bad;
 1622         }
 1623         error = vn_stat(vp, &sb, l);
 1624         vput(vp);
 1625         if (error) {
 1626                 goto bad;
 1627         }
 1628         error = copyout(&sb, sbp, sizeof(sb));
 1629 bad:
 1630         vfs_copyinfh_free(fh);
 1631         return error;
 1632 }
 1633 
 1634 
 1635 /* ARGSUSED */
 1636 int
 1637 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
 1638 {
 1639         struct sys___fhstat40_args /* {
 1640                 syscallarg(const void *) fhp;
 1641                 syscallarg(size_t) fh_size;
 1642                 syscallarg(struct stat *) sb;
 1643         } */ *uap = v;
 1644 
 1645         return dofhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), SCARG(uap, sb),
 1646             retval);
 1647 }
 1648 
 1649 int
 1650 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf,
 1651     int flags, register_t *retval)
 1652 {
 1653         struct statvfs *sb = NULL;
 1654         fhandle_t *fh;
 1655         struct mount *mp;
 1656         struct vnode *vp;
 1657         int error;
 1658 
 1659         /*
 1660          * Must be super user
 1661          */
 1662         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1663             0, NULL, NULL, NULL)))
 1664                 return error;
 1665 
 1666         error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
 1667         if (error != 0) {
 1668                 goto out;
 1669         }
 1670         error = vfs_fhtovp(fh, &vp);
 1671         if (error != 0) {
 1672                 goto out;
 1673         }
 1674         mp = vp->v_mount;
 1675         sb = STATVFSBUF_GET();
 1676         if ((error = dostatvfs(mp, sb, l, flags, 1)) != 0) {
 1677                 vput(vp);
 1678                 goto out;
 1679         }
 1680         vput(vp);
 1681         error = copyout(sb, buf, sizeof(*sb));
 1682 out:
 1683         if (sb != NULL) {
 1684                 STATVFSBUF_PUT(sb);
 1685         }
 1686         vfs_copyinfh_free(fh);
 1687         return error;
 1688 }
 1689 
 1690 /* ARGSUSED */
 1691 int
 1692 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
 1693 {
 1694         struct sys___fhstatvfs140_args /* {
 1695                 syscallarg(const void *) fhp;
 1696                 syscallarg(size_t) fh_size;
 1697                 syscallarg(struct statvfs *) buf;
 1698                 syscallarg(int) flags;
 1699         } */ *uap = v;
 1700 
 1701         return dofhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size),
 1702             SCARG(uap, buf), SCARG(uap, flags), retval);
 1703 }
 1704 
 1705 /*
 1706  * Create a special file.
 1707  */
 1708 /* ARGSUSED */
 1709 int
 1710 sys_mknod(struct lwp *l, void *v, register_t *retval)
 1711 {
 1712         struct sys_mknod_args /* {
 1713                 syscallarg(const char *) path;
 1714                 syscallarg(int) mode;
 1715                 syscallarg(int) dev;
 1716         } */ *uap = v;
 1717         struct proc *p = l->l_proc;
 1718         struct vnode *vp;
 1719         struct mount *mp;
 1720         struct vattr vattr;
 1721         int error;
 1722         int whiteout = 0;
 1723         struct nameidata nd;
 1724 
 1725         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
 1726             0, NULL, NULL, NULL)) != 0)
 1727                 return (error);
 1728 restart:
 1729         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
 1730         if ((error = namei(&nd)) != 0)
 1731                 return (error);
 1732         vp = nd.ni_vp;
 1733         if (vp != NULL)
 1734                 error = EEXIST;
 1735         else {
 1736                 VATTR_NULL(&vattr);
 1737                 vattr.va_mode =
 1738                     (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1739                 vattr.va_rdev = SCARG(uap, dev);
 1740                 whiteout = 0;
 1741 
 1742                 switch (SCARG(uap, mode) & S_IFMT) {
 1743                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1744                         vattr.va_type = VBAD;
 1745                         break;
 1746                 case S_IFCHR:
 1747                         vattr.va_type = VCHR;
 1748                         break;
 1749                 case S_IFBLK:
 1750                         vattr.va_type = VBLK;
 1751                         break;
 1752                 case S_IFWHT:
 1753                         whiteout = 1;
 1754                         break;
 1755                 default:
 1756                         error = EINVAL;
 1757                         break;
 1758                 }
 1759         }
 1760         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1761                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1762                 if (nd.ni_dvp == vp)
 1763                         vrele(nd.ni_dvp);
 1764                 else
 1765                         vput(nd.ni_dvp);
 1766                 if (vp)
 1767                         vrele(vp);
 1768                 if ((error = vn_start_write(NULL, &mp,
 1769                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1770                         return (error);
 1771                 goto restart;
 1772         }
 1773         if (!error) {
 1774                 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 1775                 if (whiteout) {
 1776                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1777                         if (error)
 1778                                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1779                         vput(nd.ni_dvp);
 1780                 } else {
 1781                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1782                                                 &nd.ni_cnd, &vattr);
 1783                         if (error == 0)
 1784                                 vput(nd.ni_vp);
 1785                 }
 1786         } else {
 1787                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1788                 if (nd.ni_dvp == vp)
 1789                         vrele(nd.ni_dvp);
 1790                 else
 1791                         vput(nd.ni_dvp);
 1792                 if (vp)
 1793                         vrele(vp);
 1794         }
 1795         vn_finished_write(mp, 0);
 1796         return (error);
 1797 }
 1798 
 1799 /*
 1800  * Create a named pipe.
 1801  */
 1802 /* ARGSUSED */
 1803 int
 1804 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
 1805 {
 1806         struct sys_mkfifo_args /* {
 1807                 syscallarg(const char *) path;
 1808                 syscallarg(int) mode;
 1809         } */ *uap = v;
 1810         struct proc *p = l->l_proc;
 1811         struct mount *mp;
 1812         struct vattr vattr;
 1813         int error;
 1814         struct nameidata nd;
 1815 
 1816 restart:
 1817         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
 1818         if ((error = namei(&nd)) != 0)
 1819                 return (error);
 1820         if (nd.ni_vp != NULL) {
 1821                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1822                 if (nd.ni_dvp == nd.ni_vp)
 1823                         vrele(nd.ni_dvp);
 1824                 else
 1825                         vput(nd.ni_dvp);
 1826                 vrele(nd.ni_vp);
 1827                 return (EEXIST);
 1828         }
 1829         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1830                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1831                 if (nd.ni_dvp == nd.ni_vp)
 1832                         vrele(nd.ni_dvp);
 1833                 else
 1834                         vput(nd.ni_dvp);
 1835                 if (nd.ni_vp)
 1836                         vrele(nd.ni_vp);
 1837                 if ((error = vn_start_write(NULL, &mp,
 1838                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1839                         return (error);
 1840                 goto restart;
 1841         }
 1842         VATTR_NULL(&vattr);
 1843         vattr.va_type = VFIFO;
 1844         vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1845         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 1846         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1847         if (error == 0)
 1848                 vput(nd.ni_vp);
 1849         vn_finished_write(mp, 0);
 1850         return (error);
 1851 }
 1852 
 1853 /*
 1854  * Make a hard file link.
 1855  */
 1856 /* ARGSUSED */
 1857 int
 1858 sys_link(struct lwp *l, void *v, register_t *retval)
 1859 {
 1860         struct sys_link_args /* {
 1861                 syscallarg(const char *) path;
 1862                 syscallarg(const char *) link;
 1863         } */ *uap = v;
 1864         struct vnode *vp;
 1865         struct mount *mp;
 1866         struct nameidata nd;
 1867         int error;
 1868 
 1869         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 1870         if ((error = namei(&nd)) != 0)
 1871                 return (error);
 1872         vp = nd.ni_vp;
 1873         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 1874                 vrele(vp);
 1875                 return (error);
 1876         }
 1877         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
 1878         if ((error = namei(&nd)) != 0)
 1879                 goto out;
 1880         if (nd.ni_vp) {
 1881                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1882                 if (nd.ni_dvp == nd.ni_vp)
 1883                         vrele(nd.ni_dvp);
 1884                 else
 1885                         vput(nd.ni_dvp);
 1886                 vrele(nd.ni_vp);
 1887                 error = EEXIST;
 1888                 goto out;
 1889         }
 1890         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 1891         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 1892         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1893 out:
 1894         vrele(vp);
 1895         vn_finished_write(mp, 0);
 1896         return (error);
 1897 }
 1898 
 1899 /*
 1900  * Make a symbolic link.
 1901  */
 1902 /* ARGSUSED */
 1903 int
 1904 sys_symlink(struct lwp *l, void *v, register_t *retval)
 1905 {
 1906         struct sys_symlink_args /* {
 1907                 syscallarg(const char *) path;
 1908                 syscallarg(const char *) link;
 1909         } */ *uap = v;
 1910         struct proc *p = l->l_proc;
 1911         struct mount *mp;
 1912         struct vattr vattr;
 1913         char *path;
 1914         int error;
 1915         struct nameidata nd;
 1916 
 1917         path = PNBUF_GET();
 1918         error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
 1919         if (error)
 1920                 goto out;
 1921 restart:
 1922         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
 1923         if ((error = namei(&nd)) != 0)
 1924                 goto out;
 1925         if (nd.ni_vp) {
 1926                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1927                 if (nd.ni_dvp == nd.ni_vp)
 1928                         vrele(nd.ni_dvp);
 1929                 else
 1930                         vput(nd.ni_dvp);
 1931                 vrele(nd.ni_vp);
 1932                 error = EEXIST;
 1933                 goto out;
 1934         }
 1935         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1936                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1937                 if (nd.ni_dvp == nd.ni_vp)
 1938                         vrele(nd.ni_dvp);
 1939                 else
 1940                         vput(nd.ni_dvp);
 1941                 if ((error = vn_start_write(NULL, &mp,
 1942                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1943                         return (error);
 1944                 goto restart;
 1945         }
 1946         VATTR_NULL(&vattr);
 1947         vattr.va_type = VLNK;
 1948         vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
 1949         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 1950         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
 1951         if (error == 0)
 1952                 vput(nd.ni_vp);
 1953         vn_finished_write(mp, 0);
 1954 out:
 1955         PNBUF_PUT(path);
 1956         return (error);
 1957 }
 1958 
 1959 /*
 1960  * Delete a whiteout from the filesystem.
 1961  */
 1962 /* ARGSUSED */
 1963 int
 1964 sys_undelete(struct lwp *l, void *v, register_t *retval)
 1965 {
 1966         struct sys_undelete_args /* {
 1967                 syscallarg(const char *) path;
 1968         } */ *uap = v;
 1969         int error;
 1970         struct mount *mp;
 1971         struct nameidata nd;
 1972 
 1973 restart:
 1974         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 1975             SCARG(uap, path), l);
 1976         error = namei(&nd);
 1977         if (error)
 1978                 return (error);
 1979 
 1980         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1981                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1982                 if (nd.ni_dvp == nd.ni_vp)
 1983                         vrele(nd.ni_dvp);
 1984                 else
 1985                         vput(nd.ni_dvp);
 1986                 if (nd.ni_vp)
 1987                         vrele(nd.ni_vp);
 1988                 return (EEXIST);
 1989         }
 1990         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1991                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1992                 if (nd.ni_dvp == nd.ni_vp)
 1993                         vrele(nd.ni_dvp);
 1994                 else
 1995                         vput(nd.ni_dvp);
 1996                 if ((error = vn_start_write(NULL, &mp,
 1997                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1998                         return (error);
 1999                 goto restart;
 2000         }
 2001         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 2002         if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
 2003                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2004         vput(nd.ni_dvp);
 2005         vn_finished_write(mp, 0);
 2006         return (error);
 2007 }
 2008 
 2009 /*
 2010  * Delete a name from the filesystem.
 2011  */
 2012 /* ARGSUSED */
 2013 int
 2014 sys_unlink(struct lwp *l, void *v, register_t *retval)
 2015 {
 2016         struct sys_unlink_args /* {
 2017                 syscallarg(const char *) path;
 2018         } */ *uap = v;
 2019         struct mount *mp;
 2020         struct vnode *vp;
 2021         int error;
 2022         struct nameidata nd;
 2023 #if NVERIEXEC > 0
 2024         pathname_t pathbuf = NULL;
 2025 #endif /* NVERIEXEC > 0 */
 2026 
 2027 restart:
 2028         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 2029             SCARG(uap, path), l);
 2030         if ((error = namei(&nd)) != 0)
 2031                 return (error);
 2032         vp = nd.ni_vp;
 2033 
 2034         /*
 2035          * The root of a mounted filesystem cannot be deleted.
 2036          */
 2037         if (vp->v_flag & VROOT) {
 2038                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2039                 if (nd.ni_dvp == vp)
 2040                         vrele(nd.ni_dvp);
 2041                 else
 2042                         vput(nd.ni_dvp);
 2043                 vput(vp);
 2044                 error = EBUSY;
 2045                 goto out;
 2046         }
 2047 
 2048 #if NVERIEXEC > 0
 2049         error = pathname_get(nd.ni_dirp, nd.ni_segflg, &pathbuf);
 2050 
 2051         /* Handle remove requests for veriexec entries. */
 2052         if (!error) {
 2053                 error = veriexec_removechk(vp, pathname_path(pathbuf), l);
 2054                 pathname_put(pathbuf);
 2055         }
 2056 
 2057         if (error) {
 2058                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2059                 if (nd.ni_dvp == vp)
 2060                         vrele(nd.ni_dvp);
 2061                 else
 2062                         vput(nd.ni_dvp);
 2063                 vput(vp);
 2064                 goto out;
 2065         }
 2066 #endif /* NVERIEXEC > 0 */
 2067         
 2068         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 2069                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2070                 if (nd.ni_dvp == vp)
 2071                         vrele(nd.ni_dvp);
 2072                 else
 2073                         vput(nd.ni_dvp);
 2074                 vput(vp);
 2075                 if ((error = vn_start_write(NULL, &mp,
 2076                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 2077                         return (error);
 2078                 goto restart;
 2079         }
 2080         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 2081         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 2082 #ifdef FILEASSOC
 2083         (void)fileassoc_file_delete(vp);
 2084 #endif /* FILEASSOC */
 2085         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 2086         vn_finished_write(mp, 0);
 2087 out:
 2088         return (error);
 2089 }
 2090 
 2091 /*
 2092  * Reposition read/write file offset.
 2093  */
 2094 int
 2095 sys_lseek(struct lwp *l, void *v, register_t *retval)
 2096 {
 2097         struct sys_lseek_args /* {
 2098                 syscallarg(int) fd;
 2099                 syscallarg(int) pad;
 2100                 syscallarg(off_t) offset;
 2101                 syscallarg(int) whence;
 2102         } */ *uap = v;
 2103         struct proc *p = l->l_proc;
 2104         kauth_cred_t cred = l->l_cred;
 2105         struct filedesc *fdp = p->p_fd;
 2106         struct file *fp;
 2107         struct vnode *vp;
 2108         struct vattr vattr;
 2109         off_t newoff;
 2110         int error;
 2111 
 2112         if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
 2113                 return (EBADF);
 2114 
 2115         FILE_USE(fp);
 2116 
 2117         vp = (struct vnode *)fp->f_data;
 2118         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2119                 error = ESPIPE;
 2120                 goto out;
 2121         }
 2122 
 2123         switch (SCARG(uap, whence)) {
 2124         case SEEK_CUR:
 2125                 newoff = fp->f_offset + SCARG(uap, offset);
 2126                 break;
 2127         case SEEK_END:
 2128                 error = VOP_GETATTR(vp, &vattr, cred, l);
 2129                 if (error)
 2130                         goto out;
 2131                 newoff = SCARG(uap, offset) + vattr.va_size;
 2132                 break;
 2133         case SEEK_SET:
 2134                 newoff = SCARG(uap, offset);
 2135                 break;
 2136         default:
 2137                 error = EINVAL;
 2138                 goto out;
 2139         }
 2140         if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
 2141                 goto out;
 2142 
 2143         *(off_t *)retval = fp->f_offset = newoff;
 2144  out:
 2145         FILE_UNUSE(fp, l);
 2146         return (error);
 2147 }
 2148 
 2149 /*
 2150  * Positional read system call.
 2151  */
 2152 int
 2153 sys_pread(struct lwp *l, void *v, register_t *retval)
 2154 {
 2155         struct sys_pread_args /* {
 2156                 syscallarg(int) fd;
 2157                 syscallarg(void *) buf;
 2158                 syscallarg(size_t) nbyte;
 2159                 syscallarg(off_t) offset;
 2160         } */ *uap = v;
 2161         struct proc *p = l->l_proc;
 2162         struct filedesc *fdp = p->p_fd;
 2163         struct file *fp;
 2164         struct vnode *vp;
 2165         off_t offset;
 2166         int error, fd = SCARG(uap, fd);
 2167 
 2168         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2169                 return (EBADF);
 2170 
 2171         if ((fp->f_flag & FREAD) == 0) {
 2172                 simple_unlock(&fp->f_slock);
 2173                 return (EBADF);
 2174         }
 2175 
 2176         FILE_USE(fp);
 2177 
 2178         vp = (struct vnode *)fp->f_data;
 2179         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2180                 error = ESPIPE;
 2181                 goto out;
 2182         }
 2183 
 2184         offset = SCARG(uap, offset);
 2185 
 2186         /*
 2187          * XXX This works because no file systems actually
 2188          * XXX take any action on the seek operation.
 2189          */
 2190         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2191                 goto out;
 2192 
 2193         /* dofileread() will unuse the descriptor for us */
 2194         return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 2195             &offset, 0, retval));
 2196 
 2197  out:
 2198         FILE_UNUSE(fp, l);
 2199         return (error);
 2200 }
 2201 
 2202 /*
 2203  * Positional scatter read system call.
 2204  */
 2205 int
 2206 sys_preadv(struct lwp *l, void *v, register_t *retval)
 2207 {
 2208         struct sys_preadv_args /* {
 2209                 syscallarg(int) fd;
 2210                 syscallarg(const struct iovec *) iovp;
 2211                 syscallarg(int) iovcnt;
 2212                 syscallarg(off_t) offset;
 2213         } */ *uap = v;
 2214         struct proc *p = l->l_proc;
 2215         struct filedesc *fdp = p->p_fd;
 2216         struct file *fp;
 2217         struct vnode *vp;
 2218         off_t offset;
 2219         int error, fd = SCARG(uap, fd);
 2220 
 2221         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2222                 return (EBADF);
 2223 
 2224         if ((fp->f_flag & FREAD) == 0) {
 2225                 simple_unlock(&fp->f_slock);
 2226                 return (EBADF);
 2227         }
 2228 
 2229         FILE_USE(fp);
 2230 
 2231         vp = (struct vnode *)fp->f_data;
 2232         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2233                 error = ESPIPE;
 2234                 goto out;
 2235         }
 2236 
 2237         offset = SCARG(uap, offset);
 2238 
 2239         /*
 2240          * XXX This works because no file systems actually
 2241          * XXX take any action on the seek operation.
 2242          */
 2243         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2244                 goto out;
 2245 
 2246         /* dofilereadv() will unuse the descriptor for us */
 2247         return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
 2248             &offset, 0, retval));
 2249 
 2250  out:
 2251         FILE_UNUSE(fp, l);
 2252         return (error);
 2253 }
 2254 
 2255 /*
 2256  * Positional write system call.
 2257  */
 2258 int
 2259 sys_pwrite(struct lwp *l, void *v, register_t *retval)
 2260 {
 2261         struct sys_pwrite_args /* {
 2262                 syscallarg(int) fd;
 2263                 syscallarg(const void *) buf;
 2264                 syscallarg(size_t) nbyte;
 2265                 syscallarg(off_t) offset;
 2266         } */ *uap = v;
 2267         struct proc *p = l->l_proc;
 2268         struct filedesc *fdp = p->p_fd;
 2269         struct file *fp;
 2270         struct vnode *vp;
 2271         off_t offset;
 2272         int error, fd = SCARG(uap, fd);
 2273 
 2274         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2275                 return (EBADF);
 2276 
 2277         if ((fp->f_flag & FWRITE) == 0) {
 2278                 simple_unlock(&fp->f_slock);
 2279                 return (EBADF);
 2280         }
 2281 
 2282         FILE_USE(fp);
 2283 
 2284         vp = (struct vnode *)fp->f_data;
 2285         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2286                 error = ESPIPE;
 2287                 goto out;
 2288         }
 2289 
 2290         offset = SCARG(uap, offset);
 2291 
 2292         /*
 2293          * XXX This works because no file systems actually
 2294          * XXX take any action on the seek operation.
 2295          */
 2296         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2297                 goto out;
 2298 
 2299         /* dofilewrite() will unuse the descriptor for us */
 2300         return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 2301             &offset, 0, retval));
 2302 
 2303  out:
 2304         FILE_UNUSE(fp, l);
 2305         return (error);
 2306 }
 2307 
 2308 /*
 2309  * Positional gather write system call.
 2310  */
 2311 int
 2312 sys_pwritev(struct lwp *l, void *v, register_t *retval)
 2313 {
 2314         struct sys_pwritev_args /* {
 2315                 syscallarg(int) fd;
 2316                 syscallarg(const struct iovec *) iovp;
 2317                 syscallarg(int) iovcnt;
 2318                 syscallarg(off_t) offset;
 2319         } */ *uap = v;
 2320         struct proc *p = l->l_proc;
 2321         struct filedesc *fdp = p->p_fd;
 2322         struct file *fp;
 2323         struct vnode *vp;
 2324         off_t offset;
 2325         int error, fd = SCARG(uap, fd);
 2326 
 2327         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2328                 return (EBADF);
 2329 
 2330         if ((fp->f_flag & FWRITE) == 0) {
 2331                 simple_unlock(&fp->f_slock);
 2332                 return (EBADF);
 2333         }
 2334 
 2335         FILE_USE(fp);
 2336 
 2337         vp = (struct vnode *)fp->f_data;
 2338         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2339                 error = ESPIPE;
 2340                 goto out;
 2341         }
 2342 
 2343         offset = SCARG(uap, offset);
 2344 
 2345         /*
 2346          * XXX This works because no file systems actually
 2347          * XXX take any action on the seek operation.
 2348          */
 2349         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2350                 goto out;
 2351 
 2352         /* dofilewritev() will unuse the descriptor for us */
 2353         return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
 2354             &offset, 0, retval));
 2355 
 2356  out:
 2357         FILE_UNUSE(fp, l);
 2358         return (error);
 2359 }
 2360 
 2361 /*
 2362  * Check access permissions.
 2363  */
 2364 int
 2365 sys_access(struct lwp *l, void *v, register_t *retval)
 2366 {
 2367         struct sys_access_args /* {
 2368                 syscallarg(const char *) path;
 2369                 syscallarg(int) flags;
 2370         } */ *uap = v;
 2371         kauth_cred_t cred;
 2372         struct vnode *vp;
 2373         int error, flags;
 2374         struct nameidata nd;
 2375 
 2376         if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) {
 2377                 /* nonsense flags */
 2378                 return EINVAL;
 2379         }
 2380 
 2381         cred = kauth_cred_dup(l->l_cred);
 2382         kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
 2383         kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
 2384         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2385             SCARG(uap, path), l);
 2386         /* Override default credentials */
 2387         nd.ni_cnd.cn_cred = cred;
 2388         if ((error = namei(&nd)) != 0)
 2389                 goto out;
 2390         vp = nd.ni_vp;
 2391 
 2392         /* Flags == 0 means only check for existence. */
 2393         if (SCARG(uap, flags)) {
 2394                 flags = 0;
 2395                 if (SCARG(uap, flags) & R_OK)
 2396                         flags |= VREAD;
 2397                 if (SCARG(uap, flags) & W_OK)
 2398                         flags |= VWRITE;
 2399                 if (SCARG(uap, flags) & X_OK)
 2400                         flags |= VEXEC;
 2401 
 2402                 error = VOP_ACCESS(vp, flags, cred, l);
 2403                 if (!error && (flags & VWRITE))
 2404                         error = vn_writechk(vp);
 2405         }
 2406         vput(vp);
 2407 out:
 2408         kauth_cred_free(cred);
 2409         return (error);
 2410 }
 2411 
 2412 /*
 2413  * Get file status; this version follows links.
 2414  */
 2415 /* ARGSUSED */
 2416 int
 2417 sys___stat30(struct lwp *l, void *v, register_t *retval)
 2418 {
 2419         struct sys___stat30_args /* {
 2420                 syscallarg(const char *) path;
 2421                 syscallarg(struct stat *) ub;
 2422         } */ *uap = v;
 2423         struct stat sb;
 2424         int error;
 2425         struct nameidata nd;
 2426 
 2427         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2428             SCARG(uap, path), l);
 2429         if ((error = namei(&nd)) != 0)
 2430                 return (error);
 2431         error = vn_stat(nd.ni_vp, &sb, l);
 2432         vput(nd.ni_vp);
 2433         if (error)
 2434                 return (error);
 2435         error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2436         return (error);
 2437 }
 2438 
 2439 /*
 2440  * Get file status; this version does not follow links.
 2441  */
 2442 /* ARGSUSED */
 2443 int
 2444 sys___lstat30(struct lwp *l, void *v, register_t *retval)
 2445 {
 2446         struct sys___lstat30_args /* {
 2447                 syscallarg(const char *) path;
 2448                 syscallarg(struct stat *) ub;
 2449         } */ *uap = v;
 2450         struct stat sb;
 2451         int error;
 2452         struct nameidata nd;
 2453 
 2454         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
 2455             SCARG(uap, path), l);
 2456         if ((error = namei(&nd)) != 0)
 2457                 return (error);
 2458         error = vn_stat(nd.ni_vp, &sb, l);
 2459         vput(nd.ni_vp);
 2460         if (error)
 2461                 return (error);
 2462         error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2463         return (error);
 2464 }
 2465 
 2466 /*
 2467  * Get configurable pathname variables.
 2468  */
 2469 /* ARGSUSED */
 2470 int
 2471 sys_pathconf(struct lwp *l, void *v, register_t *retval)
 2472 {
 2473         struct sys_pathconf_args /* {
 2474                 syscallarg(const char *) path;
 2475                 syscallarg(int) name;
 2476         } */ *uap = v;
 2477         int error;
 2478         struct nameidata nd;
 2479 
 2480         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2481             SCARG(uap, path), l);
 2482         if ((error = namei(&nd)) != 0)
 2483                 return (error);
 2484         error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
 2485         vput(nd.ni_vp);
 2486         return (error);
 2487 }
 2488 
 2489 /*
 2490  * Return target name of a symbolic link.
 2491  */
 2492 /* ARGSUSED */
 2493 int
 2494 sys_readlink(struct lwp *l, void *v, register_t *retval)
 2495 {
 2496         struct sys_readlink_args /* {
 2497                 syscallarg(const char *) path;
 2498                 syscallarg(char *) buf;
 2499                 syscallarg(size_t) count;
 2500         } */ *uap = v;
 2501         struct vnode *vp;
 2502         struct iovec aiov;
 2503         struct uio auio;
 2504         int error;
 2505         struct nameidata nd;
 2506 
 2507         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
 2508             SCARG(uap, path), l);
 2509         if ((error = namei(&nd)) != 0)
 2510                 return (error);
 2511         vp = nd.ni_vp;
 2512         if (vp->v_type != VLNK)
 2513                 error = EINVAL;
 2514         else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
 2515             (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
 2516                 aiov.iov_base = SCARG(uap, buf);
 2517                 aiov.iov_len = SCARG(uap, count);
 2518                 auio.uio_iov = &aiov;
 2519                 auio.uio_iovcnt = 1;
 2520                 auio.uio_offset = 0;
 2521                 auio.uio_rw = UIO_READ;
 2522                 KASSERT(l == curlwp);
 2523                 auio.uio_vmspace = l->l_proc->p_vmspace;
 2524                 auio.uio_resid = SCARG(uap, count);
 2525                 error = VOP_READLINK(vp, &auio, l->l_cred);
 2526         }
 2527         vput(vp);
 2528         *retval = SCARG(uap, count) - auio.uio_resid;
 2529         return (error);
 2530 }
 2531 
 2532 /*
 2533  * Change flags of a file given a path name.
 2534  */
 2535 /* ARGSUSED */
 2536 int
 2537 sys_chflags(struct lwp *l, void *v, register_t *retval)
 2538 {
 2539         struct sys_chflags_args /* {
 2540                 syscallarg(const char *) path;
 2541                 syscallarg(u_long) flags;
 2542         } */ *uap = v;
 2543         struct vnode *vp;
 2544         int error;
 2545         struct nameidata nd;
 2546 
 2547         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2548         if ((error = namei(&nd)) != 0)
 2549                 return (error);
 2550         vp = nd.ni_vp;
 2551         error = change_flags(vp, SCARG(uap, flags), l);
 2552         vput(vp);
 2553         return (error);
 2554 }
 2555 
 2556 /*
 2557  * Change flags of a file given a file descriptor.
 2558  */
 2559 /* ARGSUSED */
 2560 int
 2561 sys_fchflags(struct lwp *l, void *v, register_t *retval)
 2562 {
 2563         struct sys_fchflags_args /* {
 2564                 syscallarg(int) fd;
 2565                 syscallarg(u_long) flags;
 2566         } */ *uap = v;
 2567         struct proc *p = l->l_proc;
 2568         struct vnode *vp;
 2569         struct file *fp;
 2570         int error;
 2571 
 2572         /* getvnode() will use the descriptor for us */
 2573         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2574                 return (error);
 2575         vp = (struct vnode *)fp->f_data;
 2576         error = change_flags(vp, SCARG(uap, flags), l);
 2577         VOP_UNLOCK(vp, 0);
 2578         FILE_UNUSE(fp, l);
 2579         return (error);
 2580 }
 2581 
 2582 /*
 2583  * Change flags of a file given a path name; this version does
 2584  * not follow links.
 2585  */
 2586 int
 2587 sys_lchflags(struct lwp *l, void *v, register_t *retval)
 2588 {
 2589         struct sys_lchflags_args /* {
 2590                 syscallarg(const char *) path;
 2591                 syscallarg(u_long) flags;
 2592         } */ *uap = v;
 2593         struct vnode *vp;
 2594         int error;
 2595         struct nameidata nd;
 2596 
 2597         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2598         if ((error = namei(&nd)) != 0)
 2599                 return (error);
 2600         vp = nd.ni_vp;
 2601         error = change_flags(vp, SCARG(uap, flags), l);
 2602         vput(vp);
 2603         return (error);
 2604 }
 2605 
 2606 /*
 2607  * Common routine to change flags of a file.
 2608  */
 2609 int
 2610 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
 2611 {
 2612         struct mount *mp;
 2613         struct vattr vattr;
 2614         int error;
 2615 
 2616         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2617                 return (error);
 2618         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 2619         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2620         /*
 2621          * Non-superusers cannot change the flags on devices, even if they
 2622          * own them.
 2623          */
 2624         if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 2625             &l->l_acflag) != 0) {
 2626                 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
 2627                         goto out;
 2628                 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
 2629                         error = EINVAL;
 2630                         goto out;
 2631                 }
 2632         }
 2633         VATTR_NULL(&vattr);
 2634         vattr.va_flags = flags;
 2635         error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
 2636 out:
 2637         vn_finished_write(mp, 0);
 2638         return (error);
 2639 }
 2640 
 2641 /*
 2642  * Change mode of a file given path name; this version follows links.
 2643  */
 2644 /* ARGSUSED */
 2645 int
 2646 sys_chmod(struct lwp *l, void *v, register_t *retval)
 2647 {
 2648         struct sys_chmod_args /* {
 2649                 syscallarg(const char *) path;
 2650                 syscallarg(int) mode;
 2651         } */ *uap = v;
 2652         int error;
 2653         struct nameidata nd;
 2654 
 2655         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2656         if ((error = namei(&nd)) != 0)
 2657                 return (error);
 2658 
 2659         error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
 2660 
 2661         vrele(nd.ni_vp);
 2662         return (error);
 2663 }
 2664 
 2665 /*
 2666  * Change mode of a file given a file descriptor.
 2667  */
 2668 /* ARGSUSED */
 2669 int
 2670 sys_fchmod(struct lwp *l, void *v, register_t *retval)
 2671 {
 2672         struct sys_fchmod_args /* {
 2673                 syscallarg(int) fd;
 2674                 syscallarg(int) mode;
 2675         } */ *uap = v;
 2676         struct proc *p = l->l_proc;
 2677         struct file *fp;
 2678         int error;
 2679 
 2680         /* getvnode() will use the descriptor for us */
 2681         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2682                 return (error);
 2683 
 2684         error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
 2685         FILE_UNUSE(fp, l);
 2686         return (error);
 2687 }
 2688 
 2689 /*
 2690  * Change mode of a file given path name; this version does not follow links.
 2691  */
 2692 /* ARGSUSED */
 2693 int
 2694 sys_lchmod(struct lwp *l, void *v, register_t *retval)
 2695 {
 2696         struct sys_lchmod_args /* {
 2697                 syscallarg(const char *) path;
 2698                 syscallarg(int) mode;
 2699         } */ *uap = v;
 2700         int error;
 2701         struct nameidata nd;
 2702 
 2703         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2704         if ((error = namei(&nd)) != 0)
 2705                 return (error);
 2706 
 2707         error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
 2708 
 2709         vrele(nd.ni_vp);
 2710         return (error);
 2711 }
 2712 
 2713 /*
 2714  * Common routine to set mode given a vnode.
 2715  */
 2716 static int
 2717 change_mode(struct vnode *vp, int mode, struct lwp *l)
 2718 {
 2719         struct mount *mp;
 2720         struct vattr vattr;
 2721         int error;
 2722 
 2723         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2724                 return (error);
 2725         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 2726         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2727         VATTR_NULL(&vattr);
 2728         vattr.va_mode = mode & ALLPERMS;
 2729         error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
 2730         VOP_UNLOCK(vp, 0);
 2731         vn_finished_write(mp, 0);
 2732         return (error);
 2733 }
 2734 
 2735 /*
 2736  * Set ownership given a path name; this version follows links.
 2737  */
 2738 /* ARGSUSED */
 2739 int
 2740 sys_chown(struct lwp *l, void *v, register_t *retval)
 2741 {
 2742         struct sys_chown_args /* {
 2743                 syscallarg(const char *) path;
 2744                 syscallarg(uid_t) uid;
 2745                 syscallarg(gid_t) gid;
 2746         } */ *uap = v;
 2747         int error;
 2748         struct nameidata nd;
 2749 
 2750         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2751         if ((error = namei(&nd)) != 0)
 2752                 return (error);
 2753 
 2754         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
 2755 
 2756         vrele(nd.ni_vp);
 2757         return (error);
 2758 }
 2759 
 2760 /*
 2761  * Set ownership given a path name; this version follows links.
 2762  * Provides POSIX semantics.
 2763  */
 2764 /* ARGSUSED */
 2765 int
 2766 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
 2767 {
 2768         struct sys_chown_args /* {
 2769                 syscallarg(const char *) path;
 2770                 syscallarg(uid_t) uid;
 2771                 syscallarg(gid_t) gid;
 2772         } */ *uap = v;
 2773         int error;
 2774         struct nameidata nd;
 2775 
 2776         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2777         if ((error = namei(&nd)) != 0)
 2778                 return (error);
 2779 
 2780         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
 2781 
 2782         vrele(nd.ni_vp);
 2783         return (error);
 2784 }
 2785 
 2786 /*
 2787  * Set ownership given a file descriptor.
 2788  */
 2789 /* ARGSUSED */
 2790 int
 2791 sys_fchown(struct lwp *l, void *v, register_t *retval)
 2792 {
 2793         struct sys_fchown_args /* {
 2794                 syscallarg(int) fd;
 2795                 syscallarg(uid_t) uid;
 2796                 syscallarg(gid_t) gid;
 2797         } */ *uap = v;
 2798         struct proc *p = l->l_proc;
 2799         int error;
 2800         struct file *fp;
 2801 
 2802         /* getvnode() will use the descriptor for us */
 2803         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2804                 return (error);
 2805 
 2806         error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
 2807             SCARG(uap, gid), l, 0);
 2808         FILE_UNUSE(fp, l);
 2809         return (error);
 2810 }
 2811 
 2812 /*
 2813  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
 2814  */
 2815 /* ARGSUSED */
 2816 int
 2817 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
 2818 {
 2819         struct sys_fchown_args /* {
 2820                 syscallarg(int) fd;
 2821                 syscallarg(uid_t) uid;
 2822                 syscallarg(gid_t) gid;
 2823         } */ *uap = v;
 2824         struct proc *p = l->l_proc;
 2825         int error;
 2826         struct file *fp;
 2827 
 2828         /* getvnode() will use the descriptor for us */
 2829         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2830                 return (error);
 2831 
 2832         error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
 2833             SCARG(uap, gid), l, 1);
 2834         FILE_UNUSE(fp, l);
 2835         return (error);
 2836 }
 2837 
 2838 /*
 2839  * Set ownership given a path name; this version does not follow links.
 2840  */
 2841 /* ARGSUSED */
 2842 int
 2843 sys_lchown(struct lwp *l, void *v, register_t *retval)
 2844 {
 2845         struct sys_lchown_args /* {
 2846                 syscallarg(const char *) path;
 2847                 syscallarg(uid_t) uid;
 2848                 syscallarg(gid_t) gid;
 2849         } */ *uap = v;
 2850         int error;
 2851         struct nameidata nd;
 2852 
 2853         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2854         if ((error = namei(&nd)) != 0)
 2855                 return (error);
 2856 
 2857         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
 2858 
 2859         vrele(nd.ni_vp);
 2860         return (error);
 2861 }
 2862 
 2863 /*
 2864  * Set ownership given a path name; this version does not follow links.
 2865  * Provides POSIX/XPG semantics.
 2866  */
 2867 /* ARGSUSED */
 2868 int
 2869 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
 2870 {
 2871         struct sys_lchown_args /* {
 2872                 syscallarg(const char *) path;
 2873                 syscallarg(uid_t) uid;
 2874                 syscallarg(gid_t) gid;
 2875         } */ *uap = v;
 2876         int error;
 2877         struct nameidata nd;
 2878 
 2879         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2880         if ((error = namei(&nd)) != 0)
 2881                 return (error);
 2882 
 2883         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
 2884 
 2885         vrele(nd.ni_vp);
 2886         return (error);
 2887 }
 2888 
 2889 /*
 2890  * Common routine to set ownership given a vnode.
 2891  */
 2892 static int
 2893 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
 2894     int posix_semantics)
 2895 {
 2896         struct mount *mp;
 2897         struct vattr vattr;
 2898         mode_t newmode;
 2899         int error;
 2900 
 2901         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2902                 return (error);
 2903         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 2904         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2905         if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
 2906                 goto out;
 2907 
 2908 #define CHANGED(x) ((int)(x) != -1)
 2909         newmode = vattr.va_mode;
 2910         if (posix_semantics) {
 2911                 /*
 2912                  * POSIX/XPG semantics: if the caller is not the super-user,
 2913                  * clear set-user-id and set-group-id bits.  Both POSIX and
 2914                  * the XPG consider the behaviour for calls by the super-user
 2915                  * implementation-defined; we leave the set-user-id and set-
 2916                  * group-id settings intact in that case.
 2917                  */
 2918                 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 2919                                       NULL) != 0)
 2920                         newmode &= ~(S_ISUID | S_ISGID);
 2921         } else {
 2922                 /*
 2923                  * NetBSD semantics: when changing owner and/or group,
 2924                  * clear the respective bit(s).
 2925                  */
 2926                 if (CHANGED(uid))
 2927                         newmode &= ~S_ISUID;
 2928                 if (CHANGED(gid))
 2929                         newmode &= ~S_ISGID;
 2930         }
 2931         /* Update va_mode iff altered. */
 2932         if (vattr.va_mode == newmode)
 2933                 newmode = VNOVAL;
 2934 
 2935         VATTR_NULL(&vattr);
 2936         vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
 2937         vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
 2938         vattr.va_mode = newmode;
 2939         error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
 2940 #undef CHANGED
 2941 
 2942 out:
 2943         VOP_UNLOCK(vp, 0);
 2944         vn_finished_write(mp, 0);
 2945         return (error);
 2946 }
 2947 
 2948 /*
 2949  * Set the access and modification times given a path name; this
 2950  * version follows links.
 2951  */
 2952 /* ARGSUSED */
 2953 int
 2954 sys_utimes(struct lwp *l, void *v, register_t *retval)
 2955 {
 2956         struct sys_utimes_args /* {
 2957                 syscallarg(const char *) path;
 2958                 syscallarg(const struct timeval *) tptr;
 2959         } */ *uap = v;
 2960         int error;
 2961         struct nameidata nd;
 2962 
 2963         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 2964         if ((error = namei(&nd)) != 0)
 2965                 return (error);
 2966 
 2967         error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
 2968 
 2969         vrele(nd.ni_vp);
 2970         return (error);
 2971 }
 2972 
 2973 /*
 2974  * Set the access and modification times given a file descriptor.
 2975  */
 2976 /* ARGSUSED */
 2977 int
 2978 sys_futimes(struct lwp *l, void *v, register_t *retval)
 2979 {
 2980         struct sys_futimes_args /* {
 2981                 syscallarg(int) fd;
 2982                 syscallarg(const struct timeval *) tptr;
 2983         } */ *uap = v;
 2984         struct proc *p = l->l_proc;
 2985         int error;
 2986         struct file *fp;
 2987 
 2988         /* getvnode() will use the descriptor for us */
 2989         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2990                 return (error);
 2991 
 2992         error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
 2993         FILE_UNUSE(fp, l);
 2994         return (error);
 2995 }
 2996 
 2997 /*
 2998  * Set the access and modification times given a path name; this
 2999  * version does not follow links.
 3000  */
 3001 /* ARGSUSED */
 3002 int
 3003 sys_lutimes(struct lwp *l, void *v, register_t *retval)
 3004 {
 3005         struct sys_lutimes_args /* {
 3006                 syscallarg(const char *) path;
 3007                 syscallarg(const struct timeval *) tptr;
 3008         } */ *uap = v;
 3009         int error;
 3010         struct nameidata nd;
 3011 
 3012         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 3013         if ((error = namei(&nd)) != 0)
 3014                 return (error);
 3015 
 3016         error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
 3017 
 3018         vrele(nd.ni_vp);
 3019         return (error);
 3020 }
 3021 
 3022 /*
 3023  * Common routine to set access and modification times given a vnode.
 3024  */
 3025 static int
 3026 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
 3027 {
 3028         struct mount *mp;
 3029         struct vattr vattr;
 3030         int error;
 3031 
 3032         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 3033                 return (error);
 3034         VATTR_NULL(&vattr);
 3035         if (tptr == NULL) {
 3036                 nanotime(&vattr.va_atime);
 3037                 vattr.va_mtime = vattr.va_atime;
 3038                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3039         } else {
 3040                 struct timeval tv[2];
 3041 
 3042                 error = copyin(tptr, tv, sizeof(tv));
 3043                 if (error)
 3044                         goto out;
 3045                 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
 3046                 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
 3047         }
 3048         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 3049         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3050         error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
 3051         VOP_UNLOCK(vp, 0);
 3052 out:
 3053         vn_finished_write(mp, 0);
 3054         return (error);
 3055 }
 3056 
 3057 /*
 3058  * Truncate a file given its path name.
 3059  */
 3060 /* ARGSUSED */
 3061 int
 3062 sys_truncate(struct lwp *l, void *v, register_t *retval)
 3063 {
 3064         struct sys_truncate_args /* {
 3065                 syscallarg(const char *) path;
 3066                 syscallarg(int) pad;
 3067                 syscallarg(off_t) length;
 3068         } */ *uap = v;
 3069         struct vnode *vp;
 3070         struct mount *mp;
 3071         struct vattr vattr;
 3072         int error;
 3073         struct nameidata nd;
 3074 
 3075         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 3076         if ((error = namei(&nd)) != 0)
 3077                 return (error);
 3078         vp = nd.ni_vp;
 3079         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3080                 vrele(vp);
 3081                 return (error);
 3082         }
 3083         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 3084         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3085         if (vp->v_type == VDIR)
 3086                 error = EISDIR;
 3087         else if ((error = vn_writechk(vp)) == 0 &&
 3088             (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
 3089                 VATTR_NULL(&vattr);
 3090                 vattr.va_size = SCARG(uap, length);
 3091                 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
 3092         }
 3093         vput(vp);
 3094         vn_finished_write(mp, 0);
 3095         return (error);
 3096 }
 3097 
 3098 /*
 3099  * Truncate a file given a file descriptor.
 3100  */
 3101 /* ARGSUSED */
 3102 int
 3103 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
 3104 {
 3105         struct sys_ftruncate_args /* {
 3106                 syscallarg(int) fd;
 3107                 syscallarg(int) pad;
 3108                 syscallarg(off_t) length;
 3109         } */ *uap = v;
 3110         struct proc *p = l->l_proc;
 3111         struct mount *mp;
 3112         struct vattr vattr;
 3113         struct vnode *vp;
 3114         struct file *fp;
 3115         int error;
 3116 
 3117         /* getvnode() will use the descriptor for us */
 3118         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3119                 return (error);
 3120         if ((fp->f_flag & FWRITE) == 0) {
 3121                 error = EINVAL;
 3122                 goto out;
 3123         }
 3124         vp = (struct vnode *)fp->f_data;
 3125         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3126                 FILE_UNUSE(fp, l);
 3127                 return (error);
 3128         }
 3129         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 3130         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3131         if (vp->v_type == VDIR)
 3132                 error = EISDIR;
 3133         else if ((error = vn_writechk(vp)) == 0) {
 3134                 VATTR_NULL(&vattr);
 3135                 vattr.va_size = SCARG(uap, length);
 3136                 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
 3137         }
 3138         VOP_UNLOCK(vp, 0);
 3139         vn_finished_write(mp, 0);
 3140  out:
 3141         FILE_UNUSE(fp, l);
 3142         return (error);
 3143 }
 3144 
 3145 /*
 3146  * Sync an open file.
 3147  */
 3148 /* ARGSUSED */
 3149 int
 3150 sys_fsync(struct lwp *l, void *v, register_t *retval)
 3151 {
 3152         struct sys_fsync_args /* {
 3153                 syscallarg(int) fd;
 3154         } */ *uap = v;
 3155         struct proc *p = l->l_proc;
 3156         struct vnode *vp;
 3157         struct mount *mp;
 3158         struct file *fp;
 3159         int error;
 3160 
 3161         /* getvnode() will use the descriptor for us */
 3162         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3163                 return (error);
 3164         vp = (struct vnode *)fp->f_data;
 3165         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3166                 FILE_UNUSE(fp, l);
 3167                 return (error);
 3168         }
 3169         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3170         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
 3171         if (error == 0 && bioops.io_fsync != NULL &&
 3172             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3173                 (*bioops.io_fsync)(vp, 0);
 3174         VOP_UNLOCK(vp, 0);
 3175         vn_finished_write(mp, 0);
 3176         FILE_UNUSE(fp, l);
 3177         return (error);
 3178 }
 3179 
 3180 /*
 3181  * Sync a range of file data.  API modeled after that found in AIX.
 3182  *
 3183  * FDATASYNC indicates that we need only save enough metadata to be able
 3184  * to re-read the written data.  Note we duplicate AIX's requirement that
 3185  * the file be open for writing.
 3186  */
 3187 /* ARGSUSED */
 3188 int
 3189 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
 3190 {
 3191         struct sys_fsync_range_args /* {
 3192                 syscallarg(int) fd;
 3193                 syscallarg(int) flags;
 3194                 syscallarg(off_t) start;
 3195                 syscallarg(off_t) length;
 3196         } */ *uap = v;
 3197         struct proc *p = l->l_proc;
 3198         struct vnode *vp;
 3199         struct file *fp;
 3200         int flags, nflags;
 3201         off_t s, e, len;
 3202         int error;
 3203 
 3204         /* getvnode() will use the descriptor for us */
 3205         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3206                 return (error);
 3207 
 3208         if ((fp->f_flag & FWRITE) == 0) {
 3209                 error = EBADF;
 3210                 goto out;
 3211         }
 3212 
 3213         flags = SCARG(uap, flags);
 3214         if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
 3215             ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
 3216                 error = EINVAL;
 3217                 goto out;
 3218         }
 3219         /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
 3220         if (flags & FDATASYNC)
 3221                 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
 3222         else
 3223                 nflags = FSYNC_WAIT;
 3224         if (flags & FDISKSYNC)
 3225                 nflags |= FSYNC_CACHE;
 3226 
 3227         len = SCARG(uap, length);
 3228         /* If length == 0, we do the whole file, and s = l = 0 will do that */
 3229         if (len) {
 3230                 s = SCARG(uap, start);
 3231                 e = s + len;
 3232                 if (e < s) {
 3233                         error = EINVAL;
 3234                         goto out;
 3235                 }
 3236         } else {
 3237                 e = 0;
 3238                 s = 0;
 3239         }
 3240 
 3241         vp = (struct vnode *)fp->f_data;
 3242         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3243         error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
 3244 
 3245         if (error == 0 && bioops.io_fsync != NULL &&
 3246             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3247                 (*bioops.io_fsync)(vp, nflags);
 3248 
 3249         VOP_UNLOCK(vp, 0);
 3250 out:
 3251         FILE_UNUSE(fp, l);
 3252         return (error);
 3253 }
 3254 
 3255 /*
 3256  * Sync the data of an open file.
 3257  */
 3258 /* ARGSUSED */
 3259 int
 3260 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
 3261 {
 3262         struct sys_fdatasync_args /* {
 3263                 syscallarg(int) fd;
 3264         } */ *uap = v;
 3265         struct proc *p = l->l_proc;
 3266         struct vnode *vp;
 3267         struct file *fp;
 3268         int error;
 3269 
 3270         /* getvnode() will use the descriptor for us */
 3271         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3272                 return (error);
 3273         if ((fp->f_flag & FWRITE) == 0) {
 3274                 FILE_UNUSE(fp, l);
 3275                 return (EBADF);
 3276         }
 3277         vp = (struct vnode *)fp->f_data;
 3278         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3279         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
 3280         VOP_UNLOCK(vp, 0);
 3281         FILE_UNUSE(fp, l);
 3282         return (error);
 3283 }
 3284 
 3285 /*
 3286  * Rename files, (standard) BSD semantics frontend.
 3287  */
 3288 /* ARGSUSED */
 3289 int
 3290 sys_rename(struct lwp *l, void *v, register_t *retval)
 3291 {
 3292         struct sys_rename_args /* {
 3293                 syscallarg(const char *) from;
 3294                 syscallarg(const char *) to;
 3295         } */ *uap = v;
 3296 
 3297         return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
 3298 }
 3299 
 3300 /*
 3301  * Rename files, POSIX semantics frontend.
 3302  */
 3303 /* ARGSUSED */
 3304 int
 3305 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
 3306 {
 3307         struct sys___posix_rename_args /* {
 3308                 syscallarg(const char *) from;
 3309                 syscallarg(const char *) to;
 3310         } */ *uap = v;
 3311 
 3312         return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
 3313 }
 3314 
 3315 /*
 3316  * Rename files.  Source and destination must either both be directories,
 3317  * or both not be directories.  If target is a directory, it must be empty.
 3318  * If `from' and `to' refer to the same object, the value of the `retain'
 3319  * argument is used to determine whether `from' will be
 3320  *
 3321  * (retain == 0)        deleted unless `from' and `to' refer to the same
 3322  *                      object in the file system's name space (BSD).
 3323  * (retain == 1)        always retained (POSIX).
 3324  */
 3325 static int
 3326 rename_files(const char *from, const char *to, struct lwp *l, int retain)
 3327 {
 3328         struct mount *mp = NULL;
 3329         struct vnode *tvp, *fvp, *tdvp;
 3330         struct nameidata fromnd, tond;
 3331         struct proc *p;
 3332         int error;
 3333 
 3334         NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART, UIO_USERSPACE,
 3335             from, l);
 3336         if ((error = namei(&fromnd)) != 0)
 3337                 return (error);
 3338         if (fromnd.ni_dvp != fromnd.ni_vp)
 3339                 VOP_UNLOCK(fromnd.ni_dvp, 0);
 3340         fvp = fromnd.ni_vp;
 3341         error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
 3342         if (error != 0) {
 3343                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3344                 vrele(fromnd.ni_dvp);
 3345                 vrele(fvp);
 3346                 if (fromnd.ni_startdir)
 3347                         vrele(fromnd.ni_startdir);
 3348                 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3349                 return (error);
 3350         }
 3351         NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 3352             (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
 3353         if ((error = namei(&tond)) != 0) {
 3354                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3355                 vrele(fromnd.ni_dvp);
 3356                 vrele(fvp);
 3357                 goto out1;
 3358         }
 3359         tdvp = tond.ni_dvp;
 3360         tvp = tond.ni_vp;
 3361 
 3362         if (tvp != NULL) {
 3363                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3364                         error = ENOTDIR;
 3365                         goto out;
 3366                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3367                         error = EISDIR;
 3368                         goto out;
 3369                 }
 3370         }
 3371 
 3372         if (fvp == tdvp)
 3373                 error = EINVAL;
 3374 
 3375         /*
 3376          * Source and destination refer to the same object.
 3377          */
 3378         if (fvp == tvp) {
 3379                 if (retain)
 3380                         error = -1;
 3381                 else if (fromnd.ni_dvp == tdvp &&
 3382                     fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 3383                     !memcmp(fromnd.ni_cnd.cn_nameptr,
 3384                           tond.ni_cnd.cn_nameptr,
 3385                           fromnd.ni_cnd.cn_namelen))
 3386                 error = -1;
 3387         }
 3388 
 3389 #if NVERIEXEC > 0
 3390         if (!error) {
 3391                 pathname_t frompath = NULL, topath = NULL;
 3392 
 3393                 error = pathname_get(fromnd.ni_dirp, fromnd.ni_segflg,
 3394                     &frompath);
 3395                 if (!error)
 3396                         error = pathname_get(tond.ni_dirp, tond.ni_segflg,
 3397                             &topath);
 3398                 if (!error)
 3399                         error = veriexec_renamechk(fvp, pathname_path(frompath),
 3400                             tvp, pathname_path(topath), l);
 3401 
 3402                 pathname_put(frompath);
 3403                 pathname_put(topath);
 3404         }
 3405 #endif /* NVERIEXEC > 0 */
 3406 
 3407 out:
 3408         p = l->l_proc;
 3409         if (!error) {
 3410                 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
 3411                 if (fromnd.ni_dvp != tdvp)
 3412                         VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 3413                 if (tvp) {
 3414                         VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
 3415                 }
 3416                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3417                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3418         } else {
 3419                 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
 3420                 if (tdvp == tvp)
 3421                         vrele(tdvp);
 3422                 else
 3423                         vput(tdvp);
 3424                 if (tvp)
 3425                         vput(tvp);
 3426                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3427                 vrele(fromnd.ni_dvp);
 3428                 vrele(fvp);
 3429         }
 3430         vrele(tond.ni_startdir);
 3431         PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
 3432 out1:
 3433         vn_finished_write(mp, 0);
 3434         if (fromnd.ni_startdir)
 3435                 vrele(fromnd.ni_startdir);
 3436         PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3437         return (error == -1 ? 0 : error);
 3438 }
 3439 
 3440 /*
 3441  * Make a directory file.
 3442  */
 3443 /* ARGSUSED */
 3444 int
 3445 sys_mkdir(struct lwp *l, void *v, register_t *retval)
 3446 {
 3447         struct sys_mkdir_args /* {
 3448                 syscallarg(const char *) path;
 3449                 syscallarg(int) mode;
 3450         } */ *uap = v;
 3451         struct proc *p = l->l_proc;
 3452         struct mount *mp;
 3453         struct vnode *vp;
 3454         struct vattr vattr;
 3455         int error;
 3456         struct nameidata nd;
 3457 
 3458 restart:
 3459         NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
 3460             SCARG(uap, path), l);
 3461         if ((error = namei(&nd)) != 0)
 3462                 return (error);
 3463         vp = nd.ni_vp;
 3464         if (vp != NULL) {
 3465                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3466                 if (nd.ni_dvp == vp)
 3467                         vrele(nd.ni_dvp);
 3468                 else
 3469                         vput(nd.ni_dvp);
 3470                 vrele(vp);
 3471                 return (EEXIST);
 3472         }
 3473         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3474                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3475                 if (nd.ni_dvp == vp)
 3476                         vrele(nd.ni_dvp);
 3477                 else
 3478                         vput(nd.ni_dvp);
 3479                 if ((error = vn_start_write(NULL, &mp,
 3480                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 3481                         return (error);
 3482                 goto restart;
 3483         }
 3484         VATTR_NULL(&vattr);
 3485         vattr.va_type = VDIR;
 3486         vattr.va_mode =
 3487             (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
 3488         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 3489         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3490         if (!error)
 3491                 vput(nd.ni_vp);
 3492         vn_finished_write(mp, 0);
 3493         return (error);
 3494 }
 3495 
 3496 /*
 3497  * Remove a directory file.
 3498  */
 3499 /* ARGSUSED */
 3500 int
 3501 sys_rmdir(struct lwp *l, void *v, register_t *retval)
 3502 {
 3503         struct sys_rmdir_args /* {
 3504                 syscallarg(const char *) path;
 3505         } */ *uap = v;
 3506         struct mount *mp;
 3507         struct vnode *vp;
 3508         int error;
 3509         struct nameidata nd;
 3510 
 3511 restart:
 3512         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 3513             SCARG(uap, path), l);
 3514         if ((error = namei(&nd)) != 0)
 3515                 return (error);
 3516         vp = nd.ni_vp;
 3517         if (vp->v_type != VDIR) {
 3518                 error = ENOTDIR;
 3519                 goto out;
 3520         }
 3521         /*
 3522          * No rmdir "." please.
 3523          */
 3524         if (nd.ni_dvp == vp) {
 3525                 error = EINVAL;
 3526                 goto out;
 3527         }
 3528         /*
 3529          * The root of a mounted filesystem cannot be deleted.
 3530          */
 3531         if (vp->v_flag & VROOT) {
 3532                 error = EBUSY;
 3533                 goto out;
 3534         }
 3535         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3536                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3537                 if (nd.ni_dvp == vp)
 3538                         vrele(nd.ni_dvp);
 3539                 else
 3540                         vput(nd.ni_dvp);
 3541                 vput(vp);
 3542                 if ((error = vn_start_write(NULL, &mp,
 3543                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 3544                         return (error);
 3545                 goto restart;
 3546         }
 3547         VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
 3548         VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
 3549         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3550         vn_finished_write(mp, 0);
 3551         return (error);
 3552 
 3553 out:
 3554         VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3555         if (nd.ni_dvp == vp)
 3556                 vrele(nd.ni_dvp);
 3557         else
 3558                 vput(nd.ni_dvp);
 3559         vput(vp);
 3560         return (error);
 3561 }
 3562 
 3563 /*
 3564  * Read a block of directory entries in a file system independent format.
 3565  */
 3566 int
 3567 sys___getdents30(struct lwp *l, void *v, register_t *retval)
 3568 {
 3569         struct sys___getdents30_args /* {
 3570                 syscallarg(int) fd;
 3571                 syscallarg(char *) buf;
 3572                 syscallarg(size_t) count;
 3573         } */ *uap = v;
 3574         struct proc *p = l->l_proc;
 3575         struct file *fp;
 3576         int error, done;
 3577 
 3578         /* getvnode() will use the descriptor for us */
 3579         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3580                 return (error);
 3581         if ((fp->f_flag & FREAD) == 0) {
 3582                 error = EBADF;
 3583                 goto out;
 3584         }
 3585         error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
 3586                         SCARG(uap, count), &done, l, 0, 0);
 3587 #ifdef KTRACE
 3588         if (!error && KTRPOINT(p, KTR_GENIO)) {
 3589                 struct iovec iov;
 3590                 iov.iov_base = SCARG(uap, buf);
 3591                 iov.iov_len = done;
 3592                 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
 3593         }
 3594 #endif
 3595         *retval = done;
 3596  out:
 3597         FILE_UNUSE(fp, l);
 3598         return (error);
 3599 }
 3600 
 3601 /*
 3602  * Set the mode mask for creation of filesystem nodes.
 3603  */
 3604 int
 3605 sys_umask(struct lwp *l, void *v, register_t *retval)
 3606 {
 3607         struct sys_umask_args /* {
 3608                 syscallarg(mode_t) newmask;
 3609         } */ *uap = v;
 3610         struct proc *p = l->l_proc;
 3611         struct cwdinfo *cwdi;
 3612 
 3613         cwdi = p->p_cwdi;
 3614         *retval = cwdi->cwdi_cmask;
 3615         cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
 3616         return (0);
 3617 }
 3618 
 3619 /*
 3620  * Void all references to file by ripping underlying filesystem
 3621  * away from vnode.
 3622  */
 3623 /* ARGSUSED */
 3624 int
 3625 sys_revoke(struct lwp *l, void *v, register_t *retval)
 3626 {
 3627         struct sys_revoke_args /* {
 3628                 syscallarg(const char *) path;
 3629         } */ *uap = v;
 3630         struct mount *mp;
 3631         struct vnode *vp;
 3632         struct vattr vattr;
 3633         int error;
 3634         struct nameidata nd;
 3635 
 3636         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
 3637         if ((error = namei(&nd)) != 0)
 3638                 return (error);
 3639         vp = nd.ni_vp;
 3640         if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
 3641                 goto out;
 3642         if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
 3643             (error = kauth_authorize_generic(l->l_cred,
 3644             KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
 3645                 goto out;
 3646         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 3647                 goto out;
 3648         if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
 3649                 VOP_REVOKE(vp, REVOKEALL);
 3650         vn_finished_write(mp, 0);
 3651 out:
 3652         vrele(vp);
 3653         return (error);
 3654 }
 3655 
 3656 /*
 3657  * Convert a user file descriptor to a kernel file entry.
 3658  */
 3659 int
 3660 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
 3661 {
 3662         struct vnode *vp;
 3663         struct file *fp;
 3664 
 3665         if ((fp = fd_getfile(fdp, fd)) == NULL)
 3666                 return (EBADF);
 3667 
 3668         FILE_USE(fp);
 3669 
 3670         if (fp->f_type != DTYPE_VNODE) {
 3671                 FILE_UNUSE(fp, NULL);
 3672                 return (EINVAL);
 3673         }
 3674 
 3675         vp = (struct vnode *)fp->f_data;
 3676         if (vp->v_type == VBAD) {
 3677                 FILE_UNUSE(fp, NULL);
 3678                 return (EBADF);
 3679         }
 3680 
 3681         *fpp = fp;
 3682         return (0);
 3683 }
Cache object: 11e6e13df8ce68605c982505a2f824be
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_syscalls.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c