vfs_syscalls.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: vfs_syscalls.c,v 1.217.2.13 2007/06/26 17:00:47 ghen Exp $     */
    2 
    3 /*
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_syscalls.c      8.42 (Berkeley) 7/31/95
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.217.2.13 2007/06/26 17:00:47 ghen Exp $");
   41 
   42 #include "opt_compat_netbsd.h"
   43 #include "opt_compat_43.h"
   44 #include "opt_ktrace.h"
   45 #include "opt_verified_exec.h"
   46 #include "fss.h"
   47 
   48 #include <sys/param.h>
   49 #include <sys/systm.h>
   50 #include <sys/namei.h>
   51 #include <sys/filedesc.h>
   52 #include <sys/kernel.h>
   53 #include <sys/file.h>
   54 #include <sys/stat.h>
   55 #include <sys/vnode.h>
   56 #include <sys/mount.h>
   57 #include <sys/proc.h>
   58 #include <sys/uio.h>
   59 #include <sys/malloc.h>
   60 #include <sys/dirent.h>
   61 #include <sys/extattr.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/sa.h>
   64 #include <sys/syscallargs.h>
   65 #ifdef KTRACE
   66 #include <sys/ktrace.h>
   67 #endif
   68 #ifdef VERIFIED_EXEC
   69 #include <sys/verified_exec.h>
   70 #endif /* VERIFIED_EXEC */
   71 
   72 #include <miscfs/genfs/genfs.h>
   73 #include <miscfs/syncfs/syncfs.h>
   74 
   75 #if NFSS > 0
   76 #include <dev/fssvar.h>
   77 #endif
   78 
   79 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
   80 
   81 static int change_dir(struct nameidata *, struct proc *);
   82 static int change_flags(struct vnode *, u_long, struct proc *);
   83 static int change_mode(struct vnode *, int, struct proc *p);
   84 static int change_owner(struct vnode *, uid_t, gid_t, struct proc *, int);
   85 static int change_utimes(struct vnode *vp, const struct timeval *,
   86                struct proc *p);
   87 static int rename_files(const char *, const char *, struct proc *, int);
   88 
   89 void checkdirs(struct vnode *);
   90 
   91 int dovfsusermount = 0;
   92 
   93 /*
   94  * Virtual File System System Calls
   95  */
   96 
   97 /*
   98  * Mount a file system.
   99  */
  100 
  101 #if defined(COMPAT_09) || defined(COMPAT_43)
  102 /*
  103  * This table is used to maintain compatibility with 4.3BSD
  104  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
  105  *
  106  * Do not modify this table. It should only contain filesystems
  107  * supported by NetBSD 0.9 and 4.3BSD.
  108  */
  109 const char * const mountcompatnames[] = {
  110         NULL,           /* 0 = MOUNT_NONE */
  111         MOUNT_FFS,      /* 1 = MOUNT_UFS */
  112         MOUNT_NFS,      /* 2 */
  113         MOUNT_MFS,      /* 3 */
  114         MOUNT_MSDOS,    /* 4 */
  115         MOUNT_CD9660,   /* 5 = MOUNT_ISOFS */
  116         MOUNT_FDESC,    /* 6 */
  117         MOUNT_KERNFS,   /* 7 */
  118         NULL,           /* 8 = MOUNT_DEVFS */
  119         MOUNT_AFS,      /* 9 */
  120 };
  121 const int nmountcompatnames = sizeof(mountcompatnames) /
  122     sizeof(mountcompatnames[0]);
  123 #endif /* COMPAT_09 || COMPAT_43 */
  124 
  125 /* ARGSUSED */
  126 int
  127 sys_mount(l, v, retval)
  128         struct lwp *l;
  129         void *v;
  130         register_t *retval;
  131 {
  132         struct sys_mount_args /* {
  133                 syscallarg(const char *) type;
  134                 syscallarg(const char *) path;
  135                 syscallarg(int) flags;
  136                 syscallarg(void *) data;
  137         } */ *uap = v;
  138         struct proc *p = l->l_proc;
  139         struct vnode *vp;
  140         struct mount *mp;
  141         int error, flag = 0;
  142         char fstypename[MFSNAMELEN];
  143         struct vattr va;
  144         struct nameidata nd;
  145         struct vfsops *vfs;
  146 
  147         /*
  148          * if MNT_GETARGS is specified, it should be only flag.
  149          */
  150 
  151         if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
  152             (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
  153                 return EINVAL;
  154         }
  155 
  156         if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
  157             (error = suser(p->p_ucred, &p->p_acflag)))
  158                 return (error);
  159         /*
  160          * Get vnode to be covered
  161          */
  162         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
  163             SCARG(uap, path), p);
  164         if ((error = namei(&nd)) != 0)
  165                 return (error);
  166         vp = nd.ni_vp;
  167         /*
  168          * A lookup in VFS_MOUNT might result in an attempt to
  169          * lock this vnode again, so make the lock recursive.
  170          */
  171         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
  172         if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
  173                 if ((vp->v_flag & VROOT) == 0) {
  174                         vput(vp);
  175                         return (EINVAL);
  176                 }
  177                 mp = vp->v_mount;
  178                 flag = mp->mnt_flag;
  179                 vfs = mp->mnt_op;
  180                 /*
  181                  * We only allow the filesystem to be reloaded if it
  182                  * is currently mounted read-only.
  183                  */
  184                 if ((SCARG(uap, flags) & MNT_RELOAD) &&
  185                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  186                         vput(vp);
  187                         return (EOPNOTSUPP);    /* Needs translation */
  188                 }
  189                 /*
  190                  * In "highly secure" mode, don't let the caller do anything
  191                  * but downgrade a filesystem from read-write to read-only.
  192                  * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
  193                  */
  194                 if (securelevel >= 2 &&
  195                     SCARG(uap, flags) != MNT_GETARGS &&
  196                     SCARG(uap, flags) !=
  197                     (mp->mnt_flag | MNT_RDONLY |
  198                      MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
  199                         vput(vp);
  200                         return (EPERM);
  201                 }
  202                 mp->mnt_flag |= SCARG(uap, flags) &
  203                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  204                 /*
  205                  * Only root, or the user that did the original mount is
  206                  * permitted to update it.
  207                  */
  208                 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
  209                     mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
  210                     (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
  211                         vput(vp);
  212                         return (error);
  213                 }
  214                 /*
  215                  * Do not allow NFS export by non-root users. For non-root
  216                  * users, silently enforce MNT_NOSUID and MNT_NODEV, and
  217                  * MNT_NOEXEC if mount point is already MNT_NOEXEC.
  218                  */
  219                 if (p->p_ucred->cr_uid != 0) {
  220                         if (SCARG(uap, flags) & MNT_EXPORTED) {
  221                                 vput(vp);
  222                                 return (EPERM);
  223                         }
  224                         SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
  225                         if (flag & MNT_NOEXEC)
  226                                 SCARG(uap, flags) |= MNT_NOEXEC;
  227                 }
  228                 if (vfs_busy(mp, LK_NOWAIT, 0)) {
  229                         vput(vp);
  230                         return (EPERM);
  231                 }
  232                 goto update;
  233         } else {
  234                 if (securelevel >= 2) {
  235                         vput(vp);
  236                         return (EPERM);
  237                 }
  238         }
  239         /*
  240          * If the user is not root, ensure that they own the directory
  241          * onto which we are attempting to mount.
  242          */
  243         if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0 ||
  244             (va.va_uid != p->p_ucred->cr_uid &&
  245                 (error = suser(p->p_ucred, &p->p_acflag)) != 0)) {
  246                 vput(vp);
  247                 return (error);
  248         }
  249         /*
  250          * Do not allow NFS export by non-root users. For non-root users,
  251          * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
  252          * mount point is already MNT_NOEXEC.
  253          */
  254         if (p->p_ucred->cr_uid != 0) {
  255                 if (SCARG(uap, flags) & MNT_EXPORTED) {
  256                         vput(vp);
  257                         return (EPERM);
  258                 }
  259                 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
  260                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
  261                         SCARG(uap, flags) |= MNT_NOEXEC;
  262         }
  263         if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
  264                 vput(vp);
  265                 return (error);
  266         }
  267         if (vp->v_type != VDIR) {
  268                 vput(vp);
  269                 return (ENOTDIR);
  270         }
  271         error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
  272         if (error) {
  273 #if defined(COMPAT_09) || defined(COMPAT_43)
  274                 /*
  275                  * Historically filesystem types were identified by number.
  276                  * If we get an integer for the filesystem type instead of a
  277                  * string, we check to see if it matches one of the historic
  278                  * filesystem types.
  279                  */
  280                 u_long fsindex = (u_long)SCARG(uap, type);
  281                 if (fsindex >= nmountcompatnames ||
  282                     mountcompatnames[fsindex] == NULL) {
  283                         vput(vp);
  284                         return (ENODEV);
  285                 }
  286                 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
  287 #else
  288                 vput(vp);
  289                 return (error);
  290 #endif
  291         }
  292 #ifdef  COMPAT_10
  293         /* Accept `ufs' as an alias for `ffs'. */
  294         if (!strncmp(fstypename, "ufs", MFSNAMELEN))
  295                 strncpy(fstypename, "ffs", MFSNAMELEN);
  296 #endif
  297         if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
  298                 vput(vp);
  299                 return (ENODEV);
  300         }
  301         if (vp->v_mountedhere != NULL) {
  302                 vput(vp);
  303                 return (EBUSY);
  304         }
  305 
  306         /*
  307          * Allocate and initialize the file system.
  308          */
  309         mp = (struct mount *)malloc((u_long)sizeof(struct mount),
  310                 M_MOUNT, M_WAITOK);
  311         memset((char *)mp, 0, (u_long)sizeof(struct mount));
  312         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  313         simple_lock_init(&mp->mnt_slock);
  314         (void)vfs_busy(mp, LK_NOWAIT, 0);
  315         mp->mnt_op = vfs;
  316         vfs->vfs_refcount++;
  317         mp->mnt_vnodecovered = vp;
  318         mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
  319         mp->mnt_unmounter = NULL;
  320         mp->mnt_leaf = mp;
  321 
  322         /*
  323          * The underlying file system may refuse the mount for
  324          * various reasons.  Allow the user to force it to happen.
  325          */
  326         mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
  327  update:
  328         if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
  329                 /*
  330                  * Set the mount level flags.
  331                  */
  332                 if (SCARG(uap, flags) & MNT_RDONLY)
  333                         mp->mnt_flag |= MNT_RDONLY;
  334                 else if (mp->mnt_flag & MNT_RDONLY)
  335                         mp->mnt_iflag |= IMNT_WANTRDWR;
  336                 mp->mnt_flag &=
  337                   ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  338                     MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  339                     MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
  340                 mp->mnt_flag |= SCARG(uap, flags) &
  341                    (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  342                     MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  343                     MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
  344                     MNT_IGNORE);
  345         }
  346         /*
  347          * Mount the filesystem.
  348          */
  349         error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
  350         if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
  351                 if (mp->mnt_iflag & IMNT_WANTRDWR)
  352                         mp->mnt_flag &= ~MNT_RDONLY;
  353                 if (error)
  354                         mp->mnt_flag = flag;
  355                 mp->mnt_flag &=~
  356                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  357                 mp->mnt_iflag &=~ IMNT_WANTRDWR;
  358                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
  359                         if (mp->mnt_syncer == NULL)
  360                                 error = vfs_allocate_syncvnode(mp);
  361                 } else {
  362                         if (mp->mnt_syncer != NULL)
  363                                 vfs_deallocate_syncvnode(mp);
  364                 }
  365                 vfs_unbusy(mp);
  366                 VOP_UNLOCK(vp, 0);
  367                 vrele(vp);
  368                 return (error);
  369         }
  370         /*
  371          * Put the new filesystem on the mount list after root.
  372          */
  373         cache_purge(vp);
  374         if (!error) {
  375                 mp->mnt_flag &=~
  376                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  377                 mp->mnt_iflag &=~ IMNT_WANTRDWR;
  378                 vp->v_mountedhere = mp;
  379                 simple_lock(&mountlist_slock);
  380                 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  381                 simple_unlock(&mountlist_slock);
  382                 checkdirs(vp);
  383                 VOP_UNLOCK(vp, 0);
  384                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  385                         error = vfs_allocate_syncvnode(mp);
  386                 vfs_unbusy(mp);
  387                 (void) VFS_STATVFS(mp, &mp->mnt_stat, p);
  388                 if ((error = VFS_START(mp, 0, p)))
  389                         vrele(vp);
  390         } else {
  391                 vp->v_mountedhere = (struct mount *)0;
  392                 vfs->vfs_refcount--;
  393                 vfs_unbusy(mp);
  394                 free(mp, M_MOUNT);
  395                 vput(vp);
  396         }
  397         return (error);
  398 }
  399 
  400 /*
  401  * Scan all active processes to see if any of them have a current
  402  * or root directory onto which the new filesystem has just been
  403  * mounted. If so, replace them with the new mount point.
  404  */
  405 void
  406 checkdirs(olddp)
  407         struct vnode *olddp;
  408 {
  409         struct cwdinfo *cwdi;
  410         struct vnode *newdp;
  411         struct proc *p;
  412 
  413         if (olddp->v_usecount == 1)
  414                 return;
  415         if (VFS_ROOT(olddp->v_mountedhere, &newdp))
  416                 panic("mount: lost mount");
  417         proclist_lock_read();
  418         PROCLIST_FOREACH(p, &allproc) {
  419                 cwdi = p->p_cwdi;
  420                 if (!cwdi)
  421                         continue;
  422                 if (cwdi->cwdi_cdir == olddp) {
  423                         vrele(cwdi->cwdi_cdir);
  424                         VREF(newdp);
  425                         cwdi->cwdi_cdir = newdp;
  426                 }
  427                 if (cwdi->cwdi_rdir == olddp) {
  428                         vrele(cwdi->cwdi_rdir);
  429                         VREF(newdp);
  430                         cwdi->cwdi_rdir = newdp;
  431                 }
  432         }
  433         proclist_unlock_read();
  434         if (rootvnode == olddp) {
  435                 vrele(rootvnode);
  436                 VREF(newdp);
  437                 rootvnode = newdp;
  438         }
  439         vput(newdp);
  440 }
  441 
  442 /*
  443  * Unmount a file system.
  444  *
  445  * Note: unmount takes a path to the vnode mounted on as argument,
  446  * not special file (as before).
  447  */
  448 /* ARGSUSED */
  449 int
  450 sys_unmount(l, v, retval)
  451         struct lwp *l;
  452         void *v;
  453         register_t *retval;
  454 {
  455         struct sys_unmount_args /* {
  456                 syscallarg(const char *) path;
  457                 syscallarg(int) flags;
  458         } */ *uap = v;
  459         struct proc *p = l->l_proc;
  460         struct vnode *vp;
  461         struct mount *mp;
  462         int error;
  463         struct nameidata nd;
  464 
  465         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
  466             SCARG(uap, path), p);
  467         if ((error = namei(&nd)) != 0)
  468                 return (error);
  469         vp = nd.ni_vp;
  470         mp = vp->v_mount;
  471 
  472         /*
  473          * Only root, or the user that did the original mount is
  474          * permitted to unmount this filesystem.
  475          */
  476         if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
  477             (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
  478                 vput(vp);
  479                 return (error);
  480         }
  481 
  482         /*
  483          * Don't allow unmounting the root file system.
  484          */
  485         if (mp->mnt_flag & MNT_ROOTFS) {
  486                 vput(vp);
  487                 return (EINVAL);
  488         }
  489 
  490         /*
  491          * Must be the root of the filesystem
  492          */
  493         if ((vp->v_flag & VROOT) == 0) {
  494                 vput(vp);
  495                 return (EINVAL);
  496         }
  497         vput(vp);
  498 
  499         /*
  500          * XXX Freeze syncer.  Must do this before locking the
  501          * mount point.  See dounmount() for details.
  502          */
  503         lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
  504 
  505         if (vfs_busy(mp, 0, 0)) {
  506                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  507                 return (EBUSY);
  508         }
  509 
  510         return (dounmount(mp, SCARG(uap, flags), p));
  511 }
  512 
  513 /*
  514  * Do the actual file system unmount. File system is assumed to have been
  515  * marked busy by the caller.
  516  */
  517 int
  518 dounmount(mp, flags, p)
  519         struct mount *mp;
  520         int flags;
  521         struct proc *p;
  522 {
  523         struct vnode *coveredvp;
  524         int error;
  525         int async;
  526         int used_syncer;
  527 
  528         simple_lock(&mountlist_slock);
  529         vfs_unbusy(mp);
  530         used_syncer = (mp->mnt_syncer != NULL);
  531 
  532         /*
  533          * XXX Syncer must be frozen when we get here.  This should really
  534          * be done on a per-mountpoint basis, but especially the softdep
  535          * code possibly called from the syncer doens't exactly work on a
  536          * per-mountpoint basis, so the softdep code would become a maze
  537          * of vfs_busy() calls.
  538          *
  539          * The caller of dounmount() must acquire syncer_lock because
  540          * the syncer itself acquires locks in syncer_lock -> vfs_busy
  541          * order, and we must preserve that order to avoid deadlock.
  542          *
  543          * So, if the file system did not use the syncer, now is
  544          * the time to release the syncer_lock.
  545          */
  546         if (used_syncer == 0)
  547                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  548 
  549         mp->mnt_iflag |= IMNT_UNMOUNT;
  550         mp->mnt_unmounter = p;
  551         lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
  552         vn_start_write(NULL, &mp, V_WAIT);
  553 
  554         if (mp->mnt_flag & MNT_EXPUBLIC)
  555                 vfs_setpublicfs(NULL, NULL, NULL);
  556         async = mp->mnt_flag & MNT_ASYNC;
  557         mp->mnt_flag &= ~MNT_ASYNC;
  558         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  559         if (mp->mnt_syncer != NULL)
  560                 vfs_deallocate_syncvnode(mp);
  561         error = 0;
  562         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  563 #if NFSS > 0
  564                 error = fss_umount_hook(mp, (flags & MNT_FORCE));
  565 #endif
  566                 if (error == 0)
  567                         error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
  568         }
  569         if (error == 0 || (flags & MNT_FORCE))
  570                 error = VFS_UNMOUNT(mp, flags, p);
  571         vn_finished_write(mp, 0);
  572         simple_lock(&mountlist_slock);
  573         if (error) {
  574                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  575                         (void) vfs_allocate_syncvnode(mp);
  576                 mp->mnt_iflag &= ~IMNT_UNMOUNT;
  577                 mp->mnt_unmounter = NULL;
  578                 mp->mnt_flag |= async;
  579                 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
  580                     &mountlist_slock);
  581                 if (used_syncer)
  582                         lockmgr(&syncer_lock, LK_RELEASE, NULL);
  583                 simple_lock(&mp->mnt_slock);
  584                 while (mp->mnt_wcnt > 0) {
  585                         wakeup(mp);
  586                         ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
  587                                 0, &mp->mnt_slock);
  588                 }
  589                 simple_unlock(&mp->mnt_slock);
  590                 return (error);
  591         }
  592         CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
  593         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
  594                 coveredvp->v_mountedhere = NULL;
  595                 vrele(coveredvp);
  596         }
  597         mp->mnt_op->vfs_refcount--;
  598         if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
  599                 panic("unmount: dangling vnode");
  600         mp->mnt_iflag |= IMNT_GONE;
  601         lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
  602         if (used_syncer)
  603                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  604         simple_lock(&mp->mnt_slock);
  605         while (mp->mnt_wcnt > 0) {
  606                 wakeup(mp);
  607                 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
  608         }
  609         simple_unlock(&mp->mnt_slock);
  610         free(mp, M_MOUNT);
  611         return (0);
  612 }
  613 
  614 /*
  615  * Sync each mounted filesystem.
  616  */
  617 #ifdef DEBUG
  618 int syncprt = 0;
  619 struct ctldebug debug0 = { "syncprt", &syncprt };
  620 #endif
  621 
  622 /* ARGSUSED */
  623 int
  624 sys_sync(l, v, retval)
  625         struct lwp *l;
  626         void *v;
  627         register_t *retval;
  628 {
  629         struct mount *mp, *nmp;
  630         int asyncflag;
  631         struct proc *p = l == NULL ? &proc0 : l->l_proc;
  632 
  633         simple_lock(&mountlist_slock);
  634         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
  635                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
  636                         nmp = mp->mnt_list.cqe_prev;
  637                         continue;
  638                 }
  639                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  640                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  641                         asyncflag = mp->mnt_flag & MNT_ASYNC;
  642                         mp->mnt_flag &= ~MNT_ASYNC;
  643                         VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
  644                         if (asyncflag)
  645                                  mp->mnt_flag |= MNT_ASYNC;
  646                         vn_finished_write(mp, 0);
  647                 }
  648                 simple_lock(&mountlist_slock);
  649                 nmp = mp->mnt_list.cqe_prev;
  650                 vfs_unbusy(mp);
  651 
  652         }
  653         simple_unlock(&mountlist_slock);
  654 #ifdef DEBUG
  655         if (syncprt)
  656                 vfs_bufstats();
  657 #endif /* DEBUG */
  658         return (0);
  659 }
  660 
  661 /*
  662  * Change filesystem quotas.
  663  */
  664 /* ARGSUSED */
  665 int
  666 sys_quotactl(l, v, retval)
  667         struct lwp *l;
  668         void *v;
  669         register_t *retval;
  670 {
  671         struct sys_quotactl_args /* {
  672                 syscallarg(const char *) path;
  673                 syscallarg(int) cmd;
  674                 syscallarg(int) uid;
  675                 syscallarg(caddr_t) arg;
  676         } */ *uap = v;
  677         struct proc *p = l->l_proc;
  678         struct mount *mp;
  679         int error;
  680         struct nameidata nd;
  681 
  682         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
  683         if ((error = namei(&nd)) != 0)
  684                 return (error);
  685         error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
  686         vrele(nd.ni_vp);
  687         if (error)
  688                 return (error);
  689         error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
  690             SCARG(uap, arg), p);
  691         vn_finished_write(mp, 0);
  692         return (error);
  693 }
  694 
  695 int
  696 dostatvfs(struct mount *mp, struct statvfs *sp, struct proc *p, int flags,
  697     int root)
  698 {
  699         struct cwdinfo *cwdi = p->p_cwdi;
  700         int error = 0;
  701 
  702         /*
  703          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  704          * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
  705          * overrides MNT_NOWAIT.
  706          */
  707         if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
  708             (flags != MNT_WAIT && flags != 0)) {
  709                 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
  710                 goto done;
  711         }
  712 
  713         /* Get the filesystem stats now */
  714         memset(sp, 0, sizeof(*sp));
  715         if ((error = VFS_STATVFS(mp, sp, p)) != 0) {
  716                 return error;
  717         }
  718 
  719         if (cwdi->cwdi_rdir == NULL)
  720                 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
  721 done:
  722         if (cwdi->cwdi_rdir != NULL) {
  723                 size_t len;
  724                 char *bp;
  725                 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  726                 if (!path)
  727                         return ENOMEM;
  728 
  729                 bp = path + MAXPATHLEN;
  730                 *--bp = '\0';
  731                 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
  732                     MAXPATHLEN / 2, 0, p);
  733                 if (error) {
  734                         free(path, M_TEMP);
  735                         return error;
  736                 }
  737                 len = strlen(bp);
  738                 /*
  739                  * for mount points that are below our root, we can see
  740                  * them, so we fix up the pathname and return them. The
  741                  * rest we cannot see, so we don't allow viewing the
  742                  * data.
  743                  */
  744                 if (strncmp(bp, sp->f_mntonname, len) == 0) {
  745                         strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
  746                             sizeof(sp->f_mntonname));
  747                         if (sp->f_mntonname[0] == '\0')
  748                                 (void)strlcpy(sp->f_mntonname, "/",
  749                                     sizeof(sp->f_mntonname));
  750                 } else {
  751                         if (root)
  752                                 (void)strlcpy(sp->f_mntonname, "/",
  753                                     sizeof(sp->f_mntonname));
  754                         else
  755                                 error = EPERM;
  756                 }
  757                 free(path, M_TEMP);
  758         }
  759         sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
  760         return error;
  761 }
  762 
  763 /*
  764  * Get filesystem statistics.
  765  */
  766 /* ARGSUSED */
  767 int
  768 sys_statvfs1(l, v, retval)
  769         struct lwp *l;
  770         void *v;
  771         register_t *retval;
  772 {
  773         struct sys_statvfs1_args /* {
  774                 syscallarg(const char *) path;
  775                 syscallarg(struct statvfs *) buf;
  776                 syscallarg(int) flags;
  777         } */ *uap = v;
  778         struct proc *p = l->l_proc;
  779         struct mount *mp;
  780         struct statvfs sbuf;
  781         int error;
  782         struct nameidata nd;
  783 
  784         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
  785         if ((error = namei(&nd)) != 0)
  786                 return error;
  787         mp = nd.ni_vp->v_mount;
  788         vrele(nd.ni_vp);
  789         if ((error = dostatvfs(mp, &sbuf, p, SCARG(uap, flags), 1)) != 0)
  790                 return error;
  791         return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
  792 }
  793 
  794 /*
  795  * Get filesystem statistics.
  796  */
  797 /* ARGSUSED */
  798 int
  799 sys_fstatvfs1(l, v, retval)
  800         struct lwp *l;
  801         void *v;
  802         register_t *retval;
  803 {
  804         struct sys_fstatvfs1_args /* {
  805                 syscallarg(int) fd;
  806                 syscallarg(struct statvfs *) buf;
  807                 syscallarg(int) flags;
  808         } */ *uap = v;
  809         struct proc *p = l->l_proc;
  810         struct file *fp;
  811         struct mount *mp;
  812         struct statvfs sbuf;
  813         int error;
  814 
  815         /* getvnode() will use the descriptor for us */
  816         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
  817                 return (error);
  818         mp = ((struct vnode *)fp->f_data)->v_mount;
  819         if ((error = dostatvfs(mp, &sbuf, p, SCARG(uap, flags), 1)) != 0)
  820                 goto out;
  821         error = copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
  822  out:
  823         FILE_UNUSE(fp, p);
  824         return error;
  825 }
  826 
  827 
  828 /*
  829  * Get statistics on all filesystems.
  830  */
  831 int
  832 sys_getvfsstat(l, v, retval)
  833         struct lwp *l;
  834         void *v;
  835         register_t *retval;
  836 {
  837         struct sys_getvfsstat_args /* {
  838                 syscallarg(struct statvfs *) buf;
  839                 syscallarg(size_t) bufsize;
  840                 syscallarg(int) flags;
  841         } */ *uap = v;
  842         int root = 0;
  843         struct proc *p = l->l_proc;
  844         struct mount *mp, *nmp;
  845         struct statvfs sbuf;
  846         struct statvfs *sfsp;
  847         size_t count, maxcount;
  848         int error = 0;
  849 
  850         maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
  851         sfsp = SCARG(uap, buf);
  852         simple_lock(&mountlist_slock);
  853         count = 0;
  854         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
  855              mp = nmp) {
  856                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
  857                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
  858                         continue;
  859                 }
  860                 if (sfsp && count < maxcount) {
  861                         error = dostatvfs(mp, &sbuf, p, SCARG(uap, flags), 0);
  862                         if (error) {
  863                                 simple_lock(&mountlist_slock);
  864                                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
  865                                 vfs_unbusy(mp);
  866                                 continue;
  867                         }
  868                         error = copyout(&sbuf, sfsp, sizeof(*sfsp));
  869                         if (error) {
  870                                 vfs_unbusy(mp);
  871                                 return (error);
  872                         }
  873                         sfsp++;
  874                         root |= strcmp(sbuf.f_mntonname, "/") == 0;
  875                 }
  876                 count++;
  877                 simple_lock(&mountlist_slock);
  878                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
  879                 vfs_unbusy(mp);
  880         }
  881         simple_unlock(&mountlist_slock);
  882         if (root == 0 && p->p_cwdi->cwdi_rdir) {
  883                 /*
  884                  * fake a root entry
  885                  */
  886                 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, &sbuf, p,
  887                     SCARG(uap, flags), 1)) != 0)
  888                         return error;
  889                 if (sfsp)
  890                         error = copyout(&sbuf, sfsp, sizeof(*sfsp));
  891                 count++;
  892         }
  893         if (sfsp && count > maxcount)
  894                 *retval = maxcount;
  895         else
  896                 *retval = count;
  897         return error;
  898 }
  899 
  900 /*
  901  * Change current working directory to a given file descriptor.
  902  */
  903 /* ARGSUSED */
  904 int
  905 sys_fchdir(l, v, retval)
  906         struct lwp *l;
  907         void *v;
  908         register_t *retval;
  909 {
  910         struct sys_fchdir_args /* {
  911                 syscallarg(int) fd;
  912         } */ *uap = v;
  913         struct proc *p = l->l_proc;
  914         struct filedesc *fdp = p->p_fd;
  915         struct cwdinfo *cwdi = p->p_cwdi;
  916         struct vnode *vp, *tdp;
  917         struct mount *mp;
  918         struct file *fp;
  919         int error;
  920 
  921         /* getvnode() will use the descriptor for us */
  922         if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
  923                 return (error);
  924         vp = (struct vnode *)fp->f_data;
  925 
  926         VREF(vp);
  927         vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
  928         if (vp->v_type != VDIR)
  929                 error = ENOTDIR;
  930         else
  931                 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
  932         while (!error && (mp = vp->v_mountedhere) != NULL) {
  933                 if (vfs_busy(mp, 0, 0))
  934                         continue;
  935                 error = VFS_ROOT(mp, &tdp);
  936                 vfs_unbusy(mp);
  937                 if (error)
  938                         break;
  939                 vput(vp);
  940                 vp = tdp;
  941         }
  942         if (error) {
  943                 vput(vp);
  944                 goto out;
  945         }
  946         VOP_UNLOCK(vp, 0);
  947 
  948         /*
  949          * Disallow changing to a directory not under the process's
  950          * current root directory (if there is one).
  951          */
  952         if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, p)) {
  953                 vrele(vp);
  954                 error = EPERM;  /* operation not permitted */
  955                 goto out;
  956         }
  957 
  958         vrele(cwdi->cwdi_cdir);
  959         cwdi->cwdi_cdir = vp;
  960  out:
  961         FILE_UNUSE(fp, p);
  962         return (error);
  963 }
  964 
  965 /*
  966  * Change this process's notion of the root directory to a given file descriptor.
  967  */
  968 
  969 int
  970 sys_fchroot(l, v, retval)
  971         struct lwp *l;
  972         void *v;
  973         register_t *retval;
  974 {
  975         struct sys_fchroot_args *uap = v;
  976         struct proc *p = l->l_proc;
  977         struct filedesc *fdp = p->p_fd;
  978         struct cwdinfo *cwdi = p->p_cwdi;
  979         struct vnode    *vp;
  980         struct file     *fp;
  981         int              error;
  982 
  983         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
  984                 return error;
  985         /* getvnode() will use the descriptor for us */
  986         if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
  987                 return error;
  988         vp = (struct vnode *) fp->f_data;
  989         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  990         if (vp->v_type != VDIR)
  991                 error = ENOTDIR;
  992         else
  993                 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
  994         VOP_UNLOCK(vp, 0);
  995         if (error)
  996                 goto out;
  997         VREF(vp);
  998 
  999         /*
 1000          * Prevent escaping from chroot by putting the root under
 1001          * the working directory.  Silently chdir to / if we aren't
 1002          * already there.
 1003          */
 1004         if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
 1005                 /*
 1006                  * XXX would be more failsafe to change directory to a
 1007                  * deadfs node here instead
 1008                  */
 1009                 vrele(cwdi->cwdi_cdir);
 1010                 VREF(vp);
 1011                 cwdi->cwdi_cdir = vp;
 1012         }
 1013 
 1014         if (cwdi->cwdi_rdir != NULL)
 1015                 vrele(cwdi->cwdi_rdir);
 1016         cwdi->cwdi_rdir = vp;
 1017  out:
 1018         FILE_UNUSE(fp, p);
 1019         return (error);
 1020 }
 1021 
 1022 
 1023 
 1024 /*
 1025  * Change current working directory (``.'').
 1026  */
 1027 /* ARGSUSED */
 1028 int
 1029 sys_chdir(l, v, retval)
 1030         struct lwp *l;
 1031         void *v;
 1032         register_t *retval;
 1033 {
 1034         struct sys_chdir_args /* {
 1035                 syscallarg(const char *) path;
 1036         } */ *uap = v;
 1037         struct proc *p = l->l_proc;
 1038         struct cwdinfo *cwdi = p->p_cwdi;
 1039         int error;
 1040         struct nameidata nd;
 1041 
 1042         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1043             SCARG(uap, path), p);
 1044         if ((error = change_dir(&nd, p)) != 0)
 1045                 return (error);
 1046         vrele(cwdi->cwdi_cdir);
 1047         cwdi->cwdi_cdir = nd.ni_vp;
 1048         return (0);
 1049 }
 1050 
 1051 /*
 1052  * Change notion of root (``/'') directory.
 1053  */
 1054 /* ARGSUSED */
 1055 int
 1056 sys_chroot(l, v, retval)
 1057         struct lwp *l;
 1058         void *v;
 1059         register_t *retval;
 1060 {
 1061         struct sys_chroot_args /* {
 1062                 syscallarg(const char *) path;
 1063         } */ *uap = v;
 1064         struct proc *p = l->l_proc;
 1065         struct cwdinfo *cwdi = p->p_cwdi;
 1066         struct vnode *vp;
 1067         int error;
 1068         struct nameidata nd;
 1069 
 1070         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 1071                 return (error);
 1072         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1073             SCARG(uap, path), p);
 1074         if ((error = change_dir(&nd, p)) != 0)
 1075                 return (error);
 1076         if (cwdi->cwdi_rdir != NULL)
 1077                 vrele(cwdi->cwdi_rdir);
 1078         vp = nd.ni_vp;
 1079         cwdi->cwdi_rdir = vp;
 1080 
 1081         /*
 1082          * Prevent escaping from chroot by putting the root under
 1083          * the working directory.  Silently chdir to / if we aren't
 1084          * already there.
 1085          */
 1086         if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
 1087                 /*
 1088                  * XXX would be more failsafe to change directory to a
 1089                  * deadfs node here instead
 1090                  */
 1091                 vrele(cwdi->cwdi_cdir);
 1092                 VREF(vp);
 1093                 cwdi->cwdi_cdir = vp;
 1094         }
 1095 
 1096         return (0);
 1097 }
 1098 
 1099 /*
 1100  * Common routine for chroot and chdir.
 1101  */
 1102 static int
 1103 change_dir(ndp, p)
 1104         struct nameidata *ndp;
 1105         struct proc *p;
 1106 {
 1107         struct vnode *vp;
 1108         int error;
 1109 
 1110         if ((error = namei(ndp)) != 0)
 1111                 return (error);
 1112         vp = ndp->ni_vp;
 1113         if (vp->v_type != VDIR)
 1114                 error = ENOTDIR;
 1115         else
 1116                 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 1117 
 1118         if (error)
 1119                 vput(vp);
 1120         else
 1121                 VOP_UNLOCK(vp, 0);
 1122         return (error);
 1123 }
 1124 
 1125 /*
 1126  * Check permissions, allocate an open file structure,
 1127  * and call the device open routine if any.
 1128  */
 1129 int
 1130 sys_open(l, v, retval)
 1131         struct lwp *l;
 1132         void *v;
 1133         register_t *retval;
 1134 {
 1135         struct sys_open_args /* {
 1136                 syscallarg(const char *) path;
 1137                 syscallarg(int) flags;
 1138                 syscallarg(int) mode;
 1139         } */ *uap = v;
 1140         struct proc *p = l->l_proc;
 1141         struct cwdinfo *cwdi = p->p_cwdi;
 1142         struct filedesc *fdp = p->p_fd;
 1143         struct file *fp;
 1144         struct vnode *vp;
 1145         int flags, cmode;
 1146         int type, indx, error;
 1147         struct flock lf;
 1148         struct nameidata nd;
 1149 
 1150         flags = FFLAGS(SCARG(uap, flags));
 1151         if ((flags & (FREAD | FWRITE)) == 0)
 1152                 return (EINVAL);
 1153         /* falloc() will use the file descriptor for us */
 1154         if ((error = falloc(p, &fp, &indx)) != 0)
 1155                 return (error);
 1156         cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
 1157         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 1158         l->l_dupfd = -indx - 1;                 /* XXX check for fdopen */
 1159         if ((error = vn_open(&nd, flags, cmode)) != 0) {
 1160                 FILE_UNUSE(fp, p);
 1161                 fdp->fd_ofiles[indx] = NULL;
 1162                 ffree(fp);
 1163                 if ((error == EDUPFD || error == EMOVEFD) &&
 1164                     l->l_dupfd >= 0 &&                  /* XXX from fdopen */
 1165                     (error =
 1166                         dupfdopen(p, indx, l->l_dupfd, flags, error)) == 0) {
 1167                         *retval = indx;
 1168                         return (0);
 1169                 }
 1170                 if (error == ERESTART)
 1171                         error = EINTR;
 1172                 fdremove(fdp, indx);
 1173                 return (error);
 1174         }
 1175         l->l_dupfd = 0;
 1176         vp = nd.ni_vp;
 1177         fp->f_flag = flags & FMASK;
 1178         fp->f_type = DTYPE_VNODE;
 1179         fp->f_ops = &vnops;
 1180         fp->f_data = vp;
 1181         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1182                 lf.l_whence = SEEK_SET;
 1183                 lf.l_start = 0;
 1184                 lf.l_len = 0;
 1185                 if (flags & O_EXLOCK)
 1186                         lf.l_type = F_WRLCK;
 1187                 else
 1188                         lf.l_type = F_RDLCK;
 1189                 type = F_FLOCK;
 1190                 if ((flags & FNONBLOCK) == 0)
 1191                         type |= F_WAIT;
 1192                 VOP_UNLOCK(vp, 0);
 1193                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1194                 if (error) {
 1195                         (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
 1196                         FILE_UNUSE(fp, p);
 1197                         ffree(fp);
 1198                         fdremove(fdp, indx);
 1199                         return (error);
 1200                 }
 1201                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1202                 fp->f_flag |= FHASLOCK;
 1203         }
 1204         VOP_UNLOCK(vp, 0);
 1205         *retval = indx;
 1206         FILE_SET_MATURE(fp);
 1207         FILE_UNUSE(fp, p);
 1208         return (0);
 1209 }
 1210 
 1211 /*
 1212  * Get file handle system call
 1213  */
 1214 int
 1215 sys_getfh(l, v, retval)
 1216         struct lwp *l;
 1217         void *v;
 1218         register_t *retval;
 1219 {
 1220         struct sys_getfh_args /* {
 1221                 syscallarg(char *) fname;
 1222                 syscallarg(fhandle_t *) fhp;
 1223         } */ *uap = v;
 1224         struct proc *p = l->l_proc;
 1225         struct vnode *vp;
 1226         fhandle_t fh;
 1227         int error;
 1228         struct nameidata nd;
 1229 
 1230         /*
 1231          * Must be super user
 1232          */
 1233         error = suser(p->p_ucred, &p->p_acflag);
 1234         if (error)
 1235                 return (error);
 1236         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1237             SCARG(uap, fname), p);
 1238         error = namei(&nd);
 1239         if (error)
 1240                 return (error);
 1241         vp = nd.ni_vp;
 1242         memset(&fh, 0, sizeof(fh));
 1243         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsidx;
 1244         error = VFS_VPTOFH(vp, &fh.fh_fid);
 1245         vput(vp);
 1246         if (error)
 1247                 return (error);
 1248         error = copyout(&fh, (caddr_t)SCARG(uap, fhp), sizeof (fh));
 1249         return (error);
 1250 }
 1251 
 1252 /*
 1253  * Open a file given a file handle.
 1254  *
 1255  * Check permissions, allocate an open file structure,
 1256  * and call the device open routine if any.
 1257  */
 1258 int
 1259 sys_fhopen(l, v, retval)
 1260         struct lwp *l;
 1261         void *v;
 1262         register_t *retval;
 1263 {
 1264         struct sys_fhopen_args /* {
 1265                 syscallarg(const fhandle_t *) fhp;
 1266                 syscallarg(int) flags;
 1267         } */ *uap = v;
 1268         struct proc *p = l->l_proc;
 1269         struct filedesc *fdp = p->p_fd;
 1270         struct file *fp;
 1271         struct vnode *vp = NULL;
 1272         struct mount *mp;
 1273         struct ucred *cred = p->p_ucred;
 1274         int flags;
 1275         struct file *nfp;
 1276         int type, indx, error=0;
 1277         struct flock lf;
 1278         struct vattr va;
 1279         fhandle_t fh;
 1280 
 1281         /*
 1282          * Must be super user
 1283          */
 1284         if ((error = suser(p->p_ucred, &p->p_acflag)))
 1285                 return (error);
 1286 
 1287         flags = FFLAGS(SCARG(uap, flags));
 1288         if ((flags & (FREAD | FWRITE)) == 0)
 1289                 return (EINVAL);
 1290         if ((flags & O_CREAT))
 1291                 return (EINVAL);
 1292         /* falloc() will use the file descriptor for us */
 1293         if ((error = falloc(p, &nfp, &indx)) != 0)
 1294                 return (error);
 1295         fp = nfp;
 1296         if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
 1297                 goto bad;
 1298 
 1299         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
 1300                 error = ESTALE;
 1301                 goto bad;
 1302         }
 1303 
 1304         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
 1305                 vp = NULL;      /* most likely unnecessary sanity for bad: */
 1306                 goto bad;
 1307         }
 1308 
 1309         /* Now do an effective vn_open */
 1310 
 1311         if (vp->v_type == VSOCK) {
 1312                 error = EOPNOTSUPP;
 1313                 goto bad;
 1314         }
 1315         if (flags & FREAD) {
 1316                 if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
 1317                         goto bad;
 1318         }
 1319         if (flags & (FWRITE | O_TRUNC)) {
 1320                 if (vp->v_type == VDIR) {
 1321                         error = EISDIR;
 1322                         goto bad;
 1323                 }
 1324                 if ((error = vn_writechk(vp)) != 0 ||
 1325                     (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
 1326                         goto bad;
 1327         }
 1328         if (flags & O_TRUNC) {
 1329                 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 1330                         goto bad;
 1331                 VOP_UNLOCK(vp, 0);                      /* XXX */
 1332                 VOP_LEASE(vp, p, cred, LEASE_WRITE);
 1333                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 1334                 VATTR_NULL(&va);
 1335                 va.va_size = 0;
 1336                 error = VOP_SETATTR(vp, &va, cred, p);
 1337                 vn_finished_write(mp, 0);
 1338                 if (error)
 1339                         goto bad;
 1340         }
 1341         if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
 1342                 goto bad;
 1343         if (vp->v_type == VREG &&
 1344             uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
 1345                 error = EIO;
 1346                 goto bad;
 1347         }
 1348         if (flags & FWRITE)
 1349                 vp->v_writecount++;
 1350 
 1351         /* done with modified vn_open, now finish what sys_open does. */
 1352 
 1353         fp->f_flag = flags & FMASK;
 1354         fp->f_type = DTYPE_VNODE;
 1355         fp->f_ops = &vnops;
 1356         fp->f_data = vp;
 1357         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1358                 lf.l_whence = SEEK_SET;
 1359                 lf.l_start = 0;
 1360                 lf.l_len = 0;
 1361                 if (flags & O_EXLOCK)
 1362                         lf.l_type = F_WRLCK;
 1363                 else
 1364                         lf.l_type = F_RDLCK;
 1365                 type = F_FLOCK;
 1366                 if ((flags & FNONBLOCK) == 0)
 1367                         type |= F_WAIT;
 1368                 VOP_UNLOCK(vp, 0);
 1369                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1370                 if (error) {
 1371                         (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
 1372                         FILE_UNUSE(fp, p);
 1373                         ffree(fp);
 1374                         fdremove(fdp, indx);
 1375                         return (error);
 1376                 }
 1377                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1378                 fp->f_flag |= FHASLOCK;
 1379         }
 1380         VOP_UNLOCK(vp, 0);
 1381         *retval = indx;
 1382         FILE_SET_MATURE(fp);
 1383         FILE_UNUSE(fp, p);
 1384         return (0);
 1385 
 1386 bad:
 1387         FILE_UNUSE(fp, p);
 1388         ffree(fp);
 1389         fdremove(fdp, indx);
 1390         if (vp != NULL)
 1391                 vput(vp);
 1392         return (error);
 1393 }
 1394 
 1395 /* ARGSUSED */
 1396 int
 1397 sys_fhstat(l, v, retval)
 1398         struct lwp *l;
 1399         void *v;
 1400         register_t *retval;
 1401 {
 1402         struct sys_fhstat_args /* {
 1403                 syscallarg(const fhandle_t *) fhp;
 1404                 syscallarg(struct stat *) sb;
 1405         } */ *uap = v;
 1406         struct proc *p = l->l_proc;
 1407         struct stat sb;
 1408         int error;
 1409         fhandle_t fh;
 1410         struct mount *mp;
 1411         struct vnode *vp;
 1412 
 1413         /*
 1414          * Must be super user
 1415          */
 1416         if ((error = suser(p->p_ucred, &p->p_acflag)))
 1417                 return (error);
 1418 
 1419         if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
 1420                 return (error);
 1421 
 1422         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 1423                 return (ESTALE);
 1424         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 1425                 return (error);
 1426         error = vn_stat(vp, &sb, p);
 1427         vput(vp);
 1428         if (error)
 1429                 return (error);
 1430         error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
 1431         return (error);
 1432 }
 1433 
 1434 /* ARGSUSED */
 1435 int
 1436 sys_fhstatvfs1(l, v, retval)
 1437         struct lwp *l;
 1438         void *v;
 1439         register_t *retval;
 1440 {
 1441         struct sys_fhstatvfs1_args /*
 1442                 syscallarg(const fhandle_t *) fhp;
 1443                 syscallarg(struct statvfs *) buf;
 1444                 syscallarg(int) flags;
 1445         } */ *uap = v;
 1446         struct proc *p = l->l_proc;
 1447         struct statvfs sbuf;
 1448         fhandle_t fh;
 1449         struct mount *mp;
 1450         struct vnode *vp;
 1451         int error;
 1452 
 1453         /*
 1454          * Must be super user
 1455          */
 1456         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 1457                 return error;
 1458 
 1459         if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
 1460                 return error;
 1461 
 1462         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 1463                 return ESTALE;
 1464         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 1465                 return error;
 1466 
 1467         mp = vp->v_mount;
 1468         if ((error = dostatvfs(mp, &sbuf, p, SCARG(uap, flags), 1)) != 0) {
 1469                 vput(vp);
 1470                 return error;
 1471         }
 1472         vput(vp);
 1473         return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
 1474 }
 1475 
 1476 /*
 1477  * Create a special file.
 1478  */
 1479 /* ARGSUSED */
 1480 int
 1481 sys_mknod(l, v, retval)
 1482         struct lwp *l;
 1483         void *v;
 1484         register_t *retval;
 1485 {
 1486         struct sys_mknod_args /* {
 1487                 syscallarg(const char *) path;
 1488                 syscallarg(int) mode;
 1489                 syscallarg(int) dev;
 1490         } */ *uap = v;
 1491         struct proc *p = l->l_proc;
 1492         struct vnode *vp;
 1493         struct mount *mp;
 1494         struct vattr vattr;
 1495         int error;
 1496         int whiteout = 0;
 1497         struct nameidata nd;
 1498 
 1499         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 1500                 return (error);
 1501 restart:
 1502         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 1503         if ((error = namei(&nd)) != 0)
 1504                 return (error);
 1505         vp = nd.ni_vp;
 1506         if (vp != NULL)
 1507                 error = EEXIST;
 1508         else {
 1509                 VATTR_NULL(&vattr);
 1510                 vattr.va_mode =
 1511                     (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1512                 vattr.va_rdev = SCARG(uap, dev);
 1513                 whiteout = 0;
 1514 
 1515                 switch (SCARG(uap, mode) & S_IFMT) {
 1516                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1517                         vattr.va_type = VBAD;
 1518                         break;
 1519                 case S_IFCHR:
 1520                         vattr.va_type = VCHR;
 1521                         break;
 1522                 case S_IFBLK:
 1523                         vattr.va_type = VBLK;
 1524                         break;
 1525                 case S_IFWHT:
 1526                         whiteout = 1;
 1527                         break;
 1528                 default:
 1529                         error = EINVAL;
 1530                         break;
 1531                 }
 1532         }
 1533         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1534                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1535                 if (nd.ni_dvp == vp)
 1536                         vrele(nd.ni_dvp);
 1537                 else
 1538                         vput(nd.ni_dvp);
 1539                 if (vp)
 1540                         vrele(vp);
 1541                 if ((error = vn_start_write(NULL, &mp,
 1542                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1543                         return (error);
 1544                 goto restart;
 1545         }
 1546         if (!error) {
 1547                 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1548                 if (whiteout) {
 1549                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1550                         if (error)
 1551                                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1552                         vput(nd.ni_dvp);
 1553                 } else {
 1554                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1555                                                 &nd.ni_cnd, &vattr);
 1556                         if (error == 0)
 1557                                 vput(nd.ni_vp);
 1558                 }
 1559         } else {
 1560                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1561                 if (nd.ni_dvp == vp)
 1562                         vrele(nd.ni_dvp);
 1563                 else
 1564                         vput(nd.ni_dvp);
 1565                 if (vp)
 1566                         vrele(vp);
 1567         }
 1568         vn_finished_write(mp, 0);
 1569         return (error);
 1570 }
 1571 
 1572 /*
 1573  * Create a named pipe.
 1574  */
 1575 /* ARGSUSED */
 1576 int
 1577 sys_mkfifo(l, v, retval)
 1578         struct lwp *l;
 1579         void *v;
 1580         register_t *retval;
 1581 {
 1582         struct sys_mkfifo_args /* {
 1583                 syscallarg(const char *) path;
 1584                 syscallarg(int) mode;
 1585         } */ *uap = v;
 1586         struct proc *p = l->l_proc;
 1587         struct mount *mp;
 1588         struct vattr vattr;
 1589         int error;
 1590         struct nameidata nd;
 1591 
 1592 restart:
 1593         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 1594         if ((error = namei(&nd)) != 0)
 1595                 return (error);
 1596         if (nd.ni_vp != NULL) {
 1597                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1598                 if (nd.ni_dvp == nd.ni_vp)
 1599                         vrele(nd.ni_dvp);
 1600                 else
 1601                         vput(nd.ni_dvp);
 1602                 vrele(nd.ni_vp);
 1603                 return (EEXIST);
 1604         }
 1605         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1606                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1607                 if (nd.ni_dvp == nd.ni_vp)
 1608                         vrele(nd.ni_dvp);
 1609                 else
 1610                         vput(nd.ni_dvp);
 1611                 if (nd.ni_vp)
 1612                         vrele(nd.ni_vp);
 1613                 if ((error = vn_start_write(NULL, &mp,
 1614                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1615                         return (error);
 1616                 goto restart;
 1617         }
 1618         VATTR_NULL(&vattr);
 1619         vattr.va_type = VFIFO;
 1620         vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1621         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1622         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1623         if (error == 0)
 1624                 vput(nd.ni_vp);
 1625         vn_finished_write(mp, 0);
 1626         return (error);
 1627 }
 1628 
 1629 /*
 1630  * Make a hard file link.
 1631  */
 1632 /* ARGSUSED */
 1633 int
 1634 sys_link(l, v, retval)
 1635         struct lwp *l;
 1636         void *v;
 1637         register_t *retval;
 1638 {
 1639         struct sys_link_args /* {
 1640                 syscallarg(const char *) path;
 1641                 syscallarg(const char *) link;
 1642         } */ *uap = v;
 1643         struct proc *p = l->l_proc;
 1644         struct vnode *vp;
 1645         struct mount *mp;
 1646         struct nameidata nd;
 1647         int error;
 1648 
 1649         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 1650         if ((error = namei(&nd)) != 0)
 1651                 return (error);
 1652         vp = nd.ni_vp;
 1653         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 1654                 vrele(vp);
 1655                 return (error);
 1656         }
 1657         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
 1658         if ((error = namei(&nd)) != 0)
 1659                 goto out;
 1660         if (nd.ni_vp) {
 1661                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1662                 if (nd.ni_dvp == nd.ni_vp)
 1663                         vrele(nd.ni_dvp);
 1664                 else
 1665                         vput(nd.ni_dvp);
 1666                 vrele(nd.ni_vp);
 1667                 error = EEXIST;
 1668                 goto out;
 1669         }
 1670         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1671         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 1672         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1673 out:
 1674         vrele(vp);
 1675         vn_finished_write(mp, 0);
 1676         return (error);
 1677 }
 1678 
 1679 /*
 1680  * Make a symbolic link.
 1681  */
 1682 /* ARGSUSED */
 1683 int
 1684 sys_symlink(l, v, retval)
 1685         struct lwp *l;
 1686         void *v;
 1687         register_t *retval;
 1688 {
 1689         struct sys_symlink_args /* {
 1690                 syscallarg(const char *) path;
 1691                 syscallarg(const char *) link;
 1692         } */ *uap = v;
 1693         struct proc *p = l->l_proc;
 1694         struct mount *mp;
 1695         struct vattr vattr;
 1696         char *path;
 1697         int error;
 1698         struct nameidata nd;
 1699 
 1700         path = PNBUF_GET();
 1701         error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
 1702         if (error)
 1703                 goto out;
 1704 restart:
 1705         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
 1706         if ((error = namei(&nd)) != 0)
 1707                 goto out;
 1708         if (nd.ni_vp) {
 1709                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1710                 if (nd.ni_dvp == nd.ni_vp)
 1711                         vrele(nd.ni_dvp);
 1712                 else
 1713                         vput(nd.ni_dvp);
 1714                 vrele(nd.ni_vp);
 1715                 error = EEXIST;
 1716                 goto out;
 1717         }
 1718         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1719                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1720                 if (nd.ni_dvp == nd.ni_vp)
 1721                         vrele(nd.ni_dvp);
 1722                 else
 1723                         vput(nd.ni_dvp);
 1724                 if ((error = vn_start_write(NULL, &mp,
 1725                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1726                         return (error);
 1727                 goto restart;
 1728         }
 1729         VATTR_NULL(&vattr);
 1730         vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
 1731         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1732         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
 1733         if (error == 0)
 1734                 vput(nd.ni_vp);
 1735         vn_finished_write(mp, 0);
 1736 out:
 1737         PNBUF_PUT(path);
 1738         return (error);
 1739 }
 1740 
 1741 /*
 1742  * Delete a whiteout from the filesystem.
 1743  */
 1744 /* ARGSUSED */
 1745 int
 1746 sys_undelete(l, v, retval)
 1747         struct lwp *l;
 1748         void *v;
 1749         register_t *retval;
 1750 {
 1751         struct sys_undelete_args /* {
 1752                 syscallarg(const char *) path;
 1753         } */ *uap = v;
 1754         struct proc *p = l->l_proc;
 1755         int error;
 1756         struct mount *mp;
 1757         struct nameidata nd;
 1758 
 1759 restart:
 1760         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 1761             SCARG(uap, path), p);
 1762         error = namei(&nd);
 1763         if (error)
 1764                 return (error);
 1765 
 1766         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1767                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1768                 if (nd.ni_dvp == nd.ni_vp)
 1769                         vrele(nd.ni_dvp);
 1770                 else
 1771                         vput(nd.ni_dvp);
 1772                 if (nd.ni_vp)
 1773                         vrele(nd.ni_vp);
 1774                 return (EEXIST);
 1775         }
 1776         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1777                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1778                 if (nd.ni_dvp == nd.ni_vp)
 1779                         vrele(nd.ni_dvp);
 1780                 else
 1781                         vput(nd.ni_dvp);
 1782                 if ((error = vn_start_write(NULL, &mp,
 1783                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1784                         return (error);
 1785                 goto restart;
 1786         }
 1787         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1788         if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
 1789                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1790         vput(nd.ni_dvp);
 1791         vn_finished_write(mp, 0);
 1792         return (error);
 1793 }
 1794 
 1795 /*
 1796  * Delete a name from the filesystem.
 1797  */
 1798 /* ARGSUSED */
 1799 int
 1800 sys_unlink(l, v, retval)
 1801         struct lwp *l;
 1802         void *v;
 1803         register_t *retval;
 1804 {
 1805         struct sys_unlink_args /* {
 1806                 syscallarg(const char *) path;
 1807         } */ *uap = v;
 1808         struct proc *p = l->l_proc;
 1809         struct mount *mp;
 1810         struct vnode *vp;
 1811         int error;
 1812         struct nameidata nd;
 1813 
 1814 restart:
 1815         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 1816             SCARG(uap, path), p);
 1817         if ((error = namei(&nd)) != 0)
 1818                 return (error);
 1819         vp = nd.ni_vp;
 1820 
 1821         /*
 1822          * The root of a mounted filesystem cannot be deleted.
 1823          */
 1824         if (vp->v_flag & VROOT) {
 1825                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1826                 if (nd.ni_dvp == vp)
 1827                         vrele(nd.ni_dvp);
 1828                 else
 1829                         vput(nd.ni_dvp);
 1830                 vput(vp);
 1831                 error = EBUSY;
 1832                 goto out;
 1833         }
 1834 
 1835 #ifdef VERIFIED_EXEC
 1836         /* Handle remove requests for veriexec entries. */
 1837         if ((error = veriexec_removechk(p, vp, nd.ni_dirp)) != 0) {
 1838                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1839                 if (nd.ni_dvp == vp)
 1840                         vrele(nd.ni_dvp);
 1841                 else
 1842                         vput(nd.ni_dvp);
 1843                 vput(vp);
 1844                 goto out;
 1845         }
 1846 #endif /* VERIFIED_EXEC */
 1847 
 1848         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1849                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1850                 if (nd.ni_dvp == vp)
 1851                         vrele(nd.ni_dvp);
 1852                 else
 1853                         vput(nd.ni_dvp);
 1854                 vput(vp);
 1855                 if ((error = vn_start_write(NULL, &mp,
 1856                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1857                         return (error);
 1858                 goto restart;
 1859         }
 1860         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1861         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 1862         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 1863         vn_finished_write(mp, 0);
 1864 out:
 1865         return (error);
 1866 }
 1867 
 1868 /*
 1869  * Reposition read/write file offset.
 1870  */
 1871 int
 1872 sys_lseek(l, v, retval)
 1873         struct lwp *l;
 1874         void *v;
 1875         register_t *retval;
 1876 {
 1877         struct sys_lseek_args /* {
 1878                 syscallarg(int) fd;
 1879                 syscallarg(int) pad;
 1880                 syscallarg(off_t) offset;
 1881                 syscallarg(int) whence;
 1882         } */ *uap = v;
 1883         struct proc *p = l->l_proc;
 1884         struct ucred *cred = p->p_ucred;
 1885         struct filedesc *fdp = p->p_fd;
 1886         struct file *fp;
 1887         struct vnode *vp;
 1888         struct vattr vattr;
 1889         off_t newoff;
 1890         int error;
 1891 
 1892         if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
 1893                 return (EBADF);
 1894 
 1895         FILE_USE(fp);
 1896 
 1897         vp = (struct vnode *)fp->f_data;
 1898         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 1899                 error = ESPIPE;
 1900                 goto out;
 1901         }
 1902 
 1903         switch (SCARG(uap, whence)) {
 1904         case SEEK_CUR:
 1905                 newoff = fp->f_offset + SCARG(uap, offset);
 1906                 break;
 1907         case SEEK_END:
 1908                 error = VOP_GETATTR(vp, &vattr, cred, p);
 1909                 if (error)
 1910                         goto out;
 1911                 newoff = SCARG(uap, offset) + vattr.va_size;
 1912                 break;
 1913         case SEEK_SET:
 1914                 newoff = SCARG(uap, offset);
 1915                 break;
 1916         default:
 1917                 error = EINVAL;
 1918                 goto out;
 1919         }
 1920         if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
 1921                 goto out;
 1922 
 1923         *(off_t *)retval = fp->f_offset = newoff;
 1924  out:
 1925         FILE_UNUSE(fp, p);
 1926         return (error);
 1927 }
 1928 
 1929 /*
 1930  * Positional read system call.
 1931  */
 1932 int
 1933 sys_pread(l, v, retval)
 1934         struct lwp *l;
 1935         void *v;
 1936         register_t *retval;
 1937 {
 1938         struct sys_pread_args /* {
 1939                 syscallarg(int) fd;
 1940                 syscallarg(void *) buf;
 1941                 syscallarg(size_t) nbyte;
 1942                 syscallarg(off_t) offset;
 1943         } */ *uap = v;
 1944         struct proc *p = l->l_proc;
 1945         struct filedesc *fdp = p->p_fd;
 1946         struct file *fp;
 1947         struct vnode *vp;
 1948         off_t offset;
 1949         int error, fd = SCARG(uap, fd);
 1950 
 1951         if ((fp = fd_getfile(fdp, fd)) == NULL)
 1952                 return (EBADF);
 1953 
 1954         if ((fp->f_flag & FREAD) == 0) {
 1955                 simple_unlock(&fp->f_slock);
 1956                 return (EBADF);
 1957         }
 1958 
 1959         FILE_USE(fp);
 1960 
 1961         vp = (struct vnode *)fp->f_data;
 1962         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 1963                 error = ESPIPE;
 1964                 goto out;
 1965         }
 1966 
 1967         offset = SCARG(uap, offset);
 1968 
 1969         /*
 1970          * XXX This works because no file systems actually
 1971          * XXX take any action on the seek operation.
 1972          */
 1973         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 1974                 goto out;
 1975 
 1976         /* dofileread() will unuse the descriptor for us */
 1977         return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 1978             &offset, 0, retval));
 1979 
 1980  out:
 1981         FILE_UNUSE(fp, p);
 1982         return (error);
 1983 }
 1984 
 1985 /*
 1986  * Positional scatter read system call.
 1987  */
 1988 int
 1989 sys_preadv(l, v, retval)
 1990         struct lwp *l;
 1991         void *v;
 1992         register_t *retval;
 1993 {
 1994         struct sys_preadv_args /* {
 1995                 syscallarg(int) fd;
 1996                 syscallarg(const struct iovec *) iovp;
 1997                 syscallarg(int) iovcnt;
 1998                 syscallarg(off_t) offset;
 1999         } */ *uap = v;
 2000         struct proc *p = l->l_proc;
 2001         struct filedesc *fdp = p->p_fd;
 2002         struct file *fp;
 2003         struct vnode *vp;
 2004         off_t offset;
 2005         int error, fd = SCARG(uap, fd);
 2006 
 2007         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2008                 return (EBADF);
 2009 
 2010         if ((fp->f_flag & FREAD) == 0) {
 2011                 simple_unlock(&fp->f_slock);
 2012                 return (EBADF);
 2013         }
 2014 
 2015         FILE_USE(fp);
 2016 
 2017         vp = (struct vnode *)fp->f_data;
 2018         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2019                 error = ESPIPE;
 2020                 goto out;
 2021         }
 2022 
 2023         offset = SCARG(uap, offset);
 2024 
 2025         /*
 2026          * XXX This works because no file systems actually
 2027          * XXX take any action on the seek operation.
 2028          */
 2029         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2030                 goto out;
 2031 
 2032         /* dofilereadv() will unuse the descriptor for us */
 2033         return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
 2034             &offset, 0, retval));
 2035 
 2036  out:
 2037         FILE_UNUSE(fp, p);
 2038         return (error);
 2039 }
 2040 
 2041 /*
 2042  * Positional write system call.
 2043  */
 2044 int
 2045 sys_pwrite(l, v, retval)
 2046         struct lwp *l;
 2047         void *v;
 2048         register_t *retval;
 2049 {
 2050         struct sys_pwrite_args /* {
 2051                 syscallarg(int) fd;
 2052                 syscallarg(const void *) buf;
 2053                 syscallarg(size_t) nbyte;
 2054                 syscallarg(off_t) offset;
 2055         } */ *uap = v;
 2056         struct proc *p = l->l_proc;
 2057         struct filedesc *fdp = p->p_fd;
 2058         struct file *fp;
 2059         struct vnode *vp;
 2060         off_t offset;
 2061         int error, fd = SCARG(uap, fd);
 2062 
 2063         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2064                 return (EBADF);
 2065 
 2066         if ((fp->f_flag & FWRITE) == 0) {
 2067                 simple_unlock(&fp->f_slock);
 2068                 return (EBADF);
 2069         }
 2070 
 2071         FILE_USE(fp);
 2072 
 2073         vp = (struct vnode *)fp->f_data;
 2074         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2075                 error = ESPIPE;
 2076                 goto out;
 2077         }
 2078 
 2079         offset = SCARG(uap, offset);
 2080 
 2081         /*
 2082          * XXX This works because no file systems actually
 2083          * XXX take any action on the seek operation.
 2084          */
 2085         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2086                 goto out;
 2087 
 2088         /* dofilewrite() will unuse the descriptor for us */
 2089         return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 2090             &offset, 0, retval));
 2091 
 2092  out:
 2093         FILE_UNUSE(fp, p);
 2094         return (error);
 2095 }
 2096 
 2097 /*
 2098  * Positional gather write system call.
 2099  */
 2100 int
 2101 sys_pwritev(l, v, retval)
 2102         struct lwp *l;
 2103         void *v;
 2104         register_t *retval;
 2105 {
 2106         struct sys_pwritev_args /* {
 2107                 syscallarg(int) fd;
 2108                 syscallarg(const struct iovec *) iovp;
 2109                 syscallarg(int) iovcnt;
 2110                 syscallarg(off_t) offset;
 2111         } */ *uap = v;
 2112         struct proc *p = l->l_proc;
 2113         struct filedesc *fdp = p->p_fd;
 2114         struct file *fp;
 2115         struct vnode *vp;
 2116         off_t offset;
 2117         int error, fd = SCARG(uap, fd);
 2118 
 2119         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2120                 return (EBADF);
 2121 
 2122         if ((fp->f_flag & FWRITE) == 0) {
 2123                 simple_unlock(&fp->f_slock);
 2124                 return (EBADF);
 2125         }
 2126 
 2127         FILE_USE(fp);
 2128 
 2129         vp = (struct vnode *)fp->f_data;
 2130         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2131                 error = ESPIPE;
 2132                 goto out;
 2133         }
 2134 
 2135         offset = SCARG(uap, offset);
 2136 
 2137         /*
 2138          * XXX This works because no file systems actually
 2139          * XXX take any action on the seek operation.
 2140          */
 2141         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2142                 goto out;
 2143 
 2144         /* dofilewritev() will unuse the descriptor for us */
 2145         return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
 2146             &offset, 0, retval));
 2147 
 2148  out:
 2149         FILE_UNUSE(fp, p);
 2150         return (error);
 2151 }
 2152 
 2153 /*
 2154  * Check access permissions.
 2155  */
 2156 int
 2157 sys_access(l, v, retval)
 2158         struct lwp *l;
 2159         void *v;
 2160         register_t *retval;
 2161 {
 2162         struct sys_access_args /* {
 2163                 syscallarg(const char *) path;
 2164                 syscallarg(int) flags;
 2165         } */ *uap = v;
 2166         struct proc *p = l->l_proc;
 2167         struct ucred *cred;
 2168         struct vnode *vp;
 2169         int error, flags;
 2170         struct nameidata nd;
 2171 
 2172         cred = crdup(p->p_ucred);
 2173         cred->cr_uid = p->p_cred->p_ruid;
 2174         cred->cr_gid = p->p_cred->p_rgid;
 2175         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2176             SCARG(uap, path), p);
 2177         /* Override default credentials */
 2178         nd.ni_cnd.cn_cred = cred;
 2179         if ((error = namei(&nd)) != 0)
 2180                 goto out;
 2181         vp = nd.ni_vp;
 2182 
 2183         /* Flags == 0 means only check for existence. */
 2184         if (SCARG(uap, flags)) {
 2185                 flags = 0;
 2186                 if (SCARG(uap, flags) & R_OK)
 2187                         flags |= VREAD;
 2188                 if (SCARG(uap, flags) & W_OK)
 2189                         flags |= VWRITE;
 2190                 if (SCARG(uap, flags) & X_OK)
 2191                         flags |= VEXEC;
 2192 
 2193                 error = VOP_ACCESS(vp, flags, cred, p);
 2194                 if (!error && (flags & VWRITE))
 2195                         error = vn_writechk(vp);
 2196         }
 2197         vput(vp);
 2198 out:
 2199         crfree(cred);
 2200         return (error);
 2201 }
 2202 
 2203 /*
 2204  * Get file status; this version follows links.
 2205  */
 2206 /* ARGSUSED */
 2207 int
 2208 sys___stat13(l, v, retval)
 2209         struct lwp *l;
 2210         void *v;
 2211         register_t *retval;
 2212 {
 2213         struct sys___stat13_args /* {
 2214                 syscallarg(const char *) path;
 2215                 syscallarg(struct stat *) ub;
 2216         } */ *uap = v;
 2217         struct proc *p = l->l_proc;
 2218         struct stat sb;
 2219         int error;
 2220         struct nameidata nd;
 2221 
 2222         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2223             SCARG(uap, path), p);
 2224         if ((error = namei(&nd)) != 0)
 2225                 return (error);
 2226         error = vn_stat(nd.ni_vp, &sb, p);
 2227         vput(nd.ni_vp);
 2228         if (error)
 2229                 return (error);
 2230         error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2231         return (error);
 2232 }
 2233 
 2234 /*
 2235  * Get file status; this version does not follow links.
 2236  */
 2237 /* ARGSUSED */
 2238 int
 2239 sys___lstat13(l, v, retval)
 2240         struct lwp *l;
 2241         void *v;
 2242         register_t *retval;
 2243 {
 2244         struct sys___lstat13_args /* {
 2245                 syscallarg(const char *) path;
 2246                 syscallarg(struct stat *) ub;
 2247         } */ *uap = v;
 2248         struct proc *p = l->l_proc;
 2249         struct stat sb;
 2250         int error;
 2251         struct nameidata nd;
 2252 
 2253         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
 2254             SCARG(uap, path), p);
 2255         if ((error = namei(&nd)) != 0)
 2256                 return (error);
 2257         error = vn_stat(nd.ni_vp, &sb, p);
 2258         vput(nd.ni_vp);
 2259         if (error)
 2260                 return (error);
 2261         error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2262         return (error);
 2263 }
 2264 
 2265 /*
 2266  * Get configurable pathname variables.
 2267  */
 2268 /* ARGSUSED */
 2269 int
 2270 sys_pathconf(l, v, retval)
 2271         struct lwp *l;
 2272         void *v;
 2273         register_t *retval;
 2274 {
 2275         struct sys_pathconf_args /* {
 2276                 syscallarg(const char *) path;
 2277                 syscallarg(int) name;
 2278         } */ *uap = v;
 2279         struct proc *p = l->l_proc;
 2280         int error;
 2281         struct nameidata nd;
 2282 
 2283         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2284             SCARG(uap, path), p);
 2285         if ((error = namei(&nd)) != 0)
 2286                 return (error);
 2287         error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
 2288         vput(nd.ni_vp);
 2289         return (error);
 2290 }
 2291 
 2292 /*
 2293  * Return target name of a symbolic link.
 2294  */
 2295 /* ARGSUSED */
 2296 int
 2297 sys_readlink(l, v, retval)
 2298         struct lwp *l;
 2299         void *v;
 2300         register_t *retval;
 2301 {
 2302         struct sys_readlink_args /* {
 2303                 syscallarg(const char *) path;
 2304                 syscallarg(char *) buf;
 2305                 syscallarg(size_t) count;
 2306         } */ *uap = v;
 2307         struct proc *p = l->l_proc;
 2308         struct vnode *vp;
 2309         struct iovec aiov;
 2310         struct uio auio;
 2311         int error;
 2312         struct nameidata nd;
 2313 
 2314         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
 2315             SCARG(uap, path), p);
 2316         if ((error = namei(&nd)) != 0)
 2317                 return (error);
 2318         vp = nd.ni_vp;
 2319         if (vp->v_type != VLNK)
 2320                 error = EINVAL;
 2321         else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
 2322             (error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) == 0) {
 2323                 aiov.iov_base = SCARG(uap, buf);
 2324                 aiov.iov_len = SCARG(uap, count);
 2325                 auio.uio_iov = &aiov;
 2326                 auio.uio_iovcnt = 1;
 2327                 auio.uio_offset = 0;
 2328                 auio.uio_rw = UIO_READ;
 2329                 auio.uio_segflg = UIO_USERSPACE;
 2330                 auio.uio_procp = p;
 2331                 auio.uio_resid = SCARG(uap, count);
 2332                 error = VOP_READLINK(vp, &auio, p->p_ucred);
 2333         }
 2334         vput(vp);
 2335         *retval = SCARG(uap, count) - auio.uio_resid;
 2336         return (error);
 2337 }
 2338 
 2339 /*
 2340  * Change flags of a file given a path name.
 2341  */
 2342 /* ARGSUSED */
 2343 int
 2344 sys_chflags(l, v, retval)
 2345         struct lwp *l;
 2346         void *v;
 2347         register_t *retval;
 2348 {
 2349         struct sys_chflags_args /* {
 2350                 syscallarg(const char *) path;
 2351                 syscallarg(u_long) flags;
 2352         } */ *uap = v;
 2353         struct proc *p = l->l_proc;
 2354         struct vnode *vp;
 2355         int error;
 2356         struct nameidata nd;
 2357 
 2358         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2359         if ((error = namei(&nd)) != 0)
 2360                 return (error);
 2361         vp = nd.ni_vp;
 2362         error = change_flags(vp, SCARG(uap, flags), p);
 2363         vput(vp);
 2364         return (error);
 2365 }
 2366 
 2367 /*
 2368  * Change flags of a file given a file descriptor.
 2369  */
 2370 /* ARGSUSED */
 2371 int
 2372 sys_fchflags(l, v, retval)
 2373         struct lwp *l;
 2374         void *v;
 2375         register_t *retval;
 2376 {
 2377         struct sys_fchflags_args /* {
 2378                 syscallarg(int) fd;
 2379                 syscallarg(u_long) flags;
 2380         } */ *uap = v;
 2381         struct proc *p = l->l_proc;
 2382         struct vnode *vp;
 2383         struct file *fp;
 2384         int error;
 2385 
 2386         /* getvnode() will use the descriptor for us */
 2387         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2388                 return (error);
 2389         vp = (struct vnode *)fp->f_data;
 2390         error = change_flags(vp, SCARG(uap, flags), p);
 2391         VOP_UNLOCK(vp, 0);
 2392         FILE_UNUSE(fp, p);
 2393         return (error);
 2394 }
 2395 
 2396 /*
 2397  * Change flags of a file given a path name; this version does
 2398  * not follow links.
 2399  */
 2400 int
 2401 sys_lchflags(l, v, retval)
 2402         struct lwp *l;
 2403         void *v;
 2404         register_t *retval;
 2405 {
 2406         struct sys_lchflags_args /* {
 2407                 syscallarg(const char *) path;
 2408                 syscallarg(u_long) flags;
 2409         } */ *uap = v;
 2410         struct proc *p = l->l_proc;
 2411         struct vnode *vp;
 2412         int error;
 2413         struct nameidata nd;
 2414 
 2415         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2416         if ((error = namei(&nd)) != 0)
 2417                 return (error);
 2418         vp = nd.ni_vp;
 2419         error = change_flags(vp, SCARG(uap, flags), p);
 2420         vput(vp);
 2421         return (error);
 2422 }
 2423 
 2424 /*
 2425  * Common routine to change flags of a file.
 2426  */
 2427 int
 2428 change_flags(vp, flags, p)
 2429         struct vnode *vp;
 2430         u_long flags;
 2431         struct proc *p;
 2432 {
 2433         struct mount *mp;
 2434         struct vattr vattr;
 2435         int error;
 2436 
 2437         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2438                 return (error);
 2439         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2440         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2441         /*
 2442          * Non-superusers cannot change the flags on devices, even if they
 2443          * own them.
 2444          */
 2445         if (suser(p->p_ucred, &p->p_acflag) != 0) {
 2446                 if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
 2447                         goto out;
 2448                 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
 2449                         error = EINVAL;
 2450                         goto out;
 2451                 }
 2452         }
 2453         VATTR_NULL(&vattr);
 2454         vattr.va_flags = flags;
 2455         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2456 out:
 2457         vn_finished_write(mp, 0);
 2458         return (error);
 2459 }
 2460 
 2461 /*
 2462  * Change mode of a file given path name; this version follows links.
 2463  */
 2464 /* ARGSUSED */
 2465 int
 2466 sys_chmod(l, v, retval)
 2467         struct lwp *l;
 2468         void *v;
 2469         register_t *retval;
 2470 {
 2471         struct sys_chmod_args /* {
 2472                 syscallarg(const char *) path;
 2473                 syscallarg(int) mode;
 2474         } */ *uap = v;
 2475         struct proc *p = l->l_proc;
 2476         int error;
 2477         struct nameidata nd;
 2478 
 2479         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2480         if ((error = namei(&nd)) != 0)
 2481                 return (error);
 2482 
 2483         error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
 2484 
 2485         vrele(nd.ni_vp);
 2486         return (error);
 2487 }
 2488 
 2489 /*
 2490  * Change mode of a file given a file descriptor.
 2491  */
 2492 /* ARGSUSED */
 2493 int
 2494 sys_fchmod(l, v, retval)
 2495         struct lwp *l;
 2496         void *v;
 2497         register_t *retval;
 2498 {
 2499         struct sys_fchmod_args /* {
 2500                 syscallarg(int) fd;
 2501                 syscallarg(int) mode;
 2502         } */ *uap = v;
 2503         struct proc *p = l->l_proc;
 2504         struct file *fp;
 2505         int error;
 2506 
 2507         /* getvnode() will use the descriptor for us */
 2508         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2509                 return (error);
 2510 
 2511         error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), p);
 2512         FILE_UNUSE(fp, p);
 2513         return (error);
 2514 }
 2515 
 2516 /*
 2517  * Change mode of a file given path name; this version does not follow links.
 2518  */
 2519 /* ARGSUSED */
 2520 int
 2521 sys_lchmod(l, v, retval)
 2522         struct lwp *l;
 2523         void *v;
 2524         register_t *retval;
 2525 {
 2526         struct sys_lchmod_args /* {
 2527                 syscallarg(const char *) path;
 2528                 syscallarg(int) mode;
 2529         } */ *uap = v;
 2530         struct proc *p = l->l_proc;
 2531         int error;
 2532         struct nameidata nd;
 2533 
 2534         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2535         if ((error = namei(&nd)) != 0)
 2536                 return (error);
 2537 
 2538         error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
 2539 
 2540         vrele(nd.ni_vp);
 2541         return (error);
 2542 }
 2543 
 2544 /*
 2545  * Common routine to set mode given a vnode.
 2546  */
 2547 static int
 2548 change_mode(vp, mode, p)
 2549         struct vnode *vp;
 2550         int mode;
 2551         struct proc *p;
 2552 {
 2553         struct mount *mp;
 2554         struct vattr vattr;
 2555         int error;
 2556 
 2557         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2558                 return (error);
 2559         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2560         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2561         VATTR_NULL(&vattr);
 2562         vattr.va_mode = mode & ALLPERMS;
 2563         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2564         VOP_UNLOCK(vp, 0);
 2565         vn_finished_write(mp, 0);
 2566         return (error);
 2567 }
 2568 
 2569 /*
 2570  * Set ownership given a path name; this version follows links.
 2571  */
 2572 /* ARGSUSED */
 2573 int
 2574 sys_chown(l, v, retval)
 2575         struct lwp *l;
 2576         void *v;
 2577         register_t *retval;
 2578 {
 2579         struct sys_chown_args /* {
 2580                 syscallarg(const char *) path;
 2581                 syscallarg(uid_t) uid;
 2582                 syscallarg(gid_t) gid;
 2583         } */ *uap = v;
 2584         struct proc *p = l->l_proc;
 2585         int error;
 2586         struct nameidata nd;
 2587 
 2588         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2589         if ((error = namei(&nd)) != 0)
 2590                 return (error);
 2591 
 2592         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
 2593 
 2594         vrele(nd.ni_vp);
 2595         return (error);
 2596 }
 2597 
 2598 /*
 2599  * Set ownership given a path name; this version follows links.
 2600  * Provides POSIX semantics.
 2601  */
 2602 /* ARGSUSED */
 2603 int
 2604 sys___posix_chown(l, v, retval)
 2605         struct lwp *l;
 2606         void *v;
 2607         register_t *retval;
 2608 {
 2609         struct sys_chown_args /* {
 2610                 syscallarg(const char *) path;
 2611                 syscallarg(uid_t) uid;
 2612                 syscallarg(gid_t) gid;
 2613         } */ *uap = v;
 2614         struct proc *p = l->l_proc;
 2615         int error;
 2616         struct nameidata nd;
 2617 
 2618         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2619         if ((error = namei(&nd)) != 0)
 2620                 return (error);
 2621 
 2622         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
 2623 
 2624         vrele(nd.ni_vp);
 2625         return (error);
 2626 }
 2627 
 2628 /*
 2629  * Set ownership given a file descriptor.
 2630  */
 2631 /* ARGSUSED */
 2632 int
 2633 sys_fchown(l, v, retval)
 2634         struct lwp *l;
 2635         void *v;
 2636         register_t *retval;
 2637 {
 2638         struct sys_fchown_args /* {
 2639                 syscallarg(int) fd;
 2640                 syscallarg(uid_t) uid;
 2641                 syscallarg(gid_t) gid;
 2642         } */ *uap = v;
 2643         struct proc *p = l->l_proc;
 2644         int error;
 2645         struct file *fp;
 2646 
 2647         /* getvnode() will use the descriptor for us */
 2648         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2649                 return (error);
 2650 
 2651         error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
 2652             SCARG(uap, gid), p, 0);
 2653         FILE_UNUSE(fp, p);
 2654         return (error);
 2655 }
 2656 
 2657 /*
 2658  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
 2659  */
 2660 /* ARGSUSED */
 2661 int
 2662 sys___posix_fchown(l, v, retval)
 2663         struct lwp *l;
 2664         void *v;
 2665         register_t *retval;
 2666 {
 2667         struct sys_fchown_args /* {
 2668                 syscallarg(int) fd;
 2669                 syscallarg(uid_t) uid;
 2670                 syscallarg(gid_t) gid;
 2671         } */ *uap = v;
 2672         struct proc *p = l->l_proc;
 2673         int error;
 2674         struct file *fp;
 2675 
 2676         /* getvnode() will use the descriptor for us */
 2677         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2678                 return (error);
 2679 
 2680         error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
 2681             SCARG(uap, gid), p, 1);
 2682         FILE_UNUSE(fp, p);
 2683         return (error);
 2684 }
 2685 
 2686 /*
 2687  * Set ownership given a path name; this version does not follow links.
 2688  */
 2689 /* ARGSUSED */
 2690 int
 2691 sys_lchown(l, v, retval)
 2692         struct lwp *l;
 2693         void *v;
 2694         register_t *retval;
 2695 {
 2696         struct sys_lchown_args /* {
 2697                 syscallarg(const char *) path;
 2698                 syscallarg(uid_t) uid;
 2699                 syscallarg(gid_t) gid;
 2700         } */ *uap = v;
 2701         struct proc *p = l->l_proc;
 2702         int error;
 2703         struct nameidata nd;
 2704 
 2705         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2706         if ((error = namei(&nd)) != 0)
 2707                 return (error);
 2708 
 2709         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
 2710 
 2711         vrele(nd.ni_vp);
 2712         return (error);
 2713 }
 2714 
 2715 /*
 2716  * Set ownership given a path name; this version does not follow links.
 2717  * Provides POSIX/XPG semantics.
 2718  */
 2719 /* ARGSUSED */
 2720 int
 2721 sys___posix_lchown(l, v, retval)
 2722         struct lwp *l;
 2723         void *v;
 2724         register_t *retval;
 2725 {
 2726         struct sys_lchown_args /* {
 2727                 syscallarg(const char *) path;
 2728                 syscallarg(uid_t) uid;
 2729                 syscallarg(gid_t) gid;
 2730         } */ *uap = v;
 2731         struct proc *p = l->l_proc;
 2732         int error;
 2733         struct nameidata nd;
 2734 
 2735         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2736         if ((error = namei(&nd)) != 0)
 2737                 return (error);
 2738 
 2739         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
 2740 
 2741         vrele(nd.ni_vp);
 2742         return (error);
 2743 }
 2744 
 2745 /*
 2746  * Common routine to set ownership given a vnode.
 2747  */
 2748 static int
 2749 change_owner(vp, uid, gid, p, posix_semantics)
 2750         struct vnode *vp;
 2751         uid_t uid;
 2752         gid_t gid;
 2753         struct proc *p;
 2754         int posix_semantics;
 2755 {
 2756         struct mount *mp;
 2757         struct vattr vattr;
 2758         mode_t newmode;
 2759         int error;
 2760 
 2761         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2762                 return (error);
 2763         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2764         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2765         if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
 2766                 goto out;
 2767 
 2768 #define CHANGED(x) ((int)(x) != -1)
 2769         newmode = vattr.va_mode;
 2770         if (posix_semantics) {
 2771                 /*
 2772                  * POSIX/XPG semantics: if the caller is not the super-user,
 2773                  * clear set-user-id and set-group-id bits.  Both POSIX and
 2774                  * the XPG consider the behaviour for calls by the super-user
 2775                  * implementation-defined; we leave the set-user-id and set-
 2776                  * group-id settings intact in that case.
 2777                  */
 2778                 if (suser(p->p_ucred, NULL) != 0)
 2779                         newmode &= ~(S_ISUID | S_ISGID);
 2780         } else {
 2781                 /*
 2782                  * NetBSD semantics: when changing owner and/or group,
 2783                  * clear the respective bit(s).
 2784                  */
 2785                 if (CHANGED(uid))
 2786                         newmode &= ~S_ISUID;
 2787                 if (CHANGED(gid))
 2788                         newmode &= ~S_ISGID;
 2789         }
 2790         /* Update va_mode iff altered. */
 2791         if (vattr.va_mode == newmode)
 2792                 newmode = VNOVAL;
 2793 
 2794         VATTR_NULL(&vattr);
 2795         vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
 2796         vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
 2797         vattr.va_mode = newmode;
 2798         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2799 #undef CHANGED
 2800 
 2801 out:
 2802         VOP_UNLOCK(vp, 0);
 2803         vn_finished_write(mp, 0);
 2804         return (error);
 2805 }
 2806 
 2807 /*
 2808  * Set the access and modification times given a path name; this
 2809  * version follows links.
 2810  */
 2811 /* ARGSUSED */
 2812 int
 2813 sys_utimes(l, v, retval)
 2814         struct lwp *l;
 2815         void *v;
 2816         register_t *retval;
 2817 {
 2818         struct sys_utimes_args /* {
 2819                 syscallarg(const char *) path;
 2820                 syscallarg(const struct timeval *) tptr;
 2821         } */ *uap = v;
 2822         struct proc *p = l->l_proc;
 2823         int error;
 2824         struct nameidata nd;
 2825 
 2826         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2827         if ((error = namei(&nd)) != 0)
 2828                 return (error);
 2829 
 2830         error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
 2831 
 2832         vrele(nd.ni_vp);
 2833         return (error);
 2834 }
 2835 
 2836 /*
 2837  * Set the access and modification times given a file descriptor.
 2838  */
 2839 /* ARGSUSED */
 2840 int
 2841 sys_futimes(l, v, retval)
 2842         struct lwp *l;
 2843         void *v;
 2844         register_t *retval;
 2845 {
 2846         struct sys_futimes_args /* {
 2847                 syscallarg(int) fd;
 2848                 syscallarg(const struct timeval *) tptr;
 2849         } */ *uap = v;
 2850         struct proc *p = l->l_proc;
 2851         int error;
 2852         struct file *fp;
 2853 
 2854         /* getvnode() will use the descriptor for us */
 2855         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2856                 return (error);
 2857 
 2858         error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), p);
 2859         FILE_UNUSE(fp, p);
 2860         return (error);
 2861 }
 2862 
 2863 /*
 2864  * Set the access and modification times given a path name; this
 2865  * version does not follow links.
 2866  */
 2867 /* ARGSUSED */
 2868 int
 2869 sys_lutimes(l, v, retval)
 2870         struct lwp *l;
 2871         void *v;
 2872         register_t *retval;
 2873 {
 2874         struct sys_lutimes_args /* {
 2875                 syscallarg(const char *) path;
 2876                 syscallarg(const struct timeval *) tptr;
 2877         } */ *uap = v;
 2878         struct proc *p = l->l_proc;
 2879         int error;
 2880         struct nameidata nd;
 2881 
 2882         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2883         if ((error = namei(&nd)) != 0)
 2884                 return (error);
 2885 
 2886         error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
 2887 
 2888         vrele(nd.ni_vp);
 2889         return (error);
 2890 }
 2891 
 2892 /*
 2893  * Common routine to set access and modification times given a vnode.
 2894  */
 2895 static int
 2896 change_utimes(vp, tptr, p)
 2897         struct vnode *vp;
 2898         const struct timeval *tptr;
 2899         struct proc *p;
 2900 {
 2901         struct timeval tv[2];
 2902         struct mount *mp;
 2903         struct vattr vattr;
 2904         int error;
 2905 
 2906         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2907                 return (error);
 2908         VATTR_NULL(&vattr);
 2909         if (tptr == NULL) {
 2910                 microtime(&tv[0]);
 2911                 tv[1] = tv[0];
 2912                 vattr.va_vaflags |= VA_UTIMES_NULL;
 2913         } else {
 2914                 error = copyin(tptr, tv, sizeof(tv));
 2915                 if (error)
 2916                         goto out;
 2917         }
 2918         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2919         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2920         vattr.va_atime.tv_sec = tv[0].tv_sec;
 2921         vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
 2922         vattr.va_mtime.tv_sec = tv[1].tv_sec;
 2923         vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
 2924         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2925         VOP_UNLOCK(vp, 0);
 2926 out:
 2927         vn_finished_write(mp, 0);
 2928         return (error);
 2929 }
 2930 
 2931 /*
 2932  * Truncate a file given its path name.
 2933  */
 2934 /* ARGSUSED */
 2935 int
 2936 sys_truncate(l, v, retval)
 2937         struct lwp *l;
 2938         void *v;
 2939         register_t *retval;
 2940 {
 2941         struct sys_truncate_args /* {
 2942                 syscallarg(const char *) path;
 2943                 syscallarg(int) pad;
 2944                 syscallarg(off_t) length;
 2945         } */ *uap = v;
 2946         struct proc *p = l->l_proc;
 2947         struct vnode *vp;
 2948         struct mount *mp;
 2949         struct vattr vattr;
 2950         int error;
 2951         struct nameidata nd;
 2952 
 2953         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2954         if ((error = namei(&nd)) != 0)
 2955                 return (error);
 2956         vp = nd.ni_vp;
 2957         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 2958                 vrele(vp);
 2959                 return (error);
 2960         }
 2961         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2962         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2963         if (vp->v_type == VDIR)
 2964                 error = EISDIR;
 2965         else if ((error = vn_writechk(vp)) == 0 &&
 2966             (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
 2967                 VATTR_NULL(&vattr);
 2968                 vattr.va_size = SCARG(uap, length);
 2969                 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2970         }
 2971         vput(vp);
 2972         vn_finished_write(mp, 0);
 2973         return (error);
 2974 }
 2975 
 2976 /*
 2977  * Truncate a file given a file descriptor.
 2978  */
 2979 /* ARGSUSED */
 2980 int
 2981 sys_ftruncate(l, v, retval)
 2982         struct lwp *l;
 2983         void *v;
 2984         register_t *retval;
 2985 {
 2986         struct sys_ftruncate_args /* {
 2987                 syscallarg(int) fd;
 2988                 syscallarg(int) pad;
 2989                 syscallarg(off_t) length;
 2990         } */ *uap = v;
 2991         struct proc *p = l->l_proc;
 2992         struct mount *mp;
 2993         struct vattr vattr;
 2994         struct vnode *vp;
 2995         struct file *fp;
 2996         int error;
 2997 
 2998         /* getvnode() will use the descriptor for us */
 2999         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3000                 return (error);
 3001         if ((fp->f_flag & FWRITE) == 0) {
 3002                 error = EINVAL;
 3003                 goto out;
 3004         }
 3005         vp = (struct vnode *)fp->f_data;
 3006         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3007                 FILE_UNUSE(fp, p);
 3008                 return (error);
 3009         }
 3010         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 3011         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3012         if (vp->v_type == VDIR)
 3013                 error = EISDIR;
 3014         else if ((error = vn_writechk(vp)) == 0) {
 3015                 VATTR_NULL(&vattr);
 3016                 vattr.va_size = SCARG(uap, length);
 3017                 error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 3018         }
 3019         VOP_UNLOCK(vp, 0);
 3020         vn_finished_write(mp, 0);
 3021  out:
 3022         FILE_UNUSE(fp, p);
 3023         return (error);
 3024 }
 3025 
 3026 /*
 3027  * Sync an open file.
 3028  */
 3029 /* ARGSUSED */
 3030 int
 3031 sys_fsync(l, v, retval)
 3032         struct lwp *l;
 3033         void *v;
 3034         register_t *retval;
 3035 {
 3036         struct sys_fsync_args /* {
 3037                 syscallarg(int) fd;
 3038         } */ *uap = v;
 3039         struct proc *p = l->l_proc;
 3040         struct vnode *vp;
 3041         struct mount *mp;
 3042         struct file *fp;
 3043         int error;
 3044 
 3045         /* getvnode() will use the descriptor for us */
 3046         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3047                 return (error);
 3048         vp = (struct vnode *)fp->f_data;
 3049         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3050                 FILE_UNUSE(fp, p);
 3051                 return (error);
 3052         }
 3053         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3054         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, p);
 3055         if (error == 0 && bioops.io_fsync != NULL &&
 3056             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3057                 (*bioops.io_fsync)(vp, 0);
 3058         VOP_UNLOCK(vp, 0);
 3059         vn_finished_write(mp, 0);
 3060         FILE_UNUSE(fp, p);
 3061         return (error);
 3062 }
 3063 
 3064 /*
 3065  * Sync a range of file data.  API modeled after that found in AIX.
 3066  *
 3067  * FDATASYNC indicates that we need only save enough metadata to be able
 3068  * to re-read the written data.  Note we duplicate AIX's requirement that
 3069  * the file be open for writing.
 3070  */
 3071 /* ARGSUSED */
 3072 int
 3073 sys_fsync_range(l, v, retval)
 3074         struct lwp *l;
 3075         void *v;
 3076         register_t *retval;
 3077 {
 3078         struct sys_fsync_range_args /* {
 3079                 syscallarg(int) fd;
 3080                 syscallarg(int) flags;
 3081                 syscallarg(off_t) start;
 3082                 syscallarg(int) length;
 3083         } */ *uap = v;
 3084         struct proc *p = l->l_proc;
 3085         struct vnode *vp;
 3086         struct file *fp;
 3087         int flags, nflags;
 3088         off_t s, e, len;
 3089         int error;
 3090 
 3091         /* getvnode() will use the descriptor for us */
 3092         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3093                 return (error);
 3094 
 3095         if ((fp->f_flag & FWRITE) == 0) {
 3096                 error = EBADF;
 3097                 goto out;
 3098         }
 3099 
 3100         flags = SCARG(uap, flags);
 3101         if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
 3102             ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
 3103                 error = EINVAL;
 3104                 goto out;
 3105         }
 3106         /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
 3107         if (flags & FDATASYNC)
 3108                 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
 3109         else
 3110                 nflags = FSYNC_WAIT;
 3111         if (flags & FDISKSYNC)
 3112                 nflags |= FSYNC_CACHE;
 3113 
 3114         len = SCARG(uap, length);
 3115         /* If length == 0, we do the whole file, and s = l = 0 will do that */
 3116         if (len) {
 3117                 s = SCARG(uap, start);
 3118                 e = s + len;
 3119                 if (e < s) {
 3120                         FILE_UNUSE(fp, p);
 3121                         error = EINVAL;
 3122                         goto out;
 3123                 }
 3124         } else {
 3125                 e = 0;
 3126                 s = 0;
 3127         }
 3128 
 3129         vp = (struct vnode *)fp->f_data;
 3130         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3131         error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, p);
 3132 
 3133         if (error == 0 && bioops.io_fsync != NULL &&
 3134             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3135                 (*bioops.io_fsync)(vp, nflags);
 3136 
 3137         VOP_UNLOCK(vp, 0);
 3138 out:
 3139         FILE_UNUSE(fp, p);
 3140         return (error);
 3141 }
 3142 
 3143 /*
 3144  * Sync the data of an open file.
 3145  */
 3146 /* ARGSUSED */
 3147 int
 3148 sys_fdatasync(l, v, retval)
 3149         struct lwp *l;
 3150         void *v;
 3151         register_t *retval;
 3152 {
 3153         struct sys_fdatasync_args /* {
 3154                 syscallarg(int) fd;
 3155         } */ *uap = v;
 3156         struct proc *p = l->l_proc;
 3157         struct vnode *vp;
 3158         struct file *fp;
 3159         int error;
 3160 
 3161         /* getvnode() will use the descriptor for us */
 3162         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3163                 return (error);
 3164         if ((fp->f_flag & FWRITE) == 0) {
 3165                 FILE_UNUSE(fp, p);
 3166                 return (EBADF);
 3167         }
 3168         vp = (struct vnode *)fp->f_data;
 3169         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3170         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, p);
 3171         VOP_UNLOCK(vp, 0);
 3172         FILE_UNUSE(fp, p);
 3173         return (error);
 3174 }
 3175 
 3176 /*
 3177  * Rename files, (standard) BSD semantics frontend.
 3178  */
 3179 /* ARGSUSED */
 3180 int
 3181 sys_rename(l, v, retval)
 3182         struct lwp *l;
 3183         void *v;
 3184         register_t *retval;
 3185 {
 3186         struct sys_rename_args /* {
 3187                 syscallarg(const char *) from;
 3188                 syscallarg(const char *) to;
 3189         } */ *uap = v;
 3190         struct proc *p = l->l_proc;
 3191 
 3192         return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 0));
 3193 }
 3194 
 3195 /*
 3196  * Rename files, POSIX semantics frontend.
 3197  */
 3198 /* ARGSUSED */
 3199 int
 3200 sys___posix_rename(l, v, retval)
 3201         struct lwp *l;
 3202         void *v;
 3203         register_t *retval;
 3204 {
 3205         struct sys___posix_rename_args /* {
 3206                 syscallarg(const char *) from;
 3207                 syscallarg(const char *) to;
 3208         } */ *uap = v;
 3209         struct proc *p = l->l_proc;
 3210 
 3211         return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 1));
 3212 }
 3213 
 3214 /*
 3215  * Rename files.  Source and destination must either both be directories,
 3216  * or both not be directories.  If target is a directory, it must be empty.
 3217  * If `from' and `to' refer to the same object, the value of the `retain'
 3218  * argument is used to determine whether `from' will be
 3219  *
 3220  * (retain == 0)        deleted unless `from' and `to' refer to the same
 3221  *                      object in the file system's name space (BSD).
 3222  * (retain == 1)        always retained (POSIX).
 3223  */
 3224 static int
 3225 rename_files(from, to, p, retain)
 3226         const char *from, *to;
 3227         struct proc *p;
 3228         int retain;
 3229 {
 3230         struct mount *mp = NULL;
 3231         struct vnode *tvp, *fvp, *tdvp;
 3232         struct nameidata fromnd, tond;
 3233         int error;
 3234 
 3235         NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
 3236             from, p);
 3237         if ((error = namei(&fromnd)) != 0)
 3238                 return (error);
 3239         fvp = fromnd.ni_vp;
 3240         error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
 3241         if (error != 0) {
 3242                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3243                 vrele(fromnd.ni_dvp);
 3244                 vrele(fvp);
 3245                 if (fromnd.ni_startdir)
 3246                         vrele(fromnd.ni_startdir);
 3247                 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3248                 return (error);
 3249         }
 3250         NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 3251             (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, p);
 3252         if ((error = namei(&tond)) != 0) {
 3253                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3254                 vrele(fromnd.ni_dvp);
 3255                 vrele(fvp);
 3256                 goto out1;
 3257         }
 3258         tdvp = tond.ni_dvp;
 3259         tvp = tond.ni_vp;
 3260 
 3261         if (tvp != NULL) {
 3262                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3263                         error = ENOTDIR;
 3264                         goto out;
 3265                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3266                         error = EISDIR;
 3267                         goto out;
 3268                 }
 3269         }
 3270 
 3271         if (fvp == tdvp)
 3272                 error = EINVAL;
 3273 
 3274         /*
 3275          * Source and destination refer to the same object.
 3276          */
 3277         if (fvp == tvp) {
 3278                 if (retain)
 3279                         error = -1;
 3280                 else if (fromnd.ni_dvp == tdvp &&
 3281                     fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 3282                     !memcmp(fromnd.ni_cnd.cn_nameptr,
 3283                           tond.ni_cnd.cn_nameptr,
 3284                           fromnd.ni_cnd.cn_namelen))
 3285                 error = -1;
 3286         }
 3287 
 3288 #ifdef VERIFIED_EXEC
 3289         if (!error)
 3290                 error = veriexec_renamechk(fvp, tvp, fromnd.ni_dirp,
 3291                                            tond.ni_dirp);
 3292 #endif /* VERIFIED_EXEC */
 3293 
 3294 out:
 3295         if (!error) {
 3296                 VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
 3297                 if (fromnd.ni_dvp != tdvp)
 3298                         VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 3299                 if (tvp) {
 3300                         VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 3301                 }
 3302                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3303                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3304         } else {
 3305                 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
 3306                 if (tdvp == tvp)
 3307                         vrele(tdvp);
 3308                 else
 3309                         vput(tdvp);
 3310                 if (tvp)
 3311                         vput(tvp);
 3312                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3313                 vrele(fromnd.ni_dvp);
 3314                 vrele(fvp);
 3315         }
 3316         vrele(tond.ni_startdir);
 3317         PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
 3318 out1:
 3319         vn_finished_write(mp, 0);
 3320         if (fromnd.ni_startdir)
 3321                 vrele(fromnd.ni_startdir);
 3322         PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3323         return (error == -1 ? 0 : error);
 3324 }
 3325 
 3326 /*
 3327  * Make a directory file.
 3328  */
 3329 /* ARGSUSED */
 3330 int
 3331 sys_mkdir(l, v, retval)
 3332         struct lwp *l;
 3333         void *v;
 3334         register_t *retval;
 3335 {
 3336         struct sys_mkdir_args /* {
 3337                 syscallarg(const char *) path;
 3338                 syscallarg(int) mode;
 3339         } */ *uap = v;
 3340         struct proc *p = l->l_proc;
 3341         struct mount *mp;
 3342         struct vnode *vp;
 3343         struct vattr vattr;
 3344         int error;
 3345         struct nameidata nd;
 3346 
 3347 restart:
 3348         NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
 3349             SCARG(uap, path), p);
 3350         if ((error = namei(&nd)) != 0)
 3351                 return (error);
 3352         vp = nd.ni_vp;
 3353         if (vp != NULL) {
 3354                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3355                 if (nd.ni_dvp == vp)
 3356                         vrele(nd.ni_dvp);
 3357                 else
 3358                         vput(nd.ni_dvp);
 3359                 vrele(vp);
 3360                 return (EEXIST);
 3361         }
 3362         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3363                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3364                 if (nd.ni_dvp == vp)
 3365                         vrele(nd.ni_dvp);
 3366                 else
 3367                         vput(nd.ni_dvp);
 3368                 if ((error = vn_start_write(NULL, &mp,
 3369                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 3370                         return (error);
 3371                 goto restart;
 3372         }
 3373         VATTR_NULL(&vattr);
 3374         vattr.va_type = VDIR;
 3375         vattr.va_mode =
 3376             (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
 3377         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 3378         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3379         if (!error)
 3380                 vput(nd.ni_vp);
 3381         vn_finished_write(mp, 0);
 3382         return (error);
 3383 }
 3384 
 3385 /*
 3386  * Remove a directory file.
 3387  */
 3388 /* ARGSUSED */
 3389 int
 3390 sys_rmdir(l, v, retval)
 3391         struct lwp *l;
 3392         void *v;
 3393         register_t *retval;
 3394 {
 3395         struct sys_rmdir_args /* {
 3396                 syscallarg(const char *) path;
 3397         } */ *uap = v;
 3398         struct proc *p = l->l_proc;
 3399         struct mount *mp;
 3400         struct vnode *vp;
 3401         int error;
 3402         struct nameidata nd;
 3403 
 3404 restart:
 3405         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 3406             SCARG(uap, path), p);
 3407         if ((error = namei(&nd)) != 0)
 3408                 return (error);
 3409         vp = nd.ni_vp;
 3410         if (vp->v_type != VDIR) {
 3411                 error = ENOTDIR;
 3412                 goto out;
 3413         }
 3414         /*
 3415          * No rmdir "." please.
 3416          */
 3417         if (nd.ni_dvp == vp) {
 3418                 error = EINVAL;
 3419                 goto out;
 3420         }
 3421         /*
 3422          * The root of a mounted filesystem cannot be deleted.
 3423          */
 3424         if (vp->v_flag & VROOT) {
 3425                 error = EBUSY;
 3426                 goto out;
 3427         }
 3428         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3429                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3430                 if (nd.ni_dvp == vp)
 3431                         vrele(nd.ni_dvp);
 3432                 else
 3433                         vput(nd.ni_dvp);
 3434                 vput(vp);
 3435                 if ((error = vn_start_write(NULL, &mp,
 3436                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 3437                         return (error);
 3438                 goto restart;
 3439         }
 3440         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 3441         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 3442         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3443         vn_finished_write(mp, 0);
 3444         return (error);
 3445 
 3446 out:
 3447         VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3448         if (nd.ni_dvp == vp)
 3449                 vrele(nd.ni_dvp);
 3450         else
 3451                 vput(nd.ni_dvp);
 3452         vput(vp);
 3453         return (error);
 3454 }
 3455 
 3456 /*
 3457  * Read a block of directory entries in a file system independent format.
 3458  */
 3459 int
 3460 sys_getdents(l, v, retval)
 3461         struct lwp *l;
 3462         void *v;
 3463         register_t *retval;
 3464 {
 3465         struct sys_getdents_args /* {
 3466                 syscallarg(int) fd;
 3467                 syscallarg(char *) buf;
 3468                 syscallarg(size_t) count;
 3469         } */ *uap = v;
 3470         struct proc *p = l->l_proc;
 3471         struct file *fp;
 3472         int error, done;
 3473 
 3474         /* getvnode() will use the descriptor for us */
 3475         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3476                 return (error);
 3477         if ((fp->f_flag & FREAD) == 0) {
 3478                 error = EBADF;
 3479                 goto out;
 3480         }
 3481         error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
 3482                         SCARG(uap, count), &done, p, 0, 0);
 3483 #ifdef KTRACE
 3484         if (!error && KTRPOINT(p, KTR_GENIO)) {
 3485                 struct iovec iov;
 3486                 iov.iov_base = SCARG(uap, buf);
 3487                 iov.iov_len = done;
 3488                 ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov, done, 0);
 3489         }
 3490 #endif
 3491         *retval = done;
 3492  out:
 3493         FILE_UNUSE(fp, p);
 3494         return (error);
 3495 }
 3496 
 3497 /*
 3498  * Set the mode mask for creation of filesystem nodes.
 3499  */
 3500 int
 3501 sys_umask(l, v, retval)
 3502         struct lwp *l;
 3503         void *v;
 3504         register_t *retval;
 3505 {
 3506         struct sys_umask_args /* {
 3507                 syscallarg(mode_t) newmask;
 3508         } */ *uap = v;
 3509         struct proc *p = l->l_proc;
 3510         struct cwdinfo *cwdi;
 3511 
 3512         cwdi = p->p_cwdi;
 3513         *retval = cwdi->cwdi_cmask;
 3514         cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
 3515         return (0);
 3516 }
 3517 
 3518 /*
 3519  * Void all references to file by ripping underlying filesystem
 3520  * away from vnode.
 3521  */
 3522 /* ARGSUSED */
 3523 int
 3524 sys_revoke(l, v, retval)
 3525         struct lwp *l;
 3526         void *v;
 3527         register_t *retval;
 3528 {
 3529         struct sys_revoke_args /* {
 3530                 syscallarg(const char *) path;
 3531         } */ *uap = v;
 3532         struct proc *p = l->l_proc;
 3533         struct mount *mp;
 3534         struct vnode *vp;
 3535         struct vattr vattr;
 3536         int error;
 3537         struct nameidata nd;
 3538 
 3539         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3540         if ((error = namei(&nd)) != 0)
 3541                 return (error);
 3542         vp = nd.ni_vp;
 3543         if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
 3544                 goto out;
 3545         if (p->p_ucred->cr_uid != vattr.va_uid &&
 3546             (error = suser(p->p_ucred, &p->p_acflag)) != 0)
 3547                 goto out;
 3548         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 3549                 goto out;
 3550         if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
 3551                 VOP_REVOKE(vp, REVOKEALL);
 3552         vn_finished_write(mp, 0);
 3553 out:
 3554         vrele(vp);
 3555         return (error);
 3556 }
 3557 
 3558 /*
 3559  * Convert a user file descriptor to a kernel file entry.
 3560  */
 3561 int
 3562 getvnode(fdp, fd, fpp)
 3563         struct filedesc *fdp;
 3564         int fd;
 3565         struct file **fpp;
 3566 {
 3567         struct vnode *vp;
 3568         struct file *fp;
 3569 
 3570         if ((fp = fd_getfile(fdp, fd)) == NULL)
 3571                 return (EBADF);
 3572 
 3573         FILE_USE(fp);
 3574 
 3575         if (fp->f_type != DTYPE_VNODE) {
 3576                 FILE_UNUSE(fp, NULL);
 3577                 return (EINVAL);
 3578         }
 3579 
 3580         vp = (struct vnode *)fp->f_data;
 3581         if (vp->v_type == VBAD) {
 3582                 FILE_UNUSE(fp, NULL);
 3583                 return (EBADF);
 3584         }
 3585 
 3586         *fpp = fp;
 3587         return (0);
 3588 }
 3589 
 3590 /*
 3591  * Push extended attribute configuration information into the VFS.
 3592  *
 3593  * NOTE: Not all file systems that support extended attributes will
 3594  * require the use of this system call.
 3595  */
 3596 int
 3597 sys_extattrctl(struct lwp *l, void *v, register_t *retval)
 3598 {
 3599         struct sys_extattrctl_args /* {
 3600                 syscallarg(const char *) path;
 3601                 syscallarg(int) cmd;
 3602                 syscallarg(const char *) filename;
 3603                 syscallarg(int) attrnamespace;
 3604                 syscallarg(const char *) attrname;
 3605         } */ *uap = v;
 3606         struct proc *p = l->l_proc;
 3607         struct vnode *vp;
 3608         struct nameidata nd;
 3609         struct mount *mp;
 3610         char attrname[EXTATTR_MAXNAMELEN];
 3611         int error;
 3612 
 3613         if (SCARG(uap, attrname) != NULL) {
 3614                 error = copyinstr(SCARG(uap, attrname), attrname,
 3615                     sizeof(attrname), NULL);
 3616                 if (error)
 3617                         return (error);
 3618         }
 3619 
 3620         vp = NULL;
 3621         if (SCARG(uap, filename) != NULL) {
 3622                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 3623                     SCARG(uap, filename), p);
 3624                 error = namei(&nd);
 3625                 if (error)
 3626                         return (error);
 3627                 vp = nd.ni_vp;
 3628         }
 3629 
 3630         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3631         error = namei(&nd);
 3632         if (error) {
 3633                 if (vp != NULL)
 3634                         vput(vp);
 3635                 return (error);
 3636         }
 3637 
 3638         error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
 3639         if (error) {
 3640                 if (vp != NULL)
 3641                         vput(vp);
 3642                 return (error);
 3643         }
 3644 
 3645         error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), vp,
 3646             SCARG(uap, attrnamespace),
 3647             SCARG(uap, attrname) != NULL ? attrname : NULL, p);
 3648 
 3649         vn_finished_write(mp, 0);
 3650 
 3651         if (vp != NULL)
 3652                 vrele(vp);
 3653 
 3654         return (error);
 3655 }
 3656 
 3657 /*
 3658  * Set a named extended attribute on a file or directory.
 3659  */
 3660 static int
 3661 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
 3662     const void *data, size_t nbytes, struct proc *p, register_t *retval)
 3663 {
 3664         struct mount *mp;
 3665         struct uio auio;
 3666         struct iovec aiov;
 3667         ssize_t cnt;
 3668         int error;
 3669 
 3670         error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
 3671         if (error)
 3672                 return (error);
 3673         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 3674         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3675 
 3676         aiov.iov_base = (caddr_t) data;         /* XXX kills const */
 3677         aiov.iov_len = nbytes;
 3678         auio.uio_iov = &aiov;
 3679         auio.uio_iovcnt = 1;
 3680         auio.uio_offset = 0;
 3681         if (nbytes > INT_MAX) {
 3682                 error = EINVAL;
 3683                 goto done;
 3684         }
 3685         auio.uio_resid = nbytes;
 3686         auio.uio_rw = UIO_WRITE;
 3687         auio.uio_segflg = UIO_USERSPACE;
 3688         auio.uio_procp = p;
 3689         cnt = nbytes;
 3690 
 3691         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
 3692             p->p_ucred, p);
 3693         cnt -= auio.uio_resid;
 3694         retval[0] = cnt;
 3695 
 3696  done:
 3697         VOP_UNLOCK(vp, 0);
 3698         vn_finished_write(mp, 0);
 3699         return (error);
 3700 }
 3701 
 3702 int
 3703 sys_extattr_set_fd(struct lwp *l, void *v, register_t *retval)
 3704 {
 3705         struct sys_extattr_set_fd_args /* {
 3706                 syscallarg(int) fd;
 3707                 syscallarg(int) attrnamespace;
 3708                 syscallarg(const char *) attrname;
 3709                 syscallarg(const void *) data;
 3710                 syscallarg(size_t) nbytes;
 3711         } */ *uap = v;
 3712         struct proc *p = l->l_proc;
 3713         struct file *fp;
 3714         struct vnode *vp;
 3715         char attrname[EXTATTR_MAXNAMELEN];
 3716         int error;
 3717 
 3718         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3719             NULL);
 3720         if (error)
 3721                 return (error);
 3722 
 3723         error = getvnode(p->p_fd, SCARG(uap, fd), &fp);
 3724         if (error)
 3725                 return (error);
 3726         vp = (struct vnode *) fp->f_data;
 3727 
 3728         error = extattr_set_vp(vp, SCARG(uap, attrnamespace), attrname,
 3729             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 3730 
 3731         FILE_UNUSE(fp, p);
 3732         return (error);
 3733 }
 3734 
 3735 int
 3736 sys_extattr_set_file(struct lwp *l, void *v, register_t *retval)
 3737 {
 3738         struct sys_extattr_set_file_args /* {
 3739                 syscallarg(const char *) path;
 3740                 syscallarg(int) attrnamespace;
 3741                 syscallarg(const char *) attrname;
 3742                 syscallarg(const void *) data;
 3743                 syscallarg(size_t) nbytes;
 3744         } */ *uap = v;
 3745         struct proc *p = l->l_proc;
 3746         struct nameidata nd;
 3747         char attrname[EXTATTR_MAXNAMELEN];
 3748         int error;
 3749 
 3750         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3751             NULL);
 3752         if (error)
 3753                 return (error);
 3754 
 3755         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3756         error = namei(&nd);
 3757         if (error)
 3758                 return (error);
 3759 
 3760         error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
 3761             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 3762 
 3763         vrele(nd.ni_vp);
 3764         return (error);
 3765 }
 3766 
 3767 int
 3768 sys_extattr_set_link(struct lwp *l, void *v, register_t *retval)
 3769 {
 3770         struct sys_extattr_set_link_args /* {
 3771                 syscallarg(const char *) path;
 3772                 syscallarg(int) attrnamespace;
 3773                 syscallarg(const char *) attrname;
 3774                 syscallarg(const void *) data;
 3775                 syscallarg(size_t) nbytes;
 3776         } */ *uap = v;
 3777         struct proc *p = l->l_proc;
 3778         struct nameidata nd;
 3779         char attrname[EXTATTR_MAXNAMELEN];
 3780         int error;
 3781 
 3782         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3783             NULL);
 3784         if (error)
 3785                 return (error);
 3786 
 3787         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3788         error = namei(&nd);
 3789         if (error)
 3790                 return (error);
 3791 
 3792         error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
 3793             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 3794 
 3795         vrele(nd.ni_vp);
 3796         return (error);
 3797 }
 3798 
 3799 /*
 3800  * Get a named extended attribute on a file or directory.
 3801  */
 3802 static int
 3803 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
 3804     void *data, size_t nbytes, struct proc *p, register_t *retval)
 3805 {
 3806         struct uio auio, *auiop;
 3807         struct iovec aiov;
 3808         ssize_t cnt;
 3809         size_t size, *sizep;
 3810         int error;
 3811 
 3812         VOP_LEASE(vp, p, p->p_ucred, LEASE_READ);
 3813         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3814 
 3815         /*
 3816          * Slightly unusual semantics: if the user provides a NULL data
 3817          * pointer, they don't want to receive the data, just the maximum
 3818          * read length.
 3819          */
 3820         auiop = NULL;
 3821         sizep = NULL;
 3822         cnt = 0;
 3823         if (data != NULL) {
 3824                 aiov.iov_base = data;
 3825                 aiov.iov_len = nbytes;
 3826                 auio.uio_iov = &aiov;
 3827                 auio.uio_offset = 0;
 3828                 if (nbytes > INT_MAX) {
 3829                         error = EINVAL;
 3830                         goto done;
 3831                 }
 3832                 auio.uio_resid = nbytes;
 3833                 auio.uio_rw = UIO_READ;
 3834                 auio.uio_segflg = UIO_USERSPACE;
 3835                 auio.uio_procp = p;
 3836                 auiop = &auio;
 3837                 cnt = nbytes;
 3838         } else
 3839                 sizep = &size;
 3840 
 3841         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
 3842             p->p_ucred, p);
 3843 
 3844         if (auiop != NULL) {
 3845                 cnt -= auio.uio_resid;
 3846                 retval[0] = cnt;
 3847         } else
 3848                 retval[0] = size;
 3849 
 3850  done:
 3851         VOP_UNLOCK(vp, 0);
 3852         return (error);
 3853 }
 3854 
 3855 int
 3856 sys_extattr_get_fd(struct lwp *l, void *v, register_t *retval)
 3857 {
 3858         struct sys_extattr_get_fd_args /* {
 3859                 syscallarg(int) fd;
 3860                 syscallarg(int) attrnamespace;
 3861                 syscallarg(const char *) attrname;
 3862                 syscallarg(void *) data;
 3863                 syscallarg(size_t) nbytes;
 3864         } */ *uap = v;
 3865         struct proc *p = l->l_proc;
 3866         struct file *fp;
 3867         struct vnode *vp;
 3868         char attrname[EXTATTR_MAXNAMELEN];
 3869         int error;
 3870 
 3871         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3872             NULL);
 3873         if (error)
 3874                 return (error);
 3875 
 3876         error = getvnode(p->p_fd, SCARG(uap, fd), &fp);
 3877         if (error)
 3878                 return (error);
 3879         vp = (struct vnode *) fp->f_data;
 3880 
 3881         error = extattr_get_vp(vp, SCARG(uap, attrnamespace), attrname,
 3882             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 3883 
 3884         FILE_UNUSE(fp, p);
 3885         return (error);
 3886 }
 3887 
 3888 int
 3889 sys_extattr_get_file(struct lwp *l, void *v, register_t *retval)
 3890 {
 3891         struct sys_extattr_get_file_args /* {
 3892                 syscallarg(const char *) path;
 3893                 syscallarg(int) attrnamespace;
 3894                 syscallarg(const char *) attrname;
 3895                 syscallarg(void *) data;
 3896                 syscallarg(size_t) nbytes;
 3897         } */ *uap = v;
 3898         struct proc *p = l->l_proc;
 3899         struct nameidata nd;
 3900         char attrname[EXTATTR_MAXNAMELEN];
 3901         int error;
 3902 
 3903         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3904             NULL);
 3905         if (error)
 3906                 return (error);
 3907 
 3908         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3909         error = namei(&nd);
 3910         if (error)
 3911                 return (error);
 3912 
 3913         error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
 3914             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 3915 
 3916         vrele(nd.ni_vp);
 3917         return (error);
 3918 }
 3919 
 3920 int
 3921 sys_extattr_get_link(struct lwp *l, void *v, register_t *retval)
 3922 {
 3923         struct sys_extattr_get_link_args /* {
 3924                 syscallarg(const char *) path;
 3925                 syscallarg(int) attrnamespace;
 3926                 syscallarg(const char *) attrname;
 3927                 syscallarg(void *) data;
 3928                 syscallarg(size_t) nbytes;
 3929         } */ *uap = v;
 3930         struct proc *p = l->l_proc;
 3931         struct nameidata nd;
 3932         char attrname[EXTATTR_MAXNAMELEN];
 3933         int error;
 3934 
 3935         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3936             NULL);
 3937         if (error)
 3938                 return (error);
 3939 
 3940         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3941         error = namei(&nd);
 3942         if (error)
 3943                 return (error);
 3944 
 3945         error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
 3946             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 3947 
 3948         vrele(nd.ni_vp);
 3949         return (error);
 3950 }
 3951 
 3952 /*
 3953  * Delete a named extended attribute on a file or directory.
 3954  */
 3955 static int
 3956 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
 3957     struct proc *p)
 3958 {
 3959         struct mount *mp;
 3960         int error;
 3961 
 3962         error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
 3963         if (error)
 3964                 return (error);
 3965         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 3966         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3967 
 3968         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, p->p_ucred, p);
 3969         if (error == EOPNOTSUPP)
 3970                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 3971                     p->p_ucred, p);
 3972 
 3973         VOP_UNLOCK(vp, 0);
 3974         vn_finished_write(mp, 0);
 3975         return (error);
 3976 }
 3977 
 3978 int
 3979 sys_extattr_delete_fd(struct lwp *l, void *v, register_t *retval)
 3980 {
 3981         struct sys_extattr_delete_fd_args /* {
 3982                 syscallarg(int) fd;
 3983                 syscallarg(int) attrnamespace;
 3984                 syscallarg(const char *) attrname;
 3985         } */ *uap = v;
 3986         struct proc *p = l->l_proc;
 3987         struct file *fp;
 3988         struct vnode *vp;
 3989         char attrname[EXTATTR_MAXNAMELEN];
 3990         int error;
 3991 
 3992         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 3993             NULL);
 3994         if (error)
 3995                 return (error);
 3996 
 3997         error = getvnode(p->p_fd, SCARG(uap, fd), &fp);
 3998         if (error)
 3999                 return (error);
 4000         vp = (struct vnode *) fp->f_data;
 4001 
 4002         error = extattr_delete_vp(vp, SCARG(uap, attrnamespace), attrname, p);
 4003 
 4004         FILE_UNUSE(fp, p);
 4005         return (error);
 4006 }
 4007 
 4008 int
 4009 sys_extattr_delete_file(struct lwp *l, void *v, register_t *retval)
 4010 {
 4011         struct sys_extattr_delete_file_args /* {
 4012                 syscallarg(const char *) path;
 4013                 syscallarg(int) attrnamespace;
 4014                 syscallarg(const char *) attrname;
 4015         } */ *uap = v;
 4016         struct proc *p = l->l_proc;
 4017         struct nameidata nd;
 4018         char attrname[EXTATTR_MAXNAMELEN];
 4019         int error;
 4020 
 4021         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 4022             NULL);
 4023         if (error)
 4024                 return (error);
 4025 
 4026         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 4027         error = namei(&nd);
 4028         if (error)
 4029                 return (error);
 4030 
 4031         error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
 4032             p);
 4033 
 4034         vrele(nd.ni_vp);
 4035         return (error);
 4036 }
 4037 
 4038 int
 4039 sys_extattr_delete_link(struct lwp *l, void *v, register_t *retval)
 4040 {
 4041         struct sys_extattr_delete_link_args /* {
 4042                 syscallarg(const char *) path;
 4043                 syscallarg(int) attrnamespace;
 4044                 syscallarg(const char *) attrname;
 4045         } */ *uap = v;
 4046         struct proc *p = l->l_proc;
 4047         struct nameidata nd;
 4048         char attrname[EXTATTR_MAXNAMELEN];
 4049         int error;
 4050 
 4051         error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
 4052             NULL);
 4053         if (error)
 4054                 return (error);
 4055 
 4056         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 4057         error = namei(&nd);
 4058         if (error)
 4059                 return (error);
 4060 
 4061         error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
 4062             p);
 4063 
 4064         vrele(nd.ni_vp);
 4065         return (error);
 4066 }
 4067 
 4068 /*
 4069  * Retrieve a list of extended attributes on a file or directory.
 4070  */
 4071 static int
 4072 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data, size_t nbytes,
 4073     struct proc *p, register_t *retval)
 4074 {
 4075         struct uio auio, *auiop;
 4076         size_t size, *sizep;
 4077         struct iovec aiov;
 4078         ssize_t cnt;
 4079         int error;
 4080 
 4081         VOP_LEASE(vp, p, p->p_ucred, LEASE_READ);
 4082         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4083 
 4084         auiop = NULL;
 4085         sizep = NULL;
 4086         cnt = 0;
 4087         if (data != NULL) {
 4088                 aiov.iov_base = data;
 4089                 aiov.iov_len = nbytes;
 4090                 auio.uio_iov = &aiov;
 4091                 auio.uio_offset = 0;
 4092                 if (nbytes > INT_MAX) {
 4093                         error = EINVAL;
 4094                         goto done;
 4095                 }
 4096                 auio.uio_resid = nbytes;
 4097                 auio.uio_rw = UIO_READ;
 4098                 auio.uio_segflg = UIO_USERSPACE;
 4099                 auio.uio_procp = p;
 4100                 auiop = &auio;
 4101                 cnt = nbytes;
 4102         } else
 4103                 sizep = &size;
 4104 
 4105         error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
 4106             p->p_ucred, p);
 4107 
 4108         if (auiop != NULL) {
 4109                 cnt -= auio.uio_resid;
 4110                 retval[0] = cnt;
 4111         } else
 4112                 retval[0] = size;
 4113 
 4114  done:
 4115         VOP_UNLOCK(vp, 0);
 4116         return (error);
 4117 }
 4118 
 4119 int
 4120 sys_extattr_list_fd(struct lwp *l, void *v, register_t *retval)
 4121 {
 4122         struct sys_extattr_list_fd_args /* {
 4123                 syscallarg(int) fd;
 4124                 syscallarg(int) attrnamespace;
 4125                 syscallarg(void *) data;
 4126                 syscallarg(size_t) nbytes;
 4127         } */ *uap = v;
 4128         struct proc *p = l->l_proc;
 4129         struct file *fp;
 4130         struct vnode *vp;
 4131         int error;
 4132 
 4133         error = getvnode(p->p_fd, SCARG(uap, fd), &fp);
 4134         if (error)
 4135                 return (error);
 4136         vp = (struct vnode *) fp->f_data;
 4137 
 4138         error = extattr_list_vp(vp, SCARG(uap, attrnamespace),
 4139             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 4140 
 4141         FILE_UNUSE(fp, p);
 4142         return (error);
 4143 }
 4144 
 4145 int
 4146 sys_extattr_list_file(struct lwp *l, void *v, register_t *retval)
 4147 {
 4148         struct sys_extattr_list_file_args /* {
 4149                 syscallarg(const char *) path;
 4150                 syscallarg(int) attrnamespace;
 4151                 syscallarg(void *) data;
 4152                 syscallarg(size_t) nbytes;
 4153         } */ *uap = v;
 4154         struct proc *p = l->l_proc;
 4155         struct nameidata nd;
 4156         int error;
 4157 
 4158         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 4159         error = namei(&nd);
 4160         if (error)
 4161                 return (error);
 4162 
 4163         error = extattr_list_vp(nd.ni_vp, SCARG(uap, attrnamespace),
 4164             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 4165 
 4166         vrele(nd.ni_vp);
 4167         return (error);
 4168 }
 4169 
 4170 int
 4171 sys_extattr_list_link(struct lwp *l, void *v, register_t *retval)
 4172 {
 4173         struct sys_extattr_list_link_args /* {
 4174                 syscallarg(const char *) path;
 4175                 syscallarg(int) attrnamespace;
 4176                 syscallarg(void *) data;
 4177                 syscallarg(size_t) nbytes;
 4178         } */ *uap = v;
 4179         struct proc *p = l->l_proc;
 4180         struct nameidata nd;
 4181         int error;
 4182 
 4183         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 4184         error = namei(&nd);
 4185         if (error)
 4186                 return (error);
 4187 
 4188         error = extattr_list_vp(nd.ni_vp, SCARG(uap, attrnamespace),
 4189             SCARG(uap, data), SCARG(uap, nbytes), p, retval);
 4190 
 4191         vrele(nd.ni_vp);
 4192         return (error);
 4193 }
Cache object: e54e90d7f9abaf6e648da8020a4f4b58
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_syscalls.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c