The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vfs_syscalls.c,v 1.205.2.1 2007/02/06 21:16:37 bouyer Exp $    */
    2 
    3 /*
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_syscalls.c      8.42 (Berkeley) 7/31/95
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.205.2.1 2007/02/06 21:16:37 bouyer Exp $");
   41 
   42 #include "opt_compat_netbsd.h"
   43 #include "opt_compat_43.h"
   44 #include "opt_ktrace.h"
   45 #include "fss.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/namei.h>
   50 #include <sys/filedesc.h>
   51 #include <sys/kernel.h>
   52 #include <sys/file.h>
   53 #include <sys/stat.h>
   54 #include <sys/vnode.h>
   55 #include <sys/mount.h>
   56 #include <sys/proc.h>
   57 #include <sys/uio.h>
   58 #include <sys/malloc.h>
   59 #include <sys/dirent.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/sa.h>
   62 #include <sys/syscallargs.h>
   63 #ifdef KTRACE
   64 #include <sys/ktrace.h>
   65 #endif
   66 
   67 #include <miscfs/genfs/genfs.h>
   68 #include <miscfs/syncfs/syncfs.h>
   69 
   70 #if NFSS > 0
   71 #include <dev/fssvar.h>
   72 #endif
   73 
   74 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
   75 
   76 static int change_dir(struct nameidata *, struct proc *);
   77 static int change_flags(struct vnode *, u_long, struct proc *);
   78 static int change_mode(struct vnode *, int, struct proc *p);
   79 static int change_owner(struct vnode *, uid_t, gid_t, struct proc *, int);
   80 static int change_utimes(struct vnode *vp, const struct timeval *,
   81                struct proc *p);
   82 static int rename_files(const char *, const char *, struct proc *, int);
   83 static int dostatfs(struct mount *, struct statfs *, struct proc *, int, int);
   84 
   85 void checkdirs(struct vnode *);
   86 
   87 int dovfsusermount = 0;
   88 
   89 /*
   90  * Virtual File System System Calls
   91  */
   92 
   93 /*
   94  * Mount a file system.
   95  */
   96 
   97 #if defined(COMPAT_09) || defined(COMPAT_43)
   98 /*
   99  * This table is used to maintain compatibility with 4.3BSD
  100  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
  101  *
  102  * Do not modify this table. It should only contain filesystems
  103  * supported by NetBSD 0.9 and 4.3BSD.
  104  */
  105 const char * const mountcompatnames[] = {
  106         NULL,           /* 0 = MOUNT_NONE */
  107         MOUNT_FFS,      /* 1 = MOUNT_UFS */
  108         MOUNT_NFS,      /* 2 */
  109         MOUNT_MFS,      /* 3 */
  110         MOUNT_MSDOS,    /* 4 */
  111         MOUNT_CD9660,   /* 5 = MOUNT_ISOFS */
  112         MOUNT_FDESC,    /* 6 */
  113         MOUNT_KERNFS,   /* 7 */
  114         NULL,           /* 8 = MOUNT_DEVFS */
  115         MOUNT_AFS,      /* 9 */
  116 };
  117 const int nmountcompatnames = sizeof(mountcompatnames) /
  118     sizeof(mountcompatnames[0]);
  119 #endif /* COMPAT_09 || COMPAT_43 */
  120 
  121 /* ARGSUSED */
  122 int
  123 sys_mount(l, v, retval)
  124         struct lwp *l;
  125         void *v;
  126         register_t *retval;
  127 {
  128         struct sys_mount_args /* {
  129                 syscallarg(const char *) type;
  130                 syscallarg(const char *) path;
  131                 syscallarg(int) flags;
  132                 syscallarg(void *) data;
  133         } */ *uap = v;
  134         struct proc *p = l->l_proc;
  135         struct vnode *vp;
  136         struct mount *mp;
  137         int error, flag = 0;
  138         char fstypename[MFSNAMELEN];
  139         struct vattr va;
  140         struct nameidata nd;
  141         struct vfsops *vfs;
  142 
  143         if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
  144             (error = suser(p->p_ucred, &p->p_acflag)))
  145                 return (error);
  146         /*
  147          * Get vnode to be covered
  148          */
  149         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
  150             SCARG(uap, path), p);
  151         if ((error = namei(&nd)) != 0)
  152                 return (error);
  153         vp = nd.ni_vp;
  154         /*
  155          * A lookup in VFS_MOUNT might result in an attempt to
  156          * lock this vnode again, so make the lock recursive.
  157          */
  158         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
  159         if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
  160                 if ((vp->v_flag & VROOT) == 0) {
  161                         vput(vp);
  162                         return (EINVAL);
  163                 }
  164                 mp = vp->v_mount;
  165                 flag = mp->mnt_flag;
  166                 vfs = mp->mnt_op;
  167                 /*
  168                  * We only allow the filesystem to be reloaded if it
  169                  * is currently mounted read-only.
  170                  */
  171                 if ((SCARG(uap, flags) & MNT_RELOAD) &&
  172                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  173                         vput(vp);
  174                         return (EOPNOTSUPP);    /* Needs translation */
  175                 }
  176                 /*
  177                  * In "highly secure" mode, don't let the caller do anything
  178                  * but downgrade a filesystem from read-write to read-only.
  179                  * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
  180                  */
  181                 if (securelevel >= 2 &&
  182                     SCARG(uap, flags) != MNT_GETARGS &&
  183                     SCARG(uap, flags) !=
  184                     (mp->mnt_flag | MNT_RDONLY |
  185                      MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
  186                         vput(vp);
  187                         return (EPERM);
  188                 }
  189                 mp->mnt_flag |= SCARG(uap, flags) &
  190                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  191                 /*
  192                  * Only root, or the user that did the original mount is
  193                  * permitted to update it.
  194                  */
  195                 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
  196                     mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
  197                     (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
  198                         vput(vp);
  199                         return (error);
  200                 }
  201                 /*
  202                  * Do not allow NFS export by non-root users. For non-root
  203                  * users, silently enforce MNT_NOSUID and MNT_NODEV, and
  204                  * MNT_NOEXEC if mount point is already MNT_NOEXEC.
  205                  */
  206                 if (p->p_ucred->cr_uid != 0) {
  207                         if (SCARG(uap, flags) & MNT_EXPORTED) {
  208                                 vput(vp);
  209                                 return (EPERM);
  210                         }
  211                         SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
  212                         if (flag & MNT_NOEXEC)
  213                                 SCARG(uap, flags) |= MNT_NOEXEC;
  214                 }
  215                 if (vfs_busy(mp, LK_NOWAIT, 0)) {
  216                         vput(vp);
  217                         return (EPERM);
  218                 }
  219                 goto update;
  220         } else {
  221                 if (securelevel >= 2) {
  222                         vput(vp);
  223                         return (EPERM);
  224                 }
  225         }
  226         /*
  227          * If the user is not root, ensure that they own the directory
  228          * onto which we are attempting to mount.
  229          */
  230         if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0 ||
  231             (va.va_uid != p->p_ucred->cr_uid &&
  232                 (error = suser(p->p_ucred, &p->p_acflag)) != 0)) {
  233                 vput(vp);
  234                 return (error);
  235         }
  236         /*
  237          * Do not allow NFS export by non-root users. For non-root users,
  238          * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
  239          * mount point is already MNT_NOEXEC.
  240          */
  241         if (p->p_ucred->cr_uid != 0) {
  242                 if (SCARG(uap, flags) & MNT_EXPORTED) {
  243                         vput(vp);
  244                         return (EPERM);
  245                 }
  246                 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
  247                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
  248                         SCARG(uap, flags) |= MNT_NOEXEC;
  249         }
  250         if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
  251                 vput(vp);
  252                 return (error);
  253         }
  254         if (vp->v_type != VDIR) {
  255                 vput(vp);
  256                 return (ENOTDIR);
  257         }
  258         error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
  259         if (error) {
  260 #if defined(COMPAT_09) || defined(COMPAT_43)
  261                 /*
  262                  * Historically filesystem types were identified by number.
  263                  * If we get an integer for the filesystem type instead of a
  264                  * string, we check to see if it matches one of the historic
  265                  * filesystem types.
  266                  */
  267                 u_long fsindex = (u_long)SCARG(uap, type);
  268                 if (fsindex >= nmountcompatnames ||
  269                     mountcompatnames[fsindex] == NULL) {
  270                         vput(vp);
  271                         return (ENODEV);
  272                 }
  273                 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
  274 #else
  275                 vput(vp);
  276                 return (error);
  277 #endif
  278         }
  279 #ifdef  COMPAT_10
  280         /* Accept `ufs' as an alias for `ffs'. */
  281         if (!strncmp(fstypename, "ufs", MFSNAMELEN))
  282                 strncpy(fstypename, "ffs", MFSNAMELEN);
  283 #endif
  284         if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
  285                 vput(vp);
  286                 return (ENODEV);
  287         }
  288         if (vp->v_mountedhere != NULL) {
  289                 vput(vp);
  290                 return (EBUSY);
  291         }
  292 
  293         /*
  294          * Allocate and initialize the file system.
  295          */
  296         mp = (struct mount *)malloc((u_long)sizeof(struct mount),
  297                 M_MOUNT, M_WAITOK);
  298         memset((char *)mp, 0, (u_long)sizeof(struct mount));
  299         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  300         (void)vfs_busy(mp, LK_NOWAIT, 0);
  301         mp->mnt_op = vfs;
  302         vfs->vfs_refcount++;
  303         mp->mnt_vnodecovered = vp;
  304         mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
  305         mp->mnt_unmounter = NULL;
  306 
  307         /*
  308          * The underlying file system may refuse the mount for
  309          * various reasons.  Allow the user to force it to happen.
  310          */
  311         mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
  312  update:
  313         /*
  314          * Set the mount level flags.
  315          */
  316         if (SCARG(uap, flags) & MNT_RDONLY)
  317                 mp->mnt_flag |= MNT_RDONLY;
  318         else if (mp->mnt_flag & MNT_RDONLY)
  319                 mp->mnt_iflag |= IMNT_WANTRDWR;
  320         mp->mnt_flag &=
  321           ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  322             MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  323             MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
  324         mp->mnt_flag |= SCARG(uap, flags) &
  325            (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  326             MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  327             MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
  328             MNT_IGNORE);
  329         /*
  330          * Mount the filesystem.
  331          */
  332         error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
  333         if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
  334                 if (mp->mnt_iflag & IMNT_WANTRDWR)
  335                         mp->mnt_flag &= ~MNT_RDONLY;
  336                 if (error || (mp->mnt_flag & MNT_GETARGS))
  337                         mp->mnt_flag = flag;
  338                 mp->mnt_flag &=~
  339                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  340                 mp->mnt_iflag &=~ IMNT_WANTRDWR;
  341                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
  342                         if (mp->mnt_syncer == NULL)
  343                                 error = vfs_allocate_syncvnode(mp);
  344                 } else {
  345                         if (mp->mnt_syncer != NULL)
  346                                 vfs_deallocate_syncvnode(mp);
  347                 }
  348                 vfs_unbusy(mp);
  349                 VOP_UNLOCK(vp, 0);
  350                 vrele(vp);
  351                 return (error);
  352         }
  353         /*
  354          * Put the new filesystem on the mount list after root.
  355          */
  356         cache_purge(vp);
  357         if (!error) {
  358                 mp->mnt_flag &=~
  359                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
  360                 mp->mnt_iflag &=~ IMNT_WANTRDWR;
  361                 vp->v_mountedhere = mp;
  362                 simple_lock(&mountlist_slock);
  363                 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  364                 simple_unlock(&mountlist_slock);
  365                 checkdirs(vp);
  366                 VOP_UNLOCK(vp, 0);
  367                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  368                         error = vfs_allocate_syncvnode(mp);
  369                 vfs_unbusy(mp);
  370                 (void) VFS_STATFS(mp, &mp->mnt_stat, p);
  371                 if ((error = VFS_START(mp, 0, p)))
  372                         vrele(vp);
  373         } else {
  374                 vp->v_mountedhere = (struct mount *)0;
  375                 vfs->vfs_refcount--;
  376                 vfs_unbusy(mp);
  377                 free(mp, M_MOUNT);
  378                 vput(vp);
  379         }
  380         return (error);
  381 }
  382 
  383 /*
  384  * Scan all active processes to see if any of them have a current
  385  * or root directory onto which the new filesystem has just been
  386  * mounted. If so, replace them with the new mount point.
  387  */
  388 void
  389 checkdirs(olddp)
  390         struct vnode *olddp;
  391 {
  392         struct cwdinfo *cwdi;
  393         struct vnode *newdp;
  394         struct proc *p;
  395 
  396         if (olddp->v_usecount == 1)
  397                 return;
  398         if (VFS_ROOT(olddp->v_mountedhere, &newdp))
  399                 panic("mount: lost mount");
  400         proclist_lock_read();
  401         LIST_FOREACH(p, &allproc, p_list) {
  402                 cwdi = p->p_cwdi;
  403                 if (cwdi->cwdi_cdir == olddp) {
  404                         vrele(cwdi->cwdi_cdir);
  405                         VREF(newdp);
  406                         cwdi->cwdi_cdir = newdp;
  407                 }
  408                 if (cwdi->cwdi_rdir == olddp) {
  409                         vrele(cwdi->cwdi_rdir);
  410                         VREF(newdp);
  411                         cwdi->cwdi_rdir = newdp;
  412                 }
  413         }
  414         proclist_unlock_read();
  415         if (rootvnode == olddp) {
  416                 vrele(rootvnode);
  417                 VREF(newdp);
  418                 rootvnode = newdp;
  419         }
  420         vput(newdp);
  421 }
  422 
  423 /*
  424  * Unmount a file system.
  425  *
  426  * Note: unmount takes a path to the vnode mounted on as argument,
  427  * not special file (as before).
  428  */
  429 /* ARGSUSED */
  430 int
  431 sys_unmount(l, v, retval)
  432         struct lwp *l;
  433         void *v;
  434         register_t *retval;
  435 {
  436         struct sys_unmount_args /* {
  437                 syscallarg(const char *) path;
  438                 syscallarg(int) flags;
  439         } */ *uap = v;
  440         struct proc *p = l->l_proc;
  441         struct vnode *vp;
  442         struct mount *mp;
  443         int error;
  444         struct nameidata nd;
  445 
  446         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
  447             SCARG(uap, path), p);
  448         if ((error = namei(&nd)) != 0)
  449                 return (error);
  450         vp = nd.ni_vp;
  451         mp = vp->v_mount;
  452 
  453         /*
  454          * Only root, or the user that did the original mount is
  455          * permitted to unmount this filesystem.
  456          */
  457         if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
  458             (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
  459                 vput(vp);
  460                 return (error);
  461         }
  462 
  463         /*
  464          * Don't allow unmounting the root file system.
  465          */
  466         if (mp->mnt_flag & MNT_ROOTFS) {
  467                 vput(vp);
  468                 return (EINVAL);
  469         }
  470 
  471         /*
  472          * Must be the root of the filesystem
  473          */
  474         if ((vp->v_flag & VROOT) == 0) {
  475                 vput(vp);
  476                 return (EINVAL);
  477         }
  478         vput(vp);
  479 
  480         /*
  481          * XXX Freeze syncer.  Must do this before locking the
  482          * mount point.  See dounmount() for details.
  483          */
  484         lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
  485 
  486         if (vfs_busy(mp, 0, 0)) {
  487                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  488                 return (EBUSY);
  489         }
  490 
  491         return (dounmount(mp, SCARG(uap, flags), p));
  492 }
  493 
  494 /*
  495  * Do the actual file system unmount. File system is assumed to have been
  496  * marked busy by the caller.
  497  */
  498 int
  499 dounmount(mp, flags, p)
  500         struct mount *mp;
  501         int flags;
  502         struct proc *p;
  503 {
  504         struct vnode *coveredvp;
  505         int error;
  506         int async;
  507         int used_syncer;
  508 
  509         simple_lock(&mountlist_slock);
  510         vfs_unbusy(mp);
  511         used_syncer = (mp->mnt_syncer != NULL);
  512 
  513         /*
  514          * XXX Syncer must be frozen when we get here.  This should really
  515          * be done on a per-mountpoint basis, but especially the softdep
  516          * code possibly called from the syncer doens't exactly work on a
  517          * per-mountpoint basis, so the softdep code would become a maze
  518          * of vfs_busy() calls.
  519          *
  520          * The caller of dounmount() must acquire syncer_lock because
  521          * the syncer itself acquires locks in syncer_lock -> vfs_busy
  522          * order, and we must preserve that order to avoid deadlock.
  523          *
  524          * So, if the file system did not use the syncer, now is
  525          * the time to release the syncer_lock.
  526          */
  527         if (used_syncer == 0)
  528                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  529 
  530         mp->mnt_iflag |= IMNT_UNMOUNT;
  531         mp->mnt_unmounter = p;
  532         lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
  533         vn_start_write(NULL, &mp, V_WAIT);
  534 
  535         if (mp->mnt_flag & MNT_EXPUBLIC)
  536                 vfs_setpublicfs(NULL, NULL, NULL);
  537         async = mp->mnt_flag & MNT_ASYNC;
  538         mp->mnt_flag &= ~MNT_ASYNC;
  539         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  540         if (mp->mnt_syncer != NULL)
  541                 vfs_deallocate_syncvnode(mp);
  542         if (((mp->mnt_flag & MNT_RDONLY) ||
  543 #if NFSS > 0
  544             (error = fss_umount_hook(mp, (flags & MNT_FORCE))) == 0 ||
  545 #endif
  546             (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
  547             (flags & MNT_FORCE))
  548                 error = VFS_UNMOUNT(mp, flags, p);
  549         vn_finished_write(mp, 0);
  550         simple_lock(&mountlist_slock);
  551         if (error) {
  552                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  553                         (void) vfs_allocate_syncvnode(mp);
  554                 mp->mnt_iflag &= ~IMNT_UNMOUNT;
  555                 mp->mnt_unmounter = NULL;
  556                 mp->mnt_flag |= async;
  557                 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
  558                     &mountlist_slock);
  559                 if (used_syncer)
  560                         lockmgr(&syncer_lock, LK_RELEASE, NULL);
  561                 while (mp->mnt_wcnt > 0) {
  562                         wakeup(mp);
  563                         tsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", 0);
  564                 }
  565                 return (error);
  566         }
  567         CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
  568         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
  569                 coveredvp->v_mountedhere = NULL;
  570                 vrele(coveredvp);
  571         }
  572         mp->mnt_op->vfs_refcount--;
  573         if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
  574                 panic("unmount: dangling vnode");
  575         mp->mnt_iflag |= IMNT_GONE;
  576         lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
  577         if (used_syncer)
  578                 lockmgr(&syncer_lock, LK_RELEASE, NULL);
  579         while(mp->mnt_wcnt > 0) {
  580                 wakeup(mp);
  581                 tsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0);
  582         }
  583         free(mp, M_MOUNT);
  584         return (0);
  585 }
  586 
  587 /*
  588  * Sync each mounted filesystem.
  589  */
  590 #ifdef DEBUG
  591 int syncprt = 0;
  592 struct ctldebug debug0 = { "syncprt", &syncprt };
  593 #endif
  594 
  595 /* ARGSUSED */
  596 int
  597 sys_sync(l, v, retval)
  598         struct lwp *l;
  599         void *v;
  600         register_t *retval;
  601 {
  602         struct mount *mp, *nmp;
  603         int asyncflag;
  604         struct proc *p = l == NULL ? &proc0 : l->l_proc;
  605 
  606         simple_lock(&mountlist_slock);
  607         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
  608                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
  609                         nmp = mp->mnt_list.cqe_prev;
  610                         continue;
  611                 }
  612                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  613                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  614                         asyncflag = mp->mnt_flag & MNT_ASYNC;
  615                         mp->mnt_flag &= ~MNT_ASYNC;
  616                         VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
  617                         if (asyncflag)
  618                                  mp->mnt_flag |= MNT_ASYNC;
  619                         vn_finished_write(mp, 0);
  620                 }
  621                 simple_lock(&mountlist_slock);
  622                 nmp = mp->mnt_list.cqe_prev;
  623                 vfs_unbusy(mp);
  624 
  625         }
  626         simple_unlock(&mountlist_slock);
  627 #ifdef DEBUG
  628         if (syncprt)
  629                 vfs_bufstats();
  630 #endif /* DEBUG */
  631         return (0);
  632 }
  633 
  634 /*
  635  * Change filesystem quotas.
  636  */
  637 /* ARGSUSED */
  638 int
  639 sys_quotactl(l, v, retval)
  640         struct lwp *l;
  641         void *v;
  642         register_t *retval;
  643 {
  644         struct sys_quotactl_args /* {
  645                 syscallarg(const char *) path;
  646                 syscallarg(int) cmd;
  647                 syscallarg(int) uid;
  648                 syscallarg(caddr_t) arg;
  649         } */ *uap = v;
  650         struct proc *p = l->l_proc;
  651         struct mount *mp;
  652         int error;
  653         struct nameidata nd;
  654 
  655         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
  656         if ((error = namei(&nd)) != 0)
  657                 return (error);
  658         error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
  659         vrele(nd.ni_vp);
  660         if (error)
  661                 return (error);
  662         error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
  663             SCARG(uap, arg), p);
  664         vn_finished_write(mp, 0);
  665         return (error);
  666 }
  667 
  668 static int
  669 dostatfs(struct mount *mp, struct statfs *sp, struct proc *p, int flags,
  670     int root)
  671 {
  672         struct cwdinfo *cwdi = p->p_cwdi;
  673         int error = 0;
  674 
  675         /*
  676          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  677          * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
  678          * overrides MNT_NOWAIT.
  679          */
  680         if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
  681             (flags != MNT_WAIT && flags != 0)) {
  682                 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
  683                 goto done;
  684         }
  685 
  686         if ((error = VFS_STATFS(mp, sp, p)) != 0) {
  687                 return error;
  688         }
  689 
  690         if (cwdi->cwdi_rdir == NULL)
  691                 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
  692 done:
  693         if (cwdi->cwdi_rdir != NULL) {
  694                 size_t len;
  695                 char *bp;
  696                 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  697                 if (!path)
  698                         return ENOMEM;
  699 
  700                 bp = path + MAXPATHLEN;
  701                 *--bp = '\0';
  702                 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
  703                     MAXPATHLEN / 2, 0, p);
  704                 if (error) {
  705                         free(path, M_TEMP);
  706                         return error;
  707                 }
  708                 len = strlen(bp);
  709                 /*
  710                  * for mount points that are below our root, we can see
  711                  * them, so we fix up the pathname and return them. The
  712                  * rest we cannot see, so we don't allow viewing the
  713                  * data.
  714                  */
  715                 if (strncmp(bp, sp->f_mntonname, len) == 0) {
  716                         strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
  717                             sizeof(sp->f_mntonname));
  718                         if (sp->f_mntonname[0] == '\0')
  719                                 (void)strlcpy(sp->f_mntonname, "/",
  720                                     sizeof(sp->f_mntonname));
  721                 } else {
  722                         if (root)
  723                                 (void)strlcpy(sp->f_mntonname, "/",
  724                                     sizeof(sp->f_mntonname));
  725                         else
  726                                 error = EPERM;
  727                 }
  728                 free(path, M_TEMP);
  729         }
  730         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  731         sp->f_oflags = sp->f_flags & 0xffff;
  732         return error;
  733 }
  734 
  735 /*
  736  * Get filesystem statistics.
  737  */
  738 /* ARGSUSED */
  739 int
  740 sys_statfs(l, v, retval)
  741         struct lwp *l;
  742         void *v;
  743         register_t *retval;
  744 {
  745         struct sys_statfs_args /* {
  746                 syscallarg(const char *) path;
  747                 syscallarg(struct statfs *) buf;
  748         } */ *uap = v;
  749         struct proc *p = l->l_proc;
  750         struct mount *mp;
  751         struct statfs sbuf;
  752         int error;
  753         struct nameidata nd;
  754 
  755         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
  756         if ((error = namei(&nd)) != 0)
  757                 return error;
  758         mp = nd.ni_vp->v_mount;
  759         vrele(nd.ni_vp);
  760         if ((error = dostatfs(mp, &sbuf, p, 0, 1)) != 0)
  761                 return error;
  762         return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
  763 }
  764 
  765 /*
  766  * Get filesystem statistics.
  767  */
  768 /* ARGSUSED */
  769 int
  770 sys_fstatfs(l, v, retval)
  771         struct lwp *l;
  772         void *v;
  773         register_t *retval;
  774 {
  775         struct sys_fstatfs_args /* {
  776                 syscallarg(int) fd;
  777                 syscallarg(struct statfs *) buf;
  778         } */ *uap = v;
  779         struct proc *p = l->l_proc;
  780         struct file *fp;
  781         struct mount *mp;
  782         struct statfs sbuf;
  783         int error;
  784 
  785         /* getvnode() will use the descriptor for us */
  786         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
  787                 return (error);
  788         mp = ((struct vnode *)fp->f_data)->v_mount;
  789         if ((error = dostatfs(mp, &sbuf, p, 0, 1)) != 0)
  790                 goto out;
  791         error = copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
  792  out:
  793         FILE_UNUSE(fp, p);
  794         return error;
  795 }
  796 
  797 
  798 /*
  799  * Get statistics on all filesystems.
  800  */
  801 int
  802 sys_getfsstat(l, v, retval)
  803         struct lwp *l;
  804         void *v;
  805         register_t *retval;
  806 {
  807         struct sys_getfsstat_args /* {
  808                 syscallarg(struct statfs *) buf;
  809                 syscallarg(long) bufsize;
  810                 syscallarg(int) flags;
  811         } */ *uap = v;
  812         int root = 0;
  813         struct proc *p = l->l_proc;
  814         struct mount *mp, *nmp;
  815         struct statfs sbuf;
  816         caddr_t sfsp;
  817         long count, maxcount, error = 0;
  818 
  819         maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
  820         sfsp = (caddr_t)SCARG(uap, buf);
  821         simple_lock(&mountlist_slock);
  822         count = 0;
  823         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
  824              mp = nmp) {
  825                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
  826                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
  827                         continue;
  828                 }
  829                 if (sfsp && count < maxcount) {
  830                         error = dostatfs(mp, &sbuf, p, SCARG(uap, flags), 0);
  831                         if (error) {
  832                                 simple_lock(&mountlist_slock);
  833                                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
  834                                 vfs_unbusy(mp);
  835                                 continue;
  836                         }
  837                         error = copyout(&sbuf, sfsp, sizeof(sbuf));
  838                         if (error) {
  839                                 vfs_unbusy(mp);
  840                                 return (error);
  841                         }
  842                         sfsp += sizeof(sbuf);
  843                         root |= strcmp(sbuf.f_mntonname, "/") == 0;
  844                 }
  845                 count++;
  846                 simple_lock(&mountlist_slock);
  847                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
  848                 vfs_unbusy(mp);
  849         }
  850         simple_unlock(&mountlist_slock);
  851         if (root == 0 && p->p_cwdi->cwdi_rdir) {
  852                 /*
  853                  * fake a root entry
  854                  */
  855                 if ((error = dostatfs(p->p_cwdi->cwdi_rdir->v_mount, &sbuf, p,
  856                     SCARG(uap, flags), 1)) != 0)
  857                         return error;
  858                 if (sfsp)
  859                         error = copyout(&sbuf, sfsp, sizeof(sbuf));
  860                 count++;
  861         }
  862         if (sfsp && count > maxcount)
  863                 *retval = maxcount;
  864         else
  865                 *retval = count;
  866         return error;
  867 }
  868 
  869 /*
  870  * Change current working directory to a given file descriptor.
  871  */
  872 /* ARGSUSED */
  873 int
  874 sys_fchdir(l, v, retval)
  875         struct lwp *l;
  876         void *v;
  877         register_t *retval;
  878 {
  879         struct sys_fchdir_args /* {
  880                 syscallarg(int) fd;
  881         } */ *uap = v;
  882         struct proc *p = l->l_proc;
  883         struct filedesc *fdp = p->p_fd;
  884         struct cwdinfo *cwdi = p->p_cwdi;
  885         struct vnode *vp, *tdp;
  886         struct mount *mp;
  887         struct file *fp;
  888         int error;
  889 
  890         /* getvnode() will use the descriptor for us */
  891         if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
  892                 return (error);
  893         vp = (struct vnode *)fp->f_data;
  894 
  895         VREF(vp);
  896         vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
  897         if (vp->v_type != VDIR)
  898                 error = ENOTDIR;
  899         else
  900                 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
  901         while (!error && (mp = vp->v_mountedhere) != NULL) {
  902                 if (vfs_busy(mp, 0, 0))
  903                         continue;
  904                 error = VFS_ROOT(mp, &tdp);
  905                 vfs_unbusy(mp);
  906                 if (error)
  907                         break;
  908                 vput(vp);
  909                 vp = tdp;
  910         }
  911         if (error) {
  912                 vput(vp);
  913                 goto out;
  914         }
  915         VOP_UNLOCK(vp, 0);
  916 
  917         /*
  918          * Disallow changing to a directory not under the process's
  919          * current root directory (if there is one).
  920          */
  921         if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, p)) {
  922                 vrele(vp);
  923                 error = EPERM;  /* operation not permitted */
  924                 goto out;
  925         }
  926 
  927         vrele(cwdi->cwdi_cdir);
  928         cwdi->cwdi_cdir = vp;
  929  out:
  930         FILE_UNUSE(fp, p);
  931         return (error);
  932 }
  933 
  934 /*
  935  * Change this process's notion of the root directory to a given file descriptor.
  936  */
  937 
  938 int
  939 sys_fchroot(l, v, retval)
  940         struct lwp *l;
  941         void *v;
  942         register_t *retval;
  943 {
  944         struct sys_fchroot_args *uap = v;
  945         struct proc *p = l->l_proc;
  946         struct filedesc *fdp = p->p_fd;
  947         struct cwdinfo *cwdi = p->p_cwdi;
  948         struct vnode    *vp;
  949         struct file     *fp;
  950         int              error;
  951 
  952         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
  953                 return error;
  954         /* getvnode() will use the descriptor for us */
  955         if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
  956                 return error;
  957         vp = (struct vnode *) fp->f_data;
  958         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  959         if (vp->v_type != VDIR)
  960                 error = ENOTDIR;
  961         else
  962                 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
  963         VOP_UNLOCK(vp, 0);
  964         if (error)
  965                 goto out;
  966         VREF(vp);
  967 
  968         /*
  969          * Prevent escaping from chroot by putting the root under
  970          * the working directory.  Silently chdir to / if we aren't
  971          * already there.
  972          */
  973         if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
  974                 /*
  975                  * XXX would be more failsafe to change directory to a
  976                  * deadfs node here instead
  977                  */
  978                 vrele(cwdi->cwdi_cdir);
  979                 VREF(vp);
  980                 cwdi->cwdi_cdir = vp;
  981         }
  982 
  983         if (cwdi->cwdi_rdir != NULL)
  984                 vrele(cwdi->cwdi_rdir);
  985         cwdi->cwdi_rdir = vp;
  986  out:
  987         FILE_UNUSE(fp, p);
  988         return (error);
  989 }
  990 
  991 
  992 
  993 /*
  994  * Change current working directory (``.'').
  995  */
  996 /* ARGSUSED */
  997 int
  998 sys_chdir(l, v, retval)
  999         struct lwp *l;
 1000         void *v;
 1001         register_t *retval;
 1002 {
 1003         struct sys_chdir_args /* {
 1004                 syscallarg(const char *) path;
 1005         } */ *uap = v;
 1006         struct proc *p = l->l_proc;
 1007         struct cwdinfo *cwdi = p->p_cwdi;
 1008         int error;
 1009         struct nameidata nd;
 1010 
 1011         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1012             SCARG(uap, path), p);
 1013         if ((error = change_dir(&nd, p)) != 0)
 1014                 return (error);
 1015         vrele(cwdi->cwdi_cdir);
 1016         cwdi->cwdi_cdir = nd.ni_vp;
 1017         return (0);
 1018 }
 1019 
 1020 /*
 1021  * Change notion of root (``/'') directory.
 1022  */
 1023 /* ARGSUSED */
 1024 int
 1025 sys_chroot(l, v, retval)
 1026         struct lwp *l;
 1027         void *v;
 1028         register_t *retval;
 1029 {
 1030         struct sys_chroot_args /* {
 1031                 syscallarg(const char *) path;
 1032         } */ *uap = v;
 1033         struct proc *p = l->l_proc;
 1034         struct cwdinfo *cwdi = p->p_cwdi;
 1035         struct vnode *vp;
 1036         int error;
 1037         struct nameidata nd;
 1038 
 1039         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 1040                 return (error);
 1041         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1042             SCARG(uap, path), p);
 1043         if ((error = change_dir(&nd, p)) != 0)
 1044                 return (error);
 1045         if (cwdi->cwdi_rdir != NULL)
 1046                 vrele(cwdi->cwdi_rdir);
 1047         vp = nd.ni_vp;
 1048         cwdi->cwdi_rdir = vp;
 1049 
 1050         /*
 1051          * Prevent escaping from chroot by putting the root under
 1052          * the working directory.  Silently chdir to / if we aren't
 1053          * already there.
 1054          */
 1055         if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
 1056                 /*
 1057                  * XXX would be more failsafe to change directory to a
 1058                  * deadfs node here instead
 1059                  */
 1060                 vrele(cwdi->cwdi_cdir);
 1061                 VREF(vp);
 1062                 cwdi->cwdi_cdir = vp;
 1063         }
 1064 
 1065         return (0);
 1066 }
 1067 
 1068 /*
 1069  * Common routine for chroot and chdir.
 1070  */
 1071 static int
 1072 change_dir(ndp, p)
 1073         struct nameidata *ndp;
 1074         struct proc *p;
 1075 {
 1076         struct vnode *vp;
 1077         int error;
 1078 
 1079         if ((error = namei(ndp)) != 0)
 1080                 return (error);
 1081         vp = ndp->ni_vp;
 1082         if (vp->v_type != VDIR)
 1083                 error = ENOTDIR;
 1084         else
 1085                 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 1086 
 1087         if (error)
 1088                 vput(vp);
 1089         else
 1090                 VOP_UNLOCK(vp, 0);
 1091         return (error);
 1092 }
 1093 
 1094 /*
 1095  * Check permissions, allocate an open file structure,
 1096  * and call the device open routine if any.
 1097  */
 1098 int
 1099 sys_open(l, v, retval)
 1100         struct lwp *l;
 1101         void *v;
 1102         register_t *retval;
 1103 {
 1104         struct sys_open_args /* {
 1105                 syscallarg(const char *) path;
 1106                 syscallarg(int) flags;
 1107                 syscallarg(int) mode;
 1108         } */ *uap = v;
 1109         struct proc *p = l->l_proc;
 1110         struct cwdinfo *cwdi = p->p_cwdi;
 1111         struct filedesc *fdp = p->p_fd;
 1112         struct file *fp;
 1113         struct vnode *vp;
 1114         int flags, cmode;
 1115         int type, indx, error;
 1116         struct flock lf;
 1117         struct nameidata nd;
 1118 
 1119         flags = FFLAGS(SCARG(uap, flags));
 1120         if ((flags & (FREAD | FWRITE)) == 0)
 1121                 return (EINVAL);
 1122         /* falloc() will use the file descriptor for us */
 1123         if ((error = falloc(p, &fp, &indx)) != 0)
 1124                 return (error);
 1125         cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
 1126         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 1127         l->l_dupfd = -indx - 1;                 /* XXX check for fdopen */
 1128         if ((error = vn_open(&nd, flags, cmode)) != 0) {
 1129                 FILE_UNUSE(fp, p);
 1130                 fdp->fd_ofiles[indx] = NULL;
 1131                 ffree(fp);
 1132                 if ((error == ENODEV || error == ENXIO) &&
 1133                     l->l_dupfd >= 0 &&                  /* XXX from fdopen */
 1134                     (error =
 1135                         dupfdopen(p, indx, l->l_dupfd, flags, error)) == 0) {
 1136                         *retval = indx;
 1137                         return (0);
 1138                 }
 1139                 if (error == ERESTART)
 1140                         error = EINTR;
 1141                 fdremove(fdp, indx);
 1142                 return (error);
 1143         }
 1144         l->l_dupfd = 0;
 1145         vp = nd.ni_vp;
 1146         fp->f_flag = flags & FMASK;
 1147         fp->f_type = DTYPE_VNODE;
 1148         fp->f_ops = &vnops;
 1149         fp->f_data = vp;
 1150         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1151                 lf.l_whence = SEEK_SET;
 1152                 lf.l_start = 0;
 1153                 lf.l_len = 0;
 1154                 if (flags & O_EXLOCK)
 1155                         lf.l_type = F_WRLCK;
 1156                 else
 1157                         lf.l_type = F_RDLCK;
 1158                 type = F_FLOCK;
 1159                 if ((flags & FNONBLOCK) == 0)
 1160                         type |= F_WAIT;
 1161                 VOP_UNLOCK(vp, 0);
 1162                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1163                 if (error) {
 1164                         (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
 1165                         FILE_UNUSE(fp, p);
 1166                         ffree(fp);
 1167                         fdremove(fdp, indx);
 1168                         return (error);
 1169                 }
 1170                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1171                 fp->f_flag |= FHASLOCK;
 1172         }
 1173         VOP_UNLOCK(vp, 0);
 1174         *retval = indx;
 1175         FILE_SET_MATURE(fp);
 1176         FILE_UNUSE(fp, p);
 1177         return (0);
 1178 }
 1179 
 1180 /*
 1181  * Get file handle system call
 1182  */
 1183 int
 1184 sys_getfh(l, v, retval)
 1185         struct lwp *l;
 1186         void *v;
 1187         register_t *retval;
 1188 {
 1189         struct sys_getfh_args /* {
 1190                 syscallarg(char *) fname;
 1191                 syscallarg(fhandle_t *) fhp;
 1192         } */ *uap = v;
 1193         struct proc *p = l->l_proc;
 1194         struct vnode *vp;
 1195         fhandle_t fh;
 1196         int error;
 1197         struct nameidata nd;
 1198 
 1199         /*
 1200          * Must be super user
 1201          */
 1202         error = suser(p->p_ucred, &p->p_acflag);
 1203         if (error)
 1204                 return (error);
 1205         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 1206             SCARG(uap, fname), p);
 1207         error = namei(&nd);
 1208         if (error)
 1209                 return (error);
 1210         vp = nd.ni_vp;
 1211         memset(&fh, 0, sizeof(fh));
 1212         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 1213         error = VFS_VPTOFH(vp, &fh.fh_fid);
 1214         vput(vp);
 1215         if (error)
 1216                 return (error);
 1217         error = copyout(&fh, (caddr_t)SCARG(uap, fhp), sizeof (fh));
 1218         return (error);
 1219 }
 1220 
 1221 /*
 1222  * Open a file given a file handle.
 1223  *
 1224  * Check permissions, allocate an open file structure,
 1225  * and call the device open routine if any.
 1226  */
 1227 int
 1228 sys_fhopen(l, v, retval)
 1229         struct lwp *l;
 1230         void *v;
 1231         register_t *retval;
 1232 {
 1233         struct sys_fhopen_args /* {
 1234                 syscallarg(const fhandle_t *) fhp;
 1235                 syscallarg(int) flags;
 1236         } */ *uap = v;
 1237         struct proc *p = l->l_proc;
 1238         struct filedesc *fdp = p->p_fd;
 1239         struct file *fp;
 1240         struct vnode *vp = NULL;
 1241         struct mount *mp;
 1242         struct ucred *cred = p->p_ucred;
 1243         int flags;
 1244         struct file *nfp;
 1245         int type, indx, error=0;
 1246         struct flock lf;
 1247         struct vattr va;
 1248         fhandle_t fh;
 1249 
 1250         /*
 1251          * Must be super user
 1252          */
 1253         if ((error = suser(p->p_ucred, &p->p_acflag)))
 1254                 return (error);
 1255 
 1256         flags = FFLAGS(SCARG(uap, flags));
 1257         if ((flags & (FREAD | FWRITE)) == 0)
 1258                 return (EINVAL);
 1259         if ((flags & O_CREAT))
 1260                 return (EINVAL);
 1261         /* falloc() will use the file descriptor for us */
 1262         if ((error = falloc(p, &nfp, &indx)) != 0)
 1263                 return (error);
 1264         fp = nfp;
 1265         if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
 1266                 goto bad;
 1267 
 1268         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
 1269                 error = ESTALE;
 1270                 goto bad;
 1271         }
 1272 
 1273         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
 1274                 vp = NULL;      /* most likely unnecessary sanity for bad: */
 1275                 goto bad;
 1276         }
 1277 
 1278         /* Now do an effective vn_open */
 1279 
 1280         if (vp->v_type == VSOCK) {
 1281                 error = EOPNOTSUPP;
 1282                 goto bad;
 1283         }
 1284         if (flags & FREAD) {
 1285                 if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
 1286                         goto bad;
 1287         }
 1288         if (flags & (FWRITE | O_TRUNC)) {
 1289                 if (vp->v_type == VDIR) {
 1290                         error = EISDIR;
 1291                         goto bad;
 1292                 }
 1293                 if ((error = vn_writechk(vp)) != 0 ||
 1294                     (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
 1295                         goto bad;
 1296         }
 1297         if (flags & O_TRUNC) {
 1298                 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 1299                         goto bad;
 1300                 VOP_UNLOCK(vp, 0);                      /* XXX */
 1301                 VOP_LEASE(vp, p, cred, LEASE_WRITE);
 1302                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 1303                 VATTR_NULL(&va);
 1304                 va.va_size = 0;
 1305                 error = VOP_SETATTR(vp, &va, cred, p);
 1306                 vn_finished_write(mp, 0);
 1307                 if (error)
 1308                         goto bad;
 1309         }
 1310         if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
 1311                 goto bad;
 1312         if (vp->v_type == VREG &&
 1313             uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
 1314                 error = EIO;
 1315                 goto bad;
 1316         }
 1317         if (flags & FWRITE)
 1318                 vp->v_writecount++;
 1319 
 1320         /* done with modified vn_open, now finish what sys_open does. */
 1321 
 1322         fp->f_flag = flags & FMASK;
 1323         fp->f_type = DTYPE_VNODE;
 1324         fp->f_ops = &vnops;
 1325         fp->f_data = vp;
 1326         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1327                 lf.l_whence = SEEK_SET;
 1328                 lf.l_start = 0;
 1329                 lf.l_len = 0;
 1330                 if (flags & O_EXLOCK)
 1331                         lf.l_type = F_WRLCK;
 1332                 else
 1333                         lf.l_type = F_RDLCK;
 1334                 type = F_FLOCK;
 1335                 if ((flags & FNONBLOCK) == 0)
 1336                         type |= F_WAIT;
 1337                 VOP_UNLOCK(vp, 0);
 1338                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1339                 if (error) {
 1340                         (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
 1341                         FILE_UNUSE(fp, p);
 1342                         ffree(fp);
 1343                         fdremove(fdp, indx);
 1344                         return (error);
 1345                 }
 1346                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1347                 fp->f_flag |= FHASLOCK;
 1348         }
 1349         VOP_UNLOCK(vp, 0);
 1350         *retval = indx;
 1351         FILE_SET_MATURE(fp);
 1352         FILE_UNUSE(fp, p);
 1353         return (0);
 1354 
 1355 bad:
 1356         FILE_UNUSE(fp, p);
 1357         ffree(fp);
 1358         fdremove(fdp, indx);
 1359         if (vp != NULL)
 1360                 vput(vp);
 1361         return (error);
 1362 }
 1363 
 1364 /* ARGSUSED */
 1365 int
 1366 sys_fhstat(l, v, retval)
 1367         struct lwp *l;
 1368         void *v;
 1369         register_t *retval;
 1370 {
 1371         struct sys_fhstat_args /* {
 1372                 syscallarg(const fhandle_t *) fhp;
 1373                 syscallarg(struct stat *) sb;
 1374         } */ *uap = v;
 1375         struct proc *p = l->l_proc;
 1376         struct stat sb;
 1377         int error;
 1378         fhandle_t fh;
 1379         struct mount *mp;
 1380         struct vnode *vp;
 1381 
 1382         /*
 1383          * Must be super user
 1384          */
 1385         if ((error = suser(p->p_ucred, &p->p_acflag)))
 1386                 return (error);
 1387 
 1388         if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
 1389                 return (error);
 1390 
 1391         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 1392                 return (ESTALE);
 1393         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 1394                 return (error);
 1395         error = vn_stat(vp, &sb, p);
 1396         vput(vp);
 1397         if (error)
 1398                 return (error);
 1399         error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
 1400         return (error);
 1401 }
 1402 
 1403 /* ARGSUSED */
 1404 int
 1405 sys_fhstatfs(l, v, retval)
 1406         struct lwp *l;
 1407         void *v;
 1408         register_t *retval;
 1409 {
 1410         struct sys_fhstatfs_args /*
 1411                 syscallarg(const fhandle_t *) fhp;
 1412                 syscallarg(struct statfs *) buf;
 1413         } */ *uap = v;
 1414         struct proc *p = l->l_proc;
 1415         struct statfs sbuf;
 1416         fhandle_t fh;
 1417         struct mount *mp;
 1418         struct vnode *vp;
 1419         int error;
 1420 
 1421         /*
 1422          * Must be super user
 1423          */
 1424         if ((error = suser(p->p_ucred, &p->p_acflag)))
 1425                 return (error);
 1426 
 1427         if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
 1428                 return (error);
 1429 
 1430         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 1431                 return (ESTALE);
 1432         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 1433                 return (error);
 1434         mp = vp->v_mount;
 1435         vput(vp);
 1436         if ((error = VFS_STATFS(mp, &sbuf, p)) != 0)
 1437                 return (error);
 1438         return (copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf)));
 1439 }
 1440 
 1441 /*
 1442  * Create a special file.
 1443  */
 1444 /* ARGSUSED */
 1445 int
 1446 sys_mknod(l, v, retval)
 1447         struct lwp *l;
 1448         void *v;
 1449         register_t *retval;
 1450 {
 1451         struct sys_mknod_args /* {
 1452                 syscallarg(const char *) path;
 1453                 syscallarg(int) mode;
 1454                 syscallarg(int) dev;
 1455         } */ *uap = v;
 1456         struct proc *p = l->l_proc;
 1457         struct vnode *vp;
 1458         struct mount *mp;
 1459         struct vattr vattr;
 1460         int error;
 1461         int whiteout = 0;
 1462         struct nameidata nd;
 1463 
 1464         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 1465                 return (error);
 1466 restart:
 1467         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 1468         if ((error = namei(&nd)) != 0)
 1469                 return (error);
 1470         vp = nd.ni_vp;
 1471         if (vp != NULL)
 1472                 error = EEXIST;
 1473         else {
 1474                 VATTR_NULL(&vattr);
 1475                 vattr.va_mode =
 1476                     (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1477                 vattr.va_rdev = SCARG(uap, dev);
 1478                 whiteout = 0;
 1479 
 1480                 switch (SCARG(uap, mode) & S_IFMT) {
 1481                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1482                         vattr.va_type = VBAD;
 1483                         break;
 1484                 case S_IFCHR:
 1485                         vattr.va_type = VCHR;
 1486                         break;
 1487                 case S_IFBLK:
 1488                         vattr.va_type = VBLK;
 1489                         break;
 1490                 case S_IFWHT:
 1491                         whiteout = 1;
 1492                         break;
 1493                 default:
 1494                         error = EINVAL;
 1495                         break;
 1496                 }
 1497         }
 1498         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1499                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1500                 if (nd.ni_dvp == vp)
 1501                         vrele(nd.ni_dvp);
 1502                 else
 1503                         vput(nd.ni_dvp);
 1504                 if (vp)
 1505                         vrele(vp);
 1506                 if ((error = vn_start_write(NULL, &mp,
 1507                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1508                         return (error);
 1509                 goto restart;
 1510         }
 1511         if (!error) {
 1512                 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1513                 if (whiteout) {
 1514                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1515                         if (error)
 1516                                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1517                         vput(nd.ni_dvp);
 1518                 } else {
 1519                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1520                                                 &nd.ni_cnd, &vattr);
 1521                         if (error == 0)
 1522                                 vput(nd.ni_vp);
 1523                 }
 1524         } else {
 1525                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1526                 if (nd.ni_dvp == vp)
 1527                         vrele(nd.ni_dvp);
 1528                 else
 1529                         vput(nd.ni_dvp);
 1530                 if (vp)
 1531                         vrele(vp);
 1532         }
 1533         vn_finished_write(mp, 0);
 1534         return (error);
 1535 }
 1536 
 1537 /*
 1538  * Create a named pipe.
 1539  */
 1540 /* ARGSUSED */
 1541 int
 1542 sys_mkfifo(l, v, retval)
 1543         struct lwp *l;
 1544         void *v;
 1545         register_t *retval;
 1546 {
 1547         struct sys_mkfifo_args /* {
 1548                 syscallarg(const char *) path;
 1549                 syscallarg(int) mode;
 1550         } */ *uap = v;
 1551         struct proc *p = l->l_proc;
 1552         struct mount *mp;
 1553         struct vattr vattr;
 1554         int error;
 1555         struct nameidata nd;
 1556 
 1557 restart:
 1558         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 1559         if ((error = namei(&nd)) != 0)
 1560                 return (error);
 1561         if (nd.ni_vp != NULL) {
 1562                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1563                 if (nd.ni_dvp == nd.ni_vp)
 1564                         vrele(nd.ni_dvp);
 1565                 else
 1566                         vput(nd.ni_dvp);
 1567                 vrele(nd.ni_vp);
 1568                 return (EEXIST);
 1569         }
 1570         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1571                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1572                 if (nd.ni_dvp == nd.ni_vp)
 1573                         vrele(nd.ni_dvp);
 1574                 else
 1575                         vput(nd.ni_dvp);
 1576                 if (nd.ni_vp)
 1577                         vrele(nd.ni_vp);
 1578                 if ((error = vn_start_write(NULL, &mp,
 1579                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1580                         return (error);
 1581                 goto restart;
 1582         }
 1583         VATTR_NULL(&vattr);
 1584         vattr.va_type = VFIFO;
 1585         vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1586         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1587         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1588         if (error == 0)
 1589                 vput(nd.ni_vp);
 1590         vn_finished_write(mp, 0);
 1591         return (error);
 1592 }
 1593 
 1594 /*
 1595  * Make a hard file link.
 1596  */
 1597 /* ARGSUSED */
 1598 int
 1599 sys_link(l, v, retval)
 1600         struct lwp *l;
 1601         void *v;
 1602         register_t *retval;
 1603 {
 1604         struct sys_link_args /* {
 1605                 syscallarg(const char *) path;
 1606                 syscallarg(const char *) link;
 1607         } */ *uap = v;
 1608         struct proc *p = l->l_proc;
 1609         struct vnode *vp;
 1610         struct mount *mp;
 1611         struct nameidata nd;
 1612         int error;
 1613 
 1614         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 1615         if ((error = namei(&nd)) != 0)
 1616                 return (error);
 1617         vp = nd.ni_vp;
 1618         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 1619                 vrele(vp);
 1620                 return (error);
 1621         }
 1622         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
 1623         if ((error = namei(&nd)) != 0)
 1624                 goto out;
 1625         if (nd.ni_vp) {
 1626                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1627                 if (nd.ni_dvp == nd.ni_vp)
 1628                         vrele(nd.ni_dvp);
 1629                 else
 1630                         vput(nd.ni_dvp);
 1631                 vrele(nd.ni_vp);
 1632                 error = EEXIST;
 1633                 goto out;
 1634         }
 1635         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1636         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 1637         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1638 out:
 1639         vrele(vp);
 1640         vn_finished_write(mp, 0);
 1641         return (error);
 1642 }
 1643 
 1644 /*
 1645  * Make a symbolic link.
 1646  */
 1647 /* ARGSUSED */
 1648 int
 1649 sys_symlink(l, v, retval)
 1650         struct lwp *l;
 1651         void *v;
 1652         register_t *retval;
 1653 {
 1654         struct sys_symlink_args /* {
 1655                 syscallarg(const char *) path;
 1656                 syscallarg(const char *) link;
 1657         } */ *uap = v;
 1658         struct proc *p = l->l_proc;
 1659         struct mount *mp;
 1660         struct vattr vattr;
 1661         char *path;
 1662         int error;
 1663         struct nameidata nd;
 1664 
 1665         path = PNBUF_GET();
 1666         error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
 1667         if (error)
 1668                 goto out;
 1669 restart:
 1670         NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
 1671         if ((error = namei(&nd)) != 0)
 1672                 goto out;
 1673         if (nd.ni_vp) {
 1674                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1675                 if (nd.ni_dvp == nd.ni_vp)
 1676                         vrele(nd.ni_dvp);
 1677                 else
 1678                         vput(nd.ni_dvp);
 1679                 vrele(nd.ni_vp);
 1680                 error = EEXIST;
 1681                 goto out;
 1682         }
 1683         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1684                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1685                 if (nd.ni_dvp == nd.ni_vp)
 1686                         vrele(nd.ni_dvp);
 1687                 else
 1688                         vput(nd.ni_dvp);
 1689                 if ((error = vn_start_write(NULL, &mp,
 1690                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1691                         return (error);
 1692                 goto restart;
 1693         }
 1694         VATTR_NULL(&vattr);
 1695         vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
 1696         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1697         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
 1698         if (error == 0)
 1699                 vput(nd.ni_vp);
 1700         vn_finished_write(mp, 0);
 1701 out:
 1702         PNBUF_PUT(path);
 1703         return (error);
 1704 }
 1705 
 1706 /*
 1707  * Delete a whiteout from the filesystem.
 1708  */
 1709 /* ARGSUSED */
 1710 int
 1711 sys_undelete(l, v, retval)
 1712         struct lwp *l;
 1713         void *v;
 1714         register_t *retval;
 1715 {
 1716         struct sys_undelete_args /* {
 1717                 syscallarg(const char *) path;
 1718         } */ *uap = v;
 1719         struct proc *p = l->l_proc;
 1720         int error;
 1721         struct mount *mp;
 1722         struct nameidata nd;
 1723 
 1724 restart:
 1725         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 1726             SCARG(uap, path), p);
 1727         error = namei(&nd);
 1728         if (error)
 1729                 return (error);
 1730 
 1731         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1732                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1733                 if (nd.ni_dvp == nd.ni_vp)
 1734                         vrele(nd.ni_dvp);
 1735                 else
 1736                         vput(nd.ni_dvp);
 1737                 if (nd.ni_vp)
 1738                         vrele(nd.ni_vp);
 1739                 return (EEXIST);
 1740         }
 1741         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1742                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1743                 if (nd.ni_dvp == nd.ni_vp)
 1744                         vrele(nd.ni_dvp);
 1745                 else
 1746                         vput(nd.ni_dvp);
 1747                 if ((error = vn_start_write(NULL, &mp,
 1748                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1749                         return (error);
 1750                 goto restart;
 1751         }
 1752         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1753         if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
 1754                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1755         vput(nd.ni_dvp);
 1756         vn_finished_write(mp, 0);
 1757         return (error);
 1758 }
 1759 
 1760 /*
 1761  * Delete a name from the filesystem.
 1762  */
 1763 /* ARGSUSED */
 1764 int
 1765 sys_unlink(l, v, retval)
 1766         struct lwp *l;
 1767         void *v;
 1768         register_t *retval;
 1769 {
 1770         struct sys_unlink_args /* {
 1771                 syscallarg(const char *) path;
 1772         } */ *uap = v;
 1773         struct proc *p = l->l_proc;
 1774         struct mount *mp;
 1775         struct vnode *vp;
 1776         int error;
 1777         struct nameidata nd;
 1778 
 1779 restart:
 1780         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 1781             SCARG(uap, path), p);
 1782         if ((error = namei(&nd)) != 0)
 1783                 return (error);
 1784         vp = nd.ni_vp;
 1785 
 1786         /*
 1787          * The root of a mounted filesystem cannot be deleted.
 1788          */
 1789         if (vp->v_flag & VROOT) {
 1790                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1791                 if (nd.ni_dvp == vp)
 1792                         vrele(nd.ni_dvp);
 1793                 else
 1794                         vput(nd.ni_dvp);
 1795                 vput(vp);
 1796                 error = EBUSY;
 1797                 goto out;
 1798         }
 1799         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1800                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1801                 if (nd.ni_dvp == vp)
 1802                         vrele(nd.ni_dvp);
 1803                 else
 1804                         vput(nd.ni_dvp);
 1805                 vput(vp);
 1806                 if ((error = vn_start_write(NULL, &mp,
 1807                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 1808                         return (error);
 1809                 goto restart;
 1810         }
 1811         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 1812         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 1813         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 1814         vn_finished_write(mp, 0);
 1815 out:
 1816         return (error);
 1817 }
 1818 
 1819 /*
 1820  * Reposition read/write file offset.
 1821  */
 1822 int
 1823 sys_lseek(l, v, retval)
 1824         struct lwp *l;
 1825         void *v;
 1826         register_t *retval;
 1827 {
 1828         struct sys_lseek_args /* {
 1829                 syscallarg(int) fd;
 1830                 syscallarg(int) pad;
 1831                 syscallarg(off_t) offset;
 1832                 syscallarg(int) whence;
 1833         } */ *uap = v;
 1834         struct proc *p = l->l_proc;
 1835         struct ucred *cred = p->p_ucred;
 1836         struct filedesc *fdp = p->p_fd;
 1837         struct file *fp;
 1838         struct vnode *vp;
 1839         struct vattr vattr;
 1840         off_t newoff;
 1841         int error;
 1842 
 1843         if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
 1844                 return (EBADF);
 1845 
 1846         FILE_USE(fp);
 1847 
 1848         vp = (struct vnode *)fp->f_data;
 1849         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 1850                 error = ESPIPE;
 1851                 goto out;
 1852         }
 1853 
 1854         switch (SCARG(uap, whence)) {
 1855         case SEEK_CUR:
 1856                 newoff = fp->f_offset + SCARG(uap, offset);
 1857                 break;
 1858         case SEEK_END:
 1859                 error = VOP_GETATTR(vp, &vattr, cred, p);
 1860                 if (error)
 1861                         goto out;
 1862                 newoff = SCARG(uap, offset) + vattr.va_size;
 1863                 break;
 1864         case SEEK_SET:
 1865                 newoff = SCARG(uap, offset);
 1866                 break;
 1867         default:
 1868                 error = EINVAL;
 1869                 goto out;
 1870         }
 1871         if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
 1872                 goto out;
 1873 
 1874         *(off_t *)retval = fp->f_offset = newoff;
 1875  out:
 1876         FILE_UNUSE(fp, p);
 1877         return (error);
 1878 }
 1879 
 1880 /*
 1881  * Positional read system call.
 1882  */
 1883 int
 1884 sys_pread(l, v, retval)
 1885         struct lwp *l;
 1886         void *v;
 1887         register_t *retval;
 1888 {
 1889         struct sys_pread_args /* {
 1890                 syscallarg(int) fd;
 1891                 syscallarg(void *) buf;
 1892                 syscallarg(size_t) nbyte;
 1893                 syscallarg(off_t) offset;
 1894         } */ *uap = v;
 1895         struct proc *p = l->l_proc;
 1896         struct filedesc *fdp = p->p_fd;
 1897         struct file *fp;
 1898         struct vnode *vp;
 1899         off_t offset;
 1900         int error, fd = SCARG(uap, fd);
 1901 
 1902         if ((fp = fd_getfile(fdp, fd)) == NULL)
 1903                 return (EBADF);
 1904 
 1905         if ((fp->f_flag & FREAD) == 0) {
 1906                 simple_unlock(&fp->f_slock);
 1907                 return (EBADF);
 1908         }
 1909 
 1910         FILE_USE(fp);
 1911 
 1912         vp = (struct vnode *)fp->f_data;
 1913         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 1914                 error = ESPIPE;
 1915                 goto out;
 1916         }
 1917 
 1918         offset = SCARG(uap, offset);
 1919 
 1920         /*
 1921          * XXX This works because no file systems actually
 1922          * XXX take any action on the seek operation.
 1923          */
 1924         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 1925                 goto out;
 1926 
 1927         /* dofileread() will unuse the descriptor for us */
 1928         return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 1929             &offset, 0, retval));
 1930 
 1931  out:
 1932         FILE_UNUSE(fp, p);
 1933         return (error);
 1934 }
 1935 
 1936 /*
 1937  * Positional scatter read system call.
 1938  */
 1939 int
 1940 sys_preadv(l, v, retval)
 1941         struct lwp *l;
 1942         void *v;
 1943         register_t *retval;
 1944 {
 1945         struct sys_preadv_args /* {
 1946                 syscallarg(int) fd;
 1947                 syscallarg(const struct iovec *) iovp;
 1948                 syscallarg(int) iovcnt;
 1949                 syscallarg(off_t) offset;
 1950         } */ *uap = v;
 1951         struct proc *p = l->l_proc;
 1952         struct filedesc *fdp = p->p_fd;
 1953         struct file *fp;
 1954         struct vnode *vp;
 1955         off_t offset;
 1956         int error, fd = SCARG(uap, fd);
 1957 
 1958         if ((fp = fd_getfile(fdp, fd)) == NULL)
 1959                 return (EBADF);
 1960 
 1961         if ((fp->f_flag & FREAD) == 0) {
 1962                 simple_unlock(&fp->f_slock);
 1963                 return (EBADF);
 1964         }
 1965 
 1966         FILE_USE(fp);
 1967 
 1968         vp = (struct vnode *)fp->f_data;
 1969         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 1970                 error = ESPIPE;
 1971                 goto out;
 1972         }
 1973 
 1974         offset = SCARG(uap, offset);
 1975 
 1976         /*
 1977          * XXX This works because no file systems actually
 1978          * XXX take any action on the seek operation.
 1979          */
 1980         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 1981                 goto out;
 1982 
 1983         /* dofilereadv() will unuse the descriptor for us */
 1984         return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
 1985             &offset, 0, retval));
 1986 
 1987  out:
 1988         FILE_UNUSE(fp, p);
 1989         return (error);
 1990 }
 1991 
 1992 /*
 1993  * Positional write system call.
 1994  */
 1995 int
 1996 sys_pwrite(l, v, retval)
 1997         struct lwp *l;
 1998         void *v;
 1999         register_t *retval;
 2000 {
 2001         struct sys_pwrite_args /* {
 2002                 syscallarg(int) fd;
 2003                 syscallarg(const void *) buf;
 2004                 syscallarg(size_t) nbyte;
 2005                 syscallarg(off_t) offset;
 2006         } */ *uap = v;
 2007         struct proc *p = l->l_proc;
 2008         struct filedesc *fdp = p->p_fd;
 2009         struct file *fp;
 2010         struct vnode *vp;
 2011         off_t offset;
 2012         int error, fd = SCARG(uap, fd);
 2013 
 2014         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2015                 return (EBADF);
 2016 
 2017         if ((fp->f_flag & FWRITE) == 0) {
 2018                 simple_unlock(&fp->f_slock);
 2019                 return (EBADF);
 2020         }
 2021 
 2022         FILE_USE(fp);
 2023 
 2024         vp = (struct vnode *)fp->f_data;
 2025         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2026                 error = ESPIPE;
 2027                 goto out;
 2028         }
 2029 
 2030         offset = SCARG(uap, offset);
 2031 
 2032         /*
 2033          * XXX This works because no file systems actually
 2034          * XXX take any action on the seek operation.
 2035          */
 2036         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2037                 goto out;
 2038 
 2039         /* dofilewrite() will unuse the descriptor for us */
 2040         return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 2041             &offset, 0, retval));
 2042 
 2043  out:
 2044         FILE_UNUSE(fp, p);
 2045         return (error);
 2046 }
 2047 
 2048 /*
 2049  * Positional gather write system call.
 2050  */
 2051 int
 2052 sys_pwritev(l, v, retval)
 2053         struct lwp *l;
 2054         void *v;
 2055         register_t *retval;
 2056 {
 2057         struct sys_pwritev_args /* {
 2058                 syscallarg(int) fd;
 2059                 syscallarg(const struct iovec *) iovp;
 2060                 syscallarg(int) iovcnt;
 2061                 syscallarg(off_t) offset;
 2062         } */ *uap = v;
 2063         struct proc *p = l->l_proc;
 2064         struct filedesc *fdp = p->p_fd;
 2065         struct file *fp;
 2066         struct vnode *vp;
 2067         off_t offset;
 2068         int error, fd = SCARG(uap, fd);
 2069 
 2070         if ((fp = fd_getfile(fdp, fd)) == NULL)
 2071                 return (EBADF);
 2072 
 2073         if ((fp->f_flag & FWRITE) == 0) {
 2074                 simple_unlock(&fp->f_slock);
 2075                 return (EBADF);
 2076         }
 2077 
 2078         FILE_USE(fp);
 2079 
 2080         vp = (struct vnode *)fp->f_data;
 2081         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2082                 error = ESPIPE;
 2083                 goto out;
 2084         }
 2085 
 2086         offset = SCARG(uap, offset);
 2087 
 2088         /*
 2089          * XXX This works because no file systems actually
 2090          * XXX take any action on the seek operation.
 2091          */
 2092         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2093                 goto out;
 2094 
 2095         /* dofilewritev() will unuse the descriptor for us */
 2096         return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
 2097             &offset, 0, retval));
 2098 
 2099  out:
 2100         FILE_UNUSE(fp, p);
 2101         return (error);
 2102 }
 2103 
 2104 /*
 2105  * Check access permissions.
 2106  */
 2107 int
 2108 sys_access(l, v, retval)
 2109         struct lwp *l;
 2110         void *v;
 2111         register_t *retval;
 2112 {
 2113         struct sys_access_args /* {
 2114                 syscallarg(const char *) path;
 2115                 syscallarg(int) flags;
 2116         } */ *uap = v;
 2117         struct proc *p = l->l_proc;
 2118         struct ucred *cred = crget();
 2119         struct vnode *vp;
 2120         int error, flags;
 2121         struct nameidata nd;
 2122 
 2123         (void)memcpy(cred, p->p_ucred, sizeof(*cred));
 2124         cred->cr_ref = 1;
 2125         cred->cr_uid = p->p_cred->p_ruid;
 2126         cred->cr_gid = p->p_cred->p_rgid;
 2127         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2128             SCARG(uap, path), p);
 2129         /* Override default credentials */
 2130         nd.ni_cnd.cn_cred = cred;
 2131         if ((error = namei(&nd)) != 0)
 2132                 goto out;
 2133         vp = nd.ni_vp;
 2134 
 2135         /* Flags == 0 means only check for existence. */
 2136         if (SCARG(uap, flags)) {
 2137                 flags = 0;
 2138                 if (SCARG(uap, flags) & R_OK)
 2139                         flags |= VREAD;
 2140                 if (SCARG(uap, flags) & W_OK)
 2141                         flags |= VWRITE;
 2142                 if (SCARG(uap, flags) & X_OK)
 2143                         flags |= VEXEC;
 2144 
 2145                 error = VOP_ACCESS(vp, flags, cred, p);
 2146                 if (!error && (flags & VWRITE))
 2147                         error = vn_writechk(vp);
 2148         }
 2149         vput(vp);
 2150 out:
 2151         crfree(cred);
 2152         return (error);
 2153 }
 2154 
 2155 /*
 2156  * Get file status; this version follows links.
 2157  */
 2158 /* ARGSUSED */
 2159 int
 2160 sys___stat13(l, v, retval)
 2161         struct lwp *l;
 2162         void *v;
 2163         register_t *retval;
 2164 {
 2165         struct sys___stat13_args /* {
 2166                 syscallarg(const char *) path;
 2167                 syscallarg(struct stat *) ub;
 2168         } */ *uap = v;
 2169         struct proc *p = l->l_proc;
 2170         struct stat sb;
 2171         int error;
 2172         struct nameidata nd;
 2173 
 2174         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2175             SCARG(uap, path), p);
 2176         if ((error = namei(&nd)) != 0)
 2177                 return (error);
 2178         error = vn_stat(nd.ni_vp, &sb, p);
 2179         vput(nd.ni_vp);
 2180         if (error)
 2181                 return (error);
 2182         error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2183         return (error);
 2184 }
 2185 
 2186 /*
 2187  * Get file status; this version does not follow links.
 2188  */
 2189 /* ARGSUSED */
 2190 int
 2191 sys___lstat13(l, v, retval)
 2192         struct lwp *l;
 2193         void *v;
 2194         register_t *retval;
 2195 {
 2196         struct sys___lstat13_args /* {
 2197                 syscallarg(const char *) path;
 2198                 syscallarg(struct stat *) ub;
 2199         } */ *uap = v;
 2200         struct proc *p = l->l_proc;
 2201         struct stat sb;
 2202         int error;
 2203         struct nameidata nd;
 2204 
 2205         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
 2206             SCARG(uap, path), p);
 2207         if ((error = namei(&nd)) != 0)
 2208                 return (error);
 2209         error = vn_stat(nd.ni_vp, &sb, p);
 2210         vput(nd.ni_vp);
 2211         if (error)
 2212                 return (error);
 2213         error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2214         return (error);
 2215 }
 2216 
 2217 /*
 2218  * Get configurable pathname variables.
 2219  */
 2220 /* ARGSUSED */
 2221 int
 2222 sys_pathconf(l, v, retval)
 2223         struct lwp *l;
 2224         void *v;
 2225         register_t *retval;
 2226 {
 2227         struct sys_pathconf_args /* {
 2228                 syscallarg(const char *) path;
 2229                 syscallarg(int) name;
 2230         } */ *uap = v;
 2231         struct proc *p = l->l_proc;
 2232         int error;
 2233         struct nameidata nd;
 2234 
 2235         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 2236             SCARG(uap, path), p);
 2237         if ((error = namei(&nd)) != 0)
 2238                 return (error);
 2239         error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
 2240         vput(nd.ni_vp);
 2241         return (error);
 2242 }
 2243 
 2244 /*
 2245  * Return target name of a symbolic link.
 2246  */
 2247 /* ARGSUSED */
 2248 int
 2249 sys_readlink(l, v, retval)
 2250         struct lwp *l;
 2251         void *v;
 2252         register_t *retval;
 2253 {
 2254         struct sys_readlink_args /* {
 2255                 syscallarg(const char *) path;
 2256                 syscallarg(char *) buf;
 2257                 syscallarg(size_t) count;
 2258         } */ *uap = v;
 2259         struct proc *p = l->l_proc;
 2260         struct vnode *vp;
 2261         struct iovec aiov;
 2262         struct uio auio;
 2263         int error;
 2264         struct nameidata nd;
 2265 
 2266         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
 2267             SCARG(uap, path), p);
 2268         if ((error = namei(&nd)) != 0)
 2269                 return (error);
 2270         vp = nd.ni_vp;
 2271         if (vp->v_type != VLNK)
 2272                 error = EINVAL;
 2273         else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
 2274             (error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) == 0) {
 2275                 aiov.iov_base = SCARG(uap, buf);
 2276                 aiov.iov_len = SCARG(uap, count);
 2277                 auio.uio_iov = &aiov;
 2278                 auio.uio_iovcnt = 1;
 2279                 auio.uio_offset = 0;
 2280                 auio.uio_rw = UIO_READ;
 2281                 auio.uio_segflg = UIO_USERSPACE;
 2282                 auio.uio_procp = p;
 2283                 auio.uio_resid = SCARG(uap, count);
 2284                 error = VOP_READLINK(vp, &auio, p->p_ucred);
 2285         }
 2286         vput(vp);
 2287         *retval = SCARG(uap, count) - auio.uio_resid;
 2288         return (error);
 2289 }
 2290 
 2291 /*
 2292  * Change flags of a file given a path name.
 2293  */
 2294 /* ARGSUSED */
 2295 int
 2296 sys_chflags(l, v, retval)
 2297         struct lwp *l;
 2298         void *v;
 2299         register_t *retval;
 2300 {
 2301         struct sys_chflags_args /* {
 2302                 syscallarg(const char *) path;
 2303                 syscallarg(u_long) flags;
 2304         } */ *uap = v;
 2305         struct proc *p = l->l_proc;
 2306         struct vnode *vp;
 2307         int error;
 2308         struct nameidata nd;
 2309 
 2310         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2311         if ((error = namei(&nd)) != 0)
 2312                 return (error);
 2313         vp = nd.ni_vp;
 2314         error = change_flags(vp, SCARG(uap, flags), p);
 2315         vput(vp);
 2316         return (error);
 2317 }
 2318 
 2319 /*
 2320  * Change flags of a file given a file descriptor.
 2321  */
 2322 /* ARGSUSED */
 2323 int
 2324 sys_fchflags(l, v, retval)
 2325         struct lwp *l;
 2326         void *v;
 2327         register_t *retval;
 2328 {
 2329         struct sys_fchflags_args /* {
 2330                 syscallarg(int) fd;
 2331                 syscallarg(u_long) flags;
 2332         } */ *uap = v;
 2333         struct proc *p = l->l_proc;
 2334         struct vnode *vp;
 2335         struct file *fp;
 2336         int error;
 2337 
 2338         /* getvnode() will use the descriptor for us */
 2339         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2340                 return (error);
 2341         vp = (struct vnode *)fp->f_data;
 2342         error = change_flags(vp, SCARG(uap, flags), p);
 2343         VOP_UNLOCK(vp, 0);
 2344         FILE_UNUSE(fp, p);
 2345         return (error);
 2346 }
 2347 
 2348 /*
 2349  * Change flags of a file given a path name; this version does
 2350  * not follow links.
 2351  */
 2352 int
 2353 sys_lchflags(l, v, retval)
 2354         struct lwp *l;
 2355         void *v;
 2356         register_t *retval;
 2357 {
 2358         struct sys_lchflags_args /* {
 2359                 syscallarg(const char *) path;
 2360                 syscallarg(u_long) flags;
 2361         } */ *uap = v;
 2362         struct proc *p = l->l_proc;
 2363         struct vnode *vp;
 2364         int error;
 2365         struct nameidata nd;
 2366 
 2367         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2368         if ((error = namei(&nd)) != 0)
 2369                 return (error);
 2370         vp = nd.ni_vp;
 2371         error = change_flags(vp, SCARG(uap, flags), p);
 2372         vput(vp);
 2373         return (error);
 2374 }
 2375 
 2376 /*
 2377  * Common routine to change flags of a file.
 2378  */
 2379 int
 2380 change_flags(vp, flags, p)
 2381         struct vnode *vp;
 2382         u_long flags;
 2383         struct proc *p;
 2384 {
 2385         struct mount *mp;
 2386         struct vattr vattr;
 2387         int error;
 2388 
 2389         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2390                 return (error);
 2391         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2392         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2393         /*
 2394          * Non-superusers cannot change the flags on devices, even if they
 2395          * own them.
 2396          */
 2397         if (suser(p->p_ucred, &p->p_acflag) != 0) {
 2398                 if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
 2399                         goto out;
 2400                 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
 2401                         error = EINVAL;
 2402                         goto out;
 2403                 }
 2404         }
 2405         VATTR_NULL(&vattr);
 2406         vattr.va_flags = flags;
 2407         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2408 out:
 2409         vn_finished_write(mp, 0);
 2410         return (error);
 2411 }
 2412 
 2413 /*
 2414  * Change mode of a file given path name; this version follows links.
 2415  */
 2416 /* ARGSUSED */
 2417 int
 2418 sys_chmod(l, v, retval)
 2419         struct lwp *l;
 2420         void *v;
 2421         register_t *retval;
 2422 {
 2423         struct sys_chmod_args /* {
 2424                 syscallarg(const char *) path;
 2425                 syscallarg(int) mode;
 2426         } */ *uap = v;
 2427         struct proc *p = l->l_proc;
 2428         int error;
 2429         struct nameidata nd;
 2430 
 2431         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2432         if ((error = namei(&nd)) != 0)
 2433                 return (error);
 2434 
 2435         error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
 2436 
 2437         vrele(nd.ni_vp);
 2438         return (error);
 2439 }
 2440 
 2441 /*
 2442  * Change mode of a file given a file descriptor.
 2443  */
 2444 /* ARGSUSED */
 2445 int
 2446 sys_fchmod(l, v, retval)
 2447         struct lwp *l;
 2448         void *v;
 2449         register_t *retval;
 2450 {
 2451         struct sys_fchmod_args /* {
 2452                 syscallarg(int) fd;
 2453                 syscallarg(int) mode;
 2454         } */ *uap = v;
 2455         struct proc *p = l->l_proc;
 2456         struct file *fp;
 2457         int error;
 2458 
 2459         /* getvnode() will use the descriptor for us */
 2460         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2461                 return (error);
 2462 
 2463         error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), p);
 2464         FILE_UNUSE(fp, p);
 2465         return (error);
 2466 }
 2467 
 2468 /*
 2469  * Change mode of a file given path name; this version does not follow links.
 2470  */
 2471 /* ARGSUSED */
 2472 int
 2473 sys_lchmod(l, v, retval)
 2474         struct lwp *l;
 2475         void *v;
 2476         register_t *retval;
 2477 {
 2478         struct sys_lchmod_args /* {
 2479                 syscallarg(const char *) path;
 2480                 syscallarg(int) mode;
 2481         } */ *uap = v;
 2482         struct proc *p = l->l_proc;
 2483         int error;
 2484         struct nameidata nd;
 2485 
 2486         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2487         if ((error = namei(&nd)) != 0)
 2488                 return (error);
 2489 
 2490         error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
 2491 
 2492         vrele(nd.ni_vp);
 2493         return (error);
 2494 }
 2495 
 2496 /*
 2497  * Common routine to set mode given a vnode.
 2498  */
 2499 static int
 2500 change_mode(vp, mode, p)
 2501         struct vnode *vp;
 2502         int mode;
 2503         struct proc *p;
 2504 {
 2505         struct mount *mp;
 2506         struct vattr vattr;
 2507         int error;
 2508 
 2509         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2510                 return (error);
 2511         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2512         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2513         VATTR_NULL(&vattr);
 2514         vattr.va_mode = mode & ALLPERMS;
 2515         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2516         VOP_UNLOCK(vp, 0);
 2517         vn_finished_write(mp, 0);
 2518         return (error);
 2519 }
 2520 
 2521 /*
 2522  * Set ownership given a path name; this version follows links.
 2523  */
 2524 /* ARGSUSED */
 2525 int
 2526 sys_chown(l, v, retval)
 2527         struct lwp *l;
 2528         void *v;
 2529         register_t *retval;
 2530 {
 2531         struct sys_chown_args /* {
 2532                 syscallarg(const char *) path;
 2533                 syscallarg(uid_t) uid;
 2534                 syscallarg(gid_t) gid;
 2535         } */ *uap = v;
 2536         struct proc *p = l->l_proc;
 2537         int error;
 2538         struct nameidata nd;
 2539 
 2540         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2541         if ((error = namei(&nd)) != 0)
 2542                 return (error);
 2543 
 2544         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
 2545 
 2546         vrele(nd.ni_vp);
 2547         return (error);
 2548 }
 2549 
 2550 /*
 2551  * Set ownership given a path name; this version follows links.
 2552  * Provides POSIX semantics.
 2553  */
 2554 /* ARGSUSED */
 2555 int
 2556 sys___posix_chown(l, v, retval)
 2557         struct lwp *l;
 2558         void *v;
 2559         register_t *retval;
 2560 {
 2561         struct sys_chown_args /* {
 2562                 syscallarg(const char *) path;
 2563                 syscallarg(uid_t) uid;
 2564                 syscallarg(gid_t) gid;
 2565         } */ *uap = v;
 2566         struct proc *p = l->l_proc;
 2567         int error;
 2568         struct nameidata nd;
 2569 
 2570         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2571         if ((error = namei(&nd)) != 0)
 2572                 return (error);
 2573 
 2574         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
 2575 
 2576         vrele(nd.ni_vp);
 2577         return (error);
 2578 }
 2579 
 2580 /*
 2581  * Set ownership given a file descriptor.
 2582  */
 2583 /* ARGSUSED */
 2584 int
 2585 sys_fchown(l, v, retval)
 2586         struct lwp *l;
 2587         void *v;
 2588         register_t *retval;
 2589 {
 2590         struct sys_fchown_args /* {
 2591                 syscallarg(int) fd;
 2592                 syscallarg(uid_t) uid;
 2593                 syscallarg(gid_t) gid;
 2594         } */ *uap = v;
 2595         struct proc *p = l->l_proc;
 2596         int error;
 2597         struct file *fp;
 2598 
 2599         /* getvnode() will use the descriptor for us */
 2600         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2601                 return (error);
 2602 
 2603         error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
 2604             SCARG(uap, gid), p, 0);
 2605         FILE_UNUSE(fp, p);
 2606         return (error);
 2607 }
 2608 
 2609 /*
 2610  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
 2611  */
 2612 /* ARGSUSED */
 2613 int
 2614 sys___posix_fchown(l, v, retval)
 2615         struct lwp *l;
 2616         void *v;
 2617         register_t *retval;
 2618 {
 2619         struct sys_fchown_args /* {
 2620                 syscallarg(int) fd;
 2621                 syscallarg(uid_t) uid;
 2622                 syscallarg(gid_t) gid;
 2623         } */ *uap = v;
 2624         struct proc *p = l->l_proc;
 2625         int error;
 2626         struct file *fp;
 2627 
 2628         /* getvnode() will use the descriptor for us */
 2629         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2630                 return (error);
 2631 
 2632         error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
 2633             SCARG(uap, gid), p, 1);
 2634         FILE_UNUSE(fp, p);
 2635         return (error);
 2636 }
 2637 
 2638 /*
 2639  * Set ownership given a path name; this version does not follow links.
 2640  */
 2641 /* ARGSUSED */
 2642 int
 2643 sys_lchown(l, v, retval)
 2644         struct lwp *l;
 2645         void *v;
 2646         register_t *retval;
 2647 {
 2648         struct sys_lchown_args /* {
 2649                 syscallarg(const char *) path;
 2650                 syscallarg(uid_t) uid;
 2651                 syscallarg(gid_t) gid;
 2652         } */ *uap = v;
 2653         struct proc *p = l->l_proc;
 2654         int error;
 2655         struct nameidata nd;
 2656 
 2657         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2658         if ((error = namei(&nd)) != 0)
 2659                 return (error);
 2660 
 2661         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
 2662 
 2663         vrele(nd.ni_vp);
 2664         return (error);
 2665 }
 2666 
 2667 /*
 2668  * Set ownership given a path name; this version does not follow links.
 2669  * Provides POSIX/XPG semantics.
 2670  */
 2671 /* ARGSUSED */
 2672 int
 2673 sys___posix_lchown(l, v, retval)
 2674         struct lwp *l;
 2675         void *v;
 2676         register_t *retval;
 2677 {
 2678         struct sys_lchown_args /* {
 2679                 syscallarg(const char *) path;
 2680                 syscallarg(uid_t) uid;
 2681                 syscallarg(gid_t) gid;
 2682         } */ *uap = v;
 2683         struct proc *p = l->l_proc;
 2684         int error;
 2685         struct nameidata nd;
 2686 
 2687         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2688         if ((error = namei(&nd)) != 0)
 2689                 return (error);
 2690 
 2691         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
 2692 
 2693         vrele(nd.ni_vp);
 2694         return (error);
 2695 }
 2696 
 2697 /*
 2698  * Common routine to set ownership given a vnode.
 2699  */
 2700 static int
 2701 change_owner(vp, uid, gid, p, posix_semantics)
 2702         struct vnode *vp;
 2703         uid_t uid;
 2704         gid_t gid;
 2705         struct proc *p;
 2706         int posix_semantics;
 2707 {
 2708         struct mount *mp;
 2709         struct vattr vattr;
 2710         mode_t newmode;
 2711         int error;
 2712 
 2713         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2714                 return (error);
 2715         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2716         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2717         if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
 2718                 goto out;
 2719 
 2720 #define CHANGED(x) ((int)(x) != -1)
 2721         newmode = vattr.va_mode;
 2722         if (posix_semantics) {
 2723                 /*
 2724                  * POSIX/XPG semantics: if the caller is not the super-user,
 2725                  * clear set-user-id and set-group-id bits.  Both POSIX and
 2726                  * the XPG consider the behaviour for calls by the super-user
 2727                  * implementation-defined; we leave the set-user-id and set-
 2728                  * group-id settings intact in that case.
 2729                  */
 2730                 if (suser(p->p_ucred, NULL) != 0)
 2731                         newmode &= ~(S_ISUID | S_ISGID);
 2732         } else {
 2733                 /*
 2734                  * NetBSD semantics: when changing owner and/or group,
 2735                  * clear the respective bit(s).
 2736                  */
 2737                 if (CHANGED(uid))
 2738                         newmode &= ~S_ISUID;
 2739                 if (CHANGED(gid))
 2740                         newmode &= ~S_ISGID;
 2741         }
 2742         /* Update va_mode iff altered. */
 2743         if (vattr.va_mode == newmode)
 2744                 newmode = VNOVAL;
 2745 
 2746         VATTR_NULL(&vattr);
 2747         vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
 2748         vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
 2749         vattr.va_mode = newmode;
 2750         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2751 #undef CHANGED
 2752 
 2753 out:
 2754         VOP_UNLOCK(vp, 0);
 2755         vn_finished_write(mp, 0);
 2756         return (error);
 2757 }
 2758 
 2759 /*
 2760  * Set the access and modification times given a path name; this
 2761  * version follows links.
 2762  */
 2763 /* ARGSUSED */
 2764 int
 2765 sys_utimes(l, v, retval)
 2766         struct lwp *l;
 2767         void *v;
 2768         register_t *retval;
 2769 {
 2770         struct sys_utimes_args /* {
 2771                 syscallarg(const char *) path;
 2772                 syscallarg(const struct timeval *) tptr;
 2773         } */ *uap = v;
 2774         struct proc *p = l->l_proc;
 2775         int error;
 2776         struct nameidata nd;
 2777 
 2778         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2779         if ((error = namei(&nd)) != 0)
 2780                 return (error);
 2781 
 2782         error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
 2783 
 2784         vrele(nd.ni_vp);
 2785         return (error);
 2786 }
 2787 
 2788 /*
 2789  * Set the access and modification times given a file descriptor.
 2790  */
 2791 /* ARGSUSED */
 2792 int
 2793 sys_futimes(l, v, retval)
 2794         struct lwp *l;
 2795         void *v;
 2796         register_t *retval;
 2797 {
 2798         struct sys_futimes_args /* {
 2799                 syscallarg(int) fd;
 2800                 syscallarg(const struct timeval *) tptr;
 2801         } */ *uap = v;
 2802         struct proc *p = l->l_proc;
 2803         int error;
 2804         struct file *fp;
 2805 
 2806         /* getvnode() will use the descriptor for us */
 2807         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2808                 return (error);
 2809 
 2810         error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), p);
 2811         FILE_UNUSE(fp, p);
 2812         return (error);
 2813 }
 2814 
 2815 /*
 2816  * Set the access and modification times given a path name; this
 2817  * version does not follow links.
 2818  */
 2819 /* ARGSUSED */
 2820 int
 2821 sys_lutimes(l, v, retval)
 2822         struct lwp *l;
 2823         void *v;
 2824         register_t *retval;
 2825 {
 2826         struct sys_lutimes_args /* {
 2827                 syscallarg(const char *) path;
 2828                 syscallarg(const struct timeval *) tptr;
 2829         } */ *uap = v;
 2830         struct proc *p = l->l_proc;
 2831         int error;
 2832         struct nameidata nd;
 2833 
 2834         NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2835         if ((error = namei(&nd)) != 0)
 2836                 return (error);
 2837 
 2838         error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
 2839 
 2840         vrele(nd.ni_vp);
 2841         return (error);
 2842 }
 2843 
 2844 /*
 2845  * Common routine to set access and modification times given a vnode.
 2846  */
 2847 static int
 2848 change_utimes(vp, tptr, p)
 2849         struct vnode *vp;
 2850         const struct timeval *tptr;
 2851         struct proc *p;
 2852 {
 2853         struct timeval tv[2];
 2854         struct mount *mp;
 2855         struct vattr vattr;
 2856         int error;
 2857 
 2858         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2859                 return (error);
 2860         VATTR_NULL(&vattr);
 2861         if (tptr == NULL) {
 2862                 microtime(&tv[0]);
 2863                 tv[1] = tv[0];
 2864                 vattr.va_vaflags |= VA_UTIMES_NULL;
 2865         } else {
 2866                 error = copyin(tptr, tv, sizeof(tv));
 2867                 if (error)
 2868                         goto out;
 2869         }
 2870         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2871         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2872         vattr.va_atime.tv_sec = tv[0].tv_sec;
 2873         vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
 2874         vattr.va_mtime.tv_sec = tv[1].tv_sec;
 2875         vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
 2876         error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2877         VOP_UNLOCK(vp, 0);
 2878 out:
 2879         vn_finished_write(mp, 0);
 2880         return (error);
 2881 }
 2882 
 2883 /*
 2884  * Truncate a file given its path name.
 2885  */
 2886 /* ARGSUSED */
 2887 int
 2888 sys_truncate(l, v, retval)
 2889         struct lwp *l;
 2890         void *v;
 2891         register_t *retval;
 2892 {
 2893         struct sys_truncate_args /* {
 2894                 syscallarg(const char *) path;
 2895                 syscallarg(int) pad;
 2896                 syscallarg(off_t) length;
 2897         } */ *uap = v;
 2898         struct proc *p = l->l_proc;
 2899         struct vnode *vp;
 2900         struct mount *mp;
 2901         struct vattr vattr;
 2902         int error;
 2903         struct nameidata nd;
 2904 
 2905         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 2906         if ((error = namei(&nd)) != 0)
 2907                 return (error);
 2908         vp = nd.ni_vp;
 2909         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 2910                 vrele(vp);
 2911                 return (error);
 2912         }
 2913         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2914         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2915         if (vp->v_type == VDIR)
 2916                 error = EISDIR;
 2917         else if ((error = vn_writechk(vp)) == 0 &&
 2918             (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
 2919                 VATTR_NULL(&vattr);
 2920                 vattr.va_size = SCARG(uap, length);
 2921                 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 2922         }
 2923         vput(vp);
 2924         vn_finished_write(mp, 0);
 2925         return (error);
 2926 }
 2927 
 2928 /*
 2929  * Truncate a file given a file descriptor.
 2930  */
 2931 /* ARGSUSED */
 2932 int
 2933 sys_ftruncate(l, v, retval)
 2934         struct lwp *l;
 2935         void *v;
 2936         register_t *retval;
 2937 {
 2938         struct sys_ftruncate_args /* {
 2939                 syscallarg(int) fd;
 2940                 syscallarg(int) pad;
 2941                 syscallarg(off_t) length;
 2942         } */ *uap = v;
 2943         struct proc *p = l->l_proc;
 2944         struct mount *mp;
 2945         struct vattr vattr;
 2946         struct vnode *vp;
 2947         struct file *fp;
 2948         int error;
 2949 
 2950         /* getvnode() will use the descriptor for us */
 2951         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2952                 return (error);
 2953         if ((fp->f_flag & FWRITE) == 0) {
 2954                 error = EINVAL;
 2955                 goto out;
 2956         }
 2957         vp = (struct vnode *)fp->f_data;
 2958         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 2959                 FILE_UNUSE(fp, p);
 2960                 return (error);
 2961         }
 2962         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 2963         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2964         if (vp->v_type == VDIR)
 2965                 error = EISDIR;
 2966         else if ((error = vn_writechk(vp)) == 0) {
 2967                 VATTR_NULL(&vattr);
 2968                 vattr.va_size = SCARG(uap, length);
 2969                 error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 2970         }
 2971         VOP_UNLOCK(vp, 0);
 2972         vn_finished_write(mp, 0);
 2973  out:
 2974         FILE_UNUSE(fp, p);
 2975         return (error);
 2976 }
 2977 
 2978 /*
 2979  * Sync an open file.
 2980  */
 2981 /* ARGSUSED */
 2982 int
 2983 sys_fsync(l, v, retval)
 2984         struct lwp *l;
 2985         void *v;
 2986         register_t *retval;
 2987 {
 2988         struct sys_fsync_args /* {
 2989                 syscallarg(int) fd;
 2990         } */ *uap = v;
 2991         struct proc *p = l->l_proc;
 2992         struct vnode *vp;
 2993         struct mount *mp;
 2994         struct file *fp;
 2995         int error;
 2996 
 2997         /* getvnode() will use the descriptor for us */
 2998         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 2999                 return (error);
 3000         vp = (struct vnode *)fp->f_data;
 3001         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3002                 FILE_UNUSE(fp, p);
 3003                 return (error);
 3004         }
 3005         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3006         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, p);
 3007         if (error == 0 && bioops.io_fsync != NULL &&
 3008             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3009                 (*bioops.io_fsync)(vp);
 3010         VOP_UNLOCK(vp, 0);
 3011         vn_finished_write(mp, 0);
 3012         FILE_UNUSE(fp, p);
 3013         return (error);
 3014 }
 3015 
 3016 /*
 3017  * Sync a range of file data.  API modeled after that found in AIX.
 3018  *
 3019  * FDATASYNC indicates that we need only save enough metadata to be able
 3020  * to re-read the written data.  Note we duplicate AIX's requirement that
 3021  * the file be open for writing.
 3022  */
 3023 /* ARGSUSED */
 3024 int
 3025 sys_fsync_range(l, v, retval)
 3026         struct lwp *l;
 3027         void *v;
 3028         register_t *retval;
 3029 {
 3030         struct sys_fsync_range_args /* {
 3031                 syscallarg(int) fd;
 3032                 syscallarg(int) flags;
 3033                 syscallarg(off_t) start;
 3034                 syscallarg(int) length;
 3035         } */ *uap = v;
 3036         struct proc *p = l->l_proc;
 3037         struct vnode *vp;
 3038         struct file *fp;
 3039         int flags, nflags;
 3040         off_t s, e, len;
 3041         int error;
 3042 
 3043         /* getvnode() will use the descriptor for us */
 3044         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3045                 return (error);
 3046 
 3047         if ((fp->f_flag & FWRITE) == 0) {
 3048                 error = EBADF;
 3049                 goto out;
 3050         }
 3051 
 3052         flags = SCARG(uap, flags);
 3053         if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
 3054             ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
 3055                 error = EINVAL;
 3056                 goto out;
 3057         }
 3058         /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
 3059         if (flags & FDATASYNC)
 3060                 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
 3061         else
 3062                 nflags = FSYNC_WAIT;
 3063 
 3064         len = SCARG(uap, length);
 3065         /* If length == 0, we do the whole file, and s = l = 0 will do that */
 3066         if (len) {
 3067                 s = SCARG(uap, start);
 3068                 e = s + len;
 3069                 if (e < s) {
 3070                         FILE_UNUSE(fp, p);
 3071                         error = EINVAL;
 3072                         goto out;
 3073                 }
 3074         } else {
 3075                 e = 0;
 3076                 s = 0;
 3077         }
 3078 
 3079         vp = (struct vnode *)fp->f_data;
 3080         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3081         error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, p);
 3082 
 3083         if (error == 0 && bioops.io_fsync != NULL &&
 3084             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3085                 (*bioops.io_fsync)(vp);
 3086 
 3087         VOP_UNLOCK(vp, 0);
 3088 out:
 3089         FILE_UNUSE(fp, p);
 3090         return (error);
 3091 }
 3092 
 3093 /*
 3094  * Sync the data of an open file.
 3095  */
 3096 /* ARGSUSED */
 3097 int
 3098 sys_fdatasync(l, v, retval)
 3099         struct lwp *l;
 3100         void *v;
 3101         register_t *retval;
 3102 {
 3103         struct sys_fdatasync_args /* {
 3104                 syscallarg(int) fd;
 3105         } */ *uap = v;
 3106         struct proc *p = l->l_proc;
 3107         struct vnode *vp;
 3108         struct file *fp;
 3109         int error;
 3110 
 3111         /* getvnode() will use the descriptor for us */
 3112         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3113                 return (error);
 3114         if ((fp->f_flag & FWRITE) == 0) {
 3115                 FILE_UNUSE(fp, p);
 3116                 return (EBADF);
 3117         }
 3118         vp = (struct vnode *)fp->f_data;
 3119         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3120         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, p);
 3121         VOP_UNLOCK(vp, 0);
 3122         FILE_UNUSE(fp, p);
 3123         return (error);
 3124 }
 3125 
 3126 /*
 3127  * Rename files, (standard) BSD semantics frontend.
 3128  */
 3129 /* ARGSUSED */
 3130 int
 3131 sys_rename(l, v, retval)
 3132         struct lwp *l;
 3133         void *v;
 3134         register_t *retval;
 3135 {
 3136         struct sys_rename_args /* {
 3137                 syscallarg(const char *) from;
 3138                 syscallarg(const char *) to;
 3139         } */ *uap = v;
 3140         struct proc *p = l->l_proc;
 3141 
 3142         return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 0));
 3143 }
 3144 
 3145 /*
 3146  * Rename files, POSIX semantics frontend.
 3147  */
 3148 /* ARGSUSED */
 3149 int
 3150 sys___posix_rename(l, v, retval)
 3151         struct lwp *l;
 3152         void *v;
 3153         register_t *retval;
 3154 {
 3155         struct sys___posix_rename_args /* {
 3156                 syscallarg(const char *) from;
 3157                 syscallarg(const char *) to;
 3158         } */ *uap = v;
 3159         struct proc *p = l->l_proc;
 3160 
 3161         return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 1));
 3162 }
 3163 
 3164 /*
 3165  * Rename files.  Source and destination must either both be directories,
 3166  * or both not be directories.  If target is a directory, it must be empty.
 3167  * If `from' and `to' refer to the same object, the value of the `retain'
 3168  * argument is used to determine whether `from' will be
 3169  *
 3170  * (retain == 0)        deleted unless `from' and `to' refer to the same
 3171  *                      object in the file system's name space (BSD).
 3172  * (retain == 1)        always retained (POSIX).
 3173  */
 3174 static int
 3175 rename_files(from, to, p, retain)
 3176         const char *from, *to;
 3177         struct proc *p;
 3178         int retain;
 3179 {
 3180         struct mount *mp = NULL;
 3181         struct vnode *tvp, *fvp, *tdvp;
 3182         struct nameidata fromnd, tond;
 3183         int error;
 3184 
 3185         NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
 3186             from, p);
 3187         if ((error = namei(&fromnd)) != 0)
 3188                 return (error);
 3189         fvp = fromnd.ni_vp;
 3190         error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
 3191         if (error != 0) {
 3192                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3193                 vrele(fromnd.ni_dvp);
 3194                 vrele(fvp);
 3195                 if (fromnd.ni_startdir)
 3196                         vrele(fromnd.ni_startdir);
 3197                 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3198                 return (error);
 3199         }
 3200         NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 3201             (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, p);
 3202         if ((error = namei(&tond)) != 0) {
 3203                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3204                 vrele(fromnd.ni_dvp);
 3205                 vrele(fvp);
 3206                 goto out1;
 3207         }
 3208         tdvp = tond.ni_dvp;
 3209         tvp = tond.ni_vp;
 3210 
 3211         if (tvp != NULL) {
 3212                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3213                         error = ENOTDIR;
 3214                         goto out;
 3215                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3216                         error = EISDIR;
 3217                         goto out;
 3218                 }
 3219         }
 3220 
 3221         if (fvp == tdvp)
 3222                 error = EINVAL;
 3223 
 3224         /*
 3225          * Source and destination refer to the same object.
 3226          */
 3227         if (fvp == tvp) {
 3228                 if (retain)
 3229                         error = -1;
 3230                 else if (fromnd.ni_dvp == tdvp &&
 3231                     fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 3232                     !memcmp(fromnd.ni_cnd.cn_nameptr,
 3233                           tond.ni_cnd.cn_nameptr,
 3234                           fromnd.ni_cnd.cn_namelen))
 3235                 error = -1;
 3236         }
 3237 
 3238 out:
 3239         if (!error) {
 3240                 VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
 3241                 if (fromnd.ni_dvp != tdvp)
 3242                         VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 3243                 if (tvp) {
 3244                         VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 3245                 }
 3246                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3247                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3248         } else {
 3249                 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
 3250                 if (tdvp == tvp)
 3251                         vrele(tdvp);
 3252                 else
 3253                         vput(tdvp);
 3254                 if (tvp)
 3255                         vput(tvp);
 3256                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3257                 vrele(fromnd.ni_dvp);
 3258                 vrele(fvp);
 3259         }
 3260         vrele(tond.ni_startdir);
 3261         PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
 3262 out1:
 3263         vn_finished_write(mp, 0);
 3264         if (fromnd.ni_startdir)
 3265                 vrele(fromnd.ni_startdir);
 3266         PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3267         return (error == -1 ? 0 : error);
 3268 }
 3269 
 3270 /*
 3271  * Make a directory file.
 3272  */
 3273 /* ARGSUSED */
 3274 int
 3275 sys_mkdir(l, v, retval)
 3276         struct lwp *l;
 3277         void *v;
 3278         register_t *retval;
 3279 {
 3280         struct sys_mkdir_args /* {
 3281                 syscallarg(const char *) path;
 3282                 syscallarg(int) mode;
 3283         } */ *uap = v;
 3284         struct proc *p = l->l_proc;
 3285         struct mount *mp;
 3286         struct vnode *vp;
 3287         struct vattr vattr;
 3288         int error;
 3289         struct nameidata nd;
 3290 
 3291 restart:
 3292         NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
 3293             SCARG(uap, path), p);
 3294         if ((error = namei(&nd)) != 0)
 3295                 return (error);
 3296         vp = nd.ni_vp;
 3297         if (vp != NULL) {
 3298                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3299                 if (nd.ni_dvp == vp)
 3300                         vrele(nd.ni_dvp);
 3301                 else
 3302                         vput(nd.ni_dvp);
 3303                 vrele(vp);
 3304                 return (EEXIST);
 3305         }
 3306         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3307                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3308                 if (nd.ni_dvp == vp)
 3309                         vrele(nd.ni_dvp);
 3310                 else
 3311                         vput(nd.ni_dvp);
 3312                 if ((error = vn_start_write(NULL, &mp,
 3313                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 3314                         return (error);
 3315                 goto restart;
 3316         }
 3317         VATTR_NULL(&vattr);
 3318         vattr.va_type = VDIR;
 3319         vattr.va_mode =
 3320             (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
 3321         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 3322         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3323         if (!error)
 3324                 vput(nd.ni_vp);
 3325         vn_finished_write(mp, 0);
 3326         return (error);
 3327 }
 3328 
 3329 /*
 3330  * Remove a directory file.
 3331  */
 3332 /* ARGSUSED */
 3333 int
 3334 sys_rmdir(l, v, retval)
 3335         struct lwp *l;
 3336         void *v;
 3337         register_t *retval;
 3338 {
 3339         struct sys_rmdir_args /* {
 3340                 syscallarg(const char *) path;
 3341         } */ *uap = v;
 3342         struct proc *p = l->l_proc;
 3343         struct mount *mp;
 3344         struct vnode *vp;
 3345         int error;
 3346         struct nameidata nd;
 3347 
 3348 restart:
 3349         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 3350             SCARG(uap, path), p);
 3351         if ((error = namei(&nd)) != 0)
 3352                 return (error);
 3353         vp = nd.ni_vp;
 3354         if (vp->v_type != VDIR) {
 3355                 error = ENOTDIR;
 3356                 goto out;
 3357         }
 3358         /*
 3359          * No rmdir "." please.
 3360          */
 3361         if (nd.ni_dvp == vp) {
 3362                 error = EINVAL;
 3363                 goto out;
 3364         }
 3365         /*
 3366          * The root of a mounted filesystem cannot be deleted.
 3367          */
 3368         if (vp->v_flag & VROOT) {
 3369                 error = EBUSY;
 3370                 goto out;
 3371         }
 3372         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3373                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3374                 if (nd.ni_dvp == vp)
 3375                         vrele(nd.ni_dvp);
 3376                 else
 3377                         vput(nd.ni_dvp);
 3378                 vput(vp);
 3379                 if ((error = vn_start_write(NULL, &mp,
 3380                     V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
 3381                         return (error);
 3382                 goto restart;
 3383         }
 3384         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 3385         VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 3386         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3387         vn_finished_write(mp, 0);
 3388         return (error);
 3389 
 3390 out:
 3391         VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3392         if (nd.ni_dvp == vp)
 3393                 vrele(nd.ni_dvp);
 3394         else
 3395                 vput(nd.ni_dvp);
 3396         vput(vp);
 3397         return (error);
 3398 }
 3399 
 3400 /*
 3401  * Read a block of directory entries in a file system independent format.
 3402  */
 3403 int
 3404 sys_getdents(l, v, retval)
 3405         struct lwp *l;
 3406         void *v;
 3407         register_t *retval;
 3408 {
 3409         struct sys_getdents_args /* {
 3410                 syscallarg(int) fd;
 3411                 syscallarg(char *) buf;
 3412                 syscallarg(size_t) count;
 3413         } */ *uap = v;
 3414         struct proc *p = l->l_proc;
 3415         struct file *fp;
 3416         int error, done;
 3417 
 3418         /* getvnode() will use the descriptor for us */
 3419         if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 3420                 return (error);
 3421         if ((fp->f_flag & FREAD) == 0) {
 3422                 error = EBADF;
 3423                 goto out;
 3424         }
 3425         error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
 3426                         SCARG(uap, count), &done, p, 0, 0);
 3427 #ifdef KTRACE
 3428         if (!error && KTRPOINT(p, KTR_GENIO)) {
 3429                 struct iovec iov;
 3430                 iov.iov_base = SCARG(uap, buf);
 3431                 iov.iov_len = done;
 3432                 ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov, done, 0);
 3433         }
 3434 #endif
 3435         *retval = done;
 3436  out:
 3437         FILE_UNUSE(fp, p);
 3438         return (error);
 3439 }
 3440 
 3441 /*
 3442  * Set the mode mask for creation of filesystem nodes.
 3443  */
 3444 int
 3445 sys_umask(l, v, retval)
 3446         struct lwp *l;
 3447         void *v;
 3448         register_t *retval;
 3449 {
 3450         struct sys_umask_args /* {
 3451                 syscallarg(mode_t) newmask;
 3452         } */ *uap = v;
 3453         struct proc *p = l->l_proc;
 3454         struct cwdinfo *cwdi;
 3455 
 3456         cwdi = p->p_cwdi;
 3457         *retval = cwdi->cwdi_cmask;
 3458         cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
 3459         return (0);
 3460 }
 3461 
 3462 /*
 3463  * Void all references to file by ripping underlying filesystem
 3464  * away from vnode.
 3465  */
 3466 /* ARGSUSED */
 3467 int
 3468 sys_revoke(l, v, retval)
 3469         struct lwp *l;
 3470         void *v;
 3471         register_t *retval;
 3472 {
 3473         struct sys_revoke_args /* {
 3474                 syscallarg(const char *) path;
 3475         } */ *uap = v;
 3476         struct proc *p = l->l_proc;
 3477         struct mount *mp;
 3478         struct vnode *vp;
 3479         struct vattr vattr;
 3480         int error;
 3481         struct nameidata nd;
 3482 
 3483         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 3484         if ((error = namei(&nd)) != 0)
 3485                 return (error);
 3486         vp = nd.ni_vp;
 3487         if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
 3488                 goto out;
 3489         if (p->p_ucred->cr_uid != vattr.va_uid &&
 3490             (error = suser(p->p_ucred, &p->p_acflag)) != 0)
 3491                 goto out;
 3492         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 3493                 goto out;
 3494         if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
 3495                 VOP_REVOKE(vp, REVOKEALL);
 3496         vn_finished_write(mp, 0);
 3497 out:
 3498         vrele(vp);
 3499         return (error);
 3500 }
 3501 
 3502 /*
 3503  * Convert a user file descriptor to a kernel file entry.
 3504  */
 3505 int
 3506 getvnode(fdp, fd, fpp)
 3507         struct filedesc *fdp;
 3508         int fd;
 3509         struct file **fpp;
 3510 {
 3511         struct vnode *vp;
 3512         struct file *fp;
 3513 
 3514         if ((fp = fd_getfile(fdp, fd)) == NULL)
 3515                 return (EBADF);
 3516 
 3517         FILE_USE(fp);
 3518 
 3519         if (fp->f_type != DTYPE_VNODE) {
 3520                 FILE_UNUSE(fp, NULL);
 3521                 return (EINVAL);
 3522         }
 3523 
 3524         vp = (struct vnode *)fp->f_data;
 3525         if (vp->v_type == VBAD) {
 3526                 FILE_UNUSE(fp, NULL);
 3527                 return (EBADF);
 3528         }
 3529 
 3530         *fpp = fp;
 3531         return (0);
 3532 }

Cache object: 058ac5ea99a6132d324fa01d56a021a6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.