vfs_syscalls.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: vfs_syscalls.c,v 1.376.4.6 2011/03/20 21:19:57 bouyer Exp $    */
    2 
    3 /*-
    4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26  * POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * Copyright (c) 1989, 1993
   31  *      The Regents of the University of California.  All rights reserved.
   32  * (c) UNIX System Laboratories, Inc.
   33  * All or some portions of this file are derived from material licensed
   34  * to the University of California by American Telephone and Telegraph
   35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   36  * the permission of UNIX System Laboratories, Inc.
   37  *
   38  * Redistribution and use in source and binary forms, with or without
   39  * modification, are permitted provided that the following conditions
   40  * are met:
   41  * 1. Redistributions of source code must retain the above copyright
   42  *    notice, this list of conditions and the following disclaimer.
   43  * 2. Redistributions in binary form must reproduce the above copyright
   44  *    notice, this list of conditions and the following disclaimer in the
   45  *    documentation and/or other materials provided with the distribution.
   46  * 3. Neither the name of the University nor the names of its contributors
   47  *    may be used to endorse or promote products derived from this software
   48  *    without specific prior written permission.
   49  *
   50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   60  * SUCH DAMAGE.
   61  *
   62  *      @(#)vfs_syscalls.c      8.42 (Berkeley) 7/31/95
   63  */
   64 
   65 #include <sys/cdefs.h>
   66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.376.4.6 2011/03/20 21:19:57 bouyer Exp $");
   67 
   68 #include "opt_compat_netbsd.h"
   69 #include "opt_compat_43.h"
   70 #include "opt_fileassoc.h"
   71 #include "veriexec.h"
   72 
   73 #include <sys/param.h>
   74 #include <sys/systm.h>
   75 #include <sys/namei.h>
   76 #include <sys/filedesc.h>
   77 #include <sys/kernel.h>
   78 #include <sys/file.h>
   79 #include <sys/stat.h>
   80 #include <sys/vnode.h>
   81 #include <sys/mount.h>
   82 #include <sys/proc.h>
   83 #include <sys/uio.h>
   84 #include <sys/malloc.h>
   85 #include <sys/kmem.h>
   86 #include <sys/dirent.h>
   87 #include <sys/sysctl.h>
   88 #include <sys/syscallargs.h>
   89 #include <sys/vfs_syscalls.h>
   90 #include <sys/ktrace.h>
   91 #ifdef FILEASSOC
   92 #include <sys/fileassoc.h>
   93 #endif /* FILEASSOC */
   94 #include <sys/verified_exec.h>
   95 #include <sys/kauth.h>
   96 #include <sys/atomic.h>
   97 #include <sys/module.h>
   98 
   99 #include <miscfs/genfs/genfs.h>
  100 #include <miscfs/syncfs/syncfs.h>
  101 #include <miscfs/specfs/specdev.h>
  102 
  103 #ifdef COMPAT_30
  104 #include "opt_nfsserver.h"
  105 #include <nfs/rpcv2.h>
  106 #endif
  107 #include <nfs/nfsproto.h>
  108 #ifdef COMPAT_30
  109 #include <nfs/nfs.h>
  110 #include <nfs/nfs_var.h>
  111 #endif
  112 
  113 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
  114 
  115 static int change_dir(struct nameidata *, struct lwp *);
  116 static int change_flags(struct vnode *, u_long, struct lwp *);
  117 static int change_mode(struct vnode *, int, struct lwp *l);
  118 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
  119 
  120 void checkdirs(struct vnode *);
  121 
  122 int dovfsusermount = 0;
  123 
  124 /*
  125  * Virtual File System System Calls
  126  */
  127 
  128 /*
  129  * Mount a file system.
  130  */
  131 
  132 #if defined(COMPAT_09) || defined(COMPAT_43)
  133 /*
  134  * This table is used to maintain compatibility with 4.3BSD
  135  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
  136  *
  137  * Do not modify this table. It should only contain filesystems
  138  * supported by NetBSD 0.9 and 4.3BSD.
  139  */
  140 const char * const mountcompatnames[] = {
  141         NULL,           /* 0 = MOUNT_NONE */
  142         MOUNT_FFS,      /* 1 = MOUNT_UFS */
  143         MOUNT_NFS,      /* 2 */
  144         MOUNT_MFS,      /* 3 */
  145         MOUNT_MSDOS,    /* 4 */
  146         MOUNT_CD9660,   /* 5 = MOUNT_ISOFS */
  147         MOUNT_FDESC,    /* 6 */
  148         MOUNT_KERNFS,   /* 7 */
  149         NULL,           /* 8 = MOUNT_DEVFS */
  150         MOUNT_AFS,      /* 9 */
  151 };
  152 const int nmountcompatnames = sizeof(mountcompatnames) /
  153     sizeof(mountcompatnames[0]);
  154 #endif /* COMPAT_09 || COMPAT_43 */
  155 
  156 static int
  157 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
  158     void *data, size_t *data_len)
  159 {
  160         struct mount *mp;
  161         int error = 0, saved_flags;
  162 
  163         mp = vp->v_mount;
  164         saved_flags = mp->mnt_flag;
  165 
  166         /* We can operate only on VV_ROOT nodes. */
  167         if ((vp->v_vflag & VV_ROOT) == 0) {
  168                 error = EINVAL;
  169                 goto out;
  170         }
  171 
  172         /*
  173          * We only allow the filesystem to be reloaded if it
  174          * is currently mounted read-only.  Additionally, we
  175          * prevent read-write to read-only downgrades.
  176          */
  177         if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
  178             (mp->mnt_flag & MNT_RDONLY) == 0) {
  179                 error = EOPNOTSUPP;     /* Needs translation */
  180                 goto out;
  181         }
  182 
  183         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
  184             KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
  185         if (error)
  186                 goto out;
  187 
  188         if (vfs_busy(mp, NULL)) {
  189                 error = EPERM;
  190                 goto out;
  191         }
  192 
  193         mutex_enter(&mp->mnt_updating);
  194 
  195         mp->mnt_flag &= ~MNT_OP_FLAGS;
  196         mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
  197 
  198         /*
  199          * Set the mount level flags.
  200          */
  201         if (flags & MNT_RDONLY)
  202                 mp->mnt_flag |= MNT_RDONLY;
  203         else if (mp->mnt_flag & MNT_RDONLY)
  204                 mp->mnt_iflag |= IMNT_WANTRDWR;
  205         mp->mnt_flag &=
  206           ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  207             MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  208             MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
  209             MNT_LOG);
  210         mp->mnt_flag |= flags &
  211            (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  212             MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  213             MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
  214             MNT_LOG | MNT_IGNORE);
  215 
  216         error = VFS_MOUNT(mp, path, data, data_len);
  217 
  218 #if defined(COMPAT_30) && defined(NFSSERVER)
  219         if (error && data != NULL) {
  220                 int error2;
  221 
  222                 /* Update failed; let's try and see if it was an
  223                  * export request. */
  224                 error2 = nfs_update_exports_30(mp, path, data, l);
  225 
  226                 /* Only update error code if the export request was
  227                  * understood but some problem occurred while
  228                  * processing it. */
  229                 if (error2 != EJUSTRETURN)
  230                         error = error2;
  231         }
  232 #endif
  233         if (mp->mnt_iflag & IMNT_WANTRDWR)
  234                 mp->mnt_flag &= ~MNT_RDONLY;
  235         if (error)
  236                 mp->mnt_flag = saved_flags;
  237         mp->mnt_flag &= ~MNT_OP_FLAGS;
  238         mp->mnt_iflag &= ~IMNT_WANTRDWR;
  239         if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
  240                 if (mp->mnt_syncer == NULL)
  241                         error = vfs_allocate_syncvnode(mp);
  242         } else {
  243                 if (mp->mnt_syncer != NULL)
  244                         vfs_deallocate_syncvnode(mp);
  245         }
  246         mutex_exit(&mp->mnt_updating);
  247         vfs_unbusy(mp, false, NULL);
  248 
  249  out:
  250         return (error);
  251 }
  252 
  253 static int
  254 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
  255 {
  256         char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
  257         int error;
  258 
  259         /* Copy file-system type from userspace.  */
  260         error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
  261         if (error) {
  262 #if defined(COMPAT_09) || defined(COMPAT_43)
  263                 /*
  264                  * Historically, filesystem types were identified by numbers.
  265                  * If we get an integer for the filesystem type instead of a
  266                  * string, we check to see if it matches one of the historic
  267                  * filesystem types.
  268                  */
  269                 u_long fsindex = (u_long)fstype;
  270                 if (fsindex >= nmountcompatnames ||
  271                     mountcompatnames[fsindex] == NULL)
  272                         return ENODEV;
  273                 strlcpy(fstypename, mountcompatnames[fsindex],
  274                     sizeof(fstypename));
  275 #else
  276                 return error;
  277 #endif
  278         }
  279 
  280 #ifdef  COMPAT_10
  281         /* Accept `ufs' as an alias for `ffs'. */
  282         if (strcmp(fstypename, "ufs") == 0)
  283                 fstypename[0] = 'f';
  284 #endif
  285 
  286         if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
  287                 return 0;
  288 
  289         /* If we can autoload a vfs module, try again */
  290         mutex_enter(&module_lock);
  291         (void)module_autoload(fstypename, MODULE_CLASS_VFS);
  292         mutex_exit(&module_lock);
  293 
  294         if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
  295                 return 0;
  296 
  297         return ENODEV;
  298 }
  299 
  300 static int
  301 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
  302     const char *path, int flags, void *data, size_t *data_len, u_int recurse)
  303 {
  304         struct mount *mp;
  305         struct vnode *vp = *vpp;
  306         struct vattr va;
  307         int error;
  308 
  309         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
  310             KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
  311         if (error)
  312                 return error;
  313 
  314         /* Can't make a non-dir a mount-point (from here anyway). */
  315         if (vp->v_type != VDIR)
  316                 return ENOTDIR;
  317 
  318         /*
  319          * If the user is not root, ensure that they own the directory
  320          * onto which we are attempting to mount.
  321          */
  322         if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
  323             (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
  324             (error = kauth_authorize_generic(l->l_cred,
  325             KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
  326                 return error;
  327         }
  328 
  329         if (flags & MNT_EXPORTED)
  330                 return EINVAL;
  331 
  332         if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
  333                 return error;
  334 
  335         /*
  336          * Check if a file-system is not already mounted on this vnode.
  337          */
  338         if (vp->v_mountedhere != NULL)
  339                 return EBUSY;
  340 
  341         mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
  342         if (mp == NULL)
  343                 return ENOMEM;
  344 
  345         mp->mnt_op = vfsops;
  346         mp->mnt_refcnt = 1;
  347 
  348         TAILQ_INIT(&mp->mnt_vnodelist);
  349         rw_init(&mp->mnt_unmounting);
  350         mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
  351         mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
  352         error = vfs_busy(mp, NULL);
  353         KASSERT(error == 0);
  354         mutex_enter(&mp->mnt_updating);
  355 
  356         mp->mnt_vnodecovered = vp;
  357         mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
  358         mount_initspecific(mp);
  359 
  360         /*
  361          * The underlying file system may refuse the mount for
  362          * various reasons.  Allow the user to force it to happen.
  363          *
  364          * Set the mount level flags.
  365          */
  366         mp->mnt_flag = flags &
  367            (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  368             MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
  369             MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
  370             MNT_LOG | MNT_IGNORE | MNT_RDONLY);
  371 
  372         error = VFS_MOUNT(mp, path, data, data_len);
  373         mp->mnt_flag &= ~MNT_OP_FLAGS;
  374 
  375         /*
  376          * Put the new filesystem on the mount list after root.
  377          */
  378         cache_purge(vp);
  379         if (error != 0) {
  380                 vp->v_mountedhere = NULL;
  381                 mutex_exit(&mp->mnt_updating);
  382                 vfs_unbusy(mp, false, NULL);
  383                 vfs_destroy(mp);
  384                 return error;
  385         }
  386 
  387         mp->mnt_iflag &= ~IMNT_WANTRDWR;
  388         mutex_enter(&mountlist_lock);
  389         vp->v_mountedhere = mp;
  390         CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  391         mutex_exit(&mountlist_lock);
  392         vn_restorerecurse(vp, recurse);
  393         VOP_UNLOCK(vp, 0);
  394         checkdirs(vp);
  395         if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  396                 error = vfs_allocate_syncvnode(mp);
  397         /* Hold an additional reference to the mount across VFS_START(). */
  398         mutex_exit(&mp->mnt_updating);
  399         vfs_unbusy(mp, true, NULL);
  400         (void) VFS_STATVFS(mp, &mp->mnt_stat);
  401         error = VFS_START(mp, 0);
  402         if (error)
  403                 vrele(vp);
  404         /* Drop reference held for VFS_START(). */
  405         vfs_destroy(mp);
  406         *vpp = NULL;
  407         return error;
  408 }
  409 
  410 static int
  411 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
  412     void *data, size_t *data_len)
  413 {
  414         struct mount *mp;
  415         int error;
  416 
  417         /* If MNT_GETARGS is specified, it should be the only flag. */
  418         if (flags & ~MNT_GETARGS)
  419                 return EINVAL;
  420 
  421         mp = vp->v_mount;
  422 
  423         /* XXX: probably some notion of "can see" here if we want isolation. */ 
  424         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
  425             KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
  426         if (error)
  427                 return error;
  428 
  429         if ((vp->v_vflag & VV_ROOT) == 0)
  430                 return EINVAL;
  431 
  432         if (vfs_busy(mp, NULL))
  433                 return EPERM;
  434 
  435         mutex_enter(&mp->mnt_updating);
  436         mp->mnt_flag &= ~MNT_OP_FLAGS;
  437         mp->mnt_flag |= MNT_GETARGS;
  438         error = VFS_MOUNT(mp, path, data, data_len);
  439         mp->mnt_flag &= ~MNT_OP_FLAGS;
  440         mutex_exit(&mp->mnt_updating);
  441 
  442         vfs_unbusy(mp, false, NULL);
  443         return (error);
  444 }
  445 
  446 #ifdef COMPAT_40
  447 /* ARGSUSED */
  448 int
  449 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval)
  450 {
  451         /* {
  452                 syscallarg(const char *) type;
  453                 syscallarg(const char *) path;
  454                 syscallarg(int) flags;
  455                 syscallarg(void *) data;
  456         } */
  457         register_t dummy;
  458 
  459         return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
  460             SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
  461 }
  462 #endif
  463 
  464 int
  465 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
  466 {
  467         /* {
  468                 syscallarg(const char *) type;
  469                 syscallarg(const char *) path;
  470                 syscallarg(int) flags;
  471                 syscallarg(void *) data;
  472                 syscallarg(size_t) data_len;
  473         } */
  474 
  475         return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
  476             SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
  477             SCARG(uap, data_len), retval);
  478 }
  479 
  480 int
  481 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
  482     const char *path, int flags, void *data, enum uio_seg data_seg,
  483     size_t data_len, register_t *retval)
  484 {
  485         struct vnode *vp;
  486         struct nameidata nd;
  487         void *data_buf = data;
  488         u_int recurse;
  489         int error;
  490 
  491         /*
  492          * Get vnode to be covered
  493          */
  494         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
  495         if ((error = namei(&nd)) != 0)
  496                 return (error);
  497         vp = nd.ni_vp;
  498 
  499         /*
  500          * A lookup in VFS_MOUNT might result in an attempt to
  501          * lock this vnode again, so make the lock recursive.
  502          */
  503         if (vfsops == NULL) {
  504                 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
  505                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  506                         recurse = vn_setrecurse(vp);
  507                         vfsops = vp->v_mount->mnt_op;
  508                 } else {
  509                         /* 'type' is userspace */
  510                         error = mount_get_vfsops(type, &vfsops);
  511                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  512                         recurse = vn_setrecurse(vp);
  513                         if (error != 0)
  514                                 goto done;
  515                 }
  516         } else {
  517                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  518                 recurse = vn_setrecurse(vp);
  519         }
  520 
  521         if (data != NULL && data_seg == UIO_USERSPACE) {
  522                 if (data_len == 0) {
  523                         /* No length supplied, use default for filesystem */
  524                         data_len = vfsops->vfs_min_mount_data;
  525                         if (data_len > VFS_MAX_MOUNT_DATA) {
  526                                 /* maybe a force loaded old LKM */
  527                                 error = EINVAL;
  528                                 goto done;
  529                         }
  530 #ifdef COMPAT_30
  531                         /* Hopefully a longer buffer won't make copyin() fail */
  532                         if (flags & MNT_UPDATE
  533                             && data_len < sizeof (struct mnt_export_args30))
  534                                 data_len = sizeof (struct mnt_export_args30);
  535 #endif
  536                 }
  537                 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
  538 
  539                 /* NFS needs the buffer even for mnt_getargs .... */
  540                 error = copyin(data, data_buf, data_len);
  541                 if (error != 0)
  542                         goto done;
  543         }
  544 
  545         if (flags & MNT_GETARGS) {
  546                 if (data_len == 0) {
  547                         error = EINVAL;
  548                         goto done;
  549                 }
  550                 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
  551                 if (error != 0)
  552                         goto done;
  553                 if (data_seg == UIO_USERSPACE)
  554                         error = copyout(data_buf, data, data_len);
  555                 *retval = data_len;
  556         } else if (flags & MNT_UPDATE) {
  557                 error = mount_update(l, vp, path, flags, data_buf, &data_len);
  558         } else {
  559                 /* Locking is handled internally in mount_domount(). */
  560                 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
  561                     &data_len, recurse);
  562         }
  563 
  564     done:
  565         if (vp != NULL) {
  566                 vn_restorerecurse(vp, recurse);
  567                 vput(vp);
  568         }
  569         if (data_buf != data)
  570                 free(data_buf, M_TEMP);
  571         return (error);
  572 }
  573 
  574 /*
  575  * Scan all active processes to see if any of them have a current
  576  * or root directory onto which the new filesystem has just been
  577  * mounted. If so, replace them with the new mount point.
  578  */
  579 void
  580 checkdirs(struct vnode *olddp)
  581 {
  582         struct cwdinfo *cwdi;
  583         struct vnode *newdp, *rele1, *rele2;
  584         struct proc *p;
  585         bool retry;
  586 
  587         if (olddp->v_usecount == 1)
  588                 return;
  589         if (VFS_ROOT(olddp->v_mountedhere, &newdp))
  590                 panic("mount: lost mount");
  591 
  592         do {
  593                 retry = false;
  594                 mutex_enter(proc_lock);
  595                 PROCLIST_FOREACH(p, &allproc) {
  596                         if ((p->p_flag & PK_MARKER) != 0)
  597                                 continue;
  598                         if ((cwdi = p->p_cwdi) == NULL)
  599                                 continue;
  600                         /*
  601                          * Can't change to the old directory any more,
  602                          * so even if we see a stale value it's not a
  603                          * problem.
  604                          */
  605                         if (cwdi->cwdi_cdir != olddp &&
  606                             cwdi->cwdi_rdir != olddp)
  607                                 continue;
  608                         retry = true;
  609                         rele1 = NULL;
  610                         rele2 = NULL;
  611                         atomic_inc_uint(&cwdi->cwdi_refcnt);
  612                         mutex_exit(proc_lock);
  613                         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
  614                         if (cwdi->cwdi_cdir == olddp) {
  615                                 rele1 = cwdi->cwdi_cdir;
  616                                 VREF(newdp);
  617                                 cwdi->cwdi_cdir = newdp;
  618                         }
  619                         if (cwdi->cwdi_rdir == olddp) {
  620                                 rele2 = cwdi->cwdi_rdir;
  621                                 VREF(newdp);
  622                                 cwdi->cwdi_rdir = newdp;
  623                         }
  624                         rw_exit(&cwdi->cwdi_lock);
  625                         cwdfree(cwdi);
  626                         if (rele1 != NULL)
  627                                 vrele(rele1);
  628                         if (rele2 != NULL)
  629                                 vrele(rele2);
  630                         mutex_enter(proc_lock);
  631                         break;
  632                 }
  633                 mutex_exit(proc_lock);
  634         } while (retry);
  635 
  636         if (rootvnode == olddp) {
  637                 vrele(rootvnode);
  638                 VREF(newdp);
  639                 rootvnode = newdp;
  640         }
  641         vput(newdp);
  642 }
  643 
  644 /*
  645  * Unmount a file system.
  646  *
  647  * Note: unmount takes a path to the vnode mounted on as argument,
  648  * not special file (as before).
  649  */
  650 /* ARGSUSED */
  651 int
  652 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
  653 {
  654         /* {
  655                 syscallarg(const char *) path;
  656                 syscallarg(int) flags;
  657         } */
  658         struct vnode *vp;
  659         struct mount *mp;
  660         int error;
  661         struct nameidata nd;
  662 
  663         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
  664             SCARG(uap, path));
  665         if ((error = namei(&nd)) != 0)
  666                 return (error);
  667         vp = nd.ni_vp;
  668         mp = vp->v_mount;
  669         atomic_inc_uint(&mp->mnt_refcnt);
  670         VOP_UNLOCK(vp, 0);
  671 
  672         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
  673             KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
  674         if (error) {
  675                 vrele(vp);
  676                 vfs_destroy(mp);
  677                 return (error);
  678         }
  679 
  680         /*
  681          * Don't allow unmounting the root file system.
  682          */
  683         if (mp->mnt_flag & MNT_ROOTFS) {
  684                 vrele(vp);
  685                 vfs_destroy(mp);
  686                 return (EINVAL);
  687         }
  688 
  689         /*
  690          * Must be the root of the filesystem
  691          */
  692         if ((vp->v_vflag & VV_ROOT) == 0) {
  693                 vrele(vp);
  694                 vfs_destroy(mp);
  695                 return (EINVAL);
  696         }
  697 
  698         vrele(vp);
  699         error = dounmount(mp, SCARG(uap, flags), l);
  700         vfs_destroy(mp);
  701         return error;
  702 }
  703 
  704 /*
  705  * Do the actual file system unmount.  File system is assumed to have
  706  * been locked by the caller.
  707  *
  708  * => Caller hold reference to the mount, explicitly for dounmount().
  709  */
  710 int
  711 dounmount(struct mount *mp, int flags, struct lwp *l)
  712 {
  713         struct vnode *coveredvp;
  714         int error;
  715         int async;
  716         int used_syncer;
  717 
  718 #if NVERIEXEC > 0
  719         error = veriexec_unmountchk(mp);
  720         if (error)
  721                 return (error);
  722 #endif /* NVERIEXEC > 0 */
  723 
  724         /*
  725          * XXX Freeze syncer.  Must do this before locking the
  726          * mount point.  See dounmount() for details.
  727          */
  728         mutex_enter(&syncer_mutex);
  729         rw_enter(&mp->mnt_unmounting, RW_WRITER);
  730         if ((mp->mnt_iflag & IMNT_GONE) != 0) {
  731                 rw_exit(&mp->mnt_unmounting);
  732                 mutex_exit(&syncer_mutex);
  733                 return ENOENT;
  734         }
  735 
  736         used_syncer = (mp->mnt_syncer != NULL);
  737 
  738         /*
  739          * XXX Syncer must be frozen when we get here.  This should really
  740          * be done on a per-mountpoint basis, but especially the softdep
  741          * code possibly called from the syncer doesn't exactly work on a
  742          * per-mountpoint basis, so the softdep code would become a maze
  743          * of vfs_busy() calls.
  744          *
  745          * The caller of dounmount() must acquire syncer_mutex because
  746          * the syncer itself acquires locks in syncer_mutex -> vfs_busy
  747          * order, and we must preserve that order to avoid deadlock.
  748          *
  749          * So, if the file system did not use the syncer, now is
  750          * the time to release the syncer_mutex.
  751          */
  752         if (used_syncer == 0)
  753                 mutex_exit(&syncer_mutex);
  754 
  755         mp->mnt_iflag |= IMNT_UNMOUNT;
  756         async = mp->mnt_flag & MNT_ASYNC;
  757         mp->mnt_flag &= ~MNT_ASYNC;
  758         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  759         if (mp->mnt_syncer != NULL)
  760                 vfs_deallocate_syncvnode(mp);
  761         error = 0;
  762         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  763                 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
  764         }
  765         vfs_scrubvnlist(mp);
  766         if (error == 0 || (flags & MNT_FORCE))
  767                 error = VFS_UNMOUNT(mp, flags);
  768         if (error) {
  769                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  770                         (void) vfs_allocate_syncvnode(mp);
  771                 mp->mnt_iflag &= ~IMNT_UNMOUNT;
  772                 mp->mnt_flag |= async;
  773                 rw_exit(&mp->mnt_unmounting);
  774                 if (used_syncer)
  775                         mutex_exit(&syncer_mutex);
  776                 return (error);
  777         }
  778         vfs_scrubvnlist(mp);
  779         mutex_enter(&mountlist_lock);
  780         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
  781                 coveredvp->v_mountedhere = NULL;
  782         CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
  783         mp->mnt_iflag |= IMNT_GONE;
  784         mutex_exit(&mountlist_lock);
  785         if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
  786                 panic("unmount: dangling vnode");
  787         if (used_syncer)
  788                 mutex_exit(&syncer_mutex);
  789         vfs_hooks_unmount(mp);
  790         rw_exit(&mp->mnt_unmounting);
  791         vfs_destroy(mp);        /* reference from mount() */
  792         if (coveredvp != NULLVP)
  793                 vrele(coveredvp);
  794         return (0);
  795 }
  796 
  797 /*
  798  * Sync each mounted filesystem.
  799  */
  800 #ifdef DEBUG
  801 int syncprt = 0;
  802 struct ctldebug debug0 = { "syncprt", &syncprt };
  803 #endif
  804 
  805 /* ARGSUSED */
  806 int
  807 sys_sync(struct lwp *l, const void *v, register_t *retval)
  808 {
  809         struct mount *mp, *nmp;
  810         int asyncflag;
  811 
  812         if (l == NULL)
  813                 l = &lwp0;
  814 
  815         mutex_enter(&mountlist_lock);
  816         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
  817              mp = nmp) {
  818                 if (vfs_busy(mp, &nmp)) {
  819                         continue;
  820                 }
  821                 mutex_enter(&mp->mnt_updating);
  822                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  823                         asyncflag = mp->mnt_flag & MNT_ASYNC;
  824                         mp->mnt_flag &= ~MNT_ASYNC;
  825                         VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
  826                         if (asyncflag)
  827                                  mp->mnt_flag |= MNT_ASYNC;
  828                 }
  829                 mutex_exit(&mp->mnt_updating);
  830                 vfs_unbusy(mp, false, &nmp);
  831         }
  832         mutex_exit(&mountlist_lock);
  833 #ifdef DEBUG
  834         if (syncprt)
  835                 vfs_bufstats();
  836 #endif /* DEBUG */
  837         return (0);
  838 }
  839 
  840 /*
  841  * Change filesystem quotas.
  842  */
  843 /* ARGSUSED */
  844 int
  845 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
  846 {
  847         /* {
  848                 syscallarg(const char *) path;
  849                 syscallarg(int) cmd;
  850                 syscallarg(int) uid;
  851                 syscallarg(void *) arg;
  852         } */
  853         struct mount *mp;
  854         int error;
  855         struct nameidata nd;
  856 
  857         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
  858             SCARG(uap, path));
  859         if ((error = namei(&nd)) != 0)
  860                 return (error);
  861         mp = nd.ni_vp->v_mount;
  862         error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
  863             SCARG(uap, arg));
  864         vrele(nd.ni_vp);
  865         return (error);
  866 }
  867 
  868 int
  869 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
  870     int root)
  871 {
  872         struct cwdinfo *cwdi = l->l_proc->p_cwdi;
  873         int error = 0;
  874 
  875         /*
  876          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  877          * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
  878          * overrides MNT_NOWAIT.
  879          */
  880         if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
  881             (flags != MNT_WAIT && flags != 0)) {
  882                 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
  883                 goto done;
  884         }
  885 
  886         /* Get the filesystem stats now */
  887         memset(sp, 0, sizeof(*sp));
  888         if ((error = VFS_STATVFS(mp, sp)) != 0) {
  889                 return error;
  890         }
  891 
  892         if (cwdi->cwdi_rdir == NULL)
  893                 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
  894 done:
  895         if (cwdi->cwdi_rdir != NULL) {
  896                 size_t len;
  897                 char *bp;
  898                 char c;
  899                 char *path = PNBUF_GET();
  900 
  901                 bp = path + MAXPATHLEN;
  902                 *--bp = '\0';
  903                 rw_enter(&cwdi->cwdi_lock, RW_READER);
  904                 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
  905                     MAXPATHLEN / 2, 0, l);
  906                 rw_exit(&cwdi->cwdi_lock);
  907                 if (error) {
  908                         PNBUF_PUT(path);
  909                         return error;
  910                 }
  911                 len = strlen(bp);
  912                 /*
  913                  * for mount points that are below our root, we can see
  914                  * them, so we fix up the pathname and return them. The
  915                  * rest we cannot see, so we don't allow viewing the
  916                  * data.
  917                  */
  918                 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
  919                     ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
  920                         (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
  921                             sizeof(sp->f_mntonname));
  922                         if (sp->f_mntonname[0] == '\0')
  923                                 (void)strlcpy(sp->f_mntonname, "/",
  924                                     sizeof(sp->f_mntonname));
  925                 } else {
  926                         if (root)
  927                                 (void)strlcpy(sp->f_mntonname, "/",
  928                                     sizeof(sp->f_mntonname));
  929                         else
  930                                 error = EPERM;
  931                 }
  932                 PNBUF_PUT(path);
  933         }
  934         sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
  935         return error;
  936 }
  937 
  938 /*
  939  * Get filesystem statistics by path.
  940  */
  941 int
  942 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
  943 {
  944         struct mount *mp;
  945         int error;
  946         struct nameidata nd;
  947 
  948         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
  949         if ((error = namei(&nd)) != 0)
  950                 return error;
  951         mp = nd.ni_vp->v_mount;
  952         error = dostatvfs(mp, sb, l, flags, 1);
  953         vrele(nd.ni_vp);
  954         return error;
  955 }
  956 
  957 /* ARGSUSED */
  958 int
  959 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
  960 {
  961         /* {
  962                 syscallarg(const char *) path;
  963                 syscallarg(struct statvfs *) buf;
  964                 syscallarg(int) flags;
  965         } */
  966         struct statvfs *sb;
  967         int error;
  968 
  969         sb = STATVFSBUF_GET();
  970         error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
  971         if (error == 0)
  972                 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
  973         STATVFSBUF_PUT(sb);
  974         return error;
  975 }
  976 
  977 /*
  978  * Get filesystem statistics by fd.
  979  */
  980 int
  981 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
  982 {
  983         file_t *fp;
  984         struct mount *mp;
  985         int error;
  986 
  987         /* fd_getvnode() will use the descriptor for us */
  988         if ((error = fd_getvnode(fd, &fp)) != 0)
  989                 return (error);
  990         mp = ((struct vnode *)fp->f_data)->v_mount;
  991         error = dostatvfs(mp, sb, curlwp, flags, 1);
  992         fd_putfile(fd);
  993         return error;
  994 }
  995 
  996 /* ARGSUSED */
  997 int
  998 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
  999 {
 1000         /* {
 1001                 syscallarg(int) fd;
 1002                 syscallarg(struct statvfs *) buf;
 1003                 syscallarg(int) flags;
 1004         } */
 1005         struct statvfs *sb;
 1006         int error;
 1007 
 1008         sb = STATVFSBUF_GET();
 1009         error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
 1010         if (error == 0)
 1011                 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
 1012         STATVFSBUF_PUT(sb);
 1013         return error;
 1014 }
 1015 
 1016 
 1017 /*
 1018  * Get statistics on all filesystems.
 1019  */
 1020 int
 1021 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
 1022     int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
 1023     register_t *retval)
 1024 {
 1025         int root = 0;
 1026         struct proc *p = l->l_proc;
 1027         struct mount *mp, *nmp;
 1028         struct statvfs *sb;
 1029         size_t count, maxcount;
 1030         int error = 0;
 1031 
 1032         sb = STATVFSBUF_GET();
 1033         maxcount = bufsize / entry_sz;
 1034         mutex_enter(&mountlist_lock);
 1035         count = 0;
 1036         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 1037              mp = nmp) {
 1038                 if (vfs_busy(mp, &nmp)) {
 1039                         continue;
 1040                 }
 1041                 if (sfsp && count < maxcount) {
 1042                         error = dostatvfs(mp, sb, l, flags, 0);
 1043                         if (error) {
 1044                                 vfs_unbusy(mp, false, &nmp);
 1045                                 error = 0;
 1046                                 continue;
 1047                         }
 1048                         error = copyfn(sb, sfsp, entry_sz);
 1049                         if (error) {
 1050                                 vfs_unbusy(mp, false, NULL);
 1051                                 goto out;
 1052                         }
 1053                         sfsp = (char *)sfsp + entry_sz;
 1054                         root |= strcmp(sb->f_mntonname, "/") == 0;
 1055                 }
 1056                 count++;
 1057                 vfs_unbusy(mp, false, &nmp);
 1058         }
 1059         mutex_exit(&mountlist_lock);
 1060 
 1061         if (root == 0 && p->p_cwdi->cwdi_rdir) {
 1062                 /*
 1063                  * fake a root entry
 1064                  */
 1065                 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
 1066                     sb, l, flags, 1);
 1067                 if (error != 0)
 1068                         goto out;
 1069                 if (sfsp) {
 1070                         error = copyfn(sb, sfsp, entry_sz);
 1071                         if (error != 0)
 1072                                 goto out;
 1073                 }
 1074                 count++;
 1075         }
 1076         if (sfsp && count > maxcount)
 1077                 *retval = maxcount;
 1078         else
 1079                 *retval = count;
 1080 out:
 1081         STATVFSBUF_PUT(sb);
 1082         return error;
 1083 }
 1084 
 1085 int
 1086 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
 1087 {
 1088         /* {
 1089                 syscallarg(struct statvfs *) buf;
 1090                 syscallarg(size_t) bufsize;
 1091                 syscallarg(int) flags;
 1092         } */
 1093 
 1094         return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
 1095             SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
 1096 }
 1097 
 1098 /*
 1099  * Change current working directory to a given file descriptor.
 1100  */
 1101 /* ARGSUSED */
 1102 int
 1103 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
 1104 {
 1105         /* {
 1106                 syscallarg(int) fd;
 1107         } */
 1108         struct proc *p = l->l_proc;
 1109         struct cwdinfo *cwdi;
 1110         struct vnode *vp, *tdp;
 1111         struct mount *mp;
 1112         file_t *fp;
 1113         int error, fd;
 1114 
 1115         /* fd_getvnode() will use the descriptor for us */
 1116         fd = SCARG(uap, fd);
 1117         if ((error = fd_getvnode(fd, &fp)) != 0)
 1118                 return (error);
 1119         vp = fp->f_data;
 1120 
 1121         VREF(vp);
 1122         vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
 1123         if (vp->v_type != VDIR)
 1124                 error = ENOTDIR;
 1125         else
 1126                 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
 1127         if (error) {
 1128                 vput(vp);
 1129                 goto out;
 1130         }
 1131         while ((mp = vp->v_mountedhere) != NULL) {
 1132                 error = vfs_busy(mp, NULL);
 1133                 vput(vp);
 1134                 if (error != 0)
 1135                         goto out;
 1136                 error = VFS_ROOT(mp, &tdp);
 1137                 vfs_unbusy(mp, false, NULL);
 1138                 if (error)
 1139                         goto out;
 1140                 vp = tdp;
 1141         }
 1142         VOP_UNLOCK(vp, 0);
 1143 
 1144         /*
 1145          * Disallow changing to a directory not under the process's
 1146          * current root directory (if there is one).
 1147          */
 1148         cwdi = p->p_cwdi;
 1149         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
 1150         if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
 1151                 vrele(vp);
 1152                 error = EPERM;  /* operation not permitted */
 1153         } else {
 1154                 vrele(cwdi->cwdi_cdir);
 1155                 cwdi->cwdi_cdir = vp;
 1156         }
 1157         rw_exit(&cwdi->cwdi_lock);
 1158 
 1159  out:
 1160         fd_putfile(fd);
 1161         return (error);
 1162 }
 1163 
 1164 /*
 1165  * Change this process's notion of the root directory to a given file
 1166  * descriptor.
 1167  */
 1168 int
 1169 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
 1170 {
 1171         struct proc *p = l->l_proc;
 1172         struct cwdinfo *cwdi;
 1173         struct vnode    *vp;
 1174         file_t  *fp;
 1175         int              error, fd = SCARG(uap, fd);
 1176 
 1177         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
 1178             KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
 1179                 return error;
 1180         /* fd_getvnode() will use the descriptor for us */
 1181         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 1182                 return error;
 1183         vp = fp->f_data;
 1184         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1185         if (vp->v_type != VDIR)
 1186                 error = ENOTDIR;
 1187         else
 1188                 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
 1189         VOP_UNLOCK(vp, 0);
 1190         if (error)
 1191                 goto out;
 1192         VREF(vp);
 1193 
 1194         /*
 1195          * Prevent escaping from chroot by putting the root under
 1196          * the working directory.  Silently chdir to / if we aren't
 1197          * already there.
 1198          */
 1199         cwdi = p->p_cwdi;
 1200         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
 1201         if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
 1202                 /*
 1203                  * XXX would be more failsafe to change directory to a
 1204                  * deadfs node here instead
 1205                  */
 1206                 vrele(cwdi->cwdi_cdir);
 1207                 VREF(vp);
 1208                 cwdi->cwdi_cdir = vp;
 1209         }
 1210 
 1211         if (cwdi->cwdi_rdir != NULL)
 1212                 vrele(cwdi->cwdi_rdir);
 1213         cwdi->cwdi_rdir = vp;
 1214         rw_exit(&cwdi->cwdi_lock);
 1215 
 1216  out:
 1217         fd_putfile(fd);
 1218         return (error);
 1219 }
 1220 
 1221 /*
 1222  * Change current working directory (``.'').
 1223  */
 1224 /* ARGSUSED */
 1225 int
 1226 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
 1227 {
 1228         /* {
 1229                 syscallarg(const char *) path;
 1230         } */
 1231         struct proc *p = l->l_proc;
 1232         struct cwdinfo *cwdi;
 1233         int error;
 1234         struct nameidata nd;
 1235 
 1236         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 1237             SCARG(uap, path));
 1238         if ((error = change_dir(&nd, l)) != 0)
 1239                 return (error);
 1240         cwdi = p->p_cwdi;
 1241         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
 1242         vrele(cwdi->cwdi_cdir);
 1243         cwdi->cwdi_cdir = nd.ni_vp;
 1244         rw_exit(&cwdi->cwdi_lock);
 1245         return (0);
 1246 }
 1247 
 1248 /*
 1249  * Change notion of root (``/'') directory.
 1250  */
 1251 /* ARGSUSED */
 1252 int
 1253 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
 1254 {
 1255         /* {
 1256                 syscallarg(const char *) path;
 1257         } */
 1258         struct proc *p = l->l_proc;
 1259         struct cwdinfo *cwdi;
 1260         struct vnode *vp;
 1261         int error;
 1262         struct nameidata nd;
 1263 
 1264         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
 1265             KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
 1266                 return (error);
 1267         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 1268             SCARG(uap, path));
 1269         if ((error = change_dir(&nd, l)) != 0)
 1270                 return (error);
 1271 
 1272         cwdi = p->p_cwdi;
 1273         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
 1274         if (cwdi->cwdi_rdir != NULL)
 1275                 vrele(cwdi->cwdi_rdir);
 1276         vp = nd.ni_vp;
 1277         cwdi->cwdi_rdir = vp;
 1278 
 1279         /*
 1280          * Prevent escaping from chroot by putting the root under
 1281          * the working directory.  Silently chdir to / if we aren't
 1282          * already there.
 1283          */
 1284         if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
 1285                 /*
 1286                  * XXX would be more failsafe to change directory to a
 1287                  * deadfs node here instead
 1288                  */
 1289                 vrele(cwdi->cwdi_cdir);
 1290                 VREF(vp);
 1291                 cwdi->cwdi_cdir = vp;
 1292         }
 1293         rw_exit(&cwdi->cwdi_lock);
 1294 
 1295         return (0);
 1296 }
 1297 
 1298 /*
 1299  * Common routine for chroot and chdir.
 1300  */
 1301 static int
 1302 change_dir(struct nameidata *ndp, struct lwp *l)
 1303 {
 1304         struct vnode *vp;
 1305         int error;
 1306 
 1307         if ((error = namei(ndp)) != 0)
 1308                 return (error);
 1309         vp = ndp->ni_vp;
 1310         if (vp->v_type != VDIR)
 1311                 error = ENOTDIR;
 1312         else
 1313                 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
 1314 
 1315         if (error)
 1316                 vput(vp);
 1317         else
 1318                 VOP_UNLOCK(vp, 0);
 1319         return (error);
 1320 }
 1321 
 1322 /*
 1323  * Check permissions, allocate an open file structure,
 1324  * and call the device open routine if any.
 1325  */
 1326 int
 1327 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
 1328 {
 1329         /* {
 1330                 syscallarg(const char *) path;
 1331                 syscallarg(int) flags;
 1332                 syscallarg(int) mode;
 1333         } */
 1334         struct proc *p = l->l_proc;
 1335         struct cwdinfo *cwdi = p->p_cwdi;
 1336         file_t *fp;
 1337         struct vnode *vp;
 1338         int flags, cmode;
 1339         int type, indx, error;
 1340         struct flock lf;
 1341         struct nameidata nd;
 1342 
 1343         flags = FFLAGS(SCARG(uap, flags));
 1344         if ((flags & (FREAD | FWRITE)) == 0)
 1345                 return (EINVAL);
 1346         if ((error = fd_allocfile(&fp, &indx)) != 0)
 1347                 return (error);
 1348         /* We're going to read cwdi->cwdi_cmask unlocked here. */
 1349         cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
 1350         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 1351             SCARG(uap, path));
 1352         l->l_dupfd = -indx - 1;                 /* XXX check for fdopen */
 1353         if ((error = vn_open(&nd, flags, cmode)) != 0) {
 1354                 fd_abort(p, fp, indx);
 1355                 if ((error == EDUPFD || error == EMOVEFD) &&
 1356                     l->l_dupfd >= 0 &&                  /* XXX from fdopen */
 1357                     (error =
 1358                         fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
 1359                         *retval = indx;
 1360                         return (0);
 1361                 }
 1362                 if (error == ERESTART)
 1363                         error = EINTR;
 1364                 return (error);
 1365         }
 1366 
 1367         l->l_dupfd = 0;
 1368         vp = nd.ni_vp;
 1369         fp->f_flag = flags & FMASK;
 1370         fp->f_type = DTYPE_VNODE;
 1371         fp->f_ops = &vnops;
 1372         fp->f_data = vp;
 1373         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1374                 lf.l_whence = SEEK_SET;
 1375                 lf.l_start = 0;
 1376                 lf.l_len = 0;
 1377                 if (flags & O_EXLOCK)
 1378                         lf.l_type = F_WRLCK;
 1379                 else
 1380                         lf.l_type = F_RDLCK;
 1381                 type = F_FLOCK;
 1382                 if ((flags & FNONBLOCK) == 0)
 1383                         type |= F_WAIT;
 1384                 VOP_UNLOCK(vp, 0);
 1385                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1386                 if (error) {
 1387                         (void) vn_close(vp, fp->f_flag, fp->f_cred);
 1388                         fd_abort(p, fp, indx);
 1389                         return (error);
 1390                 }
 1391                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1392                 atomic_or_uint(&fp->f_flag, FHASLOCK);
 1393         }
 1394         VOP_UNLOCK(vp, 0);
 1395         *retval = indx;
 1396         fd_affix(p, fp, indx);
 1397         return (0);
 1398 }
 1399 
 1400 static void
 1401 vfs__fhfree(fhandle_t *fhp)
 1402 {
 1403         size_t fhsize;
 1404 
 1405         if (fhp == NULL) {
 1406                 return;
 1407         }
 1408         fhsize = FHANDLE_SIZE(fhp);
 1409         kmem_free(fhp, fhsize);
 1410 }
 1411 
 1412 /*
 1413  * vfs_composefh: compose a filehandle.
 1414  */
 1415 
 1416 int
 1417 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
 1418 {
 1419         struct mount *mp;
 1420         struct fid *fidp;
 1421         int error;
 1422         size_t needfhsize;
 1423         size_t fidsize;
 1424 
 1425         mp = vp->v_mount;
 1426         fidp = NULL;
 1427         if (*fh_size < FHANDLE_SIZE_MIN) {
 1428                 fidsize = 0;
 1429         } else {
 1430                 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
 1431                 if (fhp != NULL) {
 1432                         memset(fhp, 0, *fh_size);
 1433                         fhp->fh_fsid = mp->mnt_stat.f_fsidx;
 1434                         fidp = &fhp->fh_fid;
 1435                 }
 1436         }
 1437         error = VFS_VPTOFH(vp, fidp, &fidsize);
 1438         needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
 1439         if (error == 0 && *fh_size < needfhsize) {
 1440                 error = E2BIG;
 1441         }
 1442         *fh_size = needfhsize;
 1443         return error;
 1444 }
 1445 
 1446 int
 1447 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
 1448 {
 1449         struct mount *mp;
 1450         fhandle_t *fhp;
 1451         size_t fhsize;
 1452         size_t fidsize;
 1453         int error;
 1454 
 1455         *fhpp = NULL;
 1456         mp = vp->v_mount;
 1457         fidsize = 0;
 1458         error = VFS_VPTOFH(vp, NULL, &fidsize);
 1459         KASSERT(error != 0);
 1460         if (error != E2BIG) {
 1461                 goto out;
 1462         }
 1463         fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
 1464         fhp = kmem_zalloc(fhsize, KM_SLEEP);
 1465         if (fhp == NULL) {
 1466                 error = ENOMEM;
 1467                 goto out;
 1468         }
 1469         fhp->fh_fsid = mp->mnt_stat.f_fsidx;
 1470         error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
 1471         if (error == 0) {
 1472                 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
 1473                     FHANDLE_FILEID(fhp)->fid_len == fidsize));
 1474                 *fhpp = fhp;
 1475         } else {
 1476                 kmem_free(fhp, fhsize);
 1477         }
 1478 out:
 1479         return error;
 1480 }
 1481 
 1482 void
 1483 vfs_composefh_free(fhandle_t *fhp)
 1484 {
 1485 
 1486         vfs__fhfree(fhp);
 1487 }
 1488 
 1489 /*
 1490  * vfs_fhtovp: lookup a vnode by a filehandle.
 1491  */
 1492 
 1493 int
 1494 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
 1495 {
 1496         struct mount *mp;
 1497         int error;
 1498 
 1499         *vpp = NULL;
 1500         mp = vfs_getvfs(FHANDLE_FSID(fhp));
 1501         if (mp == NULL) {
 1502                 error = ESTALE;
 1503                 goto out;
 1504         }
 1505         if (mp->mnt_op->vfs_fhtovp == NULL) {
 1506                 error = EOPNOTSUPP;
 1507                 goto out;
 1508         }
 1509         error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
 1510 out:
 1511         return error;
 1512 }
 1513 
 1514 /*
 1515  * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
 1516  * the needed size.
 1517  */
 1518 
 1519 int
 1520 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
 1521 {
 1522         fhandle_t *fhp;
 1523         int error;
 1524 
 1525         *fhpp = NULL;
 1526         if (fhsize > FHANDLE_SIZE_MAX) {
 1527                 return EINVAL;
 1528         }
 1529         if (fhsize < FHANDLE_SIZE_MIN) {
 1530                 return EINVAL;
 1531         }
 1532 again:
 1533         fhp = kmem_alloc(fhsize, KM_SLEEP);
 1534         if (fhp == NULL) {
 1535                 return ENOMEM;
 1536         }
 1537         error = copyin(ufhp, fhp, fhsize);
 1538         if (error == 0) {
 1539                 /* XXX this check shouldn't be here */
 1540                 if (FHANDLE_SIZE(fhp) == fhsize) {
 1541                         *fhpp = fhp;
 1542                         return 0;
 1543                 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
 1544                         /*
 1545                          * a kludge for nfsv2 padded handles.
 1546                          */
 1547                         size_t sz;
 1548 
 1549                         sz = FHANDLE_SIZE(fhp);
 1550                         kmem_free(fhp, fhsize);
 1551                         fhsize = sz;
 1552                         goto again;
 1553                 } else {
 1554                         /*
 1555                          * userland told us wrong size.
 1556                          */
 1557                         error = EINVAL;
 1558                 }
 1559         }
 1560         kmem_free(fhp, fhsize);
 1561         return error;
 1562 }
 1563 
 1564 void
 1565 vfs_copyinfh_free(fhandle_t *fhp)
 1566 {
 1567 
 1568         vfs__fhfree(fhp);
 1569 }
 1570 
 1571 /*
 1572  * Get file handle system call
 1573  */
 1574 int
 1575 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
 1576 {
 1577         /* {
 1578                 syscallarg(char *) fname;
 1579                 syscallarg(fhandle_t *) fhp;
 1580                 syscallarg(size_t *) fh_size;
 1581         } */
 1582         struct vnode *vp;
 1583         fhandle_t *fh;
 1584         int error;
 1585         struct nameidata nd;
 1586         size_t sz;
 1587         size_t usz;
 1588 
 1589         /*
 1590          * Must be super user
 1591          */
 1592         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1593             0, NULL, NULL, NULL);
 1594         if (error)
 1595                 return (error);
 1596         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 1597             SCARG(uap, fname));
 1598         error = namei(&nd);
 1599         if (error)
 1600                 return (error);
 1601         vp = nd.ni_vp;
 1602         error = vfs_composefh_alloc(vp, &fh);
 1603         vput(vp);
 1604         if (error != 0) {
 1605                 goto out;
 1606         }
 1607         error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
 1608         if (error != 0) {
 1609                 goto out;
 1610         }
 1611         sz = FHANDLE_SIZE(fh);
 1612         error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
 1613         if (error != 0) {
 1614                 goto out;
 1615         }
 1616         if (usz >= sz) {
 1617                 error = copyout(fh, SCARG(uap, fhp), sz);
 1618         } else {
 1619                 error = E2BIG;
 1620         }
 1621 out:
 1622         vfs_composefh_free(fh);
 1623         return (error);
 1624 }
 1625 
 1626 /*
 1627  * Open a file given a file handle.
 1628  *
 1629  * Check permissions, allocate an open file structure,
 1630  * and call the device open routine if any.
 1631  */
 1632 
 1633 int
 1634 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
 1635     register_t *retval)
 1636 {
 1637         file_t *fp;
 1638         struct vnode *vp = NULL;
 1639         kauth_cred_t cred = l->l_cred;
 1640         file_t *nfp;
 1641         int type, indx, error=0;
 1642         struct flock lf;
 1643         struct vattr va;
 1644         fhandle_t *fh;
 1645         int flags;
 1646         proc_t *p;
 1647 
 1648         p = curproc;
 1649 
 1650         /*
 1651          * Must be super user
 1652          */
 1653         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1654             0, NULL, NULL, NULL)))
 1655                 return (error);
 1656 
 1657         flags = FFLAGS(oflags);
 1658         if ((flags & (FREAD | FWRITE)) == 0)
 1659                 return (EINVAL);
 1660         if ((flags & O_CREAT))
 1661                 return (EINVAL);
 1662         if ((error = fd_allocfile(&nfp, &indx)) != 0)
 1663                 return (error);
 1664         fp = nfp;
 1665         error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
 1666         if (error != 0) {
 1667                 goto bad;
 1668         }
 1669         error = vfs_fhtovp(fh, &vp);
 1670         if (error != 0) {
 1671                 goto bad;
 1672         }
 1673 
 1674         /* Now do an effective vn_open */
 1675 
 1676         if (vp->v_type == VSOCK) {
 1677                 error = EOPNOTSUPP;
 1678                 goto bad;
 1679         }
 1680         error = vn_openchk(vp, cred, flags);
 1681         if (error != 0)
 1682                 goto bad;
 1683         if (flags & O_TRUNC) {
 1684                 VOP_UNLOCK(vp, 0);                      /* XXX */
 1685                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 1686                 VATTR_NULL(&va);
 1687                 va.va_size = 0;
 1688                 error = VOP_SETATTR(vp, &va, cred);
 1689                 if (error)
 1690                         goto bad;
 1691         }
 1692         if ((error = VOP_OPEN(vp, flags, cred)) != 0)
 1693                 goto bad;
 1694         if (flags & FWRITE) {
 1695                 mutex_enter(&vp->v_interlock);
 1696                 vp->v_writecount++;
 1697                 mutex_exit(&vp->v_interlock);
 1698         }
 1699 
 1700         /* done with modified vn_open, now finish what sys_open does. */
 1701 
 1702         fp->f_flag = flags & FMASK;
 1703         fp->f_type = DTYPE_VNODE;
 1704         fp->f_ops = &vnops;
 1705         fp->f_data = vp;
 1706         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1707                 lf.l_whence = SEEK_SET;
 1708                 lf.l_start = 0;
 1709                 lf.l_len = 0;
 1710                 if (flags & O_EXLOCK)
 1711                         lf.l_type = F_WRLCK;
 1712                 else
 1713                         lf.l_type = F_RDLCK;
 1714                 type = F_FLOCK;
 1715                 if ((flags & FNONBLOCK) == 0)
 1716                         type |= F_WAIT;
 1717                 VOP_UNLOCK(vp, 0);
 1718                 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
 1719                 if (error) {
 1720                         (void) vn_close(vp, fp->f_flag, fp->f_cred);
 1721                         fd_abort(p, fp, indx);
 1722                         return (error);
 1723                 }
 1724                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1725                 atomic_or_uint(&fp->f_flag, FHASLOCK);
 1726         }
 1727         VOP_UNLOCK(vp, 0);
 1728         *retval = indx;
 1729         fd_affix(p, fp, indx);
 1730         vfs_copyinfh_free(fh);
 1731         return (0);
 1732 
 1733 bad:
 1734         fd_abort(p, fp, indx);
 1735         if (vp != NULL)
 1736                 vput(vp);
 1737         vfs_copyinfh_free(fh);
 1738         return (error);
 1739 }
 1740 
 1741 int
 1742 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
 1743 {
 1744         /* {
 1745                 syscallarg(const void *) fhp;
 1746                 syscallarg(size_t) fh_size;
 1747                 syscallarg(int) flags;
 1748         } */
 1749 
 1750         return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
 1751             SCARG(uap, flags), retval);
 1752 }
 1753 
 1754 int
 1755 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
 1756 {
 1757         int error;
 1758         fhandle_t *fh;
 1759         struct vnode *vp;
 1760 
 1761         /*
 1762          * Must be super user
 1763          */
 1764         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1765             0, NULL, NULL, NULL)))
 1766                 return (error);
 1767 
 1768         error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
 1769         if (error != 0)
 1770                 return error;
 1771 
 1772         error = vfs_fhtovp(fh, &vp);
 1773         vfs_copyinfh_free(fh);
 1774         if (error != 0)
 1775                 return error;
 1776 
 1777         error = vn_stat(vp, sb);
 1778         vput(vp);
 1779         return error;
 1780 }
 1781 
 1782 
 1783 /* ARGSUSED */
 1784 int
 1785 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval)
 1786 {
 1787         /* {
 1788                 syscallarg(const void *) fhp;
 1789                 syscallarg(size_t) fh_size;
 1790                 syscallarg(struct stat *) sb;
 1791         } */
 1792         struct stat sb;
 1793         int error;
 1794 
 1795         error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
 1796         if (error)
 1797                 return error;
 1798         return copyout(&sb, SCARG(uap, sb), sizeof(sb));
 1799 }
 1800 
 1801 int
 1802 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
 1803     int flags)
 1804 {
 1805         fhandle_t *fh;
 1806         struct mount *mp;
 1807         struct vnode *vp;
 1808         int error;
 1809 
 1810         /*
 1811          * Must be super user
 1812          */
 1813         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
 1814             0, NULL, NULL, NULL)))
 1815                 return error;
 1816 
 1817         error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
 1818         if (error != 0)
 1819                 return error;
 1820 
 1821         error = vfs_fhtovp(fh, &vp);
 1822         vfs_copyinfh_free(fh);
 1823         if (error != 0)
 1824                 return error;
 1825 
 1826         mp = vp->v_mount;
 1827         error = dostatvfs(mp, sb, l, flags, 1);
 1828         vput(vp);
 1829         return error;
 1830 }
 1831 
 1832 /* ARGSUSED */
 1833 int
 1834 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
 1835 {
 1836         /* {
 1837                 syscallarg(const void *) fhp;
 1838                 syscallarg(size_t) fh_size;
 1839                 syscallarg(struct statvfs *) buf;
 1840                 syscallarg(int) flags;
 1841         } */
 1842         struct statvfs *sb = STATVFSBUF_GET();
 1843         int error;
 1844 
 1845         error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
 1846             SCARG(uap, flags));
 1847         if (error == 0)
 1848                 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
 1849         STATVFSBUF_PUT(sb);
 1850         return error;
 1851 }
 1852 
 1853 /*
 1854  * Create a special file.
 1855  */
 1856 /* ARGSUSED */
 1857 int
 1858 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval)
 1859 {
 1860         /* {
 1861                 syscallarg(const char *) path;
 1862                 syscallarg(int) mode;
 1863                 syscallarg(int) dev;
 1864         } */
 1865         struct proc *p = l->l_proc;
 1866         struct vnode *vp;
 1867         struct vattr vattr;
 1868         int error, optype;
 1869         struct nameidata nd;
 1870         char *path;
 1871         const char *cpath;
 1872         enum uio_seg seg = UIO_USERSPACE;
 1873 
 1874         if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
 1875             0, NULL, NULL, NULL)) != 0)
 1876                 return (error);
 1877 
 1878         optype = VOP_MKNOD_DESCOFFSET;
 1879 
 1880         VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
 1881         NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
 1882 
 1883         if ((error = namei(&nd)) != 0)
 1884                 goto out;
 1885         vp = nd.ni_vp;
 1886         if (vp != NULL)
 1887                 error = EEXIST;
 1888         else {
 1889                 VATTR_NULL(&vattr);
 1890                 /* We will read cwdi->cwdi_cmask unlocked. */
 1891                 vattr.va_mode =
 1892                     (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1893                 vattr.va_rdev = SCARG(uap, dev);
 1894 
 1895                 switch (SCARG(uap, mode) & S_IFMT) {
 1896                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1897                         vattr.va_type = VBAD;
 1898                         break;
 1899                 case S_IFCHR:
 1900                         vattr.va_type = VCHR;
 1901                         break;
 1902                 case S_IFBLK:
 1903                         vattr.va_type = VBLK;
 1904                         break;
 1905                 case S_IFWHT:
 1906                         optype = VOP_WHITEOUT_DESCOFFSET;
 1907                         break;
 1908                 case S_IFREG:
 1909 #if NVERIEXEC > 0
 1910                         error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
 1911                             O_CREAT);
 1912 #endif /* NVERIEXEC > 0 */
 1913                         vattr.va_type = VREG;
 1914                         vattr.va_rdev = VNOVAL;
 1915                         optype = VOP_CREATE_DESCOFFSET;
 1916                         break;
 1917                 default:
 1918                         error = EINVAL;
 1919                         break;
 1920                 }
 1921         }
 1922         if (!error) {
 1923                 switch (optype) {
 1924                 case VOP_WHITEOUT_DESCOFFSET:
 1925                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1926                         if (error)
 1927                                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1928                         vput(nd.ni_dvp);
 1929                         break;
 1930 
 1931                 case VOP_MKNOD_DESCOFFSET:
 1932                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1933                                                 &nd.ni_cnd, &vattr);
 1934                         if (error == 0)
 1935                                 vput(nd.ni_vp);
 1936                         break;
 1937 
 1938                 case VOP_CREATE_DESCOFFSET:
 1939                         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
 1940                                                 &nd.ni_cnd, &vattr);
 1941                         if (error == 0)
 1942                                 vput(nd.ni_vp);
 1943                         break;
 1944                 }
 1945         } else {
 1946                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1947                 if (nd.ni_dvp == vp)
 1948                         vrele(nd.ni_dvp);
 1949                 else
 1950                         vput(nd.ni_dvp);
 1951                 if (vp)
 1952                         vrele(vp);
 1953         }
 1954 out:
 1955         VERIEXEC_PATH_PUT(path);
 1956         return (error);
 1957 }
 1958 
 1959 /*
 1960  * Create a named pipe.
 1961  */
 1962 /* ARGSUSED */
 1963 int
 1964 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
 1965 {
 1966         /* {
 1967                 syscallarg(const char *) path;
 1968                 syscallarg(int) mode;
 1969         } */
 1970         struct proc *p = l->l_proc;
 1971         struct vattr vattr;
 1972         int error;
 1973         struct nameidata nd;
 1974 
 1975         NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
 1976             SCARG(uap, path));
 1977         if ((error = namei(&nd)) != 0)
 1978                 return (error);
 1979         if (nd.ni_vp != NULL) {
 1980                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1981                 if (nd.ni_dvp == nd.ni_vp)
 1982                         vrele(nd.ni_dvp);
 1983                 else
 1984                         vput(nd.ni_dvp);
 1985                 vrele(nd.ni_vp);
 1986                 return (EEXIST);
 1987         }
 1988         VATTR_NULL(&vattr);
 1989         vattr.va_type = VFIFO;
 1990         /* We will read cwdi->cwdi_cmask unlocked. */
 1991         vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
 1992         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1993         if (error == 0)
 1994                 vput(nd.ni_vp);
 1995         return (error);
 1996 }
 1997 
 1998 /*
 1999  * Make a hard file link.
 2000  */
 2001 /* ARGSUSED */
 2002 int
 2003 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
 2004 {
 2005         /* {
 2006                 syscallarg(const char *) path;
 2007                 syscallarg(const char *) link;
 2008         } */
 2009         struct vnode *vp;
 2010         struct nameidata nd;
 2011         int error;
 2012 
 2013         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2014             SCARG(uap, path));
 2015         if ((error = namei(&nd)) != 0)
 2016                 return (error);
 2017         vp = nd.ni_vp;
 2018         NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
 2019             SCARG(uap, link));
 2020         if ((error = namei(&nd)) != 0)
 2021                 goto out;
 2022         if (nd.ni_vp) {
 2023                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2024                 if (nd.ni_dvp == nd.ni_vp)
 2025                         vrele(nd.ni_dvp);
 2026                 else
 2027                         vput(nd.ni_dvp);
 2028                 vrele(nd.ni_vp);
 2029                 error = EEXIST;
 2030                 goto out;
 2031         }
 2032         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 2033 out:
 2034         vrele(vp);
 2035         return (error);
 2036 }
 2037 
 2038 /*
 2039  * Make a symbolic link.
 2040  */
 2041 /* ARGSUSED */
 2042 int
 2043 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
 2044 {
 2045         /* {
 2046                 syscallarg(const char *) path;
 2047                 syscallarg(const char *) link;
 2048         } */
 2049         struct proc *p = l->l_proc;
 2050         struct vattr vattr;
 2051         char *path;
 2052         int error;
 2053         struct nameidata nd;
 2054 
 2055         path = PNBUF_GET();
 2056         error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
 2057         if (error)
 2058                 goto out;
 2059         NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
 2060             SCARG(uap, link));
 2061         if ((error = namei(&nd)) != 0)
 2062                 goto out;
 2063         if (nd.ni_vp) {
 2064                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2065                 if (nd.ni_dvp == nd.ni_vp)
 2066                         vrele(nd.ni_dvp);
 2067                 else
 2068                         vput(nd.ni_dvp);
 2069                 vrele(nd.ni_vp);
 2070                 error = EEXIST;
 2071                 goto out;
 2072         }
 2073         VATTR_NULL(&vattr);
 2074         vattr.va_type = VLNK;
 2075         /* We will read cwdi->cwdi_cmask unlocked. */
 2076         vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
 2077         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
 2078         if (error == 0)
 2079                 vput(nd.ni_vp);
 2080 out:
 2081         PNBUF_PUT(path);
 2082         return (error);
 2083 }
 2084 
 2085 /*
 2086  * Delete a whiteout from the filesystem.
 2087  */
 2088 /* ARGSUSED */
 2089 int
 2090 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
 2091 {
 2092         /* {
 2093                 syscallarg(const char *) path;
 2094         } */
 2095         int error;
 2096         struct nameidata nd;
 2097 
 2098         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
 2099             UIO_USERSPACE, SCARG(uap, path));
 2100         error = namei(&nd);
 2101         if (error)
 2102                 return (error);
 2103 
 2104         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 2105                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2106                 if (nd.ni_dvp == nd.ni_vp)
 2107                         vrele(nd.ni_dvp);
 2108                 else
 2109                         vput(nd.ni_dvp);
 2110                 if (nd.ni_vp)
 2111                         vrele(nd.ni_vp);
 2112                 return (EEXIST);
 2113         }
 2114         if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
 2115                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2116         vput(nd.ni_dvp);
 2117         return (error);
 2118 }
 2119 
 2120 /*
 2121  * Delete a name from the filesystem.
 2122  */
 2123 /* ARGSUSED */
 2124 int
 2125 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
 2126 {
 2127         /* {
 2128                 syscallarg(const char *) path;
 2129         } */
 2130 
 2131         return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
 2132 }
 2133 
 2134 int
 2135 do_sys_unlink(const char *arg, enum uio_seg seg)
 2136 {
 2137         struct vnode *vp;
 2138         int error;
 2139         struct nameidata nd;
 2140         kauth_cred_t cred;
 2141         char *path;
 2142         const char *cpath;
 2143 
 2144         VERIEXEC_PATH_GET(arg, seg, cpath, path);
 2145         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
 2146 
 2147         if ((error = namei(&nd)) != 0)
 2148                 goto out;
 2149         vp = nd.ni_vp;
 2150 
 2151         /*
 2152          * The root of a mounted filesystem cannot be deleted.
 2153          */
 2154         if (vp->v_vflag & VV_ROOT) {
 2155                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2156                 if (nd.ni_dvp == vp)
 2157                         vrele(nd.ni_dvp);
 2158                 else
 2159                         vput(nd.ni_dvp);
 2160                 vput(vp);
 2161                 error = EBUSY;
 2162                 goto out;
 2163         }
 2164 
 2165 #if NVERIEXEC > 0
 2166         /* Handle remove requests for veriexec entries. */
 2167         if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
 2168                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 2169                 if (nd.ni_dvp == vp)
 2170                         vrele(nd.ni_dvp);
 2171                 else
 2172                         vput(nd.ni_dvp);
 2173                 vput(vp);
 2174                 goto out;
 2175         }
 2176 #endif /* NVERIEXEC > 0 */
 2177         
 2178         cred = kauth_cred_get();
 2179 #ifdef FILEASSOC
 2180         (void)fileassoc_file_delete(vp);
 2181 #endif /* FILEASSOC */
 2182         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 2183 out:
 2184         VERIEXEC_PATH_PUT(path);
 2185         return (error);
 2186 }
 2187 
 2188 /*
 2189  * Reposition read/write file offset.
 2190  */
 2191 int
 2192 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
 2193 {
 2194         /* {
 2195                 syscallarg(int) fd;
 2196                 syscallarg(int) pad;
 2197                 syscallarg(off_t) offset;
 2198                 syscallarg(int) whence;
 2199         } */
 2200         kauth_cred_t cred = l->l_cred;
 2201         file_t *fp;
 2202         struct vnode *vp;
 2203         struct vattr vattr;
 2204         off_t newoff;
 2205         int error, fd;
 2206 
 2207         fd = SCARG(uap, fd);
 2208 
 2209         if ((fp = fd_getfile(fd)) == NULL)
 2210                 return (EBADF);
 2211 
 2212         vp = fp->f_data;
 2213         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2214                 error = ESPIPE;
 2215                 goto out;
 2216         }
 2217 
 2218         switch (SCARG(uap, whence)) {
 2219         case SEEK_CUR:
 2220                 newoff = fp->f_offset + SCARG(uap, offset);
 2221                 break;
 2222         case SEEK_END:
 2223                 error = VOP_GETATTR(vp, &vattr, cred);
 2224                 if (error) {
 2225                         goto out;
 2226                 }
 2227                 newoff = SCARG(uap, offset) + vattr.va_size;
 2228                 break;
 2229         case SEEK_SET:
 2230                 newoff = SCARG(uap, offset);
 2231                 break;
 2232         default:
 2233                 error = EINVAL;
 2234                 goto out;
 2235         }
 2236         if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
 2237                 *(off_t *)retval = fp->f_offset = newoff;
 2238         }
 2239  out:
 2240         fd_putfile(fd);
 2241         return (error);
 2242 }
 2243 
 2244 /*
 2245  * Positional read system call.
 2246  */
 2247 int
 2248 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
 2249 {
 2250         /* {
 2251                 syscallarg(int) fd;
 2252                 syscallarg(void *) buf;
 2253                 syscallarg(size_t) nbyte;
 2254                 syscallarg(off_t) offset;
 2255         } */
 2256         file_t *fp;
 2257         struct vnode *vp;
 2258         off_t offset;
 2259         int error, fd = SCARG(uap, fd);
 2260 
 2261         if ((fp = fd_getfile(fd)) == NULL)
 2262                 return (EBADF);
 2263 
 2264         if ((fp->f_flag & FREAD) == 0) {
 2265                 fd_putfile(fd);
 2266                 return (EBADF);
 2267         }
 2268 
 2269         vp = fp->f_data;
 2270         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2271                 error = ESPIPE;
 2272                 goto out;
 2273         }
 2274 
 2275         offset = SCARG(uap, offset);
 2276 
 2277         /*
 2278          * XXX This works because no file systems actually
 2279          * XXX take any action on the seek operation.
 2280          */
 2281         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2282                 goto out;
 2283 
 2284         /* dofileread() will unuse the descriptor for us */
 2285         return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 2286             &offset, 0, retval));
 2287 
 2288  out:
 2289         fd_putfile(fd);
 2290         return (error);
 2291 }
 2292 
 2293 /*
 2294  * Positional scatter read system call.
 2295  */
 2296 int
 2297 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
 2298 {
 2299         /* {
 2300                 syscallarg(int) fd;
 2301                 syscallarg(const struct iovec *) iovp;
 2302                 syscallarg(int) iovcnt;
 2303                 syscallarg(off_t) offset;
 2304         } */
 2305         off_t offset = SCARG(uap, offset);
 2306 
 2307         return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
 2308             SCARG(uap, iovcnt), &offset, 0, retval);
 2309 }
 2310 
 2311 /*
 2312  * Positional write system call.
 2313  */
 2314 int
 2315 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
 2316 {
 2317         /* {
 2318                 syscallarg(int) fd;
 2319                 syscallarg(const void *) buf;
 2320                 syscallarg(size_t) nbyte;
 2321                 syscallarg(off_t) offset;
 2322         } */
 2323         file_t *fp;
 2324         struct vnode *vp;
 2325         off_t offset;
 2326         int error, fd = SCARG(uap, fd);
 2327 
 2328         if ((fp = fd_getfile(fd)) == NULL)
 2329                 return (EBADF);
 2330 
 2331         if ((fp->f_flag & FWRITE) == 0) {
 2332                 fd_putfile(fd);
 2333                 return (EBADF);
 2334         }
 2335 
 2336         vp = fp->f_data;
 2337         if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 2338                 error = ESPIPE;
 2339                 goto out;
 2340         }
 2341 
 2342         offset = SCARG(uap, offset);
 2343 
 2344         /*
 2345          * XXX This works because no file systems actually
 2346          * XXX take any action on the seek operation.
 2347          */
 2348         if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
 2349                 goto out;
 2350 
 2351         /* dofilewrite() will unuse the descriptor for us */
 2352         return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
 2353             &offset, 0, retval));
 2354 
 2355  out:
 2356         fd_putfile(fd);
 2357         return (error);
 2358 }
 2359 
 2360 /*
 2361  * Positional gather write system call.
 2362  */
 2363 int
 2364 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
 2365 {
 2366         /* {
 2367                 syscallarg(int) fd;
 2368                 syscallarg(const struct iovec *) iovp;
 2369                 syscallarg(int) iovcnt;
 2370                 syscallarg(off_t) offset;
 2371         } */
 2372         off_t offset = SCARG(uap, offset);
 2373 
 2374         return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
 2375             SCARG(uap, iovcnt), &offset, 0, retval);
 2376 }
 2377 
 2378 /*
 2379  * Check access permissions.
 2380  */
 2381 int
 2382 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
 2383 {
 2384         /* {
 2385                 syscallarg(const char *) path;
 2386                 syscallarg(int) flags;
 2387         } */
 2388         kauth_cred_t cred;
 2389         struct vnode *vp;
 2390         int error, flags;
 2391         struct nameidata nd;
 2392 
 2393         if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) {
 2394                 /* nonsense flags */
 2395                 return EINVAL;
 2396         }
 2397 
 2398         cred = kauth_cred_dup(l->l_cred);
 2399         kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
 2400         kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
 2401         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 2402             SCARG(uap, path));
 2403         /* Override default credentials */
 2404         nd.ni_cnd.cn_cred = cred;
 2405         if ((error = namei(&nd)) != 0)
 2406                 goto out;
 2407         vp = nd.ni_vp;
 2408 
 2409         /* Flags == 0 means only check for existence. */
 2410         if (SCARG(uap, flags)) {
 2411                 flags = 0;
 2412                 if (SCARG(uap, flags) & R_OK)
 2413                         flags |= VREAD;
 2414                 if (SCARG(uap, flags) & W_OK)
 2415                         flags |= VWRITE;
 2416                 if (SCARG(uap, flags) & X_OK)
 2417                         flags |= VEXEC;
 2418 
 2419                 error = VOP_ACCESS(vp, flags, cred);
 2420                 if (!error && (flags & VWRITE))
 2421                         error = vn_writechk(vp);
 2422         }
 2423         vput(vp);
 2424 out:
 2425         kauth_cred_free(cred);
 2426         return (error);
 2427 }
 2428 
 2429 /*
 2430  * Common code for all sys_stat functions, including compat versions.
 2431  */
 2432 int
 2433 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
 2434 {
 2435         int error;
 2436         struct nameidata nd;
 2437 
 2438         NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
 2439             UIO_USERSPACE, path);
 2440         error = namei(&nd);
 2441         if (error != 0)
 2442                 return error;
 2443         error = vn_stat(nd.ni_vp, sb);
 2444         vput(nd.ni_vp);
 2445         return error;
 2446 }
 2447 
 2448 /*
 2449  * Get file status; this version follows links.
 2450  */
 2451 /* ARGSUSED */
 2452 int
 2453 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval)
 2454 {
 2455         /* {
 2456                 syscallarg(const char *) path;
 2457                 syscallarg(struct stat *) ub;
 2458         } */
 2459         struct stat sb;
 2460         int error;
 2461 
 2462         error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
 2463         if (error)
 2464                 return error;
 2465         return copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2466 }
 2467 
 2468 /*
 2469  * Get file status; this version does not follow links.
 2470  */
 2471 /* ARGSUSED */
 2472 int
 2473 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval)
 2474 {
 2475         /* {
 2476                 syscallarg(const char *) path;
 2477                 syscallarg(struct stat *) ub;
 2478         } */
 2479         struct stat sb;
 2480         int error;
 2481 
 2482         error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
 2483         if (error)
 2484                 return error;
 2485         return copyout(&sb, SCARG(uap, ub), sizeof(sb));
 2486 }
 2487 
 2488 /*
 2489  * Get configurable pathname variables.
 2490  */
 2491 /* ARGSUSED */
 2492 int
 2493 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
 2494 {
 2495         /* {
 2496                 syscallarg(const char *) path;
 2497                 syscallarg(int) name;
 2498         } */
 2499         int error;
 2500         struct nameidata nd;
 2501 
 2502         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 2503             SCARG(uap, path));
 2504         if ((error = namei(&nd)) != 0)
 2505                 return (error);
 2506         error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
 2507         vput(nd.ni_vp);
 2508         return (error);
 2509 }
 2510 
 2511 /*
 2512  * Return target name of a symbolic link.
 2513  */
 2514 /* ARGSUSED */
 2515 int
 2516 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
 2517 {
 2518         /* {
 2519                 syscallarg(const char *) path;
 2520                 syscallarg(char *) buf;
 2521                 syscallarg(size_t) count;
 2522         } */
 2523         struct vnode *vp;
 2524         struct iovec aiov;
 2525         struct uio auio;
 2526         int error;
 2527         struct nameidata nd;
 2528 
 2529         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 2530             SCARG(uap, path));
 2531         if ((error = namei(&nd)) != 0)
 2532                 return (error);
 2533         vp = nd.ni_vp;
 2534         if (vp->v_type != VLNK)
 2535                 error = EINVAL;
 2536         else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
 2537             (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
 2538                 aiov.iov_base = SCARG(uap, buf);
 2539                 aiov.iov_len = SCARG(uap, count);
 2540                 auio.uio_iov = &aiov;
 2541                 auio.uio_iovcnt = 1;
 2542                 auio.uio_offset = 0;
 2543                 auio.uio_rw = UIO_READ;
 2544                 KASSERT(l == curlwp);
 2545                 auio.uio_vmspace = l->l_proc->p_vmspace;
 2546                 auio.uio_resid = SCARG(uap, count);
 2547                 error = VOP_READLINK(vp, &auio, l->l_cred);
 2548         }
 2549         vput(vp);
 2550         *retval = SCARG(uap, count) - auio.uio_resid;
 2551         return (error);
 2552 }
 2553 
 2554 /*
 2555  * Change flags of a file given a path name.
 2556  */
 2557 /* ARGSUSED */
 2558 int
 2559 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
 2560 {
 2561         /* {
 2562                 syscallarg(const char *) path;
 2563                 syscallarg(u_long) flags;
 2564         } */
 2565         struct vnode *vp;
 2566         int error;
 2567         struct nameidata nd;
 2568 
 2569         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2570             SCARG(uap, path));
 2571         if ((error = namei(&nd)) != 0)
 2572                 return (error);
 2573         vp = nd.ni_vp;
 2574         error = change_flags(vp, SCARG(uap, flags), l);
 2575         vput(vp);
 2576         return (error);
 2577 }
 2578 
 2579 /*
 2580  * Change flags of a file given a file descriptor.
 2581  */
 2582 /* ARGSUSED */
 2583 int
 2584 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
 2585 {
 2586         /* {
 2587                 syscallarg(int) fd;
 2588                 syscallarg(u_long) flags;
 2589         } */
 2590         struct vnode *vp;
 2591         file_t *fp;
 2592         int error;
 2593 
 2594         /* fd_getvnode() will use the descriptor for us */
 2595         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 2596                 return (error);
 2597         vp = fp->f_data;
 2598         error = change_flags(vp, SCARG(uap, flags), l);
 2599         VOP_UNLOCK(vp, 0);
 2600         fd_putfile(SCARG(uap, fd));
 2601         return (error);
 2602 }
 2603 
 2604 /*
 2605  * Change flags of a file given a path name; this version does
 2606  * not follow links.
 2607  */
 2608 int
 2609 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
 2610 {
 2611         /* {
 2612                 syscallarg(const char *) path;
 2613                 syscallarg(u_long) flags;
 2614         } */
 2615         struct vnode *vp;
 2616         int error;
 2617         struct nameidata nd;
 2618 
 2619         NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2620             SCARG(uap, path));
 2621         if ((error = namei(&nd)) != 0)
 2622                 return (error);
 2623         vp = nd.ni_vp;
 2624         error = change_flags(vp, SCARG(uap, flags), l);
 2625         vput(vp);
 2626         return (error);
 2627 }
 2628 
 2629 /*
 2630  * Common routine to change flags of a file.
 2631  */
 2632 int
 2633 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
 2634 {
 2635         struct vattr vattr;
 2636         int error;
 2637 
 2638         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2639         /*
 2640          * Non-superusers cannot change the flags on devices, even if they
 2641          * own them.
 2642          */
 2643         if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
 2644                 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
 2645                         goto out;
 2646                 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
 2647                         error = EINVAL;
 2648                         goto out;
 2649                 }
 2650         }
 2651         VATTR_NULL(&vattr);
 2652         vattr.va_flags = flags;
 2653         error = VOP_SETATTR(vp, &vattr, l->l_cred);
 2654 out:
 2655         return (error);
 2656 }
 2657 
 2658 /*
 2659  * Change mode of a file given path name; this version follows links.
 2660  */
 2661 /* ARGSUSED */
 2662 int
 2663 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
 2664 {
 2665         /* {
 2666                 syscallarg(const char *) path;
 2667                 syscallarg(int) mode;
 2668         } */
 2669         int error;
 2670         struct nameidata nd;
 2671 
 2672         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2673             SCARG(uap, path));
 2674         if ((error = namei(&nd)) != 0)
 2675                 return (error);
 2676 
 2677         error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
 2678 
 2679         vrele(nd.ni_vp);
 2680         return (error);
 2681 }
 2682 
 2683 /*
 2684  * Change mode of a file given a file descriptor.
 2685  */
 2686 /* ARGSUSED */
 2687 int
 2688 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
 2689 {
 2690         /* {
 2691                 syscallarg(int) fd;
 2692                 syscallarg(int) mode;
 2693         } */
 2694         file_t *fp;
 2695         int error;
 2696 
 2697         /* fd_getvnode() will use the descriptor for us */
 2698         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 2699                 return (error);
 2700         error = change_mode(fp->f_data, SCARG(uap, mode), l);
 2701         fd_putfile(SCARG(uap, fd));
 2702         return (error);
 2703 }
 2704 
 2705 /*
 2706  * Change mode of a file given path name; this version does not follow links.
 2707  */
 2708 /* ARGSUSED */
 2709 int
 2710 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
 2711 {
 2712         /* {
 2713                 syscallarg(const char *) path;
 2714                 syscallarg(int) mode;
 2715         } */
 2716         int error;
 2717         struct nameidata nd;
 2718 
 2719         NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2720             SCARG(uap, path));
 2721         if ((error = namei(&nd)) != 0)
 2722                 return (error);
 2723 
 2724         error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
 2725 
 2726         vrele(nd.ni_vp);
 2727         return (error);
 2728 }
 2729 
 2730 /*
 2731  * Common routine to set mode given a vnode.
 2732  */
 2733 static int
 2734 change_mode(struct vnode *vp, int mode, struct lwp *l)
 2735 {
 2736         struct vattr vattr;
 2737         int error;
 2738 
 2739         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2740         VATTR_NULL(&vattr);
 2741         vattr.va_mode = mode & ALLPERMS;
 2742         error = VOP_SETATTR(vp, &vattr, l->l_cred);
 2743         VOP_UNLOCK(vp, 0);
 2744         return (error);
 2745 }
 2746 
 2747 /*
 2748  * Set ownership given a path name; this version follows links.
 2749  */
 2750 /* ARGSUSED */
 2751 int
 2752 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
 2753 {
 2754         /* {
 2755                 syscallarg(const char *) path;
 2756                 syscallarg(uid_t) uid;
 2757                 syscallarg(gid_t) gid;
 2758         } */
 2759         int error;
 2760         struct nameidata nd;
 2761 
 2762         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2763             SCARG(uap, path));
 2764         if ((error = namei(&nd)) != 0)
 2765                 return (error);
 2766 
 2767         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
 2768 
 2769         vrele(nd.ni_vp);
 2770         return (error);
 2771 }
 2772 
 2773 /*
 2774  * Set ownership given a path name; this version follows links.
 2775  * Provides POSIX semantics.
 2776  */
 2777 /* ARGSUSED */
 2778 int
 2779 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
 2780 {
 2781         /* {
 2782                 syscallarg(const char *) path;
 2783                 syscallarg(uid_t) uid;
 2784                 syscallarg(gid_t) gid;
 2785         } */
 2786         int error;
 2787         struct nameidata nd;
 2788 
 2789         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2790             SCARG(uap, path));
 2791         if ((error = namei(&nd)) != 0)
 2792                 return (error);
 2793 
 2794         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
 2795 
 2796         vrele(nd.ni_vp);
 2797         return (error);
 2798 }
 2799 
 2800 /*
 2801  * Set ownership given a file descriptor.
 2802  */
 2803 /* ARGSUSED */
 2804 int
 2805 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
 2806 {
 2807         /* {
 2808                 syscallarg(int) fd;
 2809                 syscallarg(uid_t) uid;
 2810                 syscallarg(gid_t) gid;
 2811         } */
 2812         int error;
 2813         file_t *fp;
 2814 
 2815         /* fd_getvnode() will use the descriptor for us */
 2816         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 2817                 return (error);
 2818         error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
 2819             l, 0);
 2820         fd_putfile(SCARG(uap, fd));
 2821         return (error);
 2822 }
 2823 
 2824 /*
 2825  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
 2826  */
 2827 /* ARGSUSED */
 2828 int
 2829 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
 2830 {
 2831         /* {
 2832                 syscallarg(int) fd;
 2833                 syscallarg(uid_t) uid;
 2834                 syscallarg(gid_t) gid;
 2835         } */
 2836         int error;
 2837         file_t *fp;
 2838 
 2839         /* fd_getvnode() will use the descriptor for us */
 2840         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 2841                 return (error);
 2842         error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
 2843             l, 1);
 2844         fd_putfile(SCARG(uap, fd));
 2845         return (error);
 2846 }
 2847 
 2848 /*
 2849  * Set ownership given a path name; this version does not follow links.
 2850  */
 2851 /* ARGSUSED */
 2852 int
 2853 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
 2854 {
 2855         /* {
 2856                 syscallarg(const char *) path;
 2857                 syscallarg(uid_t) uid;
 2858                 syscallarg(gid_t) gid;
 2859         } */
 2860         int error;
 2861         struct nameidata nd;
 2862 
 2863         NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2864             SCARG(uap, path));
 2865         if ((error = namei(&nd)) != 0)
 2866                 return (error);
 2867 
 2868         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
 2869 
 2870         vrele(nd.ni_vp);
 2871         return (error);
 2872 }
 2873 
 2874 /*
 2875  * Set ownership given a path name; this version does not follow links.
 2876  * Provides POSIX/XPG semantics.
 2877  */
 2878 /* ARGSUSED */
 2879 int
 2880 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
 2881 {
 2882         /* {
 2883                 syscallarg(const char *) path;
 2884                 syscallarg(uid_t) uid;
 2885                 syscallarg(gid_t) gid;
 2886         } */
 2887         int error;
 2888         struct nameidata nd;
 2889 
 2890         NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
 2891             SCARG(uap, path));
 2892         if ((error = namei(&nd)) != 0)
 2893                 return (error);
 2894 
 2895         error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
 2896 
 2897         vrele(nd.ni_vp);
 2898         return (error);
 2899 }
 2900 
 2901 /*
 2902  * Common routine to set ownership given a vnode.
 2903  */
 2904 static int
 2905 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
 2906     int posix_semantics)
 2907 {
 2908         struct vattr vattr;
 2909         mode_t newmode;
 2910         int error;
 2911 
 2912         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2913         if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
 2914                 goto out;
 2915 
 2916 #define CHANGED(x) ((int)(x) != -1)
 2917         newmode = vattr.va_mode;
 2918         if (posix_semantics) {
 2919                 /*
 2920                  * POSIX/XPG semantics: if the caller is not the super-user,
 2921                  * clear set-user-id and set-group-id bits.  Both POSIX and
 2922                  * the XPG consider the behaviour for calls by the super-user
 2923                  * implementation-defined; we leave the set-user-id and set-
 2924                  * group-id settings intact in that case.
 2925                  */
 2926                 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 2927                                       NULL) != 0)
 2928                         newmode &= ~(S_ISUID | S_ISGID);
 2929         } else {
 2930                 /*
 2931                  * NetBSD semantics: when changing owner and/or group,
 2932                  * clear the respective bit(s).
 2933                  */
 2934                 if (CHANGED(uid))
 2935                         newmode &= ~S_ISUID;
 2936                 if (CHANGED(gid))
 2937                         newmode &= ~S_ISGID;
 2938         }
 2939         /* Update va_mode iff altered. */
 2940         if (vattr.va_mode == newmode)
 2941                 newmode = VNOVAL;
 2942 
 2943         VATTR_NULL(&vattr);
 2944         vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
 2945         vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
 2946         vattr.va_mode = newmode;
 2947         error = VOP_SETATTR(vp, &vattr, l->l_cred);
 2948 #undef CHANGED
 2949 
 2950 out:
 2951         VOP_UNLOCK(vp, 0);
 2952         return (error);
 2953 }
 2954 
 2955 /*
 2956  * Set the access and modification times given a path name; this
 2957  * version follows links.
 2958  */
 2959 /* ARGSUSED */
 2960 int
 2961 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval)
 2962 {
 2963         /* {
 2964                 syscallarg(const char *) path;
 2965                 syscallarg(const struct timeval *) tptr;
 2966         } */
 2967 
 2968         return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
 2969             SCARG(uap, tptr), UIO_USERSPACE);
 2970 }
 2971 
 2972 /*
 2973  * Set the access and modification times given a file descriptor.
 2974  */
 2975 /* ARGSUSED */
 2976 int
 2977 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval)
 2978 {
 2979         /* {
 2980                 syscallarg(int) fd;
 2981                 syscallarg(const struct timeval *) tptr;
 2982         } */
 2983         int error;
 2984         file_t *fp;
 2985 
 2986         /* fd_getvnode() will use the descriptor for us */
 2987         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 2988                 return (error);
 2989         error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
 2990             UIO_USERSPACE);
 2991         fd_putfile(SCARG(uap, fd));
 2992         return (error);
 2993 }
 2994 
 2995 /*
 2996  * Set the access and modification times given a path name; this
 2997  * version does not follow links.
 2998  */
 2999 int
 3000 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval)
 3001 {
 3002         /* {
 3003                 syscallarg(const char *) path;
 3004                 syscallarg(const struct timeval *) tptr;
 3005         } */
 3006 
 3007         return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
 3008             SCARG(uap, tptr), UIO_USERSPACE);
 3009 }
 3010 
 3011 /*
 3012  * Common routine to set access and modification times given a vnode.
 3013  */
 3014 int
 3015 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
 3016     const struct timeval *tptr, enum uio_seg seg)
 3017 {
 3018         struct vattr vattr;
 3019         struct nameidata nd;
 3020         int error;
 3021         bool vanull, setbirthtime;
 3022         struct timespec ts[2];
 3023 
 3024         if (tptr == NULL) {
 3025                 vanull = true;
 3026                 nanotime(&ts[0]);
 3027                 ts[1] = ts[0];
 3028         } else {
 3029                 struct timeval tv[2];
 3030 
 3031                 vanull = false;
 3032                 if (seg != UIO_SYSSPACE) {
 3033                         error = copyin(tptr, &tv, sizeof (tv));
 3034                         if (error != 0)
 3035                                 return error;
 3036                         tptr = tv;
 3037                 }
 3038                 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
 3039                 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
 3040         }
 3041 
 3042         if (vp == NULL) {
 3043                 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
 3044                 if ((error = namei(&nd)) != 0)
 3045                         return error;
 3046                 vp = nd.ni_vp;
 3047         } else
 3048                 nd.ni_vp = NULL;
 3049 
 3050         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3051         setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
 3052             timespeccmp(&ts[1], &vattr.va_birthtime, <));
 3053         VATTR_NULL(&vattr);
 3054         vattr.va_atime = ts[0];
 3055         vattr.va_mtime = ts[1];
 3056         if (setbirthtime)
 3057                 vattr.va_birthtime = ts[1];
 3058         if (vanull)
 3059                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3060         error = VOP_SETATTR(vp, &vattr, l->l_cred);
 3061         VOP_UNLOCK(vp, 0);
 3062 
 3063         if (nd.ni_vp != NULL)
 3064                 vrele(nd.ni_vp);
 3065 
 3066         return error;
 3067 }
 3068 
 3069 /*
 3070  * Truncate a file given its path name.
 3071  */
 3072 /* ARGSUSED */
 3073 int
 3074 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
 3075 {
 3076         /* {
 3077                 syscallarg(const char *) path;
 3078                 syscallarg(int) pad;
 3079                 syscallarg(off_t) length;
 3080         } */
 3081         struct vnode *vp;
 3082         struct vattr vattr;
 3083         int error;
 3084         struct nameidata nd;
 3085 
 3086         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 3087             SCARG(uap, path));
 3088         if ((error = namei(&nd)) != 0)
 3089                 return (error);
 3090         vp = nd.ni_vp;
 3091         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3092         if (vp->v_type == VDIR)
 3093                 error = EISDIR;
 3094         else if ((error = vn_writechk(vp)) == 0 &&
 3095             (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
 3096                 VATTR_NULL(&vattr);
 3097                 vattr.va_size = SCARG(uap, length);
 3098                 error = VOP_SETATTR(vp, &vattr, l->l_cred);
 3099         }
 3100         vput(vp);
 3101         return (error);
 3102 }
 3103 
 3104 /*
 3105  * Truncate a file given a file descriptor.
 3106  */
 3107 /* ARGSUSED */
 3108 int
 3109 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
 3110 {
 3111         /* {
 3112                 syscallarg(int) fd;
 3113                 syscallarg(int) pad;
 3114                 syscallarg(off_t) length;
 3115         } */
 3116         struct vattr vattr;
 3117         struct vnode *vp;
 3118         file_t *fp;
 3119         int error;
 3120 
 3121         /* fd_getvnode() will use the descriptor for us */
 3122         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 3123                 return (error);
 3124         if ((fp->f_flag & FWRITE) == 0) {
 3125                 error = EINVAL;
 3126                 goto out;
 3127         }
 3128         vp = fp->f_data;
 3129         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3130         if (vp->v_type == VDIR)
 3131                 error = EISDIR;
 3132         else if ((error = vn_writechk(vp)) == 0) {
 3133                 VATTR_NULL(&vattr);
 3134                 vattr.va_size = SCARG(uap, length);
 3135                 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
 3136         }
 3137         VOP_UNLOCK(vp, 0);
 3138  out:
 3139         fd_putfile(SCARG(uap, fd));
 3140         return (error);
 3141 }
 3142 
 3143 /*
 3144  * Sync an open file.
 3145  */
 3146 /* ARGSUSED */
 3147 int
 3148 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
 3149 {
 3150         /* {
 3151                 syscallarg(int) fd;
 3152         } */
 3153         struct vnode *vp;
 3154         file_t *fp;
 3155         int error;
 3156 
 3157         /* fd_getvnode() will use the descriptor for us */
 3158         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 3159                 return (error);
 3160         vp = fp->f_data;
 3161         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3162         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
 3163         if (error == 0 && bioopsp != NULL &&
 3164             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3165                 (*bioopsp->io_fsync)(vp, 0);
 3166         VOP_UNLOCK(vp, 0);
 3167         fd_putfile(SCARG(uap, fd));
 3168         return (error);
 3169 }
 3170 
 3171 /*
 3172  * Sync a range of file data.  API modeled after that found in AIX.
 3173  *
 3174  * FDATASYNC indicates that we need only save enough metadata to be able
 3175  * to re-read the written data.  Note we duplicate AIX's requirement that
 3176  * the file be open for writing.
 3177  */
 3178 /* ARGSUSED */
 3179 int
 3180 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
 3181 {
 3182         /* {
 3183                 syscallarg(int) fd;
 3184                 syscallarg(int) flags;
 3185                 syscallarg(off_t) start;
 3186                 syscallarg(off_t) length;
 3187         } */
 3188         struct vnode *vp;
 3189         file_t *fp;
 3190         int flags, nflags;
 3191         off_t s, e, len;
 3192         int error;
 3193 
 3194         /* fd_getvnode() will use the descriptor for us */
 3195         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 3196                 return (error);
 3197 
 3198         if ((fp->f_flag & FWRITE) == 0) {
 3199                 error = EBADF;
 3200                 goto out;
 3201         }
 3202 
 3203         flags = SCARG(uap, flags);
 3204         if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
 3205             ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
 3206                 error = EINVAL;
 3207                 goto out;
 3208         }
 3209         /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
 3210         if (flags & FDATASYNC)
 3211                 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
 3212         else
 3213                 nflags = FSYNC_WAIT;
 3214         if (flags & FDISKSYNC)
 3215                 nflags |= FSYNC_CACHE;
 3216 
 3217         len = SCARG(uap, length);
 3218         /* If length == 0, we do the whole file, and s = l = 0 will do that */
 3219         if (len) {
 3220                 s = SCARG(uap, start);
 3221                 e = s + len;
 3222                 if (e < s) {
 3223                         error = EINVAL;
 3224                         goto out;
 3225                 }
 3226         } else {
 3227                 e = 0;
 3228                 s = 0;
 3229         }
 3230 
 3231         vp = fp->f_data;
 3232         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3233         error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
 3234 
 3235         if (error == 0 && bioopsp != NULL &&
 3236             vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
 3237                 (*bioopsp->io_fsync)(vp, nflags);
 3238 
 3239         VOP_UNLOCK(vp, 0);
 3240 out:
 3241         fd_putfile(SCARG(uap, fd));
 3242         return (error);
 3243 }
 3244 
 3245 /*
 3246  * Sync the data of an open file.
 3247  */
 3248 /* ARGSUSED */
 3249 int
 3250 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
 3251 {
 3252         /* {
 3253                 syscallarg(int) fd;
 3254         } */
 3255         struct vnode *vp;
 3256         file_t *fp;
 3257         int error;
 3258 
 3259         /* fd_getvnode() will use the descriptor for us */
 3260         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 3261                 return (error);
 3262         if ((fp->f_flag & FWRITE) == 0) {
 3263                 fd_putfile(SCARG(uap, fd));
 3264                 return (EBADF);
 3265         }
 3266         vp = fp->f_data;
 3267         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3268         error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
 3269         VOP_UNLOCK(vp, 0);
 3270         fd_putfile(SCARG(uap, fd));
 3271         return (error);
 3272 }
 3273 
 3274 /*
 3275  * Rename files, (standard) BSD semantics frontend.
 3276  */
 3277 /* ARGSUSED */
 3278 int
 3279 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
 3280 {
 3281         /* {
 3282                 syscallarg(const char *) from;
 3283                 syscallarg(const char *) to;
 3284         } */
 3285 
 3286         return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
 3287 }
 3288 
 3289 /*
 3290  * Rename files, POSIX semantics frontend.
 3291  */
 3292 /* ARGSUSED */
 3293 int
 3294 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
 3295 {
 3296         /* {
 3297                 syscallarg(const char *) from;
 3298                 syscallarg(const char *) to;
 3299         } */
 3300 
 3301         return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
 3302 }
 3303 
 3304 /*
 3305  * Rename files.  Source and destination must either both be directories,
 3306  * or both not be directories.  If target is a directory, it must be empty.
 3307  * If `from' and `to' refer to the same object, the value of the `retain'
 3308  * argument is used to determine whether `from' will be
 3309  *
 3310  * (retain == 0)        deleted unless `from' and `to' refer to the same
 3311  *                      object in the file system's name space (BSD).
 3312  * (retain == 1)        always retained (POSIX).
 3313  */
 3314 int
 3315 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
 3316 {
 3317         struct vnode *tvp, *fvp, *tdvp;
 3318         struct nameidata fromnd, tond;
 3319         struct mount *fs;
 3320         struct lwp *l = curlwp;
 3321         struct proc *p;
 3322         uint32_t saveflag;
 3323         int error;
 3324 
 3325         NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT | INRENAME,
 3326             seg, from);
 3327         if ((error = namei(&fromnd)) != 0)
 3328                 return (error);
 3329         if (fromnd.ni_dvp != fromnd.ni_vp)
 3330                 VOP_UNLOCK(fromnd.ni_dvp, 0);
 3331         fvp = fromnd.ni_vp;
 3332 
 3333         fs = fvp->v_mount;
 3334         error = VFS_RENAMELOCK_ENTER(fs);
 3335         if (error) {
 3336                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3337                 vrele(fromnd.ni_dvp);
 3338                 vrele(fvp);
 3339                 goto out1;
 3340         }
 3341 
 3342         /*
 3343          * close, partially, yet another race - ideally we should only
 3344          * go as far as getting fromnd.ni_dvp before getting the per-fs
 3345          * lock, and then continue to get fromnd.ni_vp, but we can't do
 3346          * that with namei as it stands.
 3347          *
 3348          * This still won't prevent rmdir from nuking fromnd.ni_vp
 3349          * under us. The real fix is to get the locks in the right
 3350          * order and do the lookups in the right places, but that's a
 3351          * major rototill.
 3352          *
 3353          * Preserve the SAVESTART in cn_flags, because who knows what
 3354          * might happen if we don't.
 3355          *
 3356          * Note: this logic (as well as this whole function) is cloned
 3357          * in nfs_serv.c. Proceed accordingly.
 3358          */
 3359         vrele(fvp);
 3360         if ((fromnd.ni_cnd.cn_namelen == 1 && 
 3361              fromnd.ni_cnd.cn_nameptr[0] == '.') ||
 3362             (fromnd.ni_cnd.cn_namelen == 2 && 
 3363              fromnd.ni_cnd.cn_nameptr[0] == '.' &&
 3364              fromnd.ni_cnd.cn_nameptr[1] == '.')) {
 3365                 error = EINVAL;
 3366                 VFS_RENAMELOCK_EXIT(fs);
 3367                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3368                 vrele(fromnd.ni_dvp);
 3369                 goto out1;
 3370         }
 3371         saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
 3372         fromnd.ni_cnd.cn_flags &= ~SAVESTART;
 3373         vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
 3374         error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
 3375         fromnd.ni_cnd.cn_flags |= saveflag;
 3376         if (error) {
 3377                 VOP_UNLOCK(fromnd.ni_dvp, 0);
 3378                 VFS_RENAMELOCK_EXIT(fs);
 3379                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3380                 vrele(fromnd.ni_dvp);
 3381                 goto out1;
 3382         }
 3383         VOP_UNLOCK(fromnd.ni_vp, 0);
 3384         if (fromnd.ni_dvp != fromnd.ni_vp)
 3385                 VOP_UNLOCK(fromnd.ni_dvp, 0);
 3386         fvp = fromnd.ni_vp;
 3387 
 3388         NDINIT(&tond, RENAME,
 3389             LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
 3390               | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0),
 3391             seg, to);
 3392         if ((error = namei(&tond)) != 0) {
 3393                 VFS_RENAMELOCK_EXIT(fs);
 3394                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3395                 vrele(fromnd.ni_dvp);
 3396                 vrele(fvp);
 3397                 goto out1;
 3398         }
 3399         tdvp = tond.ni_dvp;
 3400         tvp = tond.ni_vp;
 3401 
 3402         if (tvp != NULL) {
 3403                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3404                         error = ENOTDIR;
 3405                         goto out;
 3406                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3407                         error = EISDIR;
 3408                         goto out;
 3409                 }
 3410         }
 3411 
 3412         if (fvp == tdvp)
 3413                 error = EINVAL;
 3414 
 3415         /*
 3416          * Source and destination refer to the same object.
 3417          */
 3418         if (fvp == tvp) {
 3419                 if (retain)
 3420                         error = -1;
 3421                 else if (fromnd.ni_dvp == tdvp &&
 3422                     fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 3423                     !memcmp(fromnd.ni_cnd.cn_nameptr,
 3424                           tond.ni_cnd.cn_nameptr,
 3425                           fromnd.ni_cnd.cn_namelen))
 3426                 error = -1;
 3427         }
 3428 
 3429 #if NVERIEXEC > 0
 3430         if (!error) {
 3431                 char *f1, *f2;
 3432 
 3433                 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
 3434                 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen + 1);
 3435 
 3436                 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
 3437                 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen + 1);
 3438 
 3439                 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
 3440 
 3441                 free(f1, M_TEMP);
 3442                 free(f2, M_TEMP);
 3443         }
 3444 #endif /* NVERIEXEC > 0 */
 3445 
 3446 out:
 3447         p = l->l_proc;
 3448         if (!error) {
 3449                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3450                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3451                 VFS_RENAMELOCK_EXIT(fs);
 3452         } else {
 3453                 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
 3454                 if (tdvp == tvp)
 3455                         vrele(tdvp);
 3456                 else
 3457                         vput(tdvp);
 3458                 if (tvp)
 3459                         vput(tvp);
 3460                 VFS_RENAMELOCK_EXIT(fs);
 3461                 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 3462                 vrele(fromnd.ni_dvp);
 3463                 vrele(fvp);
 3464         }
 3465         vrele(tond.ni_startdir);
 3466         PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
 3467 out1:
 3468         if (fromnd.ni_startdir)
 3469                 vrele(fromnd.ni_startdir);
 3470         PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
 3471         return (error == -1 ? 0 : error);
 3472 }
 3473 
 3474 /*
 3475  * Make a directory file.
 3476  */
 3477 /* ARGSUSED */
 3478 int
 3479 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
 3480 {
 3481         /* {
 3482                 syscallarg(const char *) path;
 3483                 syscallarg(int) mode;
 3484         } */
 3485         struct proc *p = l->l_proc;
 3486         struct vnode *vp;
 3487         struct vattr vattr;
 3488         int error;
 3489         struct nameidata nd;
 3490 
 3491         NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
 3492             SCARG(uap, path));
 3493         if ((error = namei(&nd)) != 0)
 3494                 return (error);
 3495         vp = nd.ni_vp;
 3496         if (vp != NULL) {
 3497                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3498                 if (nd.ni_dvp == vp)
 3499                         vrele(nd.ni_dvp);
 3500                 else
 3501                         vput(nd.ni_dvp);
 3502                 vrele(vp);
 3503                 return (EEXIST);
 3504         }
 3505         VATTR_NULL(&vattr);
 3506         vattr.va_type = VDIR;
 3507         /* We will read cwdi->cwdi_cmask unlocked. */
 3508         vattr.va_mode =
 3509             (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
 3510         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3511         if (!error)
 3512                 vput(nd.ni_vp);
 3513         return (error);
 3514 }
 3515 
 3516 /*
 3517  * Remove a directory file.
 3518  */
 3519 /* ARGSUSED */
 3520 int
 3521 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
 3522 {
 3523         /* {
 3524                 syscallarg(const char *) path;
 3525         } */
 3526         struct vnode *vp;
 3527         int error;
 3528         struct nameidata nd;
 3529 
 3530         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
 3531             SCARG(uap, path));
 3532         if ((error = namei(&nd)) != 0)
 3533                 return (error);
 3534         vp = nd.ni_vp;
 3535         if (vp->v_type != VDIR) {
 3536                 error = ENOTDIR;
 3537                 goto out;
 3538         }
 3539         /*
 3540          * No rmdir "." please.
 3541          */
 3542         if (nd.ni_dvp == vp) {
 3543                 error = EINVAL;
 3544                 goto out;
 3545         }
 3546         /*
 3547          * The root of a mounted filesystem cannot be deleted.
 3548          */
 3549         if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
 3550                 error = EBUSY;
 3551                 goto out;
 3552         }
 3553         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3554         return (error);
 3555 
 3556 out:
 3557         VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 3558         if (nd.ni_dvp == vp)
 3559                 vrele(nd.ni_dvp);
 3560         else
 3561                 vput(nd.ni_dvp);
 3562         vput(vp);
 3563         return (error);
 3564 }
 3565 
 3566 /*
 3567  * Read a block of directory entries in a file system independent format.
 3568  */
 3569 int
 3570 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
 3571 {
 3572         /* {
 3573                 syscallarg(int) fd;
 3574                 syscallarg(char *) buf;
 3575                 syscallarg(size_t) count;
 3576         } */
 3577         file_t *fp;
 3578         int error, done;
 3579 
 3580         /* fd_getvnode() will use the descriptor for us */
 3581         if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
 3582                 return (error);
 3583         if ((fp->f_flag & FREAD) == 0) {
 3584                 error = EBADF;
 3585                 goto out;
 3586         }
 3587         error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
 3588                         SCARG(uap, count), &done, l, 0, 0);
 3589         ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
 3590         *retval = done;
 3591  out:
 3592         fd_putfile(SCARG(uap, fd));
 3593         return (error);
 3594 }
 3595 
 3596 /*
 3597  * Set the mode mask for creation of filesystem nodes.
 3598  */
 3599 int
 3600 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
 3601 {
 3602         /* {
 3603                 syscallarg(mode_t) newmask;
 3604         } */
 3605         struct proc *p = l->l_proc;
 3606         struct cwdinfo *cwdi;
 3607 
 3608         /*
 3609          * cwdi->cwdi_cmask will be read unlocked elsewhere.  What's
 3610          * important is that we serialize changes to the mask.  The
 3611          * rw_exit() will issue a write memory barrier on our behalf,
 3612          * and force the changes out to other CPUs (as it must use an
 3613          * atomic operation, draining the local CPU's store buffers).
 3614          */
 3615         cwdi = p->p_cwdi;
 3616         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
 3617         *retval = cwdi->cwdi_cmask;
 3618         cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
 3619         rw_exit(&cwdi->cwdi_lock);
 3620 
 3621         return (0);
 3622 }
 3623 
 3624 int
 3625 dorevoke(struct vnode *vp, kauth_cred_t cred)
 3626 {
 3627         struct vattr vattr;
 3628         int error;
 3629 
 3630         if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
 3631                 return error;
 3632         if (kauth_cred_geteuid(cred) == vattr.va_uid ||
 3633             (error = kauth_authorize_generic(cred,
 3634             KAUTH_GENERIC_ISSUSER, NULL)) == 0)
 3635                 VOP_REVOKE(vp, REVOKEALL);
 3636         return (error);
 3637 }
 3638 
 3639 /*
 3640  * Void all references to file by ripping underlying filesystem
 3641  * away from vnode.
 3642  */
 3643 /* ARGSUSED */
 3644 int
 3645 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
 3646 {
 3647         /* {
 3648                 syscallarg(const char *) path;
 3649         } */
 3650         struct vnode *vp;
 3651         int error;
 3652         struct nameidata nd;
 3653 
 3654         NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
 3655             SCARG(uap, path));
 3656         if ((error = namei(&nd)) != 0)
 3657                 return (error);
 3658         vp = nd.ni_vp;
 3659         error = dorevoke(vp, l->l_cred);
 3660         vrele(vp);
 3661         return (error);
 3662 }
Cache object: 0f1e4a5dd6dd9713afb0f9739bc442da
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_syscalls.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c