vfs_syscalls.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
   36  */
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/buf.h>
   41 #include <sys/conf.h>
   42 #include <sys/sysent.h>
   43 #include <sys/malloc.h>
   44 #include <sys/mount.h>
   45 #include <sys/mountctl.h>
   46 #include <sys/sysproto.h>
   47 #include <sys/filedesc.h>
   48 #include <sys/kernel.h>
   49 #include <sys/fcntl.h>
   50 #include <sys/file.h>
   51 #include <sys/linker.h>
   52 #include <sys/stat.h>
   53 #include <sys/unistd.h>
   54 #include <sys/vnode.h>
   55 #include <sys/proc.h>
   56 #include <sys/priv.h>
   57 #include <sys/jail.h>
   58 #include <sys/namei.h>
   59 #include <sys/nlookup.h>
   60 #include <sys/dirent.h>
   61 #include <sys/extattr.h>
   62 #include <sys/spinlock.h>
   63 #include <sys/kern_syscall.h>
   64 #include <sys/objcache.h>
   65 #include <sys/sysctl.h>
   66 
   67 #include <sys/buf2.h>
   68 #include <sys/file2.h>
   69 #include <sys/spinlock2.h>
   70 #include <sys/mplock2.h>
   71 
   72 #include <vm/vm.h>
   73 #include <vm/vm_object.h>
   74 #include <vm/vm_page.h>
   75 
   76 #include <machine/limits.h>
   77 #include <machine/stdarg.h>
   78 
   79 #include <vfs/union/union.h>
   80 
   81 static void mount_warning(struct mount *mp, const char *ctl, ...)
   82                 __printflike(2, 3);
   83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb);
   84 static int checkvp_chdir (struct vnode *vn, struct thread *td);
   85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch);
   86 static int chroot_refuse_vdir_fds (struct filedesc *fdp);
   87 static int chroot_visible_mnt(struct mount *mp, struct proc *p);
   88 static int getutimes (const struct timeval *, struct timespec *);
   89 static int setfown (struct mount *, struct vnode *, uid_t, gid_t);
   90 static int setfmode (struct vnode *, int);
   91 static int setfflags (struct vnode *, int);
   92 static int setutimes (struct vnode *, struct vattr *,
   93                         const struct timespec *, int);
   94 static int      usermount = 0;  /* if 1, non-root can mount fs. */
   95 
   96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *);
   97 
   98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   99     "Allow non-root users to mount filesystems");
  100 
  101 /*
  102  * Virtual File System System Calls
  103  */
  104 
  105 /*
  106  * Mount a file system.
  107  *
  108  * mount_args(char *type, char *path, int flags, caddr_t data)
  109  *
  110  * MPALMOSTSAFE
  111  */
  112 int
  113 sys_mount(struct mount_args *uap)
  114 {
  115         struct thread *td = curthread;
  116         struct vnode *vp;
  117         struct nchandle nch;
  118         struct mount *mp, *nullmp;
  119         struct vfsconf *vfsp;
  120         int error, flag = 0, flag2 = 0;
  121         int hasmount;
  122         struct vattr va;
  123         struct nlookupdata nd;
  124         char fstypename[MFSNAMELEN];
  125         struct ucred *cred;
  126 
  127         cred = td->td_ucred;
  128         if (jailed(cred)) {
  129                 error = EPERM;
  130                 goto done;
  131         }
  132         if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
  133                 goto done;
  134 
  135         /*
  136          * Do not allow NFS export by non-root users.
  137          */
  138         if (uap->flags & MNT_EXPORTED) {
  139                 error = priv_check(td, PRIV_ROOT);
  140                 if (error)
  141                         goto done;
  142         }
  143         /*
  144          * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
  145          */
  146         if (priv_check(td, PRIV_ROOT)) 
  147                 uap->flags |= MNT_NOSUID | MNT_NODEV;
  148 
  149         /*
  150          * Lookup the requested path and extract the nch and vnode.
  151          */
  152         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
  153         if (error == 0) {
  154                 if ((error = nlookup(&nd)) == 0) {
  155                         if (nd.nl_nch.ncp->nc_vp == NULL)
  156                                 error = ENOENT;
  157                 }
  158         }
  159         if (error) {
  160                 nlookup_done(&nd);
  161                 goto done;
  162         }
  163 
  164         /*
  165          * If the target filesystem is resolved via a nullfs mount, then
  166          * nd.nl_nch.mount will be pointing to the nullfs mount structure
  167          * instead of the target file system. We need it in case we are
  168          * doing an update.
  169          */
  170         nullmp = nd.nl_nch.mount;
  171 
  172         /*
  173          * Extract the locked+refd ncp and cleanup the nd structure
  174          */
  175         nch = nd.nl_nch;
  176         cache_zero(&nd.nl_nch);
  177         nlookup_done(&nd);
  178 
  179         if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
  180             (mp = cache_findmount(&nch)) != NULL) {
  181                 cache_dropmount(mp);
  182                 hasmount = 1;
  183         } else {
  184                 hasmount = 0;
  185         }
  186 
  187 
  188         /*
  189          * now we have the locked ref'd nch and unreferenced vnode.
  190          */
  191         vp = nch.ncp->nc_vp;
  192         if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
  193                 cache_put(&nch);
  194                 goto done;
  195         }
  196         cache_unlock(&nch);
  197 
  198         /*
  199          * Extract the file system type. We need to know this early, to take
  200          * appropriate actions if we are dealing with a nullfs.
  201          */
  202         if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
  203                 cache_drop(&nch);
  204                 vput(vp);
  205                 goto done;
  206         }
  207 
  208         /*
  209          * Now we have an unlocked ref'd nch and a locked ref'd vp
  210          */
  211         if (uap->flags & MNT_UPDATE) {
  212                 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
  213                         cache_drop(&nch);
  214                         vput(vp);
  215                         error = EINVAL;
  216                         goto done;
  217                 }
  218 
  219                 if (strncmp(fstypename, "null", 5) == 0) {
  220                         KKASSERT(nullmp);
  221                         mp = nullmp;
  222                 } else {
  223                         mp = vp->v_mount;
  224                 }
  225 
  226                 flag = mp->mnt_flag;
  227                 flag2 = mp->mnt_kern_flag;
  228                 /*
  229                  * We only allow the filesystem to be reloaded if it
  230                  * is currently mounted read-only.
  231                  */
  232                 if ((uap->flags & MNT_RELOAD) &&
  233                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  234                         cache_drop(&nch);
  235                         vput(vp);
  236                         error = EOPNOTSUPP;     /* Needs translation */
  237                         goto done;
  238                 }
  239                 /*
  240                  * Only root, or the user that did the original mount is
  241                  * permitted to update it.
  242                  */
  243                 if (mp->mnt_stat.f_owner != cred->cr_uid &&
  244                     (error = priv_check(td, PRIV_ROOT))) {
  245                         cache_drop(&nch);
  246                         vput(vp);
  247                         goto done;
  248                 }
  249                 if (vfs_busy(mp, LK_NOWAIT)) {
  250                         cache_drop(&nch);
  251                         vput(vp);
  252                         error = EBUSY;
  253                         goto done;
  254                 }
  255                 if (hasmount) {
  256                         cache_drop(&nch);
  257                         vfs_unbusy(mp);
  258                         vput(vp);
  259                         error = EBUSY;
  260                         goto done;
  261                 }
  262                 mp->mnt_flag |=
  263                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
  264                 lwkt_gettoken(&mp->mnt_token);
  265                 vn_unlock(vp);
  266                 goto update;
  267         }
  268 
  269         /*
  270          * If the user is not root, ensure that they own the directory
  271          * onto which we are attempting to mount.
  272          */
  273         if ((error = VOP_GETATTR(vp, &va)) ||
  274             (va.va_uid != cred->cr_uid &&
  275              (error = priv_check(td, PRIV_ROOT)))) {
  276                 cache_drop(&nch);
  277                 vput(vp);
  278                 goto done;
  279         }
  280         if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
  281                 cache_drop(&nch);
  282                 vput(vp);
  283                 goto done;
  284         }
  285         if (vp->v_type != VDIR) {
  286                 cache_drop(&nch);
  287                 vput(vp);
  288                 error = ENOTDIR;
  289                 goto done;
  290         }
  291         if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
  292                 cache_drop(&nch);
  293                 vput(vp);
  294                 error = EPERM;
  295                 goto done;
  296         }
  297         vfsp = vfsconf_find_by_name(fstypename);
  298         if (vfsp == NULL) {
  299                 linker_file_t lf;
  300 
  301                 /* Only load modules for root (very important!) */
  302                 if ((error = priv_check(td, PRIV_ROOT)) != 0) {
  303                         cache_drop(&nch);
  304                         vput(vp);
  305                         goto done;
  306                 }
  307                 error = linker_load_file(fstypename, &lf);
  308                 if (error || lf == NULL) {
  309                         cache_drop(&nch);
  310                         vput(vp);
  311                         if (lf == NULL)
  312                                 error = ENODEV;
  313                         goto done;
  314                 }
  315                 lf->userrefs++;
  316                 /* lookup again, see if the VFS was loaded */
  317                 vfsp = vfsconf_find_by_name(fstypename);
  318                 if (vfsp == NULL) {
  319                         lf->userrefs--;
  320                         linker_file_unload(lf);
  321                         cache_drop(&nch);
  322                         vput(vp);
  323                         error = ENODEV;
  324                         goto done;
  325                 }
  326         }
  327         if (hasmount) {
  328                 cache_drop(&nch);
  329                 vput(vp);
  330                 error = EBUSY;
  331                 goto done;
  332         }
  333 
  334         /*
  335          * Allocate and initialize the filesystem.
  336          */
  337         mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
  338         mount_init(mp);
  339         vfs_busy(mp, LK_NOWAIT);
  340         mp->mnt_op = vfsp->vfc_vfsops;
  341         mp->mnt_vfc = vfsp;
  342         vfsp->vfc_refcount++;
  343         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  344         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  345         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  346         mp->mnt_stat.f_owner = cred->cr_uid;
  347         lwkt_gettoken(&mp->mnt_token);
  348         vn_unlock(vp);
  349 update:
  350         /*
  351          * (per-mount token acquired at this point)
  352          *
  353          * Set the mount level flags.
  354          */
  355         if (uap->flags & MNT_RDONLY)
  356                 mp->mnt_flag |= MNT_RDONLY;
  357         else if (mp->mnt_flag & MNT_RDONLY)
  358                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
  359         mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
  360             MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
  361             MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
  362             MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
  363         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |
  364             MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
  365             MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
  366             MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
  367         /*
  368          * Mount the filesystem.
  369          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
  370          * get. 
  371          */
  372         error = VFS_MOUNT(mp, uap->path, uap->data, cred);
  373         if (mp->mnt_flag & MNT_UPDATE) {
  374                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
  375                         mp->mnt_flag &= ~MNT_RDONLY;
  376                 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
  377                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
  378                 if (error) {
  379                         mp->mnt_flag = flag;
  380                         mp->mnt_kern_flag = flag2;
  381                 }
  382                 lwkt_reltoken(&mp->mnt_token);
  383                 vfs_unbusy(mp);
  384                 vrele(vp);
  385                 cache_drop(&nch);
  386                 goto done;
  387         }
  388         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  389 
  390         /*
  391          * Put the new filesystem on the mount list after root.  The mount
  392          * point gets its own mnt_ncmountpt (unless the VFS already set one
  393          * up) which represents the root of the mount.  The lookup code
  394          * detects the mount point going forward and checks the root of
  395          * the mount going backwards.
  396          *
  397          * It is not necessary to invalidate or purge the vnode underneath
  398          * because elements under the mount will be given their own glue
  399          * namecache record.
  400          */
  401         if (!error) {
  402                 if (mp->mnt_ncmountpt.ncp == NULL) {
  403                         /* 
  404                          * allocate, then unlock, but leave the ref intact 
  405                          */
  406                         cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
  407                         cache_unlock(&mp->mnt_ncmountpt);
  408                 }
  409                 mp->mnt_ncmounton = nch;                /* inherits ref */
  410                 nch.ncp->nc_flag |= NCF_ISMOUNTPT;
  411                 cache_ismounting(mp);
  412 
  413                 mountlist_insert(mp, MNTINS_LAST);
  414                 vn_unlock(vp);
  415                 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
  416                 error = vfs_allocate_syncvnode(mp);
  417                 lwkt_reltoken(&mp->mnt_token);
  418                 vfs_unbusy(mp);
  419                 error = VFS_START(mp, 0);
  420                 vrele(vp);
  421         } else {
  422                 vn_syncer_thr_stop(mp);
  423                 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
  424                 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
  425                 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
  426                 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
  427                 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
  428                 mp->mnt_vfc->vfc_refcount--;
  429                 lwkt_reltoken(&mp->mnt_token);
  430                 vfs_unbusy(mp);
  431                 kfree(mp, M_MOUNT);
  432                 cache_drop(&nch);
  433                 vput(vp);
  434         }
  435 done:
  436         return (error);
  437 }
  438 
  439 /*
  440  * Scan all active processes to see if any of them have a current
  441  * or root directory onto which the new filesystem has just been
  442  * mounted. If so, replace them with the new mount point.
  443  *
  444  * Both old_nch and new_nch are ref'd on call but not locked.
  445  * new_nch must be temporarily locked so it can be associated with the
  446  * vnode representing the root of the mount point.
  447  */
  448 struct checkdirs_info {
  449         struct nchandle old_nch;
  450         struct nchandle new_nch;
  451         struct vnode *old_vp;
  452         struct vnode *new_vp;
  453 };
  454 
  455 static int checkdirs_callback(struct proc *p, void *data);
  456 
  457 static void
  458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch)
  459 {
  460         struct checkdirs_info info;
  461         struct vnode *olddp;
  462         struct vnode *newdp;
  463         struct mount *mp;
  464 
  465         /*
  466          * If the old mount point's vnode has a usecount of 1, it is not
  467          * being held as a descriptor anywhere.
  468          */
  469         olddp = old_nch->ncp->nc_vp;
  470         if (olddp == NULL || VREFCNT(olddp) == 1)
  471                 return;
  472 
  473         /*
  474          * Force the root vnode of the new mount point to be resolved
  475          * so we can update any matching processes.
  476          */
  477         mp = new_nch->mount;
  478         if (VFS_ROOT(mp, &newdp))
  479                 panic("mount: lost mount");
  480         vn_unlock(newdp);
  481         cache_lock(new_nch);
  482         vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
  483         cache_setunresolved(new_nch);
  484         cache_setvp(new_nch, newdp);
  485         cache_unlock(new_nch);
  486 
  487         /*
  488          * Special handling of the root node
  489          */
  490         if (rootvnode == olddp) {
  491                 vref(newdp);
  492                 vfs_cache_setroot(newdp, cache_hold(new_nch));
  493         }
  494 
  495         /*
  496          * Pass newdp separately so the callback does not have to access
  497          * it via new_nch->ncp->nc_vp.
  498          */
  499         info.old_nch = *old_nch;
  500         info.new_nch = *new_nch;
  501         info.new_vp = newdp;
  502         allproc_scan(checkdirs_callback, &info);
  503         vput(newdp);
  504 }
  505 
  506 /*
  507  * NOTE: callback is not MP safe because the scanned process's filedesc
  508  * structure can be ripped out from under us, amoung other things.
  509  */
  510 static int
  511 checkdirs_callback(struct proc *p, void *data)
  512 {
  513         struct checkdirs_info *info = data;
  514         struct filedesc *fdp;
  515         struct nchandle ncdrop1;
  516         struct nchandle ncdrop2;
  517         struct vnode *vprele1;
  518         struct vnode *vprele2;
  519 
  520         if ((fdp = p->p_fd) != NULL) {
  521                 cache_zero(&ncdrop1);
  522                 cache_zero(&ncdrop2);
  523                 vprele1 = NULL;
  524                 vprele2 = NULL;
  525 
  526                 /*
  527                  * MPUNSAFE - XXX fdp can be pulled out from under a
  528                  * foreign process.
  529                  *
  530                  * A shared filedesc is ok, we don't have to copy it
  531                  * because we are making this change globally.
  532                  */
  533                 spin_lock(&fdp->fd_spin);
  534                 if (fdp->fd_ncdir.mount == info->old_nch.mount &&
  535                     fdp->fd_ncdir.ncp == info->old_nch.ncp) {
  536                         vprele1 = fdp->fd_cdir;
  537                         vref(info->new_vp);
  538                         fdp->fd_cdir = info->new_vp;
  539                         ncdrop1 = fdp->fd_ncdir;
  540                         cache_copy(&info->new_nch, &fdp->fd_ncdir);
  541                 }
  542                 if (fdp->fd_nrdir.mount == info->old_nch.mount &&
  543                     fdp->fd_nrdir.ncp == info->old_nch.ncp) {
  544                         vprele2 = fdp->fd_rdir;
  545                         vref(info->new_vp);
  546                         fdp->fd_rdir = info->new_vp;
  547                         ncdrop2 = fdp->fd_nrdir;
  548                         cache_copy(&info->new_nch, &fdp->fd_nrdir);
  549                 }
  550                 spin_unlock(&fdp->fd_spin);
  551                 if (ncdrop1.ncp)
  552                         cache_drop(&ncdrop1);
  553                 if (ncdrop2.ncp)
  554                         cache_drop(&ncdrop2);
  555                 if (vprele1)
  556                         vrele(vprele1);
  557                 if (vprele2)
  558                         vrele(vprele2);
  559         }
  560         return(0);
  561 }
  562 
  563 /*
  564  * Unmount a file system.
  565  *
  566  * Note: unmount takes a path to the vnode mounted on as argument,
  567  * not special file (as before).
  568  *
  569  * umount_args(char *path, int flags)
  570  *
  571  * MPALMOSTSAFE
  572  */
  573 int
  574 sys_unmount(struct unmount_args *uap)
  575 {
  576         struct thread *td = curthread;
  577         struct proc *p __debugvar = td->td_proc;
  578         struct mount *mp = NULL;
  579         struct nlookupdata nd;
  580         int error;
  581 
  582         KKASSERT(p);
  583         get_mplock();
  584         if (td->td_ucred->cr_prison != NULL) {
  585                 error = EPERM;
  586                 goto done;
  587         }
  588         if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
  589                 goto done;
  590 
  591         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
  592         if (error == 0)
  593                 error = nlookup(&nd);
  594         if (error)
  595                 goto out;
  596 
  597         mp = nd.nl_nch.mount;
  598 
  599         /*
  600          * Only root, or the user that did the original mount is
  601          * permitted to unmount this filesystem.
  602          */
  603         if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) &&
  604             (error = priv_check(td, PRIV_ROOT)))
  605                 goto out;
  606 
  607         /*
  608          * Don't allow unmounting the root file system.
  609          */
  610         if (mp->mnt_flag & MNT_ROOTFS) {
  611                 error = EINVAL;
  612                 goto out;
  613         }
  614 
  615         /*
  616          * Must be the root of the filesystem
  617          */
  618         if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
  619                 error = EINVAL;
  620                 goto out;
  621         }
  622 
  623 out:
  624         nlookup_done(&nd);
  625         if (error == 0)
  626                 error = dounmount(mp, uap->flags);
  627 done:
  628         rel_mplock();
  629         return (error);
  630 }
  631 
  632 /*
  633  * Do the actual file system unmount.
  634  */
  635 static int
  636 dounmount_interlock(struct mount *mp)
  637 {
  638         if (mp->mnt_kern_flag & MNTK_UNMOUNT)
  639                 return (EBUSY);
  640         mp->mnt_kern_flag |= MNTK_UNMOUNT;
  641         return(0);
  642 }
  643 
  644 static int
  645 unmount_allproc_cb(struct proc *p, void *arg)
  646 {
  647         struct mount *mp;
  648 
  649         if (p->p_textnch.ncp == NULL)
  650                 return 0;
  651 
  652         mp = (struct mount *)arg;
  653         if (p->p_textnch.mount == mp)
  654                 cache_drop(&p->p_textnch);
  655 
  656         return 0;
  657 }
  658 
  659 int
  660 dounmount(struct mount *mp, int flags)
  661 {
  662         struct namecache *ncp;
  663         struct nchandle nch;
  664         struct vnode *vp;
  665         int error;
  666         int async_flag;
  667         int lflags;
  668         int freeok = 1;
  669         int retry;
  670 
  671         lwkt_gettoken(&mp->mnt_token);
  672         /*
  673          * Exclusive access for unmounting purposes
  674          */
  675         if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
  676                 goto out;
  677 
  678         /*
  679          * Allow filesystems to detect that a forced unmount is in progress.
  680          */
  681         if (flags & MNT_FORCE)
  682                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
  683         lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK);
  684         error = lockmgr(&mp->mnt_lock, lflags);
  685         if (error) {
  686                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
  687                 if (mp->mnt_kern_flag & MNTK_MWAIT) {
  688                         mp->mnt_kern_flag &= ~MNTK_MWAIT;
  689                         wakeup(mp);
  690                 }
  691                 goto out;
  692         }
  693 
  694         if (mp->mnt_flag & MNT_EXPUBLIC)
  695                 vfs_setpublicfs(NULL, NULL, NULL);
  696 
  697         vfs_msync(mp, MNT_WAIT);
  698         async_flag = mp->mnt_flag & MNT_ASYNC;
  699         mp->mnt_flag &=~ MNT_ASYNC;
  700 
  701         /*
  702          * If this filesystem isn't aliasing other filesystems,
  703          * try to invalidate any remaining namecache entries and
  704          * check the count afterwords.
  705          */
  706         if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
  707                 cache_lock(&mp->mnt_ncmountpt);
  708                 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN);
  709                 cache_unlock(&mp->mnt_ncmountpt);
  710 
  711                 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
  712                     (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) {
  713                         allproc_scan(&unmount_allproc_cb, mp);
  714                 }
  715 
  716                 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
  717                     (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) {
  718 
  719                         if ((flags & MNT_FORCE) == 0) {
  720                                 error = EBUSY;
  721                                 mount_warning(mp, "Cannot unmount: "
  722                                                   "%d namecache "
  723                                                   "references still "
  724                                                   "present",
  725                                                   ncp->nc_refs - 1);
  726                         } else {
  727                                 mount_warning(mp, "Forced unmount: "
  728                                                   "%d namecache "
  729                                                   "references still "
  730                                                   "present",
  731                                                   ncp->nc_refs - 1);
  732                                 freeok = 0;
  733                         }
  734                 }
  735         }
  736 
  737         /*
  738          * Decomission our special mnt_syncer vnode.  This also stops
  739          * the vnlru code.  If we are unable to unmount we recommission
  740          * the vnode.
  741          *
  742          * Then sync the filesystem.
  743          */
  744         if ((vp = mp->mnt_syncer) != NULL) {
  745                 mp->mnt_syncer = NULL;
  746                 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
  747                 vrele(vp);
  748         }
  749         if ((mp->mnt_flag & MNT_RDONLY) == 0)
  750                 VFS_SYNC(mp, MNT_WAIT);
  751 
  752         /*
  753          * nchandle records ref the mount structure.  Expect a count of 1
  754          * (our mount->mnt_ncmountpt).
  755          *
  756          * Scans can get temporary refs on a mountpoint (thought really
  757          * heavy duty stuff like cache_findmount() do not).
  758          */
  759         for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) {
  760                 cache_unmounting(mp);
  761                 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1);
  762         }
  763         if (mp->mnt_refs != 1) {
  764                 if ((flags & MNT_FORCE) == 0) {
  765                         mount_warning(mp, "Cannot unmount: "
  766                                           "%d mount refs still present",
  767                                           mp->mnt_refs);
  768                         error = EBUSY;
  769                 } else {
  770                         mount_warning(mp, "Forced unmount: "
  771                                           "%d mount refs still present",
  772                                           mp->mnt_refs);
  773                         freeok = 0;
  774                 }
  775         }
  776 
  777         /*
  778          * So far so good, sync the filesystem once more and
  779          * call the VFS unmount code if the sync succeeds.
  780          */
  781         if (error == 0) {
  782                 if (((mp->mnt_flag & MNT_RDONLY) ||
  783                      (error = VFS_SYNC(mp, MNT_WAIT)) == 0) ||
  784                     (flags & MNT_FORCE)) {
  785                         error = VFS_UNMOUNT(mp, flags);
  786                 }
  787         }
  788 
  789         /*
  790          * If an error occurred we can still recover, restoring the
  791          * syncer vnode and misc flags.
  792          */
  793         if (error) {
  794                 if (mp->mnt_syncer == NULL)
  795                         vfs_allocate_syncvnode(mp);
  796                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
  797                 mp->mnt_flag |= async_flag;
  798                 lockmgr(&mp->mnt_lock, LK_RELEASE);
  799                 if (mp->mnt_kern_flag & MNTK_MWAIT) {
  800                         mp->mnt_kern_flag &= ~MNTK_MWAIT;
  801                         wakeup(mp);
  802                 }
  803                 goto out;
  804         }
  805         /*
  806          * Clean up any journals still associated with the mount after
  807          * filesystem activity has ceased.
  808          */
  809         journal_remove_all_journals(mp, 
  810             ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
  811 
  812         mountlist_remove(mp);
  813 
  814         /*
  815          * Remove any installed vnode ops here so the individual VFSs don't
  816          * have to.
  817          */
  818         vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
  819         vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
  820         vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
  821         vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
  822         vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
  823 
  824         if (mp->mnt_ncmountpt.ncp != NULL) {
  825                 nch = mp->mnt_ncmountpt;
  826                 cache_zero(&mp->mnt_ncmountpt);
  827                 cache_clrmountpt(&nch);
  828                 cache_drop(&nch);
  829         }
  830         if (mp->mnt_ncmounton.ncp != NULL) {
  831                 cache_unmounting(mp);
  832                 nch = mp->mnt_ncmounton;
  833                 cache_zero(&mp->mnt_ncmounton);
  834                 cache_clrmountpt(&nch);
  835                 cache_drop(&nch);
  836         }
  837 
  838         mp->mnt_vfc->vfc_refcount--;
  839         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
  840                 panic("unmount: dangling vnode");
  841         lockmgr(&mp->mnt_lock, LK_RELEASE);
  842         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  843                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  844                 wakeup(mp);
  845         }
  846 
  847         /*
  848          * If we reach here and freeok != 0 we must free the mount.
  849          * If refs > 1 cycle and wait, just in case someone tried
  850          * to busy the mount after we decided to do the unmount.
  851          */
  852         if (freeok) {
  853                 while (mp->mnt_refs > 1) {
  854                         cache_unmounting(mp);
  855                         wakeup(mp);
  856                         tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1);
  857                 }
  858                 lwkt_reltoken(&mp->mnt_token);
  859                 kfree(mp, M_MOUNT);
  860                 mp = NULL;
  861         }
  862         error = 0;
  863 out:
  864         if (mp)
  865                 lwkt_reltoken(&mp->mnt_token);
  866         return (error);
  867 }
  868 
  869 static
  870 void
  871 mount_warning(struct mount *mp, const char *ctl, ...)
  872 {
  873         char *ptr;
  874         char *buf;
  875         __va_list va;
  876 
  877         __va_start(va, ctl);
  878         if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL,
  879                            &ptr, &buf, 0) == 0) {
  880                 kprintf("unmount(%s): ", ptr);
  881                 kvprintf(ctl, va);
  882                 kprintf("\n");
  883                 kfree(buf, M_TEMP);
  884         } else {
  885                 kprintf("unmount(%p", mp);
  886                 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
  887                         kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
  888                 kprintf("): ");
  889                 kvprintf(ctl, va);
  890                 kprintf("\n");
  891         }
  892         __va_end(va);
  893 }
  894 
  895 /*
  896  * Shim cache_fullpath() to handle the case where a process is chrooted into
  897  * a subdirectory of a mount.  In this case if the root mount matches the
  898  * process root directory's mount we have to specify the process's root
  899  * directory instead of the mount point, because the mount point might
  900  * be above the root directory.
  901  */
  902 static
  903 int
  904 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb)
  905 {
  906         struct nchandle *nch;
  907 
  908         if (p && p->p_fd->fd_nrdir.mount == mp)
  909                 nch = &p->p_fd->fd_nrdir;
  910         else
  911                 nch = &mp->mnt_ncmountpt;
  912         return(cache_fullpath(p, nch, NULL, rb, fb, 0));
  913 }
  914 
  915 /*
  916  * Sync each mounted filesystem.
  917  */
  918 
  919 #ifdef DEBUG
  920 static int syncprt = 0;
  921 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  922 #endif /* DEBUG */
  923 
  924 static int sync_callback(struct mount *mp, void *data);
  925 
  926 int
  927 sys_sync(struct sync_args *uap)
  928 {
  929         mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
  930         return (0);
  931 }
  932 
  933 static
  934 int
  935 sync_callback(struct mount *mp, void *data __unused)
  936 {
  937         int asyncflag;
  938 
  939         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  940                 asyncflag = mp->mnt_flag & MNT_ASYNC;
  941                 mp->mnt_flag &= ~MNT_ASYNC;
  942                 vfs_msync(mp, MNT_NOWAIT);
  943                 VFS_SYNC(mp, MNT_NOWAIT);
  944                 mp->mnt_flag |= asyncflag;
  945         }
  946         return(0);
  947 }
  948 
  949 /* XXX PRISON: could be per prison flag */
  950 static int prison_quotas;
  951 #if 0
  952 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
  953 #endif
  954 
  955 /*
  956  *  quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
  957  *
  958  * Change filesystem quotas.
  959  *
  960  * MPALMOSTSAFE
  961  */
  962 int
  963 sys_quotactl(struct quotactl_args *uap)
  964 {
  965         struct nlookupdata nd;
  966         struct thread *td;
  967         struct mount *mp;
  968         int error;
  969 
  970         get_mplock();
  971         td = curthread;
  972         if (td->td_ucred->cr_prison && !prison_quotas) {
  973                 error = EPERM;
  974                 goto done;
  975         }
  976 
  977         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
  978         if (error == 0)
  979                 error = nlookup(&nd);
  980         if (error == 0) {
  981                 mp = nd.nl_nch.mount;
  982                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
  983                                     uap->arg, nd.nl_cred);
  984         }
  985         nlookup_done(&nd);
  986 done:
  987         rel_mplock();
  988         return (error);
  989 }
  990 
  991 /*
  992  * mountctl(char *path, int op, int fd, const void *ctl, int ctllen,
  993  *              void *buf, int buflen)
  994  *
  995  * This function operates on a mount point and executes the specified
  996  * operation using the specified control data, and possibly returns data.
  997  *
  998  * The actual number of bytes stored in the result buffer is returned, 0
  999  * if none, otherwise an error is returned.
 1000  *
 1001  * MPALMOSTSAFE
 1002  */
 1003 int
 1004 sys_mountctl(struct mountctl_args *uap)
 1005 {
 1006         struct thread *td = curthread;
 1007         struct proc *p = td->td_proc;
 1008         struct file *fp;
 1009         void *ctl = NULL;
 1010         void *buf = NULL;
 1011         char *path = NULL;
 1012         int error;
 1013 
 1014         /*
 1015          * Sanity and permissions checks.  We must be root.
 1016          */
 1017         KKASSERT(p);
 1018         if (td->td_ucred->cr_prison != NULL)
 1019                 return (EPERM);
 1020         if ((uap->op != MOUNTCTL_MOUNTFLAGS) &&
 1021             (error = priv_check(td, PRIV_ROOT)) != 0)
 1022                 return (error);
 1023 
 1024         /*
 1025          * Argument length checks
 1026          */
 1027         if (uap->ctllen < 0 || uap->ctllen > 1024)
 1028                 return (EINVAL);
 1029         if (uap->buflen < 0 || uap->buflen > 16 * 1024)
 1030                 return (EINVAL);
 1031         if (uap->path == NULL)
 1032                 return (EINVAL);
 1033 
 1034         /*
 1035          * Allocate the necessary buffers and copyin data
 1036          */
 1037         path = objcache_get(namei_oc, M_WAITOK);
 1038         error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
 1039         if (error)
 1040                 goto done;
 1041 
 1042         if (uap->ctllen) {
 1043                 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO);
 1044                 error = copyin(uap->ctl, ctl, uap->ctllen);
 1045                 if (error)
 1046                         goto done;
 1047         }
 1048         if (uap->buflen)
 1049                 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO);
 1050 
 1051         /*
 1052          * Validate the descriptor
 1053          */
 1054         if (uap->fd >= 0) {
 1055                 fp = holdfp(p->p_fd, uap->fd, -1);
 1056                 if (fp == NULL) {
 1057                         error = EBADF;
 1058                         goto done;
 1059                 }
 1060         } else {
 1061                 fp = NULL;
 1062         }
 1063 
 1064         /*
 1065          * Execute the internal kernel function and clean up.
 1066          */
 1067         get_mplock();
 1068         error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
 1069         rel_mplock();
 1070         if (fp)
 1071                 fdrop(fp);
 1072         if (error == 0 && uap->sysmsg_result > 0)
 1073                 error = copyout(buf, uap->buf, uap->sysmsg_result);
 1074 done:
 1075         if (path)
 1076                 objcache_put(namei_oc, path);
 1077         if (ctl)
 1078                 kfree(ctl, M_TEMP);
 1079         if (buf)
 1080                 kfree(buf, M_TEMP);
 1081         return (error);
 1082 }
 1083 
 1084 /*
 1085  * Execute a mount control operation by resolving the path to a mount point
 1086  * and calling vop_mountctl().  
 1087  *
 1088  * Use the mount point from the nch instead of the vnode so nullfs mounts
 1089  * can properly spike the VOP.
 1090  */
 1091 int
 1092 kern_mountctl(const char *path, int op, struct file *fp, 
 1093                 const void *ctl, int ctllen, 
 1094                 void *buf, int buflen, int *res)
 1095 {
 1096         struct vnode *vp;
 1097         struct mount *mp;
 1098         struct nlookupdata nd;
 1099         int error;
 1100 
 1101         *res = 0;
 1102         vp = NULL;
 1103         error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
 1104         if (error == 0)
 1105                 error = nlookup(&nd);
 1106         if (error == 0)
 1107                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
 1108         mp = nd.nl_nch.mount;
 1109         nlookup_done(&nd);
 1110         if (error)
 1111                 return (error);
 1112         vn_unlock(vp);
 1113 
 1114         /*
 1115          * Must be the root of the filesystem
 1116          */
 1117         if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
 1118                 vrele(vp);
 1119                 return (EINVAL);
 1120         }
 1121         error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen,
 1122                              buf, buflen, res);
 1123         vrele(vp);
 1124         return (error);
 1125 }
 1126 
 1127 int
 1128 kern_statfs(struct nlookupdata *nd, struct statfs *buf)
 1129 {
 1130         struct thread *td = curthread;
 1131         struct proc *p = td->td_proc;
 1132         struct mount *mp;
 1133         struct statfs *sp;
 1134         char *fullpath, *freepath;
 1135         int error;
 1136 
 1137         if ((error = nlookup(nd)) != 0)
 1138                 return (error);
 1139         mp = nd->nl_nch.mount;
 1140         sp = &mp->mnt_stat;
 1141         if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
 1142                 return (error);
 1143 
 1144         error = mount_path(p, mp, &fullpath, &freepath);
 1145         if (error)
 1146                 return(error);
 1147         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1148         strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
 1149         kfree(freepath, M_TEMP);
 1150 
 1151         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 1152         bcopy(sp, buf, sizeof(*buf));
 1153         /* Only root should have access to the fsid's. */
 1154         if (priv_check(td, PRIV_ROOT))
 1155                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
 1156         return (0);
 1157 }
 1158 
 1159 /*
 1160  * statfs_args(char *path, struct statfs *buf)
 1161  *
 1162  * Get filesystem statistics.
 1163  */
 1164 int
 1165 sys_statfs(struct statfs_args *uap)
 1166 {
 1167         struct nlookupdata nd;
 1168         struct statfs buf;
 1169         int error;
 1170 
 1171         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 1172         if (error == 0)
 1173                 error = kern_statfs(&nd, &buf);
 1174         nlookup_done(&nd);
 1175         if (error == 0)
 1176                 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
 1177         return (error);
 1178 }
 1179 
 1180 int
 1181 kern_fstatfs(int fd, struct statfs *buf)
 1182 {
 1183         struct thread *td = curthread;
 1184         struct proc *p = td->td_proc;
 1185         struct file *fp;
 1186         struct mount *mp;
 1187         struct statfs *sp;
 1188         char *fullpath, *freepath;
 1189         int error;
 1190 
 1191         KKASSERT(p);
 1192         if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
 1193                 return (error);
 1194 
 1195         /*
 1196          * Try to use mount info from any overlays rather than the
 1197          * mount info for the underlying vnode, otherwise we will
 1198          * fail when operating on null-mounted paths inside a chroot.
 1199          */
 1200         if ((mp = fp->f_nchandle.mount) == NULL)
 1201                 mp = ((struct vnode *)fp->f_data)->v_mount;
 1202         if (mp == NULL) {
 1203                 error = EBADF;
 1204                 goto done;
 1205         }
 1206         if (fp->f_cred == NULL) {
 1207                 error = EINVAL;
 1208                 goto done;
 1209         }
 1210         sp = &mp->mnt_stat;
 1211         if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
 1212                 goto done;
 1213 
 1214         if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
 1215                 goto done;
 1216         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1217         strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
 1218         kfree(freepath, M_TEMP);
 1219 
 1220         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 1221         bcopy(sp, buf, sizeof(*buf));
 1222 
 1223         /* Only root should have access to the fsid's. */
 1224         if (priv_check(td, PRIV_ROOT))
 1225                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
 1226         error = 0;
 1227 done:
 1228         fdrop(fp);
 1229         return (error);
 1230 }
 1231 
 1232 /*
 1233  * fstatfs_args(int fd, struct statfs *buf)
 1234  *
 1235  * Get filesystem statistics.
 1236  */
 1237 int
 1238 sys_fstatfs(struct fstatfs_args *uap)
 1239 {
 1240         struct statfs buf;
 1241         int error;
 1242 
 1243         error = kern_fstatfs(uap->fd, &buf);
 1244 
 1245         if (error == 0)
 1246                 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
 1247         return (error);
 1248 }
 1249 
 1250 int
 1251 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf)
 1252 {
 1253         struct mount *mp;
 1254         struct statvfs *sp;
 1255         int error;
 1256 
 1257         if ((error = nlookup(nd)) != 0)
 1258                 return (error);
 1259         mp = nd->nl_nch.mount;
 1260         sp = &mp->mnt_vstat;
 1261         if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
 1262                 return (error);
 1263 
 1264         sp->f_flag = 0;
 1265         if (mp->mnt_flag & MNT_RDONLY)
 1266                 sp->f_flag |= ST_RDONLY;
 1267         if (mp->mnt_flag & MNT_NOSUID)
 1268                 sp->f_flag |= ST_NOSUID;
 1269         bcopy(sp, buf, sizeof(*buf));
 1270         return (0);
 1271 }
 1272 
 1273 /*
 1274  * statfs_args(char *path, struct statfs *buf)
 1275  *
 1276  * Get filesystem statistics.
 1277  */
 1278 int
 1279 sys_statvfs(struct statvfs_args *uap)
 1280 {
 1281         struct nlookupdata nd;
 1282         struct statvfs buf;
 1283         int error;
 1284 
 1285         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 1286         if (error == 0)
 1287                 error = kern_statvfs(&nd, &buf);
 1288         nlookup_done(&nd);
 1289         if (error == 0)
 1290                 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
 1291         return (error);
 1292 }
 1293 
 1294 int
 1295 kern_fstatvfs(int fd, struct statvfs *buf)
 1296 {
 1297         struct thread *td = curthread;
 1298         struct proc *p = td->td_proc;
 1299         struct file *fp;
 1300         struct mount *mp;
 1301         struct statvfs *sp;
 1302         int error;
 1303 
 1304         KKASSERT(p);
 1305         if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
 1306                 return (error);
 1307         if ((mp = fp->f_nchandle.mount) == NULL)
 1308                 mp = ((struct vnode *)fp->f_data)->v_mount;
 1309         if (mp == NULL) {
 1310                 error = EBADF;
 1311                 goto done;
 1312         }
 1313         if (fp->f_cred == NULL) {
 1314                 error = EINVAL;
 1315                 goto done;
 1316         }
 1317         sp = &mp->mnt_vstat;
 1318         if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
 1319                 goto done;
 1320 
 1321         sp->f_flag = 0;
 1322         if (mp->mnt_flag & MNT_RDONLY)
 1323                 sp->f_flag |= ST_RDONLY;
 1324         if (mp->mnt_flag & MNT_NOSUID)
 1325                 sp->f_flag |= ST_NOSUID;
 1326 
 1327         bcopy(sp, buf, sizeof(*buf));
 1328         error = 0;
 1329 done:
 1330         fdrop(fp);
 1331         return (error);
 1332 }
 1333 
 1334 /*
 1335  * fstatfs_args(int fd, struct statfs *buf)
 1336  *
 1337  * Get filesystem statistics.
 1338  */
 1339 int
 1340 sys_fstatvfs(struct fstatvfs_args *uap)
 1341 {
 1342         struct statvfs buf;
 1343         int error;
 1344 
 1345         error = kern_fstatvfs(uap->fd, &buf);
 1346 
 1347         if (error == 0)
 1348                 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
 1349         return (error);
 1350 }
 1351 
 1352 /*
 1353  * getfsstat_args(struct statfs *buf, long bufsize, int flags)
 1354  *
 1355  * Get statistics on all filesystems.
 1356  */
 1357 
 1358 struct getfsstat_info {
 1359         struct statfs *sfsp;
 1360         long count;
 1361         long maxcount;
 1362         int error;
 1363         int flags;
 1364         struct thread *td;
 1365 };
 1366 
 1367 static int getfsstat_callback(struct mount *, void *);
 1368 
 1369 int
 1370 sys_getfsstat(struct getfsstat_args *uap)
 1371 {
 1372         struct thread *td = curthread;
 1373         struct getfsstat_info info;
 1374 
 1375         bzero(&info, sizeof(info));
 1376 
 1377         info.maxcount = uap->bufsize / sizeof(struct statfs);
 1378         info.sfsp = uap->buf;
 1379         info.count = 0;
 1380         info.flags = uap->flags;
 1381         info.td = td;
 1382 
 1383         mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
 1384         if (info.sfsp && info.count > info.maxcount)
 1385                 uap->sysmsg_result = info.maxcount;
 1386         else
 1387                 uap->sysmsg_result = info.count;
 1388         return (info.error);
 1389 }
 1390 
 1391 static int
 1392 getfsstat_callback(struct mount *mp, void *data)
 1393 {
 1394         struct getfsstat_info *info = data;
 1395         struct statfs *sp;
 1396         char *freepath;
 1397         char *fullpath;
 1398         int error;
 1399 
 1400         if (info->sfsp && info->count < info->maxcount) {
 1401                 if (info->td->td_proc &&
 1402                     !chroot_visible_mnt(mp, info->td->td_proc)) {
 1403                         return(0);
 1404                 }
 1405                 sp = &mp->mnt_stat;
 1406 
 1407                 /*
 1408                  * If MNT_NOWAIT or MNT_LAZY is specified, do not
 1409                  * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
 1410                  * overrides MNT_WAIT.
 1411                  */
 1412                 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 1413                     (info->flags & MNT_WAIT)) &&
 1414                     (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
 1415                         return(0);
 1416                 }
 1417                 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 1418 
 1419                 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
 1420                 if (error) {
 1421                         info->error = error;
 1422                         return(-1);
 1423                 }
 1424                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1425                 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
 1426                 kfree(freepath, M_TEMP);
 1427 
 1428                 error = copyout(sp, info->sfsp, sizeof(*sp));
 1429                 if (error) {
 1430                         info->error = error;
 1431                         return (-1);
 1432                 }
 1433                 ++info->sfsp;
 1434         }
 1435         info->count++;
 1436         return(0);
 1437 }
 1438 
 1439 /*
 1440  * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf,
 1441                    long bufsize, int flags)
 1442  *
 1443  * Get statistics on all filesystems.
 1444  */
 1445 
 1446 struct getvfsstat_info {
 1447         struct statfs *sfsp;
 1448         struct statvfs *vsfsp;
 1449         long count;
 1450         long maxcount;
 1451         int error;
 1452         int flags;
 1453         struct thread *td;
 1454 };
 1455 
 1456 static int getvfsstat_callback(struct mount *, void *);
 1457 
 1458 int
 1459 sys_getvfsstat(struct getvfsstat_args *uap)
 1460 {
 1461         struct thread *td = curthread;
 1462         struct getvfsstat_info info;
 1463 
 1464         bzero(&info, sizeof(info));
 1465 
 1466         info.maxcount = uap->vbufsize / sizeof(struct statvfs);
 1467         info.sfsp = uap->buf;
 1468         info.vsfsp = uap->vbuf;
 1469         info.count = 0;
 1470         info.flags = uap->flags;
 1471         info.td = td;
 1472 
 1473         mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
 1474         if (info.vsfsp && info.count > info.maxcount)
 1475                 uap->sysmsg_result = info.maxcount;
 1476         else
 1477                 uap->sysmsg_result = info.count;
 1478         return (info.error);
 1479 }
 1480 
 1481 static int
 1482 getvfsstat_callback(struct mount *mp, void *data)
 1483 {
 1484         struct getvfsstat_info *info = data;
 1485         struct statfs *sp;
 1486         struct statvfs *vsp;
 1487         char *freepath;
 1488         char *fullpath;
 1489         int error;
 1490 
 1491         if (info->vsfsp && info->count < info->maxcount) {
 1492                 if (info->td->td_proc &&
 1493                     !chroot_visible_mnt(mp, info->td->td_proc)) {
 1494                         return(0);
 1495                 }
 1496                 sp = &mp->mnt_stat;
 1497                 vsp = &mp->mnt_vstat;
 1498 
 1499                 /*
 1500                  * If MNT_NOWAIT or MNT_LAZY is specified, do not
 1501                  * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
 1502                  * overrides MNT_WAIT.
 1503                  */
 1504                 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 1505                     (info->flags & MNT_WAIT)) &&
 1506                     (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
 1507                         return(0);
 1508                 }
 1509                 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 1510 
 1511                 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 1512                     (info->flags & MNT_WAIT)) &&
 1513                     (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) {
 1514                         return(0);
 1515                 }
 1516                 vsp->f_flag = 0;
 1517                 if (mp->mnt_flag & MNT_RDONLY)
 1518                         vsp->f_flag |= ST_RDONLY;
 1519                 if (mp->mnt_flag & MNT_NOSUID)
 1520                         vsp->f_flag |= ST_NOSUID;
 1521 
 1522                 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
 1523                 if (error) {
 1524                         info->error = error;
 1525                         return(-1);
 1526                 }
 1527                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1528                 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
 1529                 kfree(freepath, M_TEMP);
 1530 
 1531                 error = copyout(sp, info->sfsp, sizeof(*sp));
 1532                 if (error == 0)
 1533                         error = copyout(vsp, info->vsfsp, sizeof(*vsp));
 1534                 if (error) {
 1535                         info->error = error;
 1536                         return (-1);
 1537                 }
 1538                 ++info->sfsp;
 1539                 ++info->vsfsp;
 1540         }
 1541         info->count++;
 1542         return(0);
 1543 }
 1544 
 1545 
 1546 /*
 1547  * fchdir_args(int fd)
 1548  *
 1549  * Change current working directory to a given file descriptor.
 1550  */
 1551 int
 1552 sys_fchdir(struct fchdir_args *uap)
 1553 {
 1554         struct thread *td = curthread;
 1555         struct proc *p = td->td_proc;
 1556         struct filedesc *fdp = p->p_fd;
 1557         struct vnode *vp, *ovp;
 1558         struct mount *mp;
 1559         struct file *fp;
 1560         struct nchandle nch, onch, tnch;
 1561         int error;
 1562 
 1563         if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
 1564                 return (error);
 1565         lwkt_gettoken(&p->p_token);
 1566         vp = (struct vnode *)fp->f_data;
 1567         vref(vp);
 1568         vn_lock(vp, LK_SHARED | LK_RETRY);
 1569         if (fp->f_nchandle.ncp == NULL)
 1570                 error = ENOTDIR;
 1571         else
 1572                 error = checkvp_chdir(vp, td);
 1573         if (error) {
 1574                 vput(vp);
 1575                 goto done;
 1576         }
 1577         cache_copy(&fp->f_nchandle, &nch);
 1578 
 1579         /*
 1580          * If the ncp has become a mount point, traverse through
 1581          * the mount point.
 1582          */
 1583 
 1584         while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
 1585                (mp = cache_findmount(&nch)) != NULL
 1586         ) {
 1587                 error = nlookup_mp(mp, &tnch);
 1588                 if (error == 0) {
 1589                         cache_unlock(&tnch);    /* leave ref intact */
 1590                         vput(vp);
 1591                         vp = tnch.ncp->nc_vp;
 1592                         error = vget(vp, LK_SHARED);
 1593                         KKASSERT(error == 0);
 1594                         cache_drop(&nch);
 1595                         nch = tnch;
 1596                 }
 1597                 cache_dropmount(mp);
 1598         }
 1599         if (error == 0) {
 1600                 ovp = fdp->fd_cdir;
 1601                 onch = fdp->fd_ncdir;
 1602                 vn_unlock(vp);          /* leave ref intact */
 1603                 fdp->fd_cdir = vp;
 1604                 fdp->fd_ncdir = nch;
 1605                 cache_drop(&onch);
 1606                 vrele(ovp);
 1607         } else {
 1608                 cache_drop(&nch);
 1609                 vput(vp);
 1610         }
 1611         fdrop(fp);
 1612 done:
 1613         lwkt_reltoken(&p->p_token);
 1614         return (error);
 1615 }
 1616 
 1617 int
 1618 kern_chdir(struct nlookupdata *nd)
 1619 {
 1620         struct thread *td = curthread;
 1621         struct proc *p = td->td_proc;
 1622         struct filedesc *fdp = p->p_fd;
 1623         struct vnode *vp, *ovp;
 1624         struct nchandle onch;
 1625         int error;
 1626 
 1627         nd->nl_flags |= NLC_SHAREDLOCK;
 1628         if ((error = nlookup(nd)) != 0)
 1629                 return (error);
 1630         if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
 1631                 return (ENOENT);
 1632         if ((error = vget(vp, LK_SHARED)) != 0)
 1633                 return (error);
 1634 
 1635         lwkt_gettoken(&p->p_token);
 1636         error = checkvp_chdir(vp, td);
 1637         vn_unlock(vp);
 1638         if (error == 0) {
 1639                 ovp = fdp->fd_cdir;
 1640                 onch = fdp->fd_ncdir;
 1641                 cache_unlock(&nd->nl_nch);      /* leave reference intact */
 1642                 fdp->fd_ncdir = nd->nl_nch;
 1643                 fdp->fd_cdir = vp;
 1644                 cache_drop(&onch);
 1645                 vrele(ovp);
 1646                 cache_zero(&nd->nl_nch);
 1647         } else {
 1648                 vrele(vp);
 1649         }
 1650         lwkt_reltoken(&p->p_token);
 1651         return (error);
 1652 }
 1653 
 1654 /*
 1655  * chdir_args(char *path)
 1656  *
 1657  * Change current working directory (``.'').
 1658  */
 1659 int
 1660 sys_chdir(struct chdir_args *uap)
 1661 {
 1662         struct nlookupdata nd;
 1663         int error;
 1664 
 1665         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 1666         if (error == 0)
 1667                 error = kern_chdir(&nd);
 1668         nlookup_done(&nd);
 1669         return (error);
 1670 }
 1671 
 1672 /*
 1673  * Helper function for raised chroot(2) security function:  Refuse if
 1674  * any filedescriptors are open directories.
 1675  */
 1676 static int
 1677 chroot_refuse_vdir_fds(struct filedesc *fdp)
 1678 {
 1679         struct vnode *vp;
 1680         struct file *fp;
 1681         int error;
 1682         int fd;
 1683 
 1684         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
 1685                 if ((error = holdvnode(fdp, fd, &fp)) != 0)
 1686                         continue;
 1687                 vp = (struct vnode *)fp->f_data;
 1688                 if (vp->v_type != VDIR) {
 1689                         fdrop(fp);
 1690                         continue;
 1691                 }
 1692                 fdrop(fp);
 1693                 return(EPERM);
 1694         }
 1695         return (0);
 1696 }
 1697 
 1698 /*
 1699  * This sysctl determines if we will allow a process to chroot(2) if it
 1700  * has a directory open:
 1701  *      0: disallowed for all processes.
 1702  *      1: allowed for processes that were not already chroot(2)'ed.
 1703  *      2: allowed for all processes.
 1704  */
 1705 
 1706 static int chroot_allow_open_directories = 1;
 1707 
 1708 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
 1709      &chroot_allow_open_directories, 0, "");
 1710 
 1711 /*
 1712  * chroot to the specified namecache entry.  We obtain the vp from the
 1713  * namecache data.  The passed ncp must be locked and referenced and will
 1714  * remain locked and referenced on return.
 1715  */
 1716 int
 1717 kern_chroot(struct nchandle *nch)
 1718 {
 1719         struct thread *td = curthread;
 1720         struct proc *p = td->td_proc;
 1721         struct filedesc *fdp = p->p_fd;
 1722         struct vnode *vp;
 1723         int error;
 1724 
 1725         /*
 1726          * Only privileged user can chroot
 1727          */
 1728         error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0);
 1729         if (error)
 1730                 return (error);
 1731 
 1732         /*
 1733          * Disallow open directory descriptors (fchdir() breakouts).
 1734          */
 1735         if (chroot_allow_open_directories == 0 ||
 1736            (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
 1737                 if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
 1738                         return (error);
 1739         }
 1740         if ((vp = nch->ncp->nc_vp) == NULL)
 1741                 return (ENOENT);
 1742 
 1743         if ((error = vget(vp, LK_SHARED)) != 0)
 1744                 return (error);
 1745 
 1746         /*
 1747          * Check the validity of vp as a directory to change to and 
 1748          * associate it with rdir/jdir.
 1749          */
 1750         error = checkvp_chdir(vp, td);
 1751         vn_unlock(vp);                  /* leave reference intact */
 1752         if (error == 0) {
 1753                 vrele(fdp->fd_rdir);
 1754                 fdp->fd_rdir = vp;      /* reference inherited by fd_rdir */
 1755                 cache_drop(&fdp->fd_nrdir);
 1756                 cache_copy(nch, &fdp->fd_nrdir);
 1757                 if (fdp->fd_jdir == NULL) {
 1758                         fdp->fd_jdir = vp;
 1759                         vref(fdp->fd_jdir);
 1760                         cache_copy(nch, &fdp->fd_njdir);
 1761                 }
 1762         } else {
 1763                 vrele(vp);
 1764         }
 1765         return (error);
 1766 }
 1767 
 1768 /*
 1769  * chroot_args(char *path)
 1770  *
 1771  * Change notion of root (``/'') directory.
 1772  */
 1773 int
 1774 sys_chroot(struct chroot_args *uap)
 1775 {
 1776         struct thread *td __debugvar = curthread;
 1777         struct nlookupdata nd;
 1778         int error;
 1779 
 1780         KKASSERT(td->td_proc);
 1781         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 1782         if (error == 0) {
 1783                 nd.nl_flags |= NLC_EXEC;
 1784                 error = nlookup(&nd);
 1785                 if (error == 0)
 1786                         error = kern_chroot(&nd.nl_nch);
 1787         }
 1788         nlookup_done(&nd);
 1789         return(error);
 1790 }
 1791 
 1792 int
 1793 sys_chroot_kernel(struct chroot_kernel_args *uap)
 1794 {
 1795         struct thread *td = curthread;
 1796         struct nlookupdata nd;
 1797         struct nchandle *nch;
 1798         struct vnode *vp;
 1799         int error;
 1800 
 1801         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 1802         if (error)
 1803                 goto error_nond;
 1804 
 1805         error = nlookup(&nd);
 1806         if (error)
 1807                 goto error_out;
 1808 
 1809         nch = &nd.nl_nch;
 1810 
 1811         error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0);
 1812         if (error)
 1813                 goto error_out;
 1814 
 1815         if ((vp = nch->ncp->nc_vp) == NULL) {
 1816                 error = ENOENT;
 1817                 goto error_out;
 1818         }
 1819 
 1820         if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0)
 1821                 goto error_out;
 1822 
 1823         kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path);
 1824         get_mplock();
 1825         vfs_cache_setroot(vp, cache_hold(nch));
 1826         rel_mplock();
 1827 
 1828 error_out:
 1829         nlookup_done(&nd);
 1830 error_nond:
 1831         return(error);
 1832 }
 1833 
 1834 /*
 1835  * Common routine for chroot and chdir.  Given a locked, referenced vnode,
 1836  * determine whether it is legal to chdir to the vnode.  The vnode's state
 1837  * is not changed by this call.
 1838  */
 1839 int
 1840 checkvp_chdir(struct vnode *vp, struct thread *td)
 1841 {
 1842         int error;
 1843 
 1844         if (vp->v_type != VDIR)
 1845                 error = ENOTDIR;
 1846         else
 1847                 error = VOP_EACCESS(vp, VEXEC, td->td_ucred);
 1848         return (error);
 1849 }
 1850 
 1851 int
 1852 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res)
 1853 {
 1854         struct thread *td = curthread;
 1855         struct proc *p = td->td_proc;
 1856         struct lwp *lp = td->td_lwp;
 1857         struct filedesc *fdp = p->p_fd;
 1858         int cmode, flags;
 1859         struct file *nfp;
 1860         struct file *fp;
 1861         struct vnode *vp;
 1862         int type, indx, error = 0;
 1863         struct flock lf;
 1864 
 1865         if ((oflags & O_ACCMODE) == O_ACCMODE)
 1866                 return (EINVAL);
 1867         flags = FFLAGS(oflags);
 1868         error = falloc(lp, &nfp, NULL);
 1869         if (error)
 1870                 return (error);
 1871         fp = nfp;
 1872         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1873 
 1874         /*
 1875          * XXX p_dupfd is a real mess.  It allows a device to return a
 1876          * file descriptor to be duplicated rather then doing the open
 1877          * itself.
 1878          */
 1879         lp->lwp_dupfd = -1;
 1880 
 1881         /*
 1882          * Call vn_open() to do the lookup and assign the vnode to the 
 1883          * file pointer.  vn_open() does not change the ref count on fp
 1884          * and the vnode, on success, will be inherited by the file pointer
 1885          * and unlocked.
 1886          *
 1887          * Request a shared lock on the vnode if possible.
 1888          */
 1889         nd->nl_flags |= NLC_LOCKVP;
 1890         if ((flags & (O_CREAT|O_TRUNC)) == 0)
 1891                 nd->nl_flags |= NLC_SHAREDLOCK;
 1892 
 1893         error = vn_open(nd, fp, flags, cmode);
 1894         nlookup_done(nd);
 1895 
 1896         if (error) {
 1897                 /*
 1898                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1899                  * responsible for dropping the old contents of ofiles[indx]
 1900                  * if it succeeds.
 1901                  *
 1902                  * Note that fsetfd() will add a ref to fp which represents
 1903                  * the fd_files[] assignment.  We must still drop our
 1904                  * reference.
 1905                  */
 1906                 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) {
 1907                         if (fdalloc(p, 0, &indx) == 0) {
 1908                                 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error);
 1909                                 if (error == 0) {
 1910                                         *res = indx;
 1911                                         fdrop(fp);      /* our ref */
 1912                                         return (0);
 1913                                 }
 1914                                 fsetfd(fdp, NULL, indx);
 1915                         }
 1916                 }
 1917                 fdrop(fp);      /* our ref */
 1918                 if (error == ERESTART)
 1919                         error = EINTR;
 1920                 return (error);
 1921         }
 1922 
 1923         /*
 1924          * ref the vnode for ourselves so it can't be ripped out from under
 1925          * is.  XXX need an ND flag to request that the vnode be returned
 1926          * anyway.
 1927          *
 1928          * Reserve a file descriptor but do not assign it until the open
 1929          * succeeds.
 1930          */
 1931         vp = (struct vnode *)fp->f_data;
 1932         vref(vp);
 1933         if ((error = fdalloc(p, 0, &indx)) != 0) {
 1934                 fdrop(fp);
 1935                 vrele(vp);
 1936                 return (error);
 1937         }
 1938 
 1939         /*
 1940          * If no error occurs the vp will have been assigned to the file
 1941          * pointer.
 1942          */
 1943         lp->lwp_dupfd = 0;
 1944 
 1945         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1946                 lf.l_whence = SEEK_SET;
 1947                 lf.l_start = 0;
 1948                 lf.l_len = 0;
 1949                 if (flags & O_EXLOCK)
 1950                         lf.l_type = F_WRLCK;
 1951                 else
 1952                         lf.l_type = F_RDLCK;
 1953                 if (flags & FNONBLOCK)
 1954                         type = 0;
 1955                 else
 1956                         type = F_WAIT;
 1957 
 1958                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
 1959                         /*
 1960                          * lock request failed.  Clean up the reserved
 1961                          * descriptor.
 1962                          */
 1963                         vrele(vp);
 1964                         fsetfd(fdp, NULL, indx);
 1965                         fdrop(fp);
 1966                         return (error);
 1967                 }
 1968                 fp->f_flag |= FHASLOCK;
 1969         }
 1970 #if 0
 1971         /*
 1972          * Assert that all regular file vnodes were created with a object.
 1973          */
 1974         KASSERT(vp->v_type != VREG || vp->v_object != NULL,
 1975                 ("open: regular file has no backing object after vn_open"));
 1976 #endif
 1977 
 1978         vrele(vp);
 1979 
 1980         /*
 1981          * release our private reference, leaving the one associated with the
 1982          * descriptor table intact.
 1983          */
 1984         fsetfd(fdp, fp, indx);
 1985         fdrop(fp);
 1986         *res = indx;
 1987         if (oflags & O_CLOEXEC)
 1988                 error = fsetfdflags(fdp, *res, UF_EXCLOSE);
 1989         return (error);
 1990 }
 1991 
 1992 /*
 1993  * open_args(char *path, int flags, int mode)
 1994  *
 1995  * Check permissions, allocate an open file structure,
 1996  * and call the device open routine if any.
 1997  */
 1998 int
 1999 sys_open(struct open_args *uap)
 2000 {
 2001         struct nlookupdata nd;
 2002         int error;
 2003 
 2004         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2005         if (error == 0) {
 2006                 error = kern_open(&nd, uap->flags,
 2007                                     uap->mode, &uap->sysmsg_result);
 2008         }
 2009         nlookup_done(&nd);
 2010         return (error);
 2011 }
 2012 
 2013 /*
 2014  * openat_args(int fd, char *path, int flags, int mode)
 2015  */
 2016 int
 2017 sys_openat(struct openat_args *uap)
 2018 {
 2019         struct nlookupdata nd;
 2020         int error;
 2021         struct file *fp;
 2022 
 2023         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
 2024         if (error == 0) {
 2025                 error = kern_open(&nd, uap->flags, uap->mode, 
 2026                                         &uap->sysmsg_result);
 2027         }
 2028         nlookup_done_at(&nd, fp);
 2029         return (error);
 2030 }
 2031 
 2032 int
 2033 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
 2034 {
 2035         struct thread *td = curthread;
 2036         struct proc *p = td->td_proc;
 2037         struct vnode *vp;
 2038         struct vattr vattr;
 2039         int error;
 2040         int whiteout = 0;
 2041 
 2042         KKASSERT(p);
 2043 
 2044         VATTR_NULL(&vattr);
 2045         vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
 2046         vattr.va_rmajor = rmajor;
 2047         vattr.va_rminor = rminor;
 2048 
 2049         switch (mode & S_IFMT) {
 2050         case S_IFMT:    /* used by badsect to flag bad sectors */
 2051                 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0);
 2052                 vattr.va_type = VBAD;
 2053                 break;
 2054         case S_IFCHR:
 2055                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 2056                 vattr.va_type = VCHR;
 2057                 break;
 2058         case S_IFBLK:
 2059                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 2060                 vattr.va_type = VBLK;
 2061                 break;
 2062         case S_IFWHT:
 2063                 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0);
 2064                 whiteout = 1;
 2065                 break;
 2066         case S_IFDIR:   /* special directories support for HAMMER */
 2067                 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0);
 2068                 vattr.va_type = VDIR;
 2069                 break;
 2070         default:
 2071                 error = EINVAL;
 2072                 break;
 2073         }
 2074 
 2075         if (error)
 2076                 return (error);
 2077 
 2078         bwillinode(1);
 2079         nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
 2080         if ((error = nlookup(nd)) != 0)
 2081                 return (error);
 2082         if (nd->nl_nch.ncp->nc_vp)
 2083                 return (EEXIST);
 2084         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 2085                 return (error);
 2086 
 2087         if (whiteout) {
 2088                 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
 2089                                       nd->nl_cred, NAMEI_CREATE);
 2090         } else {
 2091                 vp = NULL;
 2092                 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
 2093                                    &vp, nd->nl_cred, &vattr);
 2094                 if (error == 0)
 2095                         vput(vp);
 2096         }
 2097         return (error);
 2098 }
 2099 
 2100 /*
 2101  * mknod_args(char *path, int mode, int dev)
 2102  *
 2103  * Create a special file.
 2104  */
 2105 int
 2106 sys_mknod(struct mknod_args *uap)
 2107 {
 2108         struct nlookupdata nd;
 2109         int error;
 2110 
 2111         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2112         if (error == 0) {
 2113                 error = kern_mknod(&nd, uap->mode,
 2114                                    umajor(uap->dev), uminor(uap->dev));
 2115         }
 2116         nlookup_done(&nd);
 2117         return (error);
 2118 }
 2119 
 2120 /*
 2121  * mknodat_args(int fd, char *path, mode_t mode, dev_t dev)
 2122  *
 2123  * Create a special file.  The path is relative to the directory associated
 2124  * with fd.
 2125  */
 2126 int
 2127 sys_mknodat(struct mknodat_args *uap)
 2128 {
 2129         struct nlookupdata nd;
 2130         struct file *fp;
 2131         int error;
 2132 
 2133         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
 2134         if (error == 0) {
 2135                 error = kern_mknod(&nd, uap->mode,
 2136                                    umajor(uap->dev), uminor(uap->dev));
 2137         }
 2138         nlookup_done_at(&nd, fp);
 2139         return (error);
 2140 }
 2141 
 2142 int
 2143 kern_mkfifo(struct nlookupdata *nd, int mode)
 2144 {
 2145         struct thread *td = curthread;
 2146         struct proc *p = td->td_proc;
 2147         struct vattr vattr;
 2148         struct vnode *vp;
 2149         int error;
 2150 
 2151         bwillinode(1);
 2152 
 2153         nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
 2154         if ((error = nlookup(nd)) != 0)
 2155                 return (error);
 2156         if (nd->nl_nch.ncp->nc_vp)
 2157                 return (EEXIST);
 2158         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 2159                 return (error);
 2160 
 2161         VATTR_NULL(&vattr);
 2162         vattr.va_type = VFIFO;
 2163         vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
 2164         vp = NULL;
 2165         error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
 2166         if (error == 0)
 2167                 vput(vp);
 2168         return (error);
 2169 }
 2170 
 2171 /*
 2172  * mkfifo_args(char *path, int mode)
 2173  *
 2174  * Create a named pipe.
 2175  */
 2176 int
 2177 sys_mkfifo(struct mkfifo_args *uap)
 2178 {
 2179         struct nlookupdata nd;
 2180         int error;
 2181 
 2182         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2183         if (error == 0)
 2184                 error = kern_mkfifo(&nd, uap->mode);
 2185         nlookup_done(&nd);
 2186         return (error);
 2187 }
 2188 
 2189 /*
 2190  * mkfifoat_args(int fd, char *path, mode_t mode)
 2191  *
 2192  * Create a named pipe.  The path is relative to the directory associated
 2193  * with fd.
 2194  */
 2195 int
 2196 sys_mkfifoat(struct mkfifoat_args *uap)
 2197 {
 2198         struct nlookupdata nd;
 2199         struct file *fp;
 2200         int error;
 2201 
 2202         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
 2203         if (error == 0)
 2204                 error = kern_mkfifo(&nd, uap->mode);
 2205         nlookup_done_at(&nd, fp);
 2206         return (error);
 2207 }
 2208 
 2209 static int hardlink_check_uid = 0;
 2210 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 2211     &hardlink_check_uid, 0, 
 2212     "Unprivileged processes cannot create hard links to files owned by other "
 2213     "users");
 2214 static int hardlink_check_gid = 0;
 2215 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 2216     &hardlink_check_gid, 0,
 2217     "Unprivileged processes cannot create hard links to files owned by other "
 2218     "groups");
 2219 
 2220 static int
 2221 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
 2222 {
 2223         struct vattr va;
 2224         int error;
 2225 
 2226         /*
 2227          * Shortcut if disabled
 2228          */
 2229         if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
 2230                 return (0);
 2231 
 2232         /*
 2233          * Privileged user can always hardlink
 2234          */
 2235         if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0)
 2236                 return (0);
 2237 
 2238         /*
 2239          * Otherwise only if the originating file is owned by the
 2240          * same user or group.  Note that any group is allowed if
 2241          * the file is owned by the caller.
 2242          */
 2243         error = VOP_GETATTR(vp, &va);
 2244         if (error != 0)
 2245                 return (error);
 2246         
 2247         if (hardlink_check_uid) {
 2248                 if (cred->cr_uid != va.va_uid)
 2249                         return (EPERM);
 2250         }
 2251         
 2252         if (hardlink_check_gid) {
 2253                 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
 2254                         return (EPERM);
 2255         }
 2256 
 2257         return (0);
 2258 }
 2259 
 2260 int
 2261 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd)
 2262 {
 2263         struct thread *td = curthread;
 2264         struct vnode *vp;
 2265         int error;
 2266 
 2267         /*
 2268          * Lookup the source and obtained a locked vnode.
 2269          *
 2270          * You may only hardlink a file which you have write permission
 2271          * on or which you own.
 2272          *
 2273          * XXX relookup on vget failure / race ?
 2274          */
 2275         bwillinode(1);
 2276         nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK;
 2277         if ((error = nlookup(nd)) != 0)
 2278                 return (error);
 2279         vp = nd->nl_nch.ncp->nc_vp;
 2280         KKASSERT(vp != NULL);
 2281         if (vp->v_type == VDIR)
 2282                 return (EPERM);         /* POSIX */
 2283         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 2284                 return (error);
 2285         if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
 2286                 return (error);
 2287 
 2288         /*
 2289          * Unlock the source so we can lookup the target without deadlocking
 2290          * (XXX vp is locked already, possible other deadlock?).  The target
 2291          * must not exist.
 2292          */
 2293         KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
 2294         nd->nl_flags &= ~NLC_NCPISLOCKED;
 2295         cache_unlock(&nd->nl_nch);
 2296         vn_unlock(vp);
 2297 
 2298         linknd->nl_flags |= NLC_CREATE | NLC_REFDVP;
 2299         if ((error = nlookup(linknd)) != 0) {
 2300                 vrele(vp);
 2301                 return (error);
 2302         }
 2303         if (linknd->nl_nch.ncp->nc_vp) {
 2304                 vrele(vp);
 2305                 return (EEXIST);
 2306         }
 2307         error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
 2308         if (error) {
 2309                 vrele(vp);
 2310                 return (error);
 2311         }
 2312 
 2313         /*
 2314          * Finally run the new API VOP.
 2315          */
 2316         error = can_hardlink(vp, td, td->td_ucred);
 2317         if (error == 0) {
 2318                 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
 2319                                   vp, linknd->nl_cred);
 2320         }
 2321         vput(vp);
 2322         return (error);
 2323 }
 2324 
 2325 /*
 2326  * link_args(char *path, char *link)
 2327  *
 2328  * Make a hard file link.
 2329  */
 2330 int
 2331 sys_link(struct link_args *uap)
 2332 {
 2333         struct nlookupdata nd, linknd;
 2334         int error;
 2335 
 2336         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 2337         if (error == 0) {
 2338                 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
 2339                 if (error == 0)
 2340                         error = kern_link(&nd, &linknd);
 2341                 nlookup_done(&linknd);
 2342         }
 2343         nlookup_done(&nd);
 2344         return (error);
 2345 }
 2346 
 2347 /*
 2348  * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags)
 2349  *
 2350  * Make a hard file link. The path1 argument is relative to the directory
 2351  * associated with fd1, and similarly the path2 argument is relative to
 2352  * the directory associated with fd2.
 2353  */
 2354 int
 2355 sys_linkat(struct linkat_args *uap)
 2356 {
 2357         struct nlookupdata nd, linknd;
 2358         struct file *fp1, *fp2;
 2359         int error;
 2360 
 2361         error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE,
 2362             (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0);
 2363         if (error == 0) {
 2364                 error = nlookup_init_at(&linknd, &fp2, uap->fd2,
 2365                     uap->path2, UIO_USERSPACE, 0);
 2366                 if (error == 0)
 2367                         error = kern_link(&nd, &linknd);
 2368                 nlookup_done_at(&linknd, fp2);
 2369         }
 2370         nlookup_done_at(&nd, fp1);
 2371         return (error);
 2372 }
 2373 
 2374 int
 2375 kern_symlink(struct nlookupdata *nd, char *path, int mode)
 2376 {
 2377         struct vattr vattr;
 2378         struct vnode *vp;
 2379         struct vnode *dvp;
 2380         int error;
 2381 
 2382         bwillinode(1);
 2383         nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
 2384         if ((error = nlookup(nd)) != 0)
 2385                 return (error);
 2386         if (nd->nl_nch.ncp->nc_vp)
 2387                 return (EEXIST);
 2388         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 2389                 return (error);
 2390         dvp = nd->nl_dvp;
 2391         VATTR_NULL(&vattr);
 2392         vattr.va_mode = mode;
 2393         error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
 2394         if (error == 0)
 2395                 vput(vp);
 2396         return (error);
 2397 }
 2398 
 2399 /*
 2400  * symlink(char *path, char *link)
 2401  *
 2402  * Make a symbolic link.
 2403  */
 2404 int
 2405 sys_symlink(struct symlink_args *uap)
 2406 {
 2407         struct thread *td = curthread;
 2408         struct nlookupdata nd;
 2409         char *path;
 2410         int error;
 2411         int mode;
 2412 
 2413         path = objcache_get(namei_oc, M_WAITOK);
 2414         error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
 2415         if (error == 0) {
 2416                 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
 2417                 if (error == 0) {
 2418                         mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
 2419                         error = kern_symlink(&nd, path, mode);
 2420                 }
 2421                 nlookup_done(&nd);
 2422         }
 2423         objcache_put(namei_oc, path);
 2424         return (error);
 2425 }
 2426 
 2427 /*
 2428  * symlinkat_args(char *path1, int fd, char *path2)
 2429  *
 2430  * Make a symbolic link.  The path2 argument is relative to the directory
 2431  * associated with fd.
 2432  */
 2433 int
 2434 sys_symlinkat(struct symlinkat_args *uap)
 2435 {
 2436         struct thread *td = curthread;
 2437         struct nlookupdata nd;
 2438         struct file *fp;
 2439         char *path1;
 2440         int error;
 2441         int mode;
 2442 
 2443         path1 = objcache_get(namei_oc, M_WAITOK);
 2444         error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL);
 2445         if (error == 0) {
 2446                 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2,
 2447                     UIO_USERSPACE, 0);
 2448                 if (error == 0) {
 2449                         mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
 2450                         error = kern_symlink(&nd, path1, mode);
 2451                 }
 2452                 nlookup_done_at(&nd, fp);
 2453         }
 2454         objcache_put(namei_oc, path1);
 2455         return (error);
 2456 }
 2457 
 2458 /*
 2459  * undelete_args(char *path)
 2460  *
 2461  * Delete a whiteout from the filesystem.
 2462  */
 2463 int
 2464 sys_undelete(struct undelete_args *uap)
 2465 {
 2466         struct nlookupdata nd;
 2467         int error;
 2468 
 2469         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2470         bwillinode(1);
 2471         nd.nl_flags |= NLC_DELETE | NLC_REFDVP;
 2472         if (error == 0)
 2473                 error = nlookup(&nd);
 2474         if (error == 0)
 2475                 error = ncp_writechk(&nd.nl_nch);
 2476         if (error == 0) {
 2477                 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
 2478                                       NAMEI_DELETE);
 2479         }
 2480         nlookup_done(&nd);
 2481         return (error);
 2482 }
 2483 
 2484 int
 2485 kern_unlink(struct nlookupdata *nd)
 2486 {
 2487         int error;
 2488 
 2489         bwillinode(1);
 2490         nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
 2491         if ((error = nlookup(nd)) != 0)
 2492                 return (error);
 2493         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 2494                 return (error);
 2495         error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
 2496         return (error);
 2497 }
 2498 
 2499 /*
 2500  * unlink_args(char *path)
 2501  *
 2502  * Delete a name from the filesystem.
 2503  */
 2504 int
 2505 sys_unlink(struct unlink_args *uap)
 2506 {
 2507         struct nlookupdata nd;
 2508         int error;
 2509 
 2510         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2511         if (error == 0)
 2512                 error = kern_unlink(&nd);
 2513         nlookup_done(&nd);
 2514         return (error);
 2515 }
 2516 
 2517 
 2518 /*
 2519  * unlinkat_args(int fd, char *path, int flags)
 2520  *
 2521  * Delete the file or directory entry pointed to by fd/path.
 2522  */
 2523 int
 2524 sys_unlinkat(struct unlinkat_args *uap)
 2525 {
 2526         struct nlookupdata nd;
 2527         struct file *fp;
 2528         int error;
 2529 
 2530         if (uap->flags & ~AT_REMOVEDIR)
 2531                 return (EINVAL);
 2532 
 2533         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
 2534         if (error == 0) {
 2535                 if (uap->flags & AT_REMOVEDIR)
 2536                         error = kern_rmdir(&nd);
 2537                 else
 2538                         error = kern_unlink(&nd);
 2539         }
 2540         nlookup_done_at(&nd, fp);
 2541         return (error);
 2542 }
 2543 
 2544 int
 2545 kern_lseek(int fd, off_t offset, int whence, off_t *res)
 2546 {
 2547         struct thread *td = curthread;
 2548         struct proc *p = td->td_proc;
 2549         struct file *fp;
 2550         struct vnode *vp;
 2551         struct vattr vattr;
 2552         off_t new_offset;
 2553         int error;
 2554 
 2555         fp = holdfp(p->p_fd, fd, -1);
 2556         if (fp == NULL)
 2557                 return (EBADF);
 2558         if (fp->f_type != DTYPE_VNODE) {
 2559                 error = ESPIPE;
 2560                 goto done;
 2561         }
 2562         vp = (struct vnode *)fp->f_data;
 2563 
 2564         switch (whence) {
 2565         case L_INCR:
 2566                 spin_lock(&fp->f_spin);
 2567                 new_offset = fp->f_offset + offset;
 2568                 error = 0;
 2569                 break;
 2570         case L_XTND:
 2571                 error = VOP_GETATTR(vp, &vattr);
 2572                 spin_lock(&fp->f_spin);
 2573                 new_offset = offset + vattr.va_size;
 2574                 break;
 2575         case L_SET:
 2576                 new_offset = offset;
 2577                 error = 0;
 2578                 spin_lock(&fp->f_spin);
 2579                 break;
 2580         default:
 2581                 new_offset = 0;
 2582                 error = EINVAL;
 2583                 spin_lock(&fp->f_spin);
 2584                 break;
 2585         }
 2586 
 2587         /*
 2588          * Validate the seek position.  Negative offsets are not allowed
 2589          * for regular files or directories.
 2590          *
 2591          * Normally we would also not want to allow negative offsets for
 2592          * character and block-special devices.  However kvm addresses
 2593          * on 64 bit architectures might appear to be negative and must
 2594          * be allowed.
 2595          */
 2596         if (error == 0) {
 2597                 if (new_offset < 0 &&
 2598                     (vp->v_type == VREG || vp->v_type == VDIR)) {
 2599                         error = EINVAL;
 2600                 } else {
 2601                         fp->f_offset = new_offset;
 2602                 }
 2603         }
 2604         *res = fp->f_offset;
 2605         spin_unlock(&fp->f_spin);
 2606 done:
 2607         fdrop(fp);
 2608         return (error);
 2609 }
 2610 
 2611 /*
 2612  * lseek_args(int fd, int pad, off_t offset, int whence)
 2613  *
 2614  * Reposition read/write file offset.
 2615  */
 2616 int
 2617 sys_lseek(struct lseek_args *uap)
 2618 {
 2619         int error;
 2620 
 2621         error = kern_lseek(uap->fd, uap->offset, uap->whence,
 2622                            &uap->sysmsg_offset);
 2623 
 2624         return (error);
 2625 }
 2626 
 2627 /*
 2628  * Check if current process can access given file.  amode is a bitmask of *_OK
 2629  * access bits.  flags is a bitmask of AT_* flags.
 2630  */
 2631 int
 2632 kern_access(struct nlookupdata *nd, int amode, int flags)
 2633 {
 2634         struct vnode *vp;
 2635         int error, mode;
 2636 
 2637         if (flags & ~AT_EACCESS)
 2638                 return (EINVAL);
 2639         nd->nl_flags |= NLC_SHAREDLOCK;
 2640         if ((error = nlookup(nd)) != 0)
 2641                 return (error);
 2642 retry:
 2643         error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
 2644         if (error)
 2645                 return (error);
 2646 
 2647         /* Flags == 0 means only check for existence. */
 2648         if (amode) {
 2649                 mode = 0;
 2650                 if (amode & R_OK)
 2651                         mode |= VREAD;
 2652                 if (amode & W_OK)
 2653                         mode |= VWRITE;
 2654                 if (amode & X_OK)
 2655                         mode |= VEXEC;
 2656                 if ((mode & VWRITE) == 0 || 
 2657                     (error = vn_writechk(vp, &nd->nl_nch)) == 0)
 2658                         error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
 2659 
 2660                 /*
 2661                  * If the file handle is stale we have to re-resolve the
 2662                  * entry with the ncp held exclusively.  This is a hack
 2663                  * at the moment.
 2664                  */
 2665                 if (error == ESTALE) {
 2666                         vput(vp);
 2667                         cache_unlock(&nd->nl_nch);
 2668                         cache_lock(&nd->nl_nch);
 2669                         cache_setunresolved(&nd->nl_nch);
 2670                         error = cache_resolve(&nd->nl_nch, nd->nl_cred);
 2671                         if (error == 0) {
 2672                                 vp = NULL;
 2673                                 goto retry;
 2674                         }
 2675                         return(error);
 2676                 }
 2677         }
 2678         vput(vp);
 2679         return (error);
 2680 }
 2681 
 2682 /*
 2683  * access_args(char *path, int flags)
 2684  *
 2685  * Check access permissions.
 2686  */
 2687 int
 2688 sys_access(struct access_args *uap)
 2689 {
 2690         struct nlookupdata nd;
 2691         int error;
 2692 
 2693         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 2694         if (error == 0)
 2695                 error = kern_access(&nd, uap->flags, 0);
 2696         nlookup_done(&nd);
 2697         return (error);
 2698 }
 2699 
 2700 
 2701 /*
 2702  * eaccess_args(char *path, int flags)
 2703  *
 2704  * Check access permissions.
 2705  */
 2706 int
 2707 sys_eaccess(struct eaccess_args *uap)
 2708 {
 2709         struct nlookupdata nd;
 2710         int error;
 2711 
 2712         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 2713         if (error == 0)
 2714                 error = kern_access(&nd, uap->flags, AT_EACCESS);
 2715         nlookup_done(&nd);
 2716         return (error);
 2717 }
 2718 
 2719 
 2720 /*
 2721  * faccessat_args(int fd, char *path, int amode, int flags)
 2722  *
 2723  * Check access permissions.
 2724  */
 2725 int
 2726 sys_faccessat(struct faccessat_args *uap)
 2727 {
 2728         struct nlookupdata nd;
 2729         struct file *fp;
 2730         int error;
 2731 
 2732         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 
 2733                                 NLC_FOLLOW);
 2734         if (error == 0)
 2735                 error = kern_access(&nd, uap->amode, uap->flags);
 2736         nlookup_done_at(&nd, fp);
 2737         return (error);
 2738 }
 2739 
 2740 int
 2741 kern_stat(struct nlookupdata *nd, struct stat *st)
 2742 {
 2743         int error;
 2744         struct vnode *vp;
 2745 
 2746         nd->nl_flags |= NLC_SHAREDLOCK;
 2747         if ((error = nlookup(nd)) != 0)
 2748                 return (error);
 2749 again:
 2750         if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
 2751                 return (ENOENT);
 2752 
 2753         if ((error = vget(vp, LK_SHARED)) != 0)
 2754                 return (error);
 2755         error = vn_stat(vp, st, nd->nl_cred);
 2756 
 2757         /*
 2758          * If the file handle is stale we have to re-resolve the
 2759          * entry with the ncp held exclusively.  This is a hack
 2760          * at the moment.
 2761          */
 2762         if (error == ESTALE) {
 2763                 vput(vp);
 2764                 cache_unlock(&nd->nl_nch);
 2765                 cache_lock(&nd->nl_nch);
 2766                 cache_setunresolved(&nd->nl_nch);
 2767                 error = cache_resolve(&nd->nl_nch, nd->nl_cred);
 2768                 if (error == 0)
 2769                         goto again;
 2770         } else {
 2771                 vput(vp);
 2772         }
 2773         return (error);
 2774 }
 2775 
 2776 /*
 2777  * stat_args(char *path, struct stat *ub)
 2778  *
 2779  * Get file status; this version follows links.
 2780  */
 2781 int
 2782 sys_stat(struct stat_args *uap)
 2783 {
 2784         struct nlookupdata nd;
 2785         struct stat st;
 2786         int error;
 2787 
 2788         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 2789         if (error == 0) {
 2790                 error = kern_stat(&nd, &st);
 2791                 if (error == 0)
 2792                         error = copyout(&st, uap->ub, sizeof(*uap->ub));
 2793         }
 2794         nlookup_done(&nd);
 2795         return (error);
 2796 }
 2797 
 2798 /*
 2799  * lstat_args(char *path, struct stat *ub)
 2800  *
 2801  * Get file status; this version does not follow links.
 2802  */
 2803 int
 2804 sys_lstat(struct lstat_args *uap)
 2805 {
 2806         struct nlookupdata nd;
 2807         struct stat st;
 2808         int error;
 2809 
 2810         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2811         if (error == 0) {
 2812                 error = kern_stat(&nd, &st);
 2813                 if (error == 0)
 2814                         error = copyout(&st, uap->ub, sizeof(*uap->ub));
 2815         }
 2816         nlookup_done(&nd);
 2817         return (error);
 2818 }
 2819 
 2820 /*
 2821  * fstatat_args(int fd, char *path, struct stat *sb, int flags)
 2822  *
 2823  * Get status of file pointed to by fd/path.
 2824  */
 2825 int
 2826 sys_fstatat(struct fstatat_args *uap)
 2827 {
 2828         struct nlookupdata nd;
 2829         struct stat st;
 2830         int error;
 2831         int flags;
 2832         struct file *fp;
 2833 
 2834         if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
 2835                 return (EINVAL);
 2836 
 2837         flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
 2838 
 2839         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 
 2840                                 UIO_USERSPACE, flags);
 2841         if (error == 0) {
 2842                 error = kern_stat(&nd, &st);
 2843                 if (error == 0)
 2844                         error = copyout(&st, uap->sb, sizeof(*uap->sb));
 2845         }
 2846         nlookup_done_at(&nd, fp);
 2847         return (error);
 2848 }
 2849 
 2850 static int
 2851 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp)
 2852 {
 2853         struct nlookupdata nd;
 2854         struct vnode *vp;
 2855         int error;
 2856 
 2857         vp = NULL;
 2858         error = nlookup_init(&nd, path, UIO_USERSPACE, flags);
 2859         if (error == 0)
 2860                 error = nlookup(&nd);
 2861         if (error == 0)
 2862                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
 2863         nlookup_done(&nd);
 2864         if (error == 0) {
 2865                 error = VOP_PATHCONF(vp, name, sysmsg_regp);
 2866                 vput(vp);
 2867         }
 2868         return (error);
 2869 }
 2870 
 2871 /*
 2872  * pathconf_Args(char *path, int name)
 2873  *
 2874  * Get configurable pathname variables.
 2875  */
 2876 int
 2877 sys_pathconf(struct pathconf_args *uap)
 2878 {
 2879         return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW,
 2880                 &uap->sysmsg_reg));
 2881 }
 2882 
 2883 /*
 2884  * lpathconf_Args(char *path, int name)
 2885  *
 2886  * Get configurable pathname variables, but don't follow symlinks.
 2887  */
 2888 int
 2889 sys_lpathconf(struct lpathconf_args *uap)
 2890 {
 2891         return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg));
 2892 }
 2893 
 2894 /*
 2895  * XXX: daver
 2896  * kern_readlink isn't properly split yet.  There is a copyin burried
 2897  * in VOP_READLINK().
 2898  */
 2899 int
 2900 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res)
 2901 {
 2902         struct thread *td = curthread;
 2903         struct vnode *vp;
 2904         struct iovec aiov;
 2905         struct uio auio;
 2906         int error;
 2907 
 2908         nd->nl_flags |= NLC_SHAREDLOCK;
 2909         if ((error = nlookup(nd)) != 0)
 2910                 return (error);
 2911         error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
 2912         if (error)
 2913                 return (error);
 2914         if (vp->v_type != VLNK) {
 2915                 error = EINVAL;
 2916         } else {
 2917                 aiov.iov_base = buf;
 2918                 aiov.iov_len = count;
 2919                 auio.uio_iov = &aiov;
 2920                 auio.uio_iovcnt = 1;
 2921                 auio.uio_offset = 0;
 2922                 auio.uio_rw = UIO_READ;
 2923                 auio.uio_segflg = UIO_USERSPACE;
 2924                 auio.uio_td = td;
 2925                 auio.uio_resid = count;
 2926                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2927         }
 2928         vput(vp);
 2929         *res = count - auio.uio_resid;
 2930         return (error);
 2931 }
 2932 
 2933 /*
 2934  * readlink_args(char *path, char *buf, int count)
 2935  *
 2936  * Return target name of a symbolic link.
 2937  */
 2938 int
 2939 sys_readlink(struct readlink_args *uap)
 2940 {
 2941         struct nlookupdata nd;
 2942         int error;
 2943 
 2944         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 2945         if (error == 0) {
 2946                 error = kern_readlink(&nd, uap->buf, uap->count,
 2947                                         &uap->sysmsg_result);
 2948         }
 2949         nlookup_done(&nd);
 2950         return (error);
 2951 }
 2952 
 2953 /*
 2954  * readlinkat_args(int fd, char *path, char *buf, size_t bufsize)
 2955  *
 2956  * Return target name of a symbolic link.  The path is relative to the
 2957  * directory associated with fd.
 2958  */
 2959 int
 2960 sys_readlinkat(struct readlinkat_args *uap)
 2961 {
 2962         struct nlookupdata nd;
 2963         struct file *fp;
 2964         int error;
 2965 
 2966         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
 2967         if (error == 0) {
 2968                 error = kern_readlink(&nd, uap->buf, uap->bufsize,
 2969                                         &uap->sysmsg_result);
 2970         }
 2971         nlookup_done_at(&nd, fp);
 2972         return (error);
 2973 }
 2974 
 2975 static int
 2976 setfflags(struct vnode *vp, int flags)
 2977 {
 2978         struct thread *td = curthread;
 2979         int error;
 2980         struct vattr vattr;
 2981 
 2982         /*
 2983          * Prevent non-root users from setting flags on devices.  When
 2984          * a device is reused, users can retain ownership of the device
 2985          * if they are allowed to set flags and programs assume that
 2986          * chown can't fail when done as root.
 2987          */
 2988         if ((vp->v_type == VCHR || vp->v_type == VBLK) && 
 2989             ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0))
 2990                 return (error);
 2991 
 2992         /*
 2993          * note: vget is required for any operation that might mod the vnode
 2994          * so VINACTIVE is properly cleared.
 2995          */
 2996         if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
 2997                 VATTR_NULL(&vattr);
 2998                 vattr.va_flags = flags;
 2999                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3000                 vput(vp);
 3001         }
 3002         return (error);
 3003 }
 3004 
 3005 /*
 3006  * chflags(char *path, int flags)
 3007  *
 3008  * Change flags of a file given a path name.
 3009  */
 3010 int
 3011 sys_chflags(struct chflags_args *uap)
 3012 {
 3013         struct nlookupdata nd;
 3014         struct vnode *vp;
 3015         int error;
 3016 
 3017         vp = NULL;
 3018         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 3019         if (error == 0)
 3020                 error = nlookup(&nd);
 3021         if (error == 0)
 3022                 error = ncp_writechk(&nd.nl_nch);
 3023         if (error == 0)
 3024                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
 3025         nlookup_done(&nd);
 3026         if (error == 0) {
 3027                 error = setfflags(vp, uap->flags);
 3028                 vrele(vp);
 3029         }
 3030         return (error);
 3031 }
 3032 
 3033 /*
 3034  * lchflags(char *path, int flags)
 3035  *
 3036  * Change flags of a file given a path name, but don't follow symlinks.
 3037  */
 3038 int
 3039 sys_lchflags(struct lchflags_args *uap)
 3040 {
 3041         struct nlookupdata nd;
 3042         struct vnode *vp;
 3043         int error;
 3044 
 3045         vp = NULL;
 3046         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 3047         if (error == 0)
 3048                 error = nlookup(&nd);
 3049         if (error == 0)
 3050                 error = ncp_writechk(&nd.nl_nch);
 3051         if (error == 0)
 3052                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
 3053         nlookup_done(&nd);
 3054         if (error == 0) {
 3055                 error = setfflags(vp, uap->flags);
 3056                 vrele(vp);
 3057         }
 3058         return (error);
 3059 }
 3060 
 3061 /*
 3062  * fchflags_args(int fd, int flags)
 3063  *
 3064  * Change flags of a file given a file descriptor.
 3065  */
 3066 int
 3067 sys_fchflags(struct fchflags_args *uap)
 3068 {
 3069         struct thread *td = curthread;
 3070         struct proc *p = td->td_proc;
 3071         struct file *fp;
 3072         int error;
 3073 
 3074         if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
 3075                 return (error);
 3076         if (fp->f_nchandle.ncp)
 3077                 error = ncp_writechk(&fp->f_nchandle);
 3078         if (error == 0)
 3079                 error = setfflags((struct vnode *) fp->f_data, uap->flags);
 3080         fdrop(fp);
 3081         return (error);
 3082 }
 3083 
 3084 static int
 3085 setfmode(struct vnode *vp, int mode)
 3086 {
 3087         struct thread *td = curthread;
 3088         int error;
 3089         struct vattr vattr;
 3090 
 3091         /*
 3092          * note: vget is required for any operation that might mod the vnode
 3093          * so VINACTIVE is properly cleared.
 3094          */
 3095         if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
 3096                 VATTR_NULL(&vattr);
 3097                 vattr.va_mode = mode & ALLPERMS;
 3098                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3099                 vput(vp);
 3100         }
 3101         return error;
 3102 }
 3103 
 3104 int
 3105 kern_chmod(struct nlookupdata *nd, int mode)
 3106 {
 3107         struct vnode *vp;
 3108         int error;
 3109 
 3110         if ((error = nlookup(nd)) != 0)
 3111                 return (error);
 3112         if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
 3113                 return (error);
 3114         if ((error = ncp_writechk(&nd->nl_nch)) == 0)
 3115                 error = setfmode(vp, mode);
 3116         vrele(vp);
 3117         return (error);
 3118 }
 3119 
 3120 /*
 3121  * chmod_args(char *path, int mode)
 3122  *
 3123  * Change mode of a file given path name.
 3124  */
 3125 int
 3126 sys_chmod(struct chmod_args *uap)
 3127 {
 3128         struct nlookupdata nd;
 3129         int error;
 3130 
 3131         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 3132         if (error == 0)
 3133                 error = kern_chmod(&nd, uap->mode);
 3134         nlookup_done(&nd);
 3135         return (error);
 3136 }
 3137 
 3138 /*
 3139  * lchmod_args(char *path, int mode)
 3140  *
 3141  * Change mode of a file given path name (don't follow links.)
 3142  */
 3143 int
 3144 sys_lchmod(struct lchmod_args *uap)
 3145 {
 3146         struct nlookupdata nd;
 3147         int error;
 3148 
 3149         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 3150         if (error == 0)
 3151                 error = kern_chmod(&nd, uap->mode);
 3152         nlookup_done(&nd);
 3153         return (error);
 3154 }
 3155 
 3156 /*
 3157  * fchmod_args(int fd, int mode)
 3158  *
 3159  * Change mode of a file given a file descriptor.
 3160  */
 3161 int
 3162 sys_fchmod(struct fchmod_args *uap)
 3163 {
 3164         struct thread *td = curthread;
 3165         struct proc *p = td->td_proc;
 3166         struct file *fp;
 3167         int error;
 3168 
 3169         if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
 3170                 return (error);
 3171         if (fp->f_nchandle.ncp)
 3172                 error = ncp_writechk(&fp->f_nchandle);
 3173         if (error == 0)
 3174                 error = setfmode((struct vnode *)fp->f_data, uap->mode);
 3175         fdrop(fp);
 3176         return (error);
 3177 }
 3178 
 3179 /*
 3180  * fchmodat_args(char *path, int mode)
 3181  *
 3182  * Change mode of a file pointed to by fd/path.
 3183  */
 3184 int
 3185 sys_fchmodat(struct fchmodat_args *uap)
 3186 {
 3187         struct nlookupdata nd;
 3188         struct file *fp;
 3189         int error;
 3190         int flags;
 3191 
 3192         if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
 3193                 return (EINVAL);
 3194         flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
 3195 
 3196         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 
 3197                                 UIO_USERSPACE, flags);
 3198         if (error == 0)
 3199                 error = kern_chmod(&nd, uap->mode);
 3200         nlookup_done_at(&nd, fp);
 3201         return (error);
 3202 }
 3203 
 3204 static int
 3205 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid)
 3206 {
 3207         struct thread *td = curthread;
 3208         int error;
 3209         struct vattr vattr;
 3210         uid_t o_uid;
 3211         gid_t o_gid;
 3212         uint64_t size;
 3213 
 3214         /*
 3215          * note: vget is required for any operation that might mod the vnode
 3216          * so VINACTIVE is properly cleared.
 3217          */
 3218         if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
 3219                 if ((error = VOP_GETATTR(vp, &vattr)) != 0)
 3220                         return error;
 3221                 o_uid = vattr.va_uid;
 3222                 o_gid = vattr.va_gid;
 3223                 size = vattr.va_size;
 3224 
 3225                 VATTR_NULL(&vattr);
 3226                 vattr.va_uid = uid;
 3227                 vattr.va_gid = gid;
 3228                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3229                 vput(vp);
 3230         }
 3231 
 3232         if (error == 0) {
 3233                 if (uid == -1)
 3234                         uid = o_uid;
 3235                 if (gid == -1)
 3236                         gid = o_gid;
 3237                 VFS_ACCOUNT(mp, o_uid, o_gid, -size);
 3238                 VFS_ACCOUNT(mp,   uid,   gid,  size);
 3239         }
 3240 
 3241         return error;
 3242 }
 3243 
 3244 int
 3245 kern_chown(struct nlookupdata *nd, int uid, int gid)
 3246 {
 3247         struct vnode *vp;
 3248         int error;
 3249 
 3250         if ((error = nlookup(nd)) != 0)
 3251                 return (error);
 3252         if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
 3253                 return (error);
 3254         if ((error = ncp_writechk(&nd->nl_nch)) == 0)
 3255                 error = setfown(nd->nl_nch.mount, vp, uid, gid);
 3256         vrele(vp);
 3257         return (error);
 3258 }
 3259 
 3260 /*
 3261  * chown(char *path, int uid, int gid)
 3262  *
 3263  * Set ownership given a path name.
 3264  */
 3265 int
 3266 sys_chown(struct chown_args *uap)
 3267 {
 3268         struct nlookupdata nd;
 3269         int error;
 3270 
 3271         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 3272         if (error == 0)
 3273                 error = kern_chown(&nd, uap->uid, uap->gid);
 3274         nlookup_done(&nd);
 3275         return (error);
 3276 }
 3277 
 3278 /*
 3279  * lchown_args(char *path, int uid, int gid)
 3280  *
 3281  * Set ownership given a path name, do not cross symlinks.
 3282  */
 3283 int
 3284 sys_lchown(struct lchown_args *uap)
 3285 {
 3286         struct nlookupdata nd;
 3287         int error;
 3288 
 3289         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 3290         if (error == 0)
 3291                 error = kern_chown(&nd, uap->uid, uap->gid);
 3292         nlookup_done(&nd);
 3293         return (error);
 3294 }
 3295 
 3296 /*
 3297  * fchown_args(int fd, int uid, int gid)
 3298  *
 3299  * Set ownership given a file descriptor.
 3300  */
 3301 int
 3302 sys_fchown(struct fchown_args *uap)
 3303 {
 3304         struct thread *td = curthread;
 3305         struct proc *p = td->td_proc;
 3306         struct file *fp;
 3307         int error;
 3308 
 3309         if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
 3310                 return (error);
 3311         if (fp->f_nchandle.ncp)
 3312                 error = ncp_writechk(&fp->f_nchandle);
 3313         if (error == 0)
 3314                 error = setfown(p->p_fd->fd_ncdir.mount,
 3315                         (struct vnode *)fp->f_data, uap->uid, uap->gid);
 3316         fdrop(fp);
 3317         return (error);
 3318 }
 3319 
 3320 /*
 3321  * fchownat(int fd, char *path, int uid, int gid, int flags)
 3322  *
 3323  * Set ownership of file pointed to by fd/path.
 3324  */
 3325 int
 3326 sys_fchownat(struct fchownat_args *uap)
 3327 {
 3328         struct nlookupdata nd;
 3329         struct file *fp;
 3330         int error;
 3331         int flags;
 3332 
 3333         if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
 3334                 return (EINVAL);
 3335         flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
 3336 
 3337         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 
 3338                                 UIO_USERSPACE, flags);
 3339         if (error == 0)
 3340                 error = kern_chown(&nd, uap->uid, uap->gid);
 3341         nlookup_done_at(&nd, fp);
 3342         return (error);
 3343 }
 3344 
 3345 
 3346 static int
 3347 getutimes(const struct timeval *tvp, struct timespec *tsp)
 3348 {
 3349         struct timeval tv[2];
 3350 
 3351         if (tvp == NULL) {
 3352                 microtime(&tv[0]);
 3353                 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
 3354                 tsp[1] = tsp[0];
 3355         } else {
 3356                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3357                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3358         }
 3359         return 0;
 3360 }
 3361 
 3362 static int
 3363 setutimes(struct vnode *vp, struct vattr *vattr,
 3364           const struct timespec *ts, int nullflag)
 3365 {
 3366         struct thread *td = curthread;
 3367         int error;
 3368 
 3369         VATTR_NULL(vattr);
 3370         vattr->va_atime = ts[0];
 3371         vattr->va_mtime = ts[1];
 3372         if (nullflag)
 3373                 vattr->va_vaflags |= VA_UTIMES_NULL;
 3374         error = VOP_SETATTR(vp, vattr, td->td_ucred);
 3375 
 3376         return error;
 3377 }
 3378 
 3379 int
 3380 kern_utimes(struct nlookupdata *nd, struct timeval *tptr)
 3381 {
 3382         struct timespec ts[2];
 3383         struct vnode *vp;
 3384         struct vattr vattr;
 3385         int error;
 3386 
 3387         if ((error = getutimes(tptr, ts)) != 0)
 3388                 return (error);
 3389 
 3390         /*
 3391          * NOTE: utimes() succeeds for the owner even if the file
 3392          * is not user-writable.
 3393          */
 3394         nd->nl_flags |= NLC_OWN | NLC_WRITE;
 3395 
 3396         if ((error = nlookup(nd)) != 0)
 3397                 return (error);
 3398         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 3399                 return (error);
 3400         if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
 3401                 return (error);
 3402 
 3403         /*
 3404          * note: vget is required for any operation that might mod the vnode
 3405          * so VINACTIVE is properly cleared.
 3406          */
 3407         if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
 3408                 error = vget(vp, LK_EXCLUSIVE);
 3409                 if (error == 0) {
 3410                         error = setutimes(vp, &vattr, ts, (tptr == NULL));
 3411                         vput(vp);
 3412                 }
 3413         }
 3414         vrele(vp);
 3415         return (error);
 3416 }
 3417 
 3418 /*
 3419  * utimes_args(char *path, struct timeval *tptr)
 3420  *
 3421  * Set the access and modification times of a file.
 3422  */
 3423 int
 3424 sys_utimes(struct utimes_args *uap)
 3425 {
 3426         struct timeval tv[2];
 3427         struct nlookupdata nd;
 3428         int error;
 3429 
 3430         if (uap->tptr) {
 3431                 error = copyin(uap->tptr, tv, sizeof(tv));
 3432                 if (error)
 3433                         return (error);
 3434         }
 3435         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 3436         if (error == 0)
 3437                 error = kern_utimes(&nd, uap->tptr ? tv : NULL);
 3438         nlookup_done(&nd);
 3439         return (error);
 3440 }
 3441 
 3442 /*
 3443  * lutimes_args(char *path, struct timeval *tptr)
 3444  *
 3445  * Set the access and modification times of a file.
 3446  */
 3447 int
 3448 sys_lutimes(struct lutimes_args *uap)
 3449 {
 3450         struct timeval tv[2];
 3451         struct nlookupdata nd;
 3452         int error;
 3453 
 3454         if (uap->tptr) {
 3455                 error = copyin(uap->tptr, tv, sizeof(tv));
 3456                 if (error)
 3457                         return (error);
 3458         }
 3459         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 3460         if (error == 0)
 3461                 error = kern_utimes(&nd, uap->tptr ? tv : NULL);
 3462         nlookup_done(&nd);
 3463         return (error);
 3464 }
 3465 
 3466 /*
 3467  * Set utimes on a file descriptor.  The creds used to open the
 3468  * file are used to determine whether the operation is allowed
 3469  * or not.
 3470  */
 3471 int
 3472 kern_futimes(int fd, struct timeval *tptr)
 3473 {
 3474         struct thread *td = curthread;
 3475         struct proc *p = td->td_proc;
 3476         struct timespec ts[2];
 3477         struct file *fp;
 3478         struct vnode *vp;
 3479         struct vattr vattr;
 3480         int error;
 3481 
 3482         error = getutimes(tptr, ts);
 3483         if (error)
 3484                 return (error);
 3485         if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
 3486                 return (error);
 3487         if (fp->f_nchandle.ncp)
 3488                 error = ncp_writechk(&fp->f_nchandle);
 3489         if (error == 0) {
 3490                 vp = fp->f_data;
 3491                 error = vget(vp, LK_EXCLUSIVE);
 3492                 if (error == 0) {
 3493                         error = VOP_GETATTR(vp, &vattr);
 3494                         if (error == 0) {
 3495                                 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE,
 3496                                                    fp->f_cred);
 3497                         }
 3498                         if (error == 0) {
 3499                                 error = setutimes(vp, &vattr, ts,
 3500                                                   (tptr == NULL));
 3501                         }
 3502                         vput(vp);
 3503                 }
 3504         }
 3505         fdrop(fp);
 3506         return (error);
 3507 }
 3508 
 3509 /*
 3510  * futimes_args(int fd, struct timeval *tptr)
 3511  *
 3512  * Set the access and modification times of a file.
 3513  */
 3514 int
 3515 sys_futimes(struct futimes_args *uap)
 3516 {
 3517         struct timeval tv[2];
 3518         int error;
 3519 
 3520         if (uap->tptr) {
 3521                 error = copyin(uap->tptr, tv, sizeof(tv));
 3522                 if (error)
 3523                         return (error);
 3524         }
 3525         error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
 3526 
 3527         return (error);
 3528 }
 3529 
 3530 int
 3531 kern_truncate(struct nlookupdata *nd, off_t length)
 3532 {
 3533         struct vnode *vp;
 3534         struct vattr vattr;
 3535         int error;
 3536         uid_t uid = 0;
 3537         gid_t gid = 0;
 3538         uint64_t old_size = 0;
 3539 
 3540         if (length < 0)
 3541                 return(EINVAL);
 3542         nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE;
 3543         if ((error = nlookup(nd)) != 0)
 3544                 return (error);
 3545         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 3546                 return (error);
 3547         if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
 3548                 return (error);
 3549         error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
 3550         if (error) {
 3551                 vrele(vp);
 3552                 return (error);
 3553         }
 3554         if (vp->v_type == VDIR) {
 3555                 error = EISDIR;
 3556                 goto done;
 3557         }
 3558         if (vfs_quota_enabled) {
 3559                 error = VOP_GETATTR(vp, &vattr);
 3560                 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0"));
 3561                 uid = vattr.va_uid;
 3562                 gid = vattr.va_gid;
 3563                 old_size = vattr.va_size;
 3564         }
 3565 
 3566         if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
 3567                 VATTR_NULL(&vattr);
 3568                 vattr.va_size = length;
 3569                 error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
 3570                 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size);
 3571         }
 3572 done:
 3573         vput(vp);
 3574         return (error);
 3575 }
 3576 
 3577 /*
 3578  * truncate(char *path, int pad, off_t length)
 3579  *
 3580  * Truncate a file given its path name.
 3581  */
 3582 int
 3583 sys_truncate(struct truncate_args *uap)
 3584 {
 3585         struct nlookupdata nd;
 3586         int error;
 3587 
 3588         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 3589         if (error == 0)
 3590                 error = kern_truncate(&nd, uap->length);
 3591         nlookup_done(&nd);
 3592         return error;
 3593 }
 3594 
 3595 int
 3596 kern_ftruncate(int fd, off_t length)
 3597 {
 3598         struct thread *td = curthread;
 3599         struct proc *p = td->td_proc;
 3600         struct vattr vattr;
 3601         struct vnode *vp;
 3602         struct file *fp;
 3603         int error;
 3604         uid_t uid = 0;
 3605         gid_t gid = 0;
 3606         uint64_t old_size = 0;
 3607         struct mount *mp;
 3608 
 3609         if (length < 0)
 3610                 return(EINVAL);
 3611         if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
 3612                 return (error);
 3613         if (fp->f_nchandle.ncp) {
 3614                 error = ncp_writechk(&fp->f_nchandle);
 3615                 if (error)
 3616                         goto done;
 3617         }
 3618         if ((fp->f_flag & FWRITE) == 0) {
 3619                 error = EINVAL;
 3620                 goto done;
 3621         }
 3622         if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */
 3623                 error = EINVAL;
 3624                 goto done;
 3625         }
 3626         vp = (struct vnode *)fp->f_data;
 3627         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3628         if (vp->v_type == VDIR) {
 3629                 error = EISDIR;
 3630                 goto done;
 3631         }
 3632 
 3633         if (vfs_quota_enabled) {
 3634                 error = VOP_GETATTR(vp, &vattr);
 3635                 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0"));
 3636                 uid = vattr.va_uid;
 3637                 gid = vattr.va_gid;
 3638                 old_size = vattr.va_size;
 3639         }
 3640 
 3641         if ((error = vn_writechk(vp, NULL)) == 0) {
 3642                 VATTR_NULL(&vattr);
 3643                 vattr.va_size = length;
 3644                 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
 3645                 mp = vq_vptomp(vp);
 3646                 VFS_ACCOUNT(mp, uid, gid, length - old_size);
 3647         }
 3648         vn_unlock(vp);
 3649 done:
 3650         fdrop(fp);
 3651         return (error);
 3652 }
 3653 
 3654 /*
 3655  * ftruncate_args(int fd, int pad, off_t length)
 3656  *
 3657  * Truncate a file given a file descriptor.
 3658  */
 3659 int
 3660 sys_ftruncate(struct ftruncate_args *uap)
 3661 {
 3662         int error;
 3663 
 3664         error = kern_ftruncate(uap->fd, uap->length);
 3665 
 3666         return (error);
 3667 }
 3668 
 3669 /*
 3670  * fsync(int fd)
 3671  *
 3672  * Sync an open file.
 3673  */
 3674 int
 3675 sys_fsync(struct fsync_args *uap)
 3676 {
 3677         struct thread *td = curthread;
 3678         struct proc *p = td->td_proc;
 3679         struct vnode *vp;
 3680         struct file *fp;
 3681         vm_object_t obj;
 3682         int error;
 3683 
 3684         if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
 3685                 return (error);
 3686         vp = (struct vnode *)fp->f_data;
 3687         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3688         if ((obj = vp->v_object) != NULL) {
 3689                 if (vp->v_mount == NULL ||
 3690                     (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) {
 3691                         vm_object_page_clean(obj, 0, 0, 0);
 3692                 }
 3693         }
 3694         error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL);
 3695         if (error == 0 && vp->v_mount)
 3696                 error = buf_fsync(vp);
 3697         vn_unlock(vp);
 3698         fdrop(fp);
 3699 
 3700         return (error);
 3701 }
 3702 
 3703 int
 3704 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond)
 3705 {
 3706         struct nchandle fnchd;
 3707         struct nchandle tnchd;
 3708         struct namecache *ncp;
 3709         struct vnode *fdvp;
 3710         struct vnode *tdvp;
 3711         struct mount *mp;
 3712         int error;
 3713 
 3714         bwillinode(1);
 3715         fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC;
 3716         if ((error = nlookup(fromnd)) != 0)
 3717                 return (error);
 3718         if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
 3719                 return (ENOENT);
 3720         fnchd.mount = fromnd->nl_nch.mount;
 3721         cache_hold(&fnchd);
 3722 
 3723         /*
 3724          * unlock the source nch so we can lookup the target nch without
 3725          * deadlocking.  The target may or may not exist so we do not check
 3726          * for a target vp like kern_mkdir() and other creation functions do.
 3727          *
 3728          * The source and target directories are ref'd and rechecked after
 3729          * everything is relocked to determine if the source or target file
 3730          * has been renamed.
 3731          */
 3732         KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
 3733         fromnd->nl_flags &= ~NLC_NCPISLOCKED;
 3734         cache_unlock(&fromnd->nl_nch);
 3735 
 3736         tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP;
 3737         if ((error = nlookup(tond)) != 0) {
 3738                 cache_drop(&fnchd);
 3739                 return (error);
 3740         }
 3741         if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
 3742                 cache_drop(&fnchd);
 3743                 return (ENOENT);
 3744         }
 3745         tnchd.mount = tond->nl_nch.mount;
 3746         cache_hold(&tnchd);
 3747 
 3748         /*
 3749          * If the source and target are the same there is nothing to do
 3750          */
 3751         if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
 3752                 cache_drop(&fnchd);
 3753                 cache_drop(&tnchd);
 3754                 return (0);
 3755         }
 3756 
 3757         /*
 3758          * Mount points cannot be renamed or overwritten
 3759          */
 3760         if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) &
 3761             NCF_ISMOUNTPT
 3762         ) {
 3763                 cache_drop(&fnchd);
 3764                 cache_drop(&tnchd);
 3765                 return (EINVAL);
 3766         }
 3767 
 3768         /*
 3769          * Relock the source ncp.  cache_relock() will deal with any
 3770          * deadlocks against the already-locked tond and will also
 3771          * make sure both are resolved.
 3772          *
 3773          * NOTE AFTER RELOCKING: The source or target ncp may have become
 3774          * invalid while they were unlocked, nc_vp and nc_mount could
 3775          * be NULL.
 3776          */
 3777         cache_relock(&fromnd->nl_nch, fromnd->nl_cred,
 3778                      &tond->nl_nch, tond->nl_cred);
 3779         fromnd->nl_flags |= NLC_NCPISLOCKED;
 3780 
 3781         /*
 3782          * If either fromnd or tond are marked destroyed a ripout occured
 3783          * out from under us and we must retry.
 3784          */
 3785         if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) ||
 3786             fromnd->nl_nch.ncp->nc_vp == NULL ||
 3787             (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) {
 3788                 kprintf("kern_rename: retry due to ripout on: "
 3789                         "\"%s\" -> \"%s\"\n",
 3790                         fromnd->nl_nch.ncp->nc_name,
 3791                         tond->nl_nch.ncp->nc_name);
 3792                 cache_drop(&fnchd);
 3793                 cache_drop(&tnchd);
 3794                 return (EAGAIN);
 3795         }
 3796 
 3797         /*
 3798          * make sure the parent directories linkages are the same
 3799          */
 3800         if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent ||
 3801             tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
 3802                 cache_drop(&fnchd);
 3803                 cache_drop(&tnchd);
 3804                 return (ENOENT);
 3805         }
 3806 
 3807         /*
 3808          * Both the source and target must be within the same filesystem and
 3809          * in the same filesystem as their parent directories within the
 3810          * namecache topology.
 3811          *
 3812          * NOTE: fromnd's nc_mount or nc_vp could be NULL.
 3813          */
 3814         mp = fnchd.mount;
 3815         if (mp != tnchd.mount || mp != fromnd->nl_nch.mount ||
 3816             mp != tond->nl_nch.mount) {
 3817                 cache_drop(&fnchd);
 3818                 cache_drop(&tnchd);
 3819                 return (EXDEV);
 3820         }
 3821 
 3822         /*
 3823          * Make sure the mount point is writable
 3824          */
 3825         if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
 3826                 cache_drop(&fnchd);
 3827                 cache_drop(&tnchd);
 3828                 return (error);
 3829         }
 3830 
 3831         /*
 3832          * If the target exists and either the source or target is a directory,
 3833          * then both must be directories.
 3834          *
 3835          * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
 3836          * have become NULL.
 3837          */
 3838         if (tond->nl_nch.ncp->nc_vp) {
 3839                 if (fromnd->nl_nch.ncp->nc_vp == NULL) {
 3840                         error = ENOENT;
 3841                 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
 3842                         if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
 3843                                 error = ENOTDIR;
 3844                 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
 3845                         error = EISDIR;
 3846                 }
 3847         }
 3848 
 3849         /*
 3850          * You cannot rename a source into itself or a subdirectory of itself.
 3851          * We check this by travsersing the target directory upwards looking
 3852          * for a match against the source.
 3853          *
 3854          * XXX MPSAFE
 3855          */
 3856         if (error == 0) {
 3857                 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
 3858                         if (fromnd->nl_nch.ncp == ncp) {
 3859                                 error = EINVAL;
 3860                                 break;
 3861                         }
 3862                 }
 3863         }
 3864 
 3865         cache_drop(&fnchd);
 3866         cache_drop(&tnchd);
 3867 
 3868         /*
 3869          * Even though the namespaces are different, they may still represent
 3870          * hardlinks to the same file.  The filesystem might have a hard time
 3871          * with this so we issue a NREMOVE of the source instead of a NRENAME
 3872          * when we detect the situation.
 3873          */
 3874         if (error == 0) {
 3875                 fdvp = fromnd->nl_dvp;
 3876                 tdvp = tond->nl_dvp;
 3877                 if (fdvp == NULL || tdvp == NULL) {
 3878                         error = EPERM;
 3879                 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
 3880                         error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
 3881                                             fromnd->nl_cred);
 3882                 } else {
 3883                         error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 
 3884                                             fdvp, tdvp, tond->nl_cred);
 3885                 }
 3886         }
 3887         return (error);
 3888 }
 3889 
 3890 /*
 3891  * rename_args(char *from, char *to)
 3892  *
 3893  * Rename files.  Source and destination must either both be directories,
 3894  * or both not be directories.  If target is a directory, it must be empty.
 3895  */
 3896 int
 3897 sys_rename(struct rename_args *uap)
 3898 {
 3899         struct nlookupdata fromnd, tond;
 3900         int error;
 3901 
 3902         do {
 3903                 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
 3904                 if (error == 0) {
 3905                         error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
 3906                         if (error == 0)
 3907                                 error = kern_rename(&fromnd, &tond);
 3908                         nlookup_done(&tond);
 3909                 }
 3910                 nlookup_done(&fromnd);
 3911         } while (error == EAGAIN);
 3912         return (error);
 3913 }
 3914 
 3915 /*
 3916  * renameat_args(int oldfd, char *old, int newfd, char *new)
 3917  *
 3918  * Rename files using paths relative to the directories associated with
 3919  * oldfd and newfd.  Source and destination must either both be directories,
 3920  * or both not be directories.  If target is a directory, it must be empty.
 3921  */
 3922 int
 3923 sys_renameat(struct renameat_args *uap)
 3924 {
 3925         struct nlookupdata oldnd, newnd;
 3926         struct file *oldfp, *newfp;
 3927         int error;
 3928 
 3929         do {
 3930                 error = nlookup_init_at(&oldnd, &oldfp,
 3931                                         uap->oldfd, uap->old,
 3932                                         UIO_USERSPACE, 0);
 3933                 if (error == 0) {
 3934                         error = nlookup_init_at(&newnd, &newfp,
 3935                                                 uap->newfd, uap->new,
 3936                                                 UIO_USERSPACE, 0);
 3937                         if (error == 0)
 3938                                 error = kern_rename(&oldnd, &newnd);
 3939                         nlookup_done_at(&newnd, newfp);
 3940                 }
 3941                 nlookup_done_at(&oldnd, oldfp);
 3942         } while (error == EAGAIN);
 3943         return (error);
 3944 }
 3945 
 3946 int
 3947 kern_mkdir(struct nlookupdata *nd, int mode)
 3948 {
 3949         struct thread *td = curthread;
 3950         struct proc *p = td->td_proc;
 3951         struct vnode *vp;
 3952         struct vattr vattr;
 3953         int error;
 3954 
 3955         bwillinode(1);
 3956         nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP;
 3957         if ((error = nlookup(nd)) != 0)
 3958                 return (error);
 3959 
 3960         if (nd->nl_nch.ncp->nc_vp)
 3961                 return (EEXIST);
 3962         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 3963                 return (error);
 3964         VATTR_NULL(&vattr);
 3965         vattr.va_type = VDIR;
 3966         vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
 3967 
 3968         vp = NULL;
 3969         error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr);
 3970         if (error == 0)
 3971                 vput(vp);
 3972         return (error);
 3973 }
 3974 
 3975 /*
 3976  * mkdir_args(char *path, int mode)
 3977  *
 3978  * Make a directory file.
 3979  */
 3980 int
 3981 sys_mkdir(struct mkdir_args *uap)
 3982 {
 3983         struct nlookupdata nd;
 3984         int error;
 3985 
 3986         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 3987         if (error == 0)
 3988                 error = kern_mkdir(&nd, uap->mode);
 3989         nlookup_done(&nd);
 3990         return (error);
 3991 }
 3992 
 3993 /*
 3994  * mkdirat_args(int fd, char *path, mode_t mode)
 3995  *
 3996  * Make a directory file.  The path is relative to the directory associated
 3997  * with fd.
 3998  */
 3999 int
 4000 sys_mkdirat(struct mkdirat_args *uap)
 4001 {
 4002         struct nlookupdata nd;
 4003         struct file *fp;
 4004         int error;
 4005 
 4006         error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
 4007         if (error == 0)
 4008                 error = kern_mkdir(&nd, uap->mode);
 4009         nlookup_done_at(&nd, fp);
 4010         return (error);
 4011 }
 4012 
 4013 int
 4014 kern_rmdir(struct nlookupdata *nd)
 4015 {
 4016         int error;
 4017 
 4018         bwillinode(1);
 4019         nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
 4020         if ((error = nlookup(nd)) != 0)
 4021                 return (error);
 4022 
 4023         /*
 4024          * Do not allow directories representing mount points to be
 4025          * deleted, even if empty.  Check write perms on mount point
 4026          * in case the vnode is aliased (aka nullfs).
 4027          */
 4028         if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
 4029                 return (EBUSY);
 4030         if ((error = ncp_writechk(&nd->nl_nch)) != 0)
 4031                 return (error);
 4032         error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
 4033         return (error);
 4034 }
 4035 
 4036 /*
 4037  * rmdir_args(char *path)
 4038  *
 4039  * Remove a directory file.
 4040  */
 4041 int
 4042 sys_rmdir(struct rmdir_args *uap)
 4043 {
 4044         struct nlookupdata nd;
 4045         int error;
 4046 
 4047         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
 4048         if (error == 0)
 4049                 error = kern_rmdir(&nd);
 4050         nlookup_done(&nd);
 4051         return (error);
 4052 }
 4053 
 4054 int
 4055 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res,
 4056                    enum uio_seg direction)
 4057 {
 4058         struct thread *td = curthread;
 4059         struct proc *p = td->td_proc;
 4060         struct vnode *vp;
 4061         struct file *fp;
 4062         struct uio auio;
 4063         struct iovec aiov;
 4064         off_t loff;
 4065         int error, eofflag;
 4066 
 4067         if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
 4068                 return (error);
 4069         if ((fp->f_flag & FREAD) == 0) {
 4070                 error = EBADF;
 4071                 goto done;
 4072         }
 4073         vp = (struct vnode *)fp->f_data;
 4074 unionread:
 4075         if (vp->v_type != VDIR) {
 4076                 error = EINVAL;
 4077                 goto done;
 4078         }
 4079         aiov.iov_base = buf;
 4080         aiov.iov_len = count;
 4081         auio.uio_iov = &aiov;
 4082         auio.uio_iovcnt = 1;
 4083         auio.uio_rw = UIO_READ;
 4084         auio.uio_segflg = direction;
 4085         auio.uio_td = td;
 4086         auio.uio_resid = count;
 4087         loff = auio.uio_offset = fp->f_offset;
 4088         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
 4089         fp->f_offset = auio.uio_offset;
 4090         if (error)
 4091                 goto done;
 4092         if (count == auio.uio_resid) {
 4093                 if (union_dircheckp) {
 4094                         error = union_dircheckp(td, &vp, fp);
 4095                         if (error == -1)
 4096                                 goto unionread;
 4097                         if (error)
 4098                                 goto done;
 4099                 }
 4100 #if 0
 4101                 if ((vp->v_flag & VROOT) &&
 4102                     (vp->v_mount->mnt_flag & MNT_UNION)) {
 4103                         struct vnode *tvp = vp;
 4104                         vp = vp->v_mount->mnt_vnodecovered;
 4105                         vref(vp);
 4106                         fp->f_data = vp;
 4107                         fp->f_offset = 0;
 4108                         vrele(tvp);
 4109                         goto unionread;
 4110                 }
 4111 #endif
 4112         }
 4113 
 4114         /*
 4115          * WARNING!  *basep may not be wide enough to accomodate the
 4116          * seek offset.   XXX should we hack this to return the upper 32 bits
 4117          * for offsets greater then 4G?
 4118          */
 4119         if (basep) {
 4120                 *basep = (long)loff;
 4121         }
 4122         *res = count - auio.uio_resid;
 4123 done:
 4124         fdrop(fp);
 4125         return (error);
 4126 }
 4127 
 4128 /*
 4129  * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
 4130  *
 4131  * Read a block of directory entries in a file system independent format.
 4132  */
 4133 int
 4134 sys_getdirentries(struct getdirentries_args *uap)
 4135 {
 4136         long base;
 4137         int error;
 4138 
 4139         error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
 4140                                    &uap->sysmsg_result, UIO_USERSPACE);
 4141 
 4142         if (error == 0 && uap->basep)
 4143                 error = copyout(&base, uap->basep, sizeof(*uap->basep));
 4144         return (error);
 4145 }
 4146 
 4147 /*
 4148  * getdents_args(int fd, char *buf, size_t count)
 4149  */
 4150 int
 4151 sys_getdents(struct getdents_args *uap)
 4152 {
 4153         int error;
 4154 
 4155         error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
 4156                                    &uap->sysmsg_result, UIO_USERSPACE);
 4157 
 4158         return (error);
 4159 }
 4160 
 4161 /*
 4162  * Set the mode mask for creation of filesystem nodes.
 4163  *
 4164  * umask(int newmask)
 4165  */
 4166 int
 4167 sys_umask(struct umask_args *uap)
 4168 {
 4169         struct thread *td = curthread;
 4170         struct proc *p = td->td_proc;
 4171         struct filedesc *fdp;
 4172 
 4173         fdp = p->p_fd;
 4174         uap->sysmsg_result = fdp->fd_cmask;
 4175         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4176         return (0);
 4177 }
 4178 
 4179 /*
 4180  * revoke(char *path)
 4181  *
 4182  * Void all references to file by ripping underlying filesystem
 4183  * away from vnode.
 4184  */
 4185 int
 4186 sys_revoke(struct revoke_args *uap)
 4187 {
 4188         struct nlookupdata nd;
 4189         struct vattr vattr;
 4190         struct vnode *vp;
 4191         struct ucred *cred;
 4192         int error;
 4193 
 4194         vp = NULL;
 4195         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 4196         if (error == 0)
 4197                 error = nlookup(&nd);
 4198         if (error == 0)
 4199                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
 4200         cred = crhold(nd.nl_cred);
 4201         nlookup_done(&nd);
 4202         if (error == 0) {
 4203                 if (error == 0)
 4204                         error = VOP_GETATTR(vp, &vattr);
 4205                 if (error == 0 && cred->cr_uid != vattr.va_uid)
 4206                         error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0);
 4207                 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) {
 4208                         if (vcount(vp) > 0)
 4209                                 error = vrevoke(vp, cred);
 4210                 } else if (error == 0) {
 4211                         error = vrevoke(vp, cred);
 4212                 }
 4213                 vrele(vp);
 4214         }
 4215         if (cred)
 4216                 crfree(cred);
 4217         return (error);
 4218 }
 4219 
 4220 /*
 4221  * getfh_args(char *fname, fhandle_t *fhp)
 4222  *
 4223  * Get (NFS) file handle
 4224  *
 4225  * NOTE: We use the fsid of the covering mount, even if it is a nullfs
 4226  * mount.  This allows nullfs mounts to be explicitly exported. 
 4227  *
 4228  * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe.
 4229  *
 4230  *          nullfs mounts of subdirectories are not safe.  That is, it will
 4231  *          work, but you do not really have protection against access to
 4232  *          the related parent directories.
 4233  */
 4234 int
 4235 sys_getfh(struct getfh_args *uap)
 4236 {
 4237         struct thread *td = curthread;
 4238         struct nlookupdata nd;
 4239         fhandle_t fh;
 4240         struct vnode *vp;
 4241         struct mount *mp;
 4242         int error;
 4243 
 4244         /*
 4245          * Must be super user
 4246          */
 4247         if ((error = priv_check(td, PRIV_ROOT)) != 0)
 4248                 return (error);
 4249 
 4250         vp = NULL;
 4251         error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
 4252         if (error == 0)
 4253                 error = nlookup(&nd);
 4254         if (error == 0)
 4255                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
 4256         mp = nd.nl_nch.mount;
 4257         nlookup_done(&nd);
 4258         if (error == 0) {
 4259                 bzero(&fh, sizeof(fh));
 4260                 fh.fh_fsid = mp->mnt_stat.f_fsid;
 4261                 error = VFS_VPTOFH(vp, &fh.fh_fid);
 4262                 vput(vp);
 4263                 if (error == 0)
 4264                         error = copyout(&fh, uap->fhp, sizeof(fh));
 4265         }
 4266         return (error);
 4267 }
 4268 
 4269 /*
 4270  * fhopen_args(const struct fhandle *u_fhp, int flags)
 4271  *
 4272  * syscall for the rpc.lockd to use to translate a NFS file handle into
 4273  * an open descriptor.
 4274  *
 4275  * warning: do not remove the priv_check() call or this becomes one giant
 4276  * security hole.
 4277  */
 4278 int
 4279 sys_fhopen(struct fhopen_args *uap)
 4280 {
 4281         struct thread *td = curthread;
 4282         struct filedesc *fdp = td->td_proc->p_fd;
 4283         struct mount *mp;
 4284         struct vnode *vp;
 4285         struct fhandle fhp;
 4286         struct vattr vat;
 4287         struct vattr *vap = &vat;
 4288         struct flock lf;
 4289         int fmode, mode, error = 0, type;
 4290         struct file *nfp; 
 4291         struct file *fp;
 4292         int indx;
 4293 
 4294         /*
 4295          * Must be super user
 4296          */
 4297         error = priv_check(td, PRIV_ROOT);
 4298         if (error)
 4299                 return (error);
 4300 
 4301         fmode = FFLAGS(uap->flags);
 4302 
 4303         /*
 4304          * Why not allow a non-read/write open for our lockd?
 4305          */
 4306         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4307                 return (EINVAL);
 4308         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4309         if (error)
 4310                 return(error);
 4311 
 4312         /*
 4313          * Find the mount point
 4314          */
 4315         mp = vfs_getvfs(&fhp.fh_fsid);
 4316         if (mp == NULL) {
 4317                 error = ESTALE;
 4318                 goto  done;
 4319         }
 4320         /* now give me my vnode, it gets returned to me locked */
 4321         error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp);
 4322         if (error)
 4323                 goto done;
 4324         /*
 4325          * from now on we have to make sure not
 4326          * to forget about the vnode
 4327          * any error that causes an abort must vput(vp) 
 4328          * just set error = err and 'goto bad;'.
 4329          */
 4330 
 4331         /* 
 4332          * from vn_open 
 4333          */
 4334         if (vp->v_type == VLNK) {
 4335                 error = EMLINK;
 4336                 goto bad;
 4337         }
 4338         if (vp->v_type == VSOCK) {
 4339                 error = EOPNOTSUPP;
 4340                 goto bad;
 4341         }
 4342         mode = 0;
 4343         if (fmode & (FWRITE | O_TRUNC)) {
 4344                 if (vp->v_type == VDIR) {
 4345                         error = EISDIR;
 4346                         goto bad;
 4347                 }
 4348                 error = vn_writechk(vp, NULL);
 4349                 if (error)
 4350                         goto bad;
 4351                 mode |= VWRITE;
 4352         }
 4353         if (fmode & FREAD)
 4354                 mode |= VREAD;
 4355         if (mode) {
 4356                 error = VOP_ACCESS(vp, mode, td->td_ucred);
 4357                 if (error)
 4358                         goto bad;
 4359         }
 4360         if (fmode & O_TRUNC) {
 4361                 vn_unlock(vp);                          /* XXX */
 4362                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 4363                 VATTR_NULL(vap);
 4364                 vap->va_size = 0;
 4365                 error = VOP_SETATTR(vp, vap, td->td_ucred);
 4366                 if (error)
 4367                         goto bad;
 4368         }
 4369 
 4370         /*
 4371          * VOP_OPEN needs the file pointer so it can potentially override
 4372          * it.
 4373          *
 4374          * WARNING! no f_nchandle will be associated when fhopen()ing a
 4375          * directory.  XXX
 4376          */
 4377         if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0)
 4378                 goto bad;
 4379         fp = nfp;
 4380 
 4381         error = VOP_OPEN(vp, fmode, td->td_ucred, fp);
 4382         if (error) {
 4383                 /*
 4384                  * setting f_ops this way prevents VOP_CLOSE from being
 4385                  * called or fdrop() releasing the vp from v_data.   Since
 4386                  * the VOP_OPEN failed we don't want to VOP_CLOSE.
 4387                  */
 4388                 fp->f_ops = &badfileops;
 4389                 fp->f_data = NULL;
 4390                 goto bad_drop;
 4391         }
 4392 
 4393         /*
 4394          * The fp is given its own reference, we still have our ref and lock.
 4395          *
 4396          * Assert that all regular files must be created with a VM object.
 4397          */
 4398         if (vp->v_type == VREG && vp->v_object == NULL) {
 4399                 kprintf("fhopen: regular file did not have VM object: %p\n", vp);
 4400                 goto bad_drop;
 4401         }
 4402 
 4403         /*
 4404          * The open was successful.  Handle any locking requirements.
 4405          */
 4406         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4407                 lf.l_whence = SEEK_SET;
 4408                 lf.l_start = 0;
 4409                 lf.l_len = 0;
 4410                 if (fmode & O_EXLOCK)
 4411                         lf.l_type = F_WRLCK;
 4412                 else
 4413                         lf.l_type = F_RDLCK;
 4414                 if (fmode & FNONBLOCK)
 4415                         type = 0;
 4416                 else
 4417                         type = F_WAIT;
 4418                 vn_unlock(vp);
 4419                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
 4420                         /*
 4421                          * release our private reference.
 4422                          */
 4423                         fsetfd(fdp, NULL, indx);
 4424                         fdrop(fp);
 4425                         vrele(vp);
 4426                         goto done;
 4427                 }
 4428                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4429                 fp->f_flag |= FHASLOCK;
 4430         }
 4431 
 4432         /*
 4433          * Clean up.  Associate the file pointer with the previously
 4434          * reserved descriptor and return it.
 4435          */
 4436         vput(vp);
 4437         fsetfd(fdp, fp, indx);
 4438         fdrop(fp);
 4439         uap->sysmsg_result = indx;
 4440         if (uap->flags & O_CLOEXEC)
 4441                 error = fsetfdflags(fdp, indx, UF_EXCLOSE);
 4442         return (error);
 4443 
 4444 bad_drop:
 4445         fsetfd(fdp, NULL, indx);
 4446         fdrop(fp);
 4447 bad:
 4448         vput(vp);
 4449 done:
 4450         return (error);
 4451 }
 4452 
 4453 /*
 4454  * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
 4455  */
 4456 int
 4457 sys_fhstat(struct fhstat_args *uap)
 4458 {
 4459         struct thread *td = curthread;
 4460         struct stat sb;
 4461         fhandle_t fh;
 4462         struct mount *mp;
 4463         struct vnode *vp;
 4464         int error;
 4465 
 4466         /*
 4467          * Must be super user
 4468          */
 4469         error = priv_check(td, PRIV_ROOT);
 4470         if (error)
 4471                 return (error);
 4472         
 4473         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4474         if (error)
 4475                 return (error);
 4476 
 4477         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 4478                 error = ESTALE;
 4479         if (error == 0) {
 4480                 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) {
 4481                         error = vn_stat(vp, &sb, td->td_ucred);
 4482                         vput(vp);
 4483                 }
 4484         }
 4485         if (error == 0)
 4486                 error = copyout(&sb, uap->sb, sizeof(sb));
 4487         return (error);
 4488 }
 4489 
 4490 /*
 4491  * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
 4492  */
 4493 int
 4494 sys_fhstatfs(struct fhstatfs_args *uap)
 4495 {
 4496         struct thread *td = curthread;
 4497         struct proc *p = td->td_proc;
 4498         struct statfs *sp;
 4499         struct mount *mp;
 4500         struct vnode *vp;
 4501         struct statfs sb;
 4502         char *fullpath, *freepath;
 4503         fhandle_t fh;
 4504         int error;
 4505 
 4506         /*
 4507          * Must be super user
 4508          */
 4509         if ((error = priv_check(td, PRIV_ROOT)))
 4510                 return (error);
 4511 
 4512         if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 4513                 return (error);
 4514 
 4515         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
 4516                 error = ESTALE;
 4517                 goto done;
 4518         }
 4519         if (p != NULL && !chroot_visible_mnt(mp, p)) {
 4520                 error = ESTALE;
 4521                 goto done;
 4522         }
 4523 
 4524         if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0)
 4525                 goto done;
 4526         mp = vp->v_mount;
 4527         sp = &mp->mnt_stat;
 4528         vput(vp);
 4529         if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0)
 4530                 goto done;
 4531 
 4532         error = mount_path(p, mp, &fullpath, &freepath);
 4533         if (error)
 4534                 goto done;
 4535         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 4536         strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
 4537         kfree(freepath, M_TEMP);
 4538 
 4539         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4540         if (priv_check(td, PRIV_ROOT)) {
 4541                 bcopy(sp, &sb, sizeof(sb));
 4542                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 4543                 sp = &sb;
 4544         }
 4545         error = copyout(sp, uap->buf, sizeof(*sp));
 4546 done:
 4547         return (error);
 4548 }
 4549 
 4550 /*
 4551  * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf)
 4552  */
 4553 int
 4554 sys_fhstatvfs(struct fhstatvfs_args *uap)
 4555 {
 4556         struct thread *td = curthread;
 4557         struct proc *p = td->td_proc;
 4558         struct statvfs *sp;
 4559         struct mount *mp;
 4560         struct vnode *vp;
 4561         fhandle_t fh;
 4562         int error;
 4563 
 4564         /*
 4565          * Must be super user
 4566          */
 4567         if ((error = priv_check(td, PRIV_ROOT)))
 4568                 return (error);
 4569 
 4570         if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 4571                 return (error);
 4572 
 4573         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
 4574                 error = ESTALE;
 4575                 goto done;
 4576         }
 4577         if (p != NULL && !chroot_visible_mnt(mp, p)) {
 4578                 error = ESTALE;
 4579                 goto done;
 4580         }
 4581 
 4582         if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
 4583                 goto done;
 4584         mp = vp->v_mount;
 4585         sp = &mp->mnt_vstat;
 4586         vput(vp);
 4587         if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0)
 4588                 goto done;
 4589 
 4590         sp->f_flag = 0;
 4591         if (mp->mnt_flag & MNT_RDONLY)
 4592                 sp->f_flag |= ST_RDONLY;
 4593         if (mp->mnt_flag & MNT_NOSUID)
 4594                 sp->f_flag |= ST_NOSUID;
 4595         error = copyout(sp, uap->buf, sizeof(*sp));
 4596 done:
 4597         return (error);
 4598 }
 4599 
 4600 
 4601 /*
 4602  * Syscall to push extended attribute configuration information into the
 4603  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
 4604  * a command (int cmd), and attribute name and misc data.  For now, the
 4605  * attribute name is left in userspace for consumption by the VFS_op.
 4606  * It will probably be changed to be copied into sysspace by the
 4607  * syscall in the future, once issues with various consumers of the
 4608  * attribute code have raised their hands.
 4609  *
 4610  * Currently this is used only by UFS Extended Attributes.
 4611  */
 4612 int
 4613 sys_extattrctl(struct extattrctl_args *uap)
 4614 {
 4615         struct nlookupdata nd;
 4616         struct vnode *vp;
 4617         char attrname[EXTATTR_MAXNAMELEN];
 4618         int error;
 4619         size_t size;
 4620 
 4621         attrname[0] = 0;
 4622         vp = NULL;
 4623         error = 0;
 4624 
 4625         if (error == 0 && uap->filename) {
 4626                 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE,
 4627                                      NLC_FOLLOW);
 4628                 if (error == 0)
 4629                         error = nlookup(&nd);
 4630                 if (error == 0)
 4631                         error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
 4632                 nlookup_done(&nd);
 4633         }
 4634 
 4635         if (error == 0 && uap->attrname) {
 4636                 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
 4637                                   &size);
 4638         }
 4639 
 4640         if (error == 0) {
 4641                 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 4642                 if (error == 0)
 4643                         error = nlookup(&nd);
 4644                 if (error == 0)
 4645                         error = ncp_writechk(&nd.nl_nch);
 4646                 if (error == 0) {
 4647                         error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp,
 4648                                                uap->attrnamespace,
 4649                                                uap->attrname, nd.nl_cred);
 4650                 }
 4651                 nlookup_done(&nd);
 4652         }
 4653 
 4654         return (error);
 4655 }
 4656 
 4657 /*
 4658  * Syscall to get a named extended attribute on a file or directory.
 4659  */
 4660 int
 4661 sys_extattr_set_file(struct extattr_set_file_args *uap)
 4662 {
 4663         char attrname[EXTATTR_MAXNAMELEN];
 4664         struct nlookupdata nd;
 4665         struct vnode *vp;
 4666         struct uio auio;
 4667         struct iovec aiov;
 4668         int error;
 4669 
 4670         error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
 4671         if (error)
 4672                 return (error);
 4673 
 4674         vp = NULL;
 4675 
 4676         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 4677         if (error == 0)
 4678                 error = nlookup(&nd);
 4679         if (error == 0)
 4680                 error = ncp_writechk(&nd.nl_nch);
 4681         if (error == 0)
 4682                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
 4683         if (error) {
 4684                 nlookup_done(&nd);
 4685                 return (error);
 4686         }
 4687 
 4688         bzero(&auio, sizeof(auio));
 4689         aiov.iov_base = uap->data;
 4690         aiov.iov_len = uap->nbytes;
 4691         auio.uio_iov = &aiov;
 4692         auio.uio_iovcnt = 1;
 4693         auio.uio_offset = 0;
 4694         auio.uio_resid = uap->nbytes;
 4695         auio.uio_rw = UIO_WRITE;
 4696         auio.uio_td = curthread;
 4697 
 4698         error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname,
 4699                                &auio, nd.nl_cred);
 4700 
 4701         vput(vp);
 4702         nlookup_done(&nd);
 4703         return (error);
 4704 }
 4705 
 4706 /*
 4707  * Syscall to get a named extended attribute on a file or directory.
 4708  */
 4709 int
 4710 sys_extattr_get_file(struct extattr_get_file_args *uap)
 4711 {
 4712         char attrname[EXTATTR_MAXNAMELEN];
 4713         struct nlookupdata nd;
 4714         struct uio auio;
 4715         struct iovec aiov;
 4716         struct vnode *vp;
 4717         int error;
 4718 
 4719         error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
 4720         if (error)
 4721                 return (error);
 4722 
 4723         vp = NULL;
 4724 
 4725         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 4726         if (error == 0)
 4727                 error = nlookup(&nd);
 4728         if (error == 0)
 4729                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp);
 4730         if (error) {
 4731                 nlookup_done(&nd);
 4732                 return (error);
 4733         }
 4734 
 4735         bzero(&auio, sizeof(auio));
 4736         aiov.iov_base = uap->data;
 4737         aiov.iov_len = uap->nbytes;
 4738         auio.uio_iov = &aiov;
 4739         auio.uio_iovcnt = 1;
 4740         auio.uio_offset = 0;
 4741         auio.uio_resid = uap->nbytes;
 4742         auio.uio_rw = UIO_READ;
 4743         auio.uio_td = curthread;
 4744 
 4745         error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname,
 4746                                 &auio, nd.nl_cred);
 4747         uap->sysmsg_result = uap->nbytes - auio.uio_resid;
 4748 
 4749         vput(vp);
 4750         nlookup_done(&nd);
 4751         return(error);
 4752 }
 4753 
 4754 /*
 4755  * Syscall to delete a named extended attribute from a file or directory.
 4756  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
 4757  */
 4758 int
 4759 sys_extattr_delete_file(struct extattr_delete_file_args *uap)
 4760 {
 4761         char attrname[EXTATTR_MAXNAMELEN];
 4762         struct nlookupdata nd;
 4763         struct vnode *vp;
 4764         int error;
 4765 
 4766         error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
 4767         if (error)
 4768                 return(error);
 4769 
 4770         error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
 4771         if (error == 0)
 4772                 error = nlookup(&nd);
 4773         if (error == 0)
 4774                 error = ncp_writechk(&nd.nl_nch);
 4775         if (error == 0) {
 4776                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
 4777                 if (error == 0) {
 4778                         error = VOP_SETEXTATTR(vp, uap->attrnamespace,
 4779                                                attrname, NULL, nd.nl_cred);
 4780                         vput(vp);
 4781                 }
 4782         }
 4783         nlookup_done(&nd);
 4784         return(error);
 4785 }
 4786 
 4787 /*
 4788  * Determine if the mount is visible to the process.
 4789  */
 4790 static int
 4791 chroot_visible_mnt(struct mount *mp, struct proc *p)
 4792 {
 4793         struct nchandle nch;
 4794 
 4795         /*
 4796          * Traverse from the mount point upwards.  If we hit the process
 4797          * root then the mount point is visible to the process.
 4798          */
 4799         nch = mp->mnt_ncmountpt;
 4800         while (nch.ncp) {
 4801                 if (nch.mount == p->p_fd->fd_nrdir.mount &&
 4802                     nch.ncp == p->p_fd->fd_nrdir.ncp) {
 4803                         return(1);
 4804                 }
 4805                 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
 4806                         nch = nch.mount->mnt_ncmounton;
 4807                 } else {
 4808                         nch.ncp = nch.ncp->nc_parent;
 4809                 }
 4810         }
 4811 
 4812         /*
 4813          * If the mount point is not visible to the process, but the
 4814          * process root is in a subdirectory of the mount, return
 4815          * TRUE anyway.
 4816          */
 4817         if (p->p_fd->fd_nrdir.mount == mp)
 4818                 return(1);
 4819 
 4820         return(0);
 4821 }
 4822
Cache object: 28dd5fe3ab06b2e24069bbbf65d151d0
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_syscalls.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c