The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: stable/9/sys/kern/vfs_syscalls.c 301055 2016-05-31 16:58:00Z glebius $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capability.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/sdt.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/unistd.h>
   68 #include <sys/vnode.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/dirent.h>
   72 #include <sys/jail.h>
   73 #include <sys/syscallsubr.h>
   74 #include <sys/sysctl.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 
   79 #include <machine/stdarg.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/uma.h>
   88 
   89 #include <ufs/ufs/quota.h>
   90 
   91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   92 
   93 SDT_PROVIDER_DEFINE(vfs);
   94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   96 
   97 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
   99 static int setfflags(struct thread *td, struct vnode *, int);
  100 static int setutimes(struct thread *td, struct vnode *,
  101     const struct timespec *, int, int);
  102 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  103     struct thread *td);
  104 
  105 /*
  106  * The module initialization routine for POSIX asynchronous I/O will
  107  * set this to the version of AIO that it implements.  (Zero means
  108  * that it is not implemented.)  This value is used here by pathconf()
  109  * and in kern_descrip.c by fpathconf().
  110  */
  111 int async_io_version;
  112 
  113 #ifdef DEBUG
  114 static int syncprt = 0;
  115 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  116 #endif
  117 
  118 /*
  119  * Sync each mounted filesystem.
  120  */
  121 #ifndef _SYS_SYSPROTO_H_
  122 struct sync_args {
  123         int     dummy;
  124 };
  125 #endif
  126 /* ARGSUSED */
  127 int
  128 sys_sync(td, uap)
  129         struct thread *td;
  130         struct sync_args *uap;
  131 {
  132         struct mount *mp, *nmp;
  133         int save, vfslocked;
  134 
  135         mtx_lock(&mountlist_mtx);
  136         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  137                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  138                         nmp = TAILQ_NEXT(mp, mnt_list);
  139                         continue;
  140                 }
  141                 vfslocked = VFS_LOCK_GIANT(mp);
  142                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  143                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  144                         save = curthread_pflags_set(TDP_SYNCIO);
  145                         vfs_msync(mp, MNT_NOWAIT);
  146                         VFS_SYNC(mp, MNT_NOWAIT);
  147                         curthread_pflags_restore(save);
  148                         vn_finished_write(mp);
  149                 }
  150                 VFS_UNLOCK_GIANT(vfslocked);
  151                 mtx_lock(&mountlist_mtx);
  152                 nmp = TAILQ_NEXT(mp, mnt_list);
  153                 vfs_unbusy(mp);
  154         }
  155         mtx_unlock(&mountlist_mtx);
  156         return (0);
  157 }
  158 
  159 /*
  160  * Change filesystem quotas.
  161  */
  162 #ifndef _SYS_SYSPROTO_H_
  163 struct quotactl_args {
  164         char *path;
  165         int cmd;
  166         int uid;
  167         caddr_t arg;
  168 };
  169 #endif
  170 int
  171 sys_quotactl(td, uap)
  172         struct thread *td;
  173         register struct quotactl_args /* {
  174                 char *path;
  175                 int cmd;
  176                 int uid;
  177                 caddr_t arg;
  178         } */ *uap;
  179 {
  180         struct mount *mp;
  181         int vfslocked;
  182         int error;
  183         struct nameidata nd;
  184 
  185         AUDIT_ARG_CMD(uap->cmd);
  186         AUDIT_ARG_UID(uap->uid);
  187         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  188                 return (EPERM);
  189         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
  190            UIO_USERSPACE, uap->path, td);
  191         if ((error = namei(&nd)) != 0)
  192                 return (error);
  193         vfslocked = NDHASGIANT(&nd);
  194         NDFREE(&nd, NDF_ONLY_PNBUF);
  195         mp = nd.ni_vp->v_mount;
  196         vfs_ref(mp);
  197         vput(nd.ni_vp);
  198         error = vfs_busy(mp, 0);
  199         vfs_rel(mp);
  200         if (error) {
  201                 VFS_UNLOCK_GIANT(vfslocked);
  202                 return (error);
  203         }
  204         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  205 
  206         /*
  207          * Since quota on operation typically needs to open quota
  208          * file, the Q_QUOTAON handler needs to unbusy the mount point
  209          * before calling into namei.  Otherwise, unmount might be
  210          * started between two vfs_busy() invocations (first is our,
  211          * second is from mount point cross-walk code in lookup()),
  212          * causing deadlock.
  213          *
  214          * Require that Q_QUOTAON handles the vfs_busy() reference on
  215          * its own, always returning with ubusied mount point.
  216          */
  217         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  218                 vfs_unbusy(mp);
  219         VFS_UNLOCK_GIANT(vfslocked);
  220         return (error);
  221 }
  222 
  223 /*
  224  * Used by statfs conversion routines to scale the block size up if
  225  * necessary so that all of the block counts are <= 'max_size'.  Note
  226  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  227  * value of 'n'.
  228  */
  229 void
  230 statfs_scale_blocks(struct statfs *sf, long max_size)
  231 {
  232         uint64_t count;
  233         int shift;
  234 
  235         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  236 
  237         /*
  238          * Attempt to scale the block counts to give a more accurate
  239          * overview to userland of the ratio of free space to used
  240          * space.  To do this, find the largest block count and compute
  241          * a divisor that lets it fit into a signed integer <= max_size.
  242          */
  243         if (sf->f_bavail < 0)
  244                 count = -sf->f_bavail;
  245         else
  246                 count = sf->f_bavail;
  247         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  248         if (count <= max_size)
  249                 return;
  250 
  251         count >>= flsl(max_size);
  252         shift = 0;
  253         while (count > 0) {
  254                 shift++;
  255                 count >>=1;
  256         }
  257 
  258         sf->f_bsize <<= shift;
  259         sf->f_blocks >>= shift;
  260         sf->f_bfree >>= shift;
  261         sf->f_bavail >>= shift;
  262 }
  263 
  264 /*
  265  * Get filesystem statistics.
  266  */
  267 #ifndef _SYS_SYSPROTO_H_
  268 struct statfs_args {
  269         char *path;
  270         struct statfs *buf;
  271 };
  272 #endif
  273 int
  274 sys_statfs(td, uap)
  275         struct thread *td;
  276         register struct statfs_args /* {
  277                 char *path;
  278                 struct statfs *buf;
  279         } */ *uap;
  280 {
  281         struct statfs sf;
  282         int error;
  283 
  284         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  285         if (error == 0)
  286                 error = copyout(&sf, uap->buf, sizeof(sf));
  287         return (error);
  288 }
  289 
  290 int
  291 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  292     struct statfs *buf)
  293 {
  294         struct mount *mp;
  295         struct statfs *sp, sb;
  296         int vfslocked;
  297         int error;
  298         struct nameidata nd;
  299 
  300         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  301             AUDITVNODE1, pathseg, path, td);
  302         error = namei(&nd);
  303         if (error)
  304                 return (error);
  305         vfslocked = NDHASGIANT(&nd);
  306         mp = nd.ni_vp->v_mount;
  307         vfs_ref(mp);
  308         NDFREE(&nd, NDF_ONLY_PNBUF);
  309         vput(nd.ni_vp);
  310         error = vfs_busy(mp, 0);
  311         vfs_rel(mp);
  312         if (error) {
  313                 VFS_UNLOCK_GIANT(vfslocked);
  314                 return (error);
  315         }
  316 #ifdef MAC
  317         error = mac_mount_check_stat(td->td_ucred, mp);
  318         if (error)
  319                 goto out;
  320 #endif
  321         /*
  322          * Set these in case the underlying filesystem fails to do so.
  323          */
  324         sp = &mp->mnt_stat;
  325         sp->f_version = STATFS_VERSION;
  326         sp->f_namemax = NAME_MAX;
  327         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  328         error = VFS_STATFS(mp, sp);
  329         if (error)
  330                 goto out;
  331         if (priv_check(td, PRIV_VFS_GENERATION)) {
  332                 bcopy(sp, &sb, sizeof(sb));
  333                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  334                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  335                 sp = &sb;
  336         }
  337         *buf = *sp;
  338 out:
  339         vfs_unbusy(mp);
  340         VFS_UNLOCK_GIANT(vfslocked);
  341         return (error);
  342 }
  343 
  344 /*
  345  * Get filesystem statistics.
  346  */
  347 #ifndef _SYS_SYSPROTO_H_
  348 struct fstatfs_args {
  349         int fd;
  350         struct statfs *buf;
  351 };
  352 #endif
  353 int
  354 sys_fstatfs(td, uap)
  355         struct thread *td;
  356         register struct fstatfs_args /* {
  357                 int fd;
  358                 struct statfs *buf;
  359         } */ *uap;
  360 {
  361         struct statfs sf;
  362         int error;
  363 
  364         error = kern_fstatfs(td, uap->fd, &sf);
  365         if (error == 0)
  366                 error = copyout(&sf, uap->buf, sizeof(sf));
  367         return (error);
  368 }
  369 
  370 int
  371 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  372 {
  373         struct file *fp;
  374         struct mount *mp;
  375         struct statfs *sp, sb;
  376         int vfslocked;
  377         struct vnode *vp;
  378         int error;
  379 
  380         AUDIT_ARG_FD(fd);
  381         error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp);
  382         if (error)
  383                 return (error);
  384         vp = fp->f_vnode;
  385         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  386         vn_lock(vp, LK_SHARED | LK_RETRY);
  387 #ifdef AUDIT
  388         AUDIT_ARG_VNODE1(vp);
  389 #endif
  390         mp = vp->v_mount;
  391         if (mp)
  392                 vfs_ref(mp);
  393         VOP_UNLOCK(vp, 0);
  394         fdrop(fp, td);
  395         if (mp == NULL) {
  396                 error = EBADF;
  397                 goto out;
  398         }
  399         error = vfs_busy(mp, 0);
  400         vfs_rel(mp);
  401         if (error) {
  402                 VFS_UNLOCK_GIANT(vfslocked);
  403                 return (error);
  404         }
  405 #ifdef MAC
  406         error = mac_mount_check_stat(td->td_ucred, mp);
  407         if (error)
  408                 goto out;
  409 #endif
  410         /*
  411          * Set these in case the underlying filesystem fails to do so.
  412          */
  413         sp = &mp->mnt_stat;
  414         sp->f_version = STATFS_VERSION;
  415         sp->f_namemax = NAME_MAX;
  416         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  417         error = VFS_STATFS(mp, sp);
  418         if (error)
  419                 goto out;
  420         if (priv_check(td, PRIV_VFS_GENERATION)) {
  421                 bcopy(sp, &sb, sizeof(sb));
  422                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  423                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  424                 sp = &sb;
  425         }
  426         *buf = *sp;
  427 out:
  428         if (mp)
  429                 vfs_unbusy(mp);
  430         VFS_UNLOCK_GIANT(vfslocked);
  431         return (error);
  432 }
  433 
  434 /*
  435  * Get statistics on all filesystems.
  436  */
  437 #ifndef _SYS_SYSPROTO_H_
  438 struct getfsstat_args {
  439         struct statfs *buf;
  440         long bufsize;
  441         int flags;
  442 };
  443 #endif
  444 int
  445 sys_getfsstat(td, uap)
  446         struct thread *td;
  447         register struct getfsstat_args /* {
  448                 struct statfs *buf;
  449                 long bufsize;
  450                 int flags;
  451         } */ *uap;
  452 {
  453 
  454         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  455             uap->flags));
  456 }
  457 
  458 /*
  459  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  460  *      The caller is responsible for freeing memory which will be allocated
  461  *      in '*buf'.
  462  */
  463 int
  464 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  465     enum uio_seg bufseg, int flags)
  466 {
  467         struct mount *mp, *nmp;
  468         struct statfs *sfsp, *sp, sb;
  469         size_t count, maxcount;
  470         int vfslocked;
  471         int error;
  472 
  473         maxcount = bufsize / sizeof(struct statfs);
  474         if (bufsize == 0)
  475                 sfsp = NULL;
  476         else if (bufseg == UIO_USERSPACE)
  477                 sfsp = *buf;
  478         else /* if (bufseg == UIO_SYSSPACE) */ {
  479                 count = 0;
  480                 mtx_lock(&mountlist_mtx);
  481                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  482                         count++;
  483                 }
  484                 mtx_unlock(&mountlist_mtx);
  485                 if (maxcount > count)
  486                         maxcount = count;
  487                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  488                     M_WAITOK);
  489         }
  490         count = 0;
  491         mtx_lock(&mountlist_mtx);
  492         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  493                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  494                         nmp = TAILQ_NEXT(mp, mnt_list);
  495                         continue;
  496                 }
  497 #ifdef MAC
  498                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  499                         nmp = TAILQ_NEXT(mp, mnt_list);
  500                         continue;
  501                 }
  502 #endif
  503                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  504                         nmp = TAILQ_NEXT(mp, mnt_list);
  505                         continue;
  506                 }
  507                 vfslocked = VFS_LOCK_GIANT(mp);
  508                 if (sfsp && count < maxcount) {
  509                         sp = &mp->mnt_stat;
  510                         /*
  511                          * Set these in case the underlying filesystem
  512                          * fails to do so.
  513                          */
  514                         sp->f_version = STATFS_VERSION;
  515                         sp->f_namemax = NAME_MAX;
  516                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  517                         /*
  518                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  519                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  520                          * overrides MNT_WAIT.
  521                          */
  522                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  523                             (flags & MNT_WAIT)) &&
  524                             (error = VFS_STATFS(mp, sp))) {
  525                                 VFS_UNLOCK_GIANT(vfslocked);
  526                                 mtx_lock(&mountlist_mtx);
  527                                 nmp = TAILQ_NEXT(mp, mnt_list);
  528                                 vfs_unbusy(mp);
  529                                 continue;
  530                         }
  531                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  532                                 bcopy(sp, &sb, sizeof(sb));
  533                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  534                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  535                                 sp = &sb;
  536                         }
  537                         if (bufseg == UIO_SYSSPACE)
  538                                 bcopy(sp, sfsp, sizeof(*sp));
  539                         else /* if (bufseg == UIO_USERSPACE) */ {
  540                                 error = copyout(sp, sfsp, sizeof(*sp));
  541                                 if (error) {
  542                                         vfs_unbusy(mp);
  543                                         VFS_UNLOCK_GIANT(vfslocked);
  544                                         return (error);
  545                                 }
  546                         }
  547                         sfsp++;
  548                 }
  549                 VFS_UNLOCK_GIANT(vfslocked);
  550                 count++;
  551                 mtx_lock(&mountlist_mtx);
  552                 nmp = TAILQ_NEXT(mp, mnt_list);
  553                 vfs_unbusy(mp);
  554         }
  555         mtx_unlock(&mountlist_mtx);
  556         if (sfsp && count > maxcount)
  557                 td->td_retval[0] = maxcount;
  558         else
  559                 td->td_retval[0] = count;
  560         return (0);
  561 }
  562 
  563 #ifdef COMPAT_FREEBSD4
  564 /*
  565  * Get old format filesystem statistics.
  566  */
  567 static void cvtstatfs(struct statfs *, struct ostatfs *);
  568 
  569 #ifndef _SYS_SYSPROTO_H_
  570 struct freebsd4_statfs_args {
  571         char *path;
  572         struct ostatfs *buf;
  573 };
  574 #endif
  575 int
  576 freebsd4_statfs(td, uap)
  577         struct thread *td;
  578         struct freebsd4_statfs_args /* {
  579                 char *path;
  580                 struct ostatfs *buf;
  581         } */ *uap;
  582 {
  583         struct ostatfs osb;
  584         struct statfs sf;
  585         int error;
  586 
  587         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  588         if (error)
  589                 return (error);
  590         cvtstatfs(&sf, &osb);
  591         return (copyout(&osb, uap->buf, sizeof(osb)));
  592 }
  593 
  594 /*
  595  * Get filesystem statistics.
  596  */
  597 #ifndef _SYS_SYSPROTO_H_
  598 struct freebsd4_fstatfs_args {
  599         int fd;
  600         struct ostatfs *buf;
  601 };
  602 #endif
  603 int
  604 freebsd4_fstatfs(td, uap)
  605         struct thread *td;
  606         struct freebsd4_fstatfs_args /* {
  607                 int fd;
  608                 struct ostatfs *buf;
  609         } */ *uap;
  610 {
  611         struct ostatfs osb;
  612         struct statfs sf;
  613         int error;
  614 
  615         error = kern_fstatfs(td, uap->fd, &sf);
  616         if (error)
  617                 return (error);
  618         cvtstatfs(&sf, &osb);
  619         return (copyout(&osb, uap->buf, sizeof(osb)));
  620 }
  621 
  622 /*
  623  * Get statistics on all filesystems.
  624  */
  625 #ifndef _SYS_SYSPROTO_H_
  626 struct freebsd4_getfsstat_args {
  627         struct ostatfs *buf;
  628         long bufsize;
  629         int flags;
  630 };
  631 #endif
  632 int
  633 freebsd4_getfsstat(td, uap)
  634         struct thread *td;
  635         register struct freebsd4_getfsstat_args /* {
  636                 struct ostatfs *buf;
  637                 long bufsize;
  638                 int flags;
  639         } */ *uap;
  640 {
  641         struct statfs *buf, *sp;
  642         struct ostatfs osb;
  643         size_t count, size;
  644         int error;
  645 
  646         count = uap->bufsize / sizeof(struct ostatfs);
  647         size = count * sizeof(struct statfs);
  648         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  649         if (size > 0) {
  650                 count = td->td_retval[0];
  651                 sp = buf;
  652                 while (count > 0 && error == 0) {
  653                         cvtstatfs(sp, &osb);
  654                         error = copyout(&osb, uap->buf, sizeof(osb));
  655                         sp++;
  656                         uap->buf++;
  657                         count--;
  658                 }
  659                 free(buf, M_TEMP);
  660         }
  661         return (error);
  662 }
  663 
  664 /*
  665  * Implement fstatfs() for (NFS) file handles.
  666  */
  667 #ifndef _SYS_SYSPROTO_H_
  668 struct freebsd4_fhstatfs_args {
  669         struct fhandle *u_fhp;
  670         struct ostatfs *buf;
  671 };
  672 #endif
  673 int
  674 freebsd4_fhstatfs(td, uap)
  675         struct thread *td;
  676         struct freebsd4_fhstatfs_args /* {
  677                 struct fhandle *u_fhp;
  678                 struct ostatfs *buf;
  679         } */ *uap;
  680 {
  681         struct ostatfs osb;
  682         struct statfs sf;
  683         fhandle_t fh;
  684         int error;
  685 
  686         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  687         if (error)
  688                 return (error);
  689         error = kern_fhstatfs(td, fh, &sf);
  690         if (error)
  691                 return (error);
  692         cvtstatfs(&sf, &osb);
  693         return (copyout(&osb, uap->buf, sizeof(osb)));
  694 }
  695 
  696 /*
  697  * Convert a new format statfs structure to an old format statfs structure.
  698  */
  699 static void
  700 cvtstatfs(nsp, osp)
  701         struct statfs *nsp;
  702         struct ostatfs *osp;
  703 {
  704 
  705         statfs_scale_blocks(nsp, LONG_MAX);
  706         bzero(osp, sizeof(*osp));
  707         osp->f_bsize = nsp->f_bsize;
  708         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  709         osp->f_blocks = nsp->f_blocks;
  710         osp->f_bfree = nsp->f_bfree;
  711         osp->f_bavail = nsp->f_bavail;
  712         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  713         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  714         osp->f_owner = nsp->f_owner;
  715         osp->f_type = nsp->f_type;
  716         osp->f_flags = nsp->f_flags;
  717         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  718         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  719         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  720         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  721         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  722             MIN(MFSNAMELEN, OMFSNAMELEN));
  723         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  724             MIN(MNAMELEN, OMNAMELEN));
  725         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  726             MIN(MNAMELEN, OMNAMELEN));
  727         osp->f_fsid = nsp->f_fsid;
  728 }
  729 #endif /* COMPAT_FREEBSD4 */
  730 
  731 /*
  732  * Change current working directory to a given file descriptor.
  733  */
  734 #ifndef _SYS_SYSPROTO_H_
  735 struct fchdir_args {
  736         int     fd;
  737 };
  738 #endif
  739 int
  740 sys_fchdir(td, uap)
  741         struct thread *td;
  742         struct fchdir_args /* {
  743                 int fd;
  744         } */ *uap;
  745 {
  746         register struct filedesc *fdp = td->td_proc->p_fd;
  747         struct vnode *vp, *tdp, *vpold;
  748         struct mount *mp;
  749         struct file *fp;
  750         int vfslocked;
  751         int error;
  752 
  753         AUDIT_ARG_FD(uap->fd);
  754         if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0)
  755                 return (error);
  756         vp = fp->f_vnode;
  757         VREF(vp);
  758         fdrop(fp, td);
  759         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  760         vn_lock(vp, LK_SHARED | LK_RETRY);
  761         AUDIT_ARG_VNODE1(vp);
  762         error = change_dir(vp, td);
  763         while (!error && (mp = vp->v_mountedhere) != NULL) {
  764                 int tvfslocked;
  765                 if (vfs_busy(mp, 0))
  766                         continue;
  767                 tvfslocked = VFS_LOCK_GIANT(mp);
  768                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  769                 vfs_unbusy(mp);
  770                 if (error) {
  771                         VFS_UNLOCK_GIANT(tvfslocked);
  772                         break;
  773                 }
  774                 vput(vp);
  775                 VFS_UNLOCK_GIANT(vfslocked);
  776                 vp = tdp;
  777                 vfslocked = tvfslocked;
  778         }
  779         if (error) {
  780                 vput(vp);
  781                 VFS_UNLOCK_GIANT(vfslocked);
  782                 return (error);
  783         }
  784         VOP_UNLOCK(vp, 0);
  785         VFS_UNLOCK_GIANT(vfslocked);
  786         FILEDESC_XLOCK(fdp);
  787         vpold = fdp->fd_cdir;
  788         fdp->fd_cdir = vp;
  789         FILEDESC_XUNLOCK(fdp);
  790         vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  791         vrele(vpold);
  792         VFS_UNLOCK_GIANT(vfslocked);
  793         return (0);
  794 }
  795 
  796 /*
  797  * Change current working directory (``.'').
  798  */
  799 #ifndef _SYS_SYSPROTO_H_
  800 struct chdir_args {
  801         char    *path;
  802 };
  803 #endif
  804 int
  805 sys_chdir(td, uap)
  806         struct thread *td;
  807         struct chdir_args /* {
  808                 char *path;
  809         } */ *uap;
  810 {
  811 
  812         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  813 }
  814 
  815 int
  816 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  817 {
  818         register struct filedesc *fdp = td->td_proc->p_fd;
  819         int error;
  820         struct nameidata nd;
  821         struct vnode *vp;
  822         int vfslocked;
  823 
  824         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
  825             MPSAFE, pathseg, path, td);
  826         if ((error = namei(&nd)) != 0)
  827                 return (error);
  828         vfslocked = NDHASGIANT(&nd);
  829         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  830                 vput(nd.ni_vp);
  831                 VFS_UNLOCK_GIANT(vfslocked);
  832                 NDFREE(&nd, NDF_ONLY_PNBUF);
  833                 return (error);
  834         }
  835         VOP_UNLOCK(nd.ni_vp, 0);
  836         VFS_UNLOCK_GIANT(vfslocked);
  837         NDFREE(&nd, NDF_ONLY_PNBUF);
  838         FILEDESC_XLOCK(fdp);
  839         vp = fdp->fd_cdir;
  840         fdp->fd_cdir = nd.ni_vp;
  841         FILEDESC_XUNLOCK(fdp);
  842         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  843         vrele(vp);
  844         VFS_UNLOCK_GIANT(vfslocked);
  845         return (0);
  846 }
  847 
  848 /*
  849  * Helper function for raised chroot(2) security function:  Refuse if
  850  * any filedescriptors are open directories.
  851  */
  852 static int
  853 chroot_refuse_vdir_fds(fdp)
  854         struct filedesc *fdp;
  855 {
  856         struct vnode *vp;
  857         struct file *fp;
  858         int fd;
  859 
  860         FILEDESC_LOCK_ASSERT(fdp);
  861 
  862         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  863                 fp = fget_locked(fdp, fd);
  864                 if (fp == NULL)
  865                         continue;
  866                 if (fp->f_type == DTYPE_VNODE) {
  867                         vp = fp->f_vnode;
  868                         if (vp->v_type == VDIR)
  869                                 return (EPERM);
  870                 }
  871         }
  872         return (0);
  873 }
  874 
  875 /*
  876  * This sysctl determines if we will allow a process to chroot(2) if it
  877  * has a directory open:
  878  *      0: disallowed for all processes.
  879  *      1: allowed for processes that were not already chroot(2)'ed.
  880  *      2: allowed for all processes.
  881  */
  882 
  883 static int chroot_allow_open_directories = 1;
  884 
  885 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  886      &chroot_allow_open_directories, 0, "");
  887 
  888 /*
  889  * Change notion of root (``/'') directory.
  890  */
  891 #ifndef _SYS_SYSPROTO_H_
  892 struct chroot_args {
  893         char    *path;
  894 };
  895 #endif
  896 int
  897 sys_chroot(td, uap)
  898         struct thread *td;
  899         struct chroot_args /* {
  900                 char *path;
  901         } */ *uap;
  902 {
  903         int error;
  904         struct nameidata nd;
  905         int vfslocked;
  906 
  907         error = priv_check(td, PRIV_VFS_CHROOT);
  908         if (error)
  909                 return (error);
  910         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  911             AUDITVNODE1, UIO_USERSPACE, uap->path, td);
  912         error = namei(&nd);
  913         if (error)
  914                 goto error;
  915         vfslocked = NDHASGIANT(&nd);
  916         if ((error = change_dir(nd.ni_vp, td)) != 0)
  917                 goto e_vunlock;
  918 #ifdef MAC
  919         if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
  920                 goto e_vunlock;
  921 #endif
  922         VOP_UNLOCK(nd.ni_vp, 0);
  923         error = change_root(nd.ni_vp, td);
  924         vrele(nd.ni_vp);
  925         VFS_UNLOCK_GIANT(vfslocked);
  926         NDFREE(&nd, NDF_ONLY_PNBUF);
  927         return (error);
  928 e_vunlock:
  929         vput(nd.ni_vp);
  930         VFS_UNLOCK_GIANT(vfslocked);
  931 error:
  932         NDFREE(&nd, NDF_ONLY_PNBUF);
  933         return (error);
  934 }
  935 
  936 /*
  937  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  938  * instance.
  939  */
  940 int
  941 change_dir(vp, td)
  942         struct vnode *vp;
  943         struct thread *td;
  944 {
  945         int error;
  946 
  947         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  948         if (vp->v_type != VDIR)
  949                 return (ENOTDIR);
  950 #ifdef MAC
  951         error = mac_vnode_check_chdir(td->td_ucred, vp);
  952         if (error)
  953                 return (error);
  954 #endif
  955         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  956         return (error);
  957 }
  958 
  959 /*
  960  * Common routine for kern_chroot() and jail_attach().  The caller is
  961  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  962  * authorize this operation.
  963  */
  964 int
  965 change_root(vp, td)
  966         struct vnode *vp;
  967         struct thread *td;
  968 {
  969         struct filedesc *fdp;
  970         struct vnode *oldvp;
  971         int vfslocked;
  972         int error;
  973 
  974         VFS_ASSERT_GIANT(vp->v_mount);
  975         fdp = td->td_proc->p_fd;
  976         FILEDESC_XLOCK(fdp);
  977         if (chroot_allow_open_directories == 0 ||
  978             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  979                 error = chroot_refuse_vdir_fds(fdp);
  980                 if (error) {
  981                         FILEDESC_XUNLOCK(fdp);
  982                         return (error);
  983                 }
  984         }
  985         oldvp = fdp->fd_rdir;
  986         fdp->fd_rdir = vp;
  987         VREF(fdp->fd_rdir);
  988         if (!fdp->fd_jdir) {
  989                 fdp->fd_jdir = vp;
  990                 VREF(fdp->fd_jdir);
  991         }
  992         FILEDESC_XUNLOCK(fdp);
  993         vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  994         vrele(oldvp);
  995         VFS_UNLOCK_GIANT(vfslocked);
  996         return (0);
  997 }
  998 
  999 static __inline cap_rights_t
 1000 flags_to_rights(int flags)
 1001 {
 1002         cap_rights_t rights = 0;
 1003 
 1004         switch ((flags & O_ACCMODE)) {
 1005         case O_RDONLY:
 1006                 rights |= CAP_READ;
 1007                 break;
 1008 
 1009         case O_RDWR:
 1010                 rights |= CAP_READ;
 1011                 /* fall through */
 1012 
 1013         case O_WRONLY:
 1014                 rights |= CAP_WRITE;
 1015                 break;
 1016 
 1017         case O_EXEC:
 1018                 rights |= CAP_FEXECVE;
 1019                 break;
 1020         }
 1021 
 1022         if (flags & O_CREAT)
 1023                 rights |= CAP_CREATE;
 1024 
 1025         if (flags & O_TRUNC)
 1026                 rights |= CAP_FTRUNCATE;
 1027 
 1028         if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
 1029                 rights |= CAP_FLOCK;
 1030 
 1031         return (rights);
 1032 }
 1033 
 1034 /*
 1035  * Check permissions, allocate an open file structure, and call the device
 1036  * open routine if any.
 1037  */
 1038 #ifndef _SYS_SYSPROTO_H_
 1039 struct open_args {
 1040         char    *path;
 1041         int     flags;
 1042         int     mode;
 1043 };
 1044 #endif
 1045 int
 1046 sys_open(td, uap)
 1047         struct thread *td;
 1048         register struct open_args /* {
 1049                 char *path;
 1050                 int flags;
 1051                 int mode;
 1052         } */ *uap;
 1053 {
 1054 
 1055         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1056 }
 1057 
 1058 #ifndef _SYS_SYSPROTO_H_
 1059 struct openat_args {
 1060         int     fd;
 1061         char    *path;
 1062         int     flag;
 1063         int     mode;
 1064 };
 1065 #endif
 1066 int
 1067 sys_openat(struct thread *td, struct openat_args *uap)
 1068 {
 1069 
 1070         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1071             uap->mode));
 1072 }
 1073 
 1074 int
 1075 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1076     int mode)
 1077 {
 1078 
 1079         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1080 }
 1081 
 1082 int
 1083 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1084     int flags, int mode)
 1085 {
 1086         struct proc *p = td->td_proc;
 1087         struct filedesc *fdp = p->p_fd;
 1088         struct file *fp;
 1089         struct vnode *vp;
 1090         int cmode;
 1091         struct file *nfp;
 1092         int type, indx = -1, error, error_open;
 1093         struct flock lf;
 1094         struct nameidata nd;
 1095         int vfslocked;
 1096         cap_rights_t rights_needed = CAP_LOOKUP;
 1097 
 1098         AUDIT_ARG_FFLAGS(flags);
 1099         AUDIT_ARG_MODE(mode);
 1100         /* XXX: audit dirfd */
 1101         rights_needed |= flags_to_rights(flags);
 1102         /*
 1103          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1104          * may be specified.
 1105          */
 1106         if (flags & O_EXEC) {
 1107                 if (flags & O_ACCMODE)
 1108                         return (EINVAL);
 1109         } else if ((flags & O_ACCMODE) == O_ACCMODE)
 1110                 return (EINVAL);
 1111         else
 1112                 flags = FFLAGS(flags);
 1113 
 1114         /*
 1115          * allocate the file descriptor, but don't install a descriptor yet
 1116          */
 1117         error = falloc_noinstall(td, &nfp);
 1118         if (error)
 1119                 return (error);
 1120         /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
 1121         fp = nfp;
 1122         /* Set the flags early so the finit in devfs can pick them up. */
 1123         fp->f_flag = flags & FMASK;
 1124         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 1125         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
 1126             path, fd, rights_needed, td);
 1127         td->td_dupfd = -1;              /* XXX check for fdopen */
 1128         error = vn_open(&nd, &flags, cmode, fp);
 1129         if (error) {
 1130                 /*
 1131                  * If the vn_open replaced the method vector, something
 1132                  * wonderous happened deep below and we just pass it up
 1133                  * pretending we know what we do.
 1134                  */
 1135                 if (error == ENXIO && fp->f_ops != &badfileops)
 1136                         goto success;
 1137 
 1138                 /*
 1139                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1140                  * responsible for dropping the old contents of ofiles[indx]
 1141                  * if it succeeds.
 1142                  *
 1143                  * Don't do this for relative (capability) lookups; we don't
 1144                  * understand exactly what would happen, and we don't think
 1145                  * that it ever should.
 1146                  */
 1147                 if ((nd.ni_strictrelative == 0) &&
 1148                     (error == ENODEV || error == ENXIO) &&
 1149                     (td->td_dupfd >= 0)) {
 1150                         /* XXX from fdopen */
 1151                         error_open = error;
 1152                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1153                                 goto bad_unlocked;
 1154                         if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
 1155                             flags, error_open)) == 0)
 1156                                 goto success;
 1157                 }
 1158                 /*
 1159                  * Clean up the descriptor, but only if another thread hadn't
 1160                  * replaced or closed it.
 1161                  */
 1162                 if (indx != -1)
 1163                         fdclose(fdp, fp, indx, td);
 1164                 fdrop(fp, td);
 1165 
 1166                 return (error);
 1167         }
 1168         td->td_dupfd = 0;
 1169         vfslocked = NDHASGIANT(&nd);
 1170         NDFREE(&nd, NDF_ONLY_PNBUF);
 1171         vp = nd.ni_vp;
 1172 
 1173         /*
 1174          * Store the vnode, for any f_type. Typically, the vnode use
 1175          * count is decremented by direct call to vn_closefile() for
 1176          * files that switched type in the cdevsw fdopen() method.
 1177          */
 1178         fp->f_vnode = vp;
 1179         /*
 1180          * If the file wasn't claimed by devfs bind it to the normal
 1181          * vnode operations here.
 1182          */
 1183         if (fp->f_ops == &badfileops) {
 1184                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1185                 fp->f_seqcount = 1;
 1186                 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
 1187         }
 1188 
 1189         VOP_UNLOCK(vp, 0);
 1190         if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
 1191                 lf.l_whence = SEEK_SET;
 1192                 lf.l_start = 0;
 1193                 lf.l_len = 0;
 1194                 if (flags & O_EXLOCK)
 1195                         lf.l_type = F_WRLCK;
 1196                 else
 1197                         lf.l_type = F_RDLCK;
 1198                 type = F_FLOCK;
 1199                 if ((flags & FNONBLOCK) == 0)
 1200                         type |= F_WAIT;
 1201                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 1202                             type)) != 0)
 1203                         goto bad;
 1204                 atomic_set_int(&fp->f_flag, FHASLOCK);
 1205         }
 1206         if (flags & O_TRUNC) {
 1207                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1208                 if (error)
 1209                         goto bad;
 1210         }
 1211         VFS_UNLOCK_GIANT(vfslocked);
 1212 success:
 1213         /*
 1214          * If we haven't already installed the FD (for dupfdopen), do so now.
 1215          */
 1216         if (indx == -1) {
 1217 #ifdef CAPABILITIES
 1218                 if (nd.ni_strictrelative == 1) {
 1219                         /*
 1220                          * We are doing a strict relative lookup; wrap the
 1221                          * result in a capability.
 1222                          */
 1223                         if ((error = kern_capwrap(td, fp, nd.ni_baserights,
 1224                             &indx)) != 0)
 1225                                 goto bad_unlocked;
 1226                 } else
 1227 #endif
 1228                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1229                                 goto bad_unlocked;
 1230 
 1231         }
 1232 
 1233         /*
 1234          * Release our private reference, leaving the one associated with
 1235          * the descriptor table intact.
 1236          */
 1237         fdrop(fp, td);
 1238         td->td_retval[0] = indx;
 1239         return (0);
 1240 bad:
 1241         VFS_UNLOCK_GIANT(vfslocked);
 1242 bad_unlocked:
 1243         if (indx != -1)
 1244                 fdclose(fdp, fp, indx, td);
 1245         fdrop(fp, td);
 1246         td->td_retval[0] = -1;
 1247         return (error);
 1248 }
 1249 
 1250 #ifdef COMPAT_43
 1251 /*
 1252  * Create a file.
 1253  */
 1254 #ifndef _SYS_SYSPROTO_H_
 1255 struct ocreat_args {
 1256         char    *path;
 1257         int     mode;
 1258 };
 1259 #endif
 1260 int
 1261 ocreat(td, uap)
 1262         struct thread *td;
 1263         register struct ocreat_args /* {
 1264                 char *path;
 1265                 int mode;
 1266         } */ *uap;
 1267 {
 1268 
 1269         return (kern_open(td, uap->path, UIO_USERSPACE,
 1270             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1271 }
 1272 #endif /* COMPAT_43 */
 1273 
 1274 /*
 1275  * Create a special file.
 1276  */
 1277 #ifndef _SYS_SYSPROTO_H_
 1278 struct mknod_args {
 1279         char    *path;
 1280         int     mode;
 1281         int     dev;
 1282 };
 1283 #endif
 1284 int
 1285 sys_mknod(td, uap)
 1286         struct thread *td;
 1287         register struct mknod_args /* {
 1288                 char *path;
 1289                 int mode;
 1290                 int dev;
 1291         } */ *uap;
 1292 {
 1293 
 1294         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1295 }
 1296 
 1297 #ifndef _SYS_SYSPROTO_H_
 1298 struct mknodat_args {
 1299         int     fd;
 1300         char    *path;
 1301         mode_t  mode;
 1302         dev_t   dev;
 1303 };
 1304 #endif
 1305 int
 1306 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1307 {
 1308 
 1309         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1310             uap->dev));
 1311 }
 1312 
 1313 int
 1314 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1315     int dev)
 1316 {
 1317 
 1318         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1319 }
 1320 
 1321 int
 1322 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1323     int mode, int dev)
 1324 {
 1325         struct vnode *vp;
 1326         struct mount *mp;
 1327         struct vattr vattr;
 1328         int error;
 1329         int whiteout = 0;
 1330         struct nameidata nd;
 1331         int vfslocked;
 1332 
 1333         AUDIT_ARG_MODE(mode);
 1334         AUDIT_ARG_DEV(dev);
 1335         switch (mode & S_IFMT) {
 1336         case S_IFCHR:
 1337         case S_IFBLK:
 1338                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1339                 break;
 1340         case S_IFMT:
 1341                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1342                 break;
 1343         case S_IFWHT:
 1344                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1345                 break;
 1346         case S_IFIFO:
 1347                 if (dev == 0)
 1348                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1349                 /* FALLTHROUGH */
 1350         default:
 1351                 error = EINVAL;
 1352                 break;
 1353         }
 1354         if (error)
 1355                 return (error);
 1356 restart:
 1357         bwillwrite();
 1358         NDINIT_ATRIGHTS(&nd, CREATE,
 1359             LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
 1360             CAP_MKFIFO, td);
 1361         if ((error = namei(&nd)) != 0)
 1362                 return (error);
 1363         vfslocked = NDHASGIANT(&nd);
 1364         vp = nd.ni_vp;
 1365         if (vp != NULL) {
 1366                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1367                 if (vp == nd.ni_dvp)
 1368                         vrele(nd.ni_dvp);
 1369                 else
 1370                         vput(nd.ni_dvp);
 1371                 vrele(vp);
 1372                 VFS_UNLOCK_GIANT(vfslocked);
 1373                 return (EEXIST);
 1374         } else {
 1375                 VATTR_NULL(&vattr);
 1376                 vattr.va_mode = (mode & ALLPERMS) &
 1377                     ~td->td_proc->p_fd->fd_cmask;
 1378                 vattr.va_rdev = dev;
 1379                 whiteout = 0;
 1380 
 1381                 switch (mode & S_IFMT) {
 1382                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1383                         vattr.va_type = VBAD;
 1384                         break;
 1385                 case S_IFCHR:
 1386                         vattr.va_type = VCHR;
 1387                         break;
 1388                 case S_IFBLK:
 1389                         vattr.va_type = VBLK;
 1390                         break;
 1391                 case S_IFWHT:
 1392                         whiteout = 1;
 1393                         break;
 1394                 default:
 1395                         panic("kern_mknod: invalid mode");
 1396                 }
 1397         }
 1398         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1399                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1400                 vput(nd.ni_dvp);
 1401                 VFS_UNLOCK_GIANT(vfslocked);
 1402                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1403                         return (error);
 1404                 goto restart;
 1405         }
 1406 #ifdef MAC
 1407         if (error == 0 && !whiteout)
 1408                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1409                     &nd.ni_cnd, &vattr);
 1410 #endif
 1411         if (!error) {
 1412                 if (whiteout)
 1413                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1414                 else {
 1415                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1416                                                 &nd.ni_cnd, &vattr);
 1417                         if (error == 0)
 1418                                 vput(nd.ni_vp);
 1419                 }
 1420         }
 1421         NDFREE(&nd, NDF_ONLY_PNBUF);
 1422         vput(nd.ni_dvp);
 1423         vn_finished_write(mp);
 1424         VFS_UNLOCK_GIANT(vfslocked);
 1425         return (error);
 1426 }
 1427 
 1428 /*
 1429  * Create a named pipe.
 1430  */
 1431 #ifndef _SYS_SYSPROTO_H_
 1432 struct mkfifo_args {
 1433         char    *path;
 1434         int     mode;
 1435 };
 1436 #endif
 1437 int
 1438 sys_mkfifo(td, uap)
 1439         struct thread *td;
 1440         register struct mkfifo_args /* {
 1441                 char *path;
 1442                 int mode;
 1443         } */ *uap;
 1444 {
 1445 
 1446         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1447 }
 1448 
 1449 #ifndef _SYS_SYSPROTO_H_
 1450 struct mkfifoat_args {
 1451         int     fd;
 1452         char    *path;
 1453         mode_t  mode;
 1454 };
 1455 #endif
 1456 int
 1457 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1458 {
 1459 
 1460         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1461             uap->mode));
 1462 }
 1463 
 1464 int
 1465 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1466 {
 1467 
 1468         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1469 }
 1470 
 1471 int
 1472 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1473     int mode)
 1474 {
 1475         struct mount *mp;
 1476         struct vattr vattr;
 1477         int error;
 1478         struct nameidata nd;
 1479         int vfslocked;
 1480 
 1481         AUDIT_ARG_MODE(mode);
 1482 restart:
 1483         bwillwrite();
 1484         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1485             pathseg, path, fd, td);
 1486         if ((error = namei(&nd)) != 0)
 1487                 return (error);
 1488         vfslocked = NDHASGIANT(&nd);
 1489         if (nd.ni_vp != NULL) {
 1490                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1491                 if (nd.ni_vp == nd.ni_dvp)
 1492                         vrele(nd.ni_dvp);
 1493                 else
 1494                         vput(nd.ni_dvp);
 1495                 vrele(nd.ni_vp);
 1496                 VFS_UNLOCK_GIANT(vfslocked);
 1497                 return (EEXIST);
 1498         }
 1499         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1500                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1501                 vput(nd.ni_dvp);
 1502                 VFS_UNLOCK_GIANT(vfslocked);
 1503                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1504                         return (error);
 1505                 goto restart;
 1506         }
 1507         VATTR_NULL(&vattr);
 1508         vattr.va_type = VFIFO;
 1509         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1510 #ifdef MAC
 1511         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1512             &vattr);
 1513         if (error)
 1514                 goto out;
 1515 #endif
 1516         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1517         if (error == 0)
 1518                 vput(nd.ni_vp);
 1519 #ifdef MAC
 1520 out:
 1521 #endif
 1522         vput(nd.ni_dvp);
 1523         vn_finished_write(mp);
 1524         VFS_UNLOCK_GIANT(vfslocked);
 1525         NDFREE(&nd, NDF_ONLY_PNBUF);
 1526         return (error);
 1527 }
 1528 
 1529 /*
 1530  * Make a hard file link.
 1531  */
 1532 #ifndef _SYS_SYSPROTO_H_
 1533 struct link_args {
 1534         char    *path;
 1535         char    *link;
 1536 };
 1537 #endif
 1538 int
 1539 sys_link(td, uap)
 1540         struct thread *td;
 1541         register struct link_args /* {
 1542                 char *path;
 1543                 char *link;
 1544         } */ *uap;
 1545 {
 1546 
 1547         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1548 }
 1549 
 1550 #ifndef _SYS_SYSPROTO_H_
 1551 struct linkat_args {
 1552         int     fd1;
 1553         char    *path1;
 1554         int     fd2;
 1555         char    *path2;
 1556         int     flag;
 1557 };
 1558 #endif
 1559 int
 1560 sys_linkat(struct thread *td, struct linkat_args *uap)
 1561 {
 1562         int flag;
 1563 
 1564         flag = uap->flag;
 1565         if (flag & ~AT_SYMLINK_FOLLOW)
 1566                 return (EINVAL);
 1567 
 1568         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1569             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1570 }
 1571 
 1572 int hardlink_check_uid = 0;
 1573 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1574     &hardlink_check_uid, 0,
 1575     "Unprivileged processes cannot create hard links to files owned by other "
 1576     "users");
 1577 static int hardlink_check_gid = 0;
 1578 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1579     &hardlink_check_gid, 0,
 1580     "Unprivileged processes cannot create hard links to files owned by other "
 1581     "groups");
 1582 
 1583 static int
 1584 can_hardlink(struct vnode *vp, struct ucred *cred)
 1585 {
 1586         struct vattr va;
 1587         int error;
 1588 
 1589         if (!hardlink_check_uid && !hardlink_check_gid)
 1590                 return (0);
 1591 
 1592         error = VOP_GETATTR(vp, &va, cred);
 1593         if (error != 0)
 1594                 return (error);
 1595 
 1596         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1597                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1598                 if (error)
 1599                         return (error);
 1600         }
 1601 
 1602         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1603                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1604                 if (error)
 1605                         return (error);
 1606         }
 1607 
 1608         return (0);
 1609 }
 1610 
 1611 int
 1612 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1613 {
 1614 
 1615         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1616 }
 1617 
 1618 int
 1619 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1620     enum uio_seg segflg, int follow)
 1621 {
 1622         struct vnode *vp;
 1623         struct mount *mp;
 1624         struct nameidata nd;
 1625         int vfslocked;
 1626         int lvfslocked;
 1627         int error;
 1628 
 1629         bwillwrite();
 1630         NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
 1631             fd1, td);
 1632 
 1633         if ((error = namei(&nd)) != 0)
 1634                 return (error);
 1635         vfslocked = NDHASGIANT(&nd);
 1636         NDFREE(&nd, NDF_ONLY_PNBUF);
 1637         vp = nd.ni_vp;
 1638         if (vp->v_type == VDIR) {
 1639                 vrele(vp);
 1640                 VFS_UNLOCK_GIANT(vfslocked);
 1641                 return (EPERM);         /* POSIX */
 1642         }
 1643         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1644                 vrele(vp);
 1645                 VFS_UNLOCK_GIANT(vfslocked);
 1646                 return (error);
 1647         }
 1648         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
 1649             segflg, path2, fd2, td);
 1650         if ((error = namei(&nd)) == 0) {
 1651                 lvfslocked = NDHASGIANT(&nd);
 1652                 if (nd.ni_vp != NULL) {
 1653                         if (nd.ni_dvp == nd.ni_vp)
 1654                                 vrele(nd.ni_dvp);
 1655                         else
 1656                                 vput(nd.ni_dvp);
 1657                         vrele(nd.ni_vp);
 1658                         error = EEXIST;
 1659                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
 1660                     == 0) {
 1661                         error = can_hardlink(vp, td->td_ucred);
 1662                         if (error == 0)
 1663 #ifdef MAC
 1664                                 error = mac_vnode_check_link(td->td_ucred,
 1665                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1666                         if (error == 0)
 1667 #endif
 1668                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1669                         VOP_UNLOCK(vp, 0);
 1670                         vput(nd.ni_dvp);
 1671                 }
 1672                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1673                 VFS_UNLOCK_GIANT(lvfslocked);
 1674         }
 1675         vrele(vp);
 1676         vn_finished_write(mp);
 1677         VFS_UNLOCK_GIANT(vfslocked);
 1678         return (error);
 1679 }
 1680 
 1681 /*
 1682  * Make a symbolic link.
 1683  */
 1684 #ifndef _SYS_SYSPROTO_H_
 1685 struct symlink_args {
 1686         char    *path;
 1687         char    *link;
 1688 };
 1689 #endif
 1690 int
 1691 sys_symlink(td, uap)
 1692         struct thread *td;
 1693         register struct symlink_args /* {
 1694                 char *path;
 1695                 char *link;
 1696         } */ *uap;
 1697 {
 1698 
 1699         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1700 }
 1701 
 1702 #ifndef _SYS_SYSPROTO_H_
 1703 struct symlinkat_args {
 1704         char    *path;
 1705         int     fd;
 1706         char    *path2;
 1707 };
 1708 #endif
 1709 int
 1710 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1711 {
 1712 
 1713         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1714             UIO_USERSPACE));
 1715 }
 1716 
 1717 int
 1718 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1719 {
 1720 
 1721         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1722 }
 1723 
 1724 int
 1725 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1726     enum uio_seg segflg)
 1727 {
 1728         struct mount *mp;
 1729         struct vattr vattr;
 1730         char *syspath;
 1731         int error;
 1732         struct nameidata nd;
 1733         int vfslocked;
 1734 
 1735         if (segflg == UIO_SYSSPACE) {
 1736                 syspath = path1;
 1737         } else {
 1738                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1739                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1740                         goto out;
 1741         }
 1742         AUDIT_ARG_TEXT(syspath);
 1743 restart:
 1744         bwillwrite();
 1745         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1746             segflg, path2, fd, td);
 1747         if ((error = namei(&nd)) != 0)
 1748                 goto out;
 1749         vfslocked = NDHASGIANT(&nd);
 1750         if (nd.ni_vp) {
 1751                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1752                 if (nd.ni_vp == nd.ni_dvp)
 1753                         vrele(nd.ni_dvp);
 1754                 else
 1755                         vput(nd.ni_dvp);
 1756                 vrele(nd.ni_vp);
 1757                 VFS_UNLOCK_GIANT(vfslocked);
 1758                 error = EEXIST;
 1759                 goto out;
 1760         }
 1761         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1762                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1763                 vput(nd.ni_dvp);
 1764                 VFS_UNLOCK_GIANT(vfslocked);
 1765                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1766                         goto out;
 1767                 goto restart;
 1768         }
 1769         VATTR_NULL(&vattr);
 1770         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1771 #ifdef MAC
 1772         vattr.va_type = VLNK;
 1773         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1774             &vattr);
 1775         if (error)
 1776                 goto out2;
 1777 #endif
 1778         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1779         if (error == 0)
 1780                 vput(nd.ni_vp);
 1781 #ifdef MAC
 1782 out2:
 1783 #endif
 1784         NDFREE(&nd, NDF_ONLY_PNBUF);
 1785         vput(nd.ni_dvp);
 1786         vn_finished_write(mp);
 1787         VFS_UNLOCK_GIANT(vfslocked);
 1788 out:
 1789         if (segflg != UIO_SYSSPACE)
 1790                 uma_zfree(namei_zone, syspath);
 1791         return (error);
 1792 }
 1793 
 1794 /*
 1795  * Delete a whiteout from the filesystem.
 1796  */
 1797 int
 1798 sys_undelete(td, uap)
 1799         struct thread *td;
 1800         register struct undelete_args /* {
 1801                 char *path;
 1802         } */ *uap;
 1803 {
 1804         int error;
 1805         struct mount *mp;
 1806         struct nameidata nd;
 1807         int vfslocked;
 1808 
 1809 restart:
 1810         bwillwrite();
 1811         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
 1812             UIO_USERSPACE, uap->path, td);
 1813         error = namei(&nd);
 1814         if (error)
 1815                 return (error);
 1816         vfslocked = NDHASGIANT(&nd);
 1817 
 1818         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1819                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1820                 if (nd.ni_vp == nd.ni_dvp)
 1821                         vrele(nd.ni_dvp);
 1822                 else
 1823                         vput(nd.ni_dvp);
 1824                 if (nd.ni_vp)
 1825                         vrele(nd.ni_vp);
 1826                 VFS_UNLOCK_GIANT(vfslocked);
 1827                 return (EEXIST);
 1828         }
 1829         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1830                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1831                 vput(nd.ni_dvp);
 1832                 VFS_UNLOCK_GIANT(vfslocked);
 1833                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1834                         return (error);
 1835                 goto restart;
 1836         }
 1837         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1838         NDFREE(&nd, NDF_ONLY_PNBUF);
 1839         vput(nd.ni_dvp);
 1840         vn_finished_write(mp);
 1841         VFS_UNLOCK_GIANT(vfslocked);
 1842         return (error);
 1843 }
 1844 
 1845 /*
 1846  * Delete a name from the filesystem.
 1847  */
 1848 #ifndef _SYS_SYSPROTO_H_
 1849 struct unlink_args {
 1850         char    *path;
 1851 };
 1852 #endif
 1853 int
 1854 sys_unlink(td, uap)
 1855         struct thread *td;
 1856         struct unlink_args /* {
 1857                 char *path;
 1858         } */ *uap;
 1859 {
 1860 
 1861         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1862 }
 1863 
 1864 #ifndef _SYS_SYSPROTO_H_
 1865 struct unlinkat_args {
 1866         int     fd;
 1867         char    *path;
 1868         int     flag;
 1869 };
 1870 #endif
 1871 int
 1872 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1873 {
 1874         int flag = uap->flag;
 1875         int fd = uap->fd;
 1876         char *path = uap->path;
 1877 
 1878         if (flag & ~AT_REMOVEDIR)
 1879                 return (EINVAL);
 1880 
 1881         if (flag & AT_REMOVEDIR)
 1882                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1883         else
 1884                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1885 }
 1886 
 1887 int
 1888 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1889 {
 1890 
 1891         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1892 }
 1893 
 1894 int
 1895 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1896     ino_t oldinum)
 1897 {
 1898         struct mount *mp;
 1899         struct vnode *vp;
 1900         int error;
 1901         struct nameidata nd;
 1902         struct stat sb;
 1903         int vfslocked;
 1904 
 1905 restart:
 1906         bwillwrite();
 1907         NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1908             pathseg, path, fd, td);
 1909         if ((error = namei(&nd)) != 0)
 1910                 return (error == EINVAL ? EPERM : error);
 1911         vfslocked = NDHASGIANT(&nd);
 1912         vp = nd.ni_vp;
 1913         if (vp->v_type == VDIR && oldinum == 0) {
 1914                 error = EPERM;          /* POSIX */
 1915         } else if (oldinum != 0 &&
 1916                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1917                   sb.st_ino != oldinum) {
 1918                         error = EIDRM;  /* Identifier removed */
 1919         } else {
 1920                 /*
 1921                  * The root of a mounted filesystem cannot be deleted.
 1922                  *
 1923                  * XXX: can this only be a VDIR case?
 1924                  */
 1925                 if (vp->v_vflag & VV_ROOT)
 1926                         error = EBUSY;
 1927         }
 1928         if (error == 0) {
 1929                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1930                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1931                         vput(nd.ni_dvp);
 1932                         if (vp == nd.ni_dvp)
 1933                                 vrele(vp);
 1934                         else
 1935                                 vput(vp);
 1936                         VFS_UNLOCK_GIANT(vfslocked);
 1937                         if ((error = vn_start_write(NULL, &mp,
 1938                             V_XSLEEP | PCATCH)) != 0)
 1939                                 return (error);
 1940                         goto restart;
 1941                 }
 1942 #ifdef MAC
 1943                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1944                     &nd.ni_cnd);
 1945                 if (error)
 1946                         goto out;
 1947 #endif
 1948                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1949                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1950 #ifdef MAC
 1951 out:
 1952 #endif
 1953                 vn_finished_write(mp);
 1954         }
 1955         NDFREE(&nd, NDF_ONLY_PNBUF);
 1956         vput(nd.ni_dvp);
 1957         if (vp == nd.ni_dvp)
 1958                 vrele(vp);
 1959         else
 1960                 vput(vp);
 1961         VFS_UNLOCK_GIANT(vfslocked);
 1962         return (error);
 1963 }
 1964 
 1965 /*
 1966  * Reposition read/write file offset.
 1967  */
 1968 #ifndef _SYS_SYSPROTO_H_
 1969 struct lseek_args {
 1970         int     fd;
 1971         int     pad;
 1972         off_t   offset;
 1973         int     whence;
 1974 };
 1975 #endif
 1976 int
 1977 sys_lseek(td, uap)
 1978         struct thread *td;
 1979         register struct lseek_args /* {
 1980                 int fd;
 1981                 int pad;
 1982                 off_t offset;
 1983                 int whence;
 1984         } */ *uap;
 1985 {
 1986         struct ucred *cred = td->td_ucred;
 1987         struct file *fp;
 1988         struct vnode *vp;
 1989         struct vattr vattr;
 1990         off_t foffset, offset, size;
 1991         int error, noneg;
 1992         int vfslocked;
 1993 
 1994         AUDIT_ARG_FD(uap->fd);
 1995         if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0)
 1996                 return (error);
 1997         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 1998                 fdrop(fp, td);
 1999                 return (ESPIPE);
 2000         }
 2001         vp = fp->f_vnode;
 2002         foffset = foffset_lock(fp, 0);
 2003         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2004         noneg = (vp->v_type != VCHR);
 2005         offset = uap->offset;
 2006         switch (uap->whence) {
 2007         case L_INCR:
 2008                 if (noneg &&
 2009                     (foffset < 0 ||
 2010                     (offset > 0 && foffset > OFF_MAX - offset))) {
 2011                         error = EOVERFLOW;
 2012                         break;
 2013                 }
 2014                 offset += foffset;
 2015                 break;
 2016         case L_XTND:
 2017                 vn_lock(vp, LK_SHARED | LK_RETRY);
 2018                 error = VOP_GETATTR(vp, &vattr, cred);
 2019                 VOP_UNLOCK(vp, 0);
 2020                 if (error)
 2021                         break;
 2022 
 2023                 /*
 2024                  * If the file references a disk device, then fetch
 2025                  * the media size and use that to determine the ending
 2026                  * offset.
 2027                  */
 2028                 if (vattr.va_size == 0 && vp->v_type == VCHR &&
 2029                     fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 2030                         vattr.va_size = size;
 2031                 if (noneg &&
 2032                     (vattr.va_size > OFF_MAX ||
 2033                     (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 2034                         error = EOVERFLOW;
 2035                         break;
 2036                 }
 2037                 offset += vattr.va_size;
 2038                 break;
 2039         case L_SET:
 2040                 break;
 2041         case SEEK_DATA:
 2042                 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 2043                 break;
 2044         case SEEK_HOLE:
 2045                 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 2046                 break;
 2047         default:
 2048                 error = EINVAL;
 2049         }
 2050         if (error == 0 && noneg && offset < 0)
 2051                 error = EINVAL;
 2052         if (error != 0)
 2053                 goto drop;
 2054         VFS_KNOTE_UNLOCKED(vp, 0);
 2055         *(off_t *)(td->td_retval) = offset;
 2056 drop:
 2057         fdrop(fp, td);
 2058         VFS_UNLOCK_GIANT(vfslocked);
 2059         foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
 2060         return (error);
 2061 }
 2062 
 2063 #if defined(COMPAT_43)
 2064 /*
 2065  * Reposition read/write file offset.
 2066  */
 2067 #ifndef _SYS_SYSPROTO_H_
 2068 struct olseek_args {
 2069         int     fd;
 2070         long    offset;
 2071         int     whence;
 2072 };
 2073 #endif
 2074 int
 2075 olseek(td, uap)
 2076         struct thread *td;
 2077         register struct olseek_args /* {
 2078                 int fd;
 2079                 long offset;
 2080                 int whence;
 2081         } */ *uap;
 2082 {
 2083         struct lseek_args /* {
 2084                 int fd;
 2085                 int pad;
 2086                 off_t offset;
 2087                 int whence;
 2088         } */ nuap;
 2089 
 2090         nuap.fd = uap->fd;
 2091         nuap.offset = uap->offset;
 2092         nuap.whence = uap->whence;
 2093         return (sys_lseek(td, &nuap));
 2094 }
 2095 #endif /* COMPAT_43 */
 2096 
 2097 /* Version with the 'pad' argument */
 2098 int
 2099 freebsd6_lseek(td, uap)
 2100         struct thread *td;
 2101         register struct freebsd6_lseek_args *uap;
 2102 {
 2103         struct lseek_args ouap;
 2104 
 2105         ouap.fd = uap->fd;
 2106         ouap.offset = uap->offset;
 2107         ouap.whence = uap->whence;
 2108         return (sys_lseek(td, &ouap));
 2109 }
 2110 
 2111 /*
 2112  * Check access permissions using passed credentials.
 2113  */
 2114 static int
 2115 vn_access(vp, user_flags, cred, td)
 2116         struct vnode    *vp;
 2117         int             user_flags;
 2118         struct ucred    *cred;
 2119         struct thread   *td;
 2120 {
 2121         int error;
 2122         accmode_t accmode;
 2123 
 2124         /* Flags == 0 means only check for existence. */
 2125         error = 0;
 2126         if (user_flags) {
 2127                 accmode = 0;
 2128                 if (user_flags & R_OK)
 2129                         accmode |= VREAD;
 2130                 if (user_flags & W_OK)
 2131                         accmode |= VWRITE;
 2132                 if (user_flags & X_OK)
 2133                         accmode |= VEXEC;
 2134 #ifdef MAC
 2135                 error = mac_vnode_check_access(cred, vp, accmode);
 2136                 if (error)
 2137                         return (error);
 2138 #endif
 2139                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2140                         error = VOP_ACCESS(vp, accmode, cred, td);
 2141         }
 2142         return (error);
 2143 }
 2144 
 2145 /*
 2146  * Check access permissions using "real" credentials.
 2147  */
 2148 #ifndef _SYS_SYSPROTO_H_
 2149 struct access_args {
 2150         char    *path;
 2151         int     flags;
 2152 };
 2153 #endif
 2154 int
 2155 sys_access(td, uap)
 2156         struct thread *td;
 2157         register struct access_args /* {
 2158                 char *path;
 2159                 int flags;
 2160         } */ *uap;
 2161 {
 2162 
 2163         return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 2164 }
 2165 
 2166 #ifndef _SYS_SYSPROTO_H_
 2167 struct faccessat_args {
 2168         int     dirfd;
 2169         char    *path;
 2170         int     mode;
 2171         int     flag;
 2172 }
 2173 #endif
 2174 int
 2175 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2176 {
 2177 
 2178         if (uap->flag & ~AT_EACCESS)
 2179                 return (EINVAL);
 2180         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2181             uap->mode));
 2182 }
 2183 
 2184 int
 2185 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2186 {
 2187 
 2188         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
 2189 }
 2190 
 2191 int
 2192 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2193     int flags, int mode)
 2194 {
 2195         struct ucred *cred, *tmpcred;
 2196         struct vnode *vp;
 2197         struct nameidata nd;
 2198         int vfslocked;
 2199         int error;
 2200 
 2201         /*
 2202          * Create and modify a temporary credential instead of one that
 2203          * is potentially shared.
 2204          */
 2205         if (!(flags & AT_EACCESS)) {
 2206                 cred = td->td_ucred;
 2207                 tmpcred = crdup(cred);
 2208                 tmpcred->cr_uid = cred->cr_ruid;
 2209                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2210                 td->td_ucred = tmpcred;
 2211         } else
 2212                 cred = tmpcred = td->td_ucred;
 2213         AUDIT_ARG_VALUE(mode);
 2214         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2215             AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
 2216         if ((error = namei(&nd)) != 0)
 2217                 goto out1;
 2218         vfslocked = NDHASGIANT(&nd);
 2219         vp = nd.ni_vp;
 2220 
 2221         error = vn_access(vp, mode, tmpcred, td);
 2222         NDFREE(&nd, NDF_ONLY_PNBUF);
 2223         vput(vp);
 2224         VFS_UNLOCK_GIANT(vfslocked);
 2225 out1:
 2226         if (!(flags & AT_EACCESS)) {
 2227                 td->td_ucred = cred;
 2228                 crfree(tmpcred);
 2229         }
 2230         return (error);
 2231 }
 2232 
 2233 /*
 2234  * Check access permissions using "effective" credentials.
 2235  */
 2236 #ifndef _SYS_SYSPROTO_H_
 2237 struct eaccess_args {
 2238         char    *path;
 2239         int     flags;
 2240 };
 2241 #endif
 2242 int
 2243 sys_eaccess(td, uap)
 2244         struct thread *td;
 2245         register struct eaccess_args /* {
 2246                 char *path;
 2247                 int flags;
 2248         } */ *uap;
 2249 {
 2250 
 2251         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
 2252 }
 2253 
 2254 int
 2255 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 2256 {
 2257 
 2258         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
 2259 }
 2260 
 2261 #if defined(COMPAT_43)
 2262 /*
 2263  * Get file status; this version follows links.
 2264  */
 2265 #ifndef _SYS_SYSPROTO_H_
 2266 struct ostat_args {
 2267         char    *path;
 2268         struct ostat *ub;
 2269 };
 2270 #endif
 2271 int
 2272 ostat(td, uap)
 2273         struct thread *td;
 2274         register struct ostat_args /* {
 2275                 char *path;
 2276                 struct ostat *ub;
 2277         } */ *uap;
 2278 {
 2279         struct stat sb;
 2280         struct ostat osb;
 2281         int error;
 2282 
 2283         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2284         if (error)
 2285                 return (error);
 2286         cvtstat(&sb, &osb);
 2287         error = copyout(&osb, uap->ub, sizeof (osb));
 2288         return (error);
 2289 }
 2290 
 2291 /*
 2292  * Get file status; this version does not follow links.
 2293  */
 2294 #ifndef _SYS_SYSPROTO_H_
 2295 struct olstat_args {
 2296         char    *path;
 2297         struct ostat *ub;
 2298 };
 2299 #endif
 2300 int
 2301 olstat(td, uap)
 2302         struct thread *td;
 2303         register struct olstat_args /* {
 2304                 char *path;
 2305                 struct ostat *ub;
 2306         } */ *uap;
 2307 {
 2308         struct stat sb;
 2309         struct ostat osb;
 2310         int error;
 2311 
 2312         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2313         if (error)
 2314                 return (error);
 2315         cvtstat(&sb, &osb);
 2316         error = copyout(&osb, uap->ub, sizeof (osb));
 2317         return (error);
 2318 }
 2319 
 2320 /*
 2321  * Convert from an old to a new stat structure.
 2322  */
 2323 void
 2324 cvtstat(st, ost)
 2325         struct stat *st;
 2326         struct ostat *ost;
 2327 {
 2328 
 2329         bzero(ost, sizeof(*ost));
 2330         ost->st_dev = st->st_dev;
 2331         ost->st_ino = st->st_ino;
 2332         ost->st_mode = st->st_mode;
 2333         ost->st_nlink = st->st_nlink;
 2334         ost->st_uid = st->st_uid;
 2335         ost->st_gid = st->st_gid;
 2336         ost->st_rdev = st->st_rdev;
 2337         if (st->st_size < (quad_t)1 << 32)
 2338                 ost->st_size = st->st_size;
 2339         else
 2340                 ost->st_size = -2;
 2341         ost->st_atim = st->st_atim;
 2342         ost->st_mtim = st->st_mtim;
 2343         ost->st_ctim = st->st_ctim;
 2344         ost->st_blksize = st->st_blksize;
 2345         ost->st_blocks = st->st_blocks;
 2346         ost->st_flags = st->st_flags;
 2347         ost->st_gen = st->st_gen;
 2348 }
 2349 #endif /* COMPAT_43 */
 2350 
 2351 /*
 2352  * Get file status; this version follows links.
 2353  */
 2354 #ifndef _SYS_SYSPROTO_H_
 2355 struct stat_args {
 2356         char    *path;
 2357         struct stat *ub;
 2358 };
 2359 #endif
 2360 int
 2361 sys_stat(td, uap)
 2362         struct thread *td;
 2363         register struct stat_args /* {
 2364                 char *path;
 2365                 struct stat *ub;
 2366         } */ *uap;
 2367 {
 2368         struct stat sb;
 2369         int error;
 2370 
 2371         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2372         if (error == 0)
 2373                 error = copyout(&sb, uap->ub, sizeof (sb));
 2374         return (error);
 2375 }
 2376 
 2377 #ifndef _SYS_SYSPROTO_H_
 2378 struct fstatat_args {
 2379         int     fd;
 2380         char    *path;
 2381         struct stat     *buf;
 2382         int     flag;
 2383 }
 2384 #endif
 2385 int
 2386 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2387 {
 2388         struct stat sb;
 2389         int error;
 2390 
 2391         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2392             UIO_USERSPACE, &sb);
 2393         if (error == 0)
 2394                 error = copyout(&sb, uap->buf, sizeof (sb));
 2395         return (error);
 2396 }
 2397 
 2398 int
 2399 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2400 {
 2401 
 2402         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2403 }
 2404 
 2405 int
 2406 kern_statat(struct thread *td, int flag, int fd, char *path,
 2407     enum uio_seg pathseg, struct stat *sbp)
 2408 {
 2409 
 2410         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2411 }
 2412 
 2413 int
 2414 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2415     enum uio_seg pathseg, struct stat *sbp,
 2416     void (*hook)(struct vnode *vp, struct stat *sbp))
 2417 {
 2418         struct nameidata nd;
 2419         struct stat sb;
 2420         int error, vfslocked;
 2421 
 2422         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2423                 return (EINVAL);
 2424 
 2425         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2426             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
 2427             path, fd, CAP_FSTAT, td);
 2428 
 2429         if ((error = namei(&nd)) != 0)
 2430                 return (error);
 2431         vfslocked = NDHASGIANT(&nd);
 2432         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2433         if (!error) {
 2434                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2435                 if (S_ISREG(sb.st_mode))
 2436                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2437                 if (__predict_false(hook != NULL))
 2438                         hook(nd.ni_vp, &sb);
 2439         }
 2440         NDFREE(&nd, NDF_ONLY_PNBUF);
 2441         vput(nd.ni_vp);
 2442         VFS_UNLOCK_GIANT(vfslocked);
 2443         if (error)
 2444                 return (error);
 2445         *sbp = sb;
 2446 #ifdef KTRACE
 2447         if (KTRPOINT(td, KTR_STRUCT))
 2448                 ktrstat(&sb);
 2449 #endif
 2450         return (0);
 2451 }
 2452 
 2453 /*
 2454  * Get file status; this version does not follow links.
 2455  */
 2456 #ifndef _SYS_SYSPROTO_H_
 2457 struct lstat_args {
 2458         char    *path;
 2459         struct stat *ub;
 2460 };
 2461 #endif
 2462 int
 2463 sys_lstat(td, uap)
 2464         struct thread *td;
 2465         register struct lstat_args /* {
 2466                 char *path;
 2467                 struct stat *ub;
 2468         } */ *uap;
 2469 {
 2470         struct stat sb;
 2471         int error;
 2472 
 2473         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2474         if (error == 0)
 2475                 error = copyout(&sb, uap->ub, sizeof (sb));
 2476         return (error);
 2477 }
 2478 
 2479 int
 2480 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2481 {
 2482 
 2483         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2484             sbp));
 2485 }
 2486 
 2487 /*
 2488  * Implementation of the NetBSD [l]stat() functions.
 2489  */
 2490 void
 2491 cvtnstat(sb, nsb)
 2492         struct stat *sb;
 2493         struct nstat *nsb;
 2494 {
 2495         bzero(nsb, sizeof *nsb);
 2496         nsb->st_dev = sb->st_dev;
 2497         nsb->st_ino = sb->st_ino;
 2498         nsb->st_mode = sb->st_mode;
 2499         nsb->st_nlink = sb->st_nlink;
 2500         nsb->st_uid = sb->st_uid;
 2501         nsb->st_gid = sb->st_gid;
 2502         nsb->st_rdev = sb->st_rdev;
 2503         nsb->st_atim = sb->st_atim;
 2504         nsb->st_mtim = sb->st_mtim;
 2505         nsb->st_ctim = sb->st_ctim;
 2506         nsb->st_size = sb->st_size;
 2507         nsb->st_blocks = sb->st_blocks;
 2508         nsb->st_blksize = sb->st_blksize;
 2509         nsb->st_flags = sb->st_flags;
 2510         nsb->st_gen = sb->st_gen;
 2511         nsb->st_birthtim = sb->st_birthtim;
 2512 }
 2513 
 2514 #ifndef _SYS_SYSPROTO_H_
 2515 struct nstat_args {
 2516         char    *path;
 2517         struct nstat *ub;
 2518 };
 2519 #endif
 2520 int
 2521 sys_nstat(td, uap)
 2522         struct thread *td;
 2523         register struct nstat_args /* {
 2524                 char *path;
 2525                 struct nstat *ub;
 2526         } */ *uap;
 2527 {
 2528         struct stat sb;
 2529         struct nstat nsb;
 2530         int error;
 2531 
 2532         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2533         if (error)
 2534                 return (error);
 2535         cvtnstat(&sb, &nsb);
 2536         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2537         return (error);
 2538 }
 2539 
 2540 /*
 2541  * NetBSD lstat.  Get file status; this version does not follow links.
 2542  */
 2543 #ifndef _SYS_SYSPROTO_H_
 2544 struct lstat_args {
 2545         char    *path;
 2546         struct stat *ub;
 2547 };
 2548 #endif
 2549 int
 2550 sys_nlstat(td, uap)
 2551         struct thread *td;
 2552         register struct nlstat_args /* {
 2553                 char *path;
 2554                 struct nstat *ub;
 2555         } */ *uap;
 2556 {
 2557         struct stat sb;
 2558         struct nstat nsb;
 2559         int error;
 2560 
 2561         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2562         if (error)
 2563                 return (error);
 2564         cvtnstat(&sb, &nsb);
 2565         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2566         return (error);
 2567 }
 2568 
 2569 /*
 2570  * Get configurable pathname variables.
 2571  */
 2572 #ifndef _SYS_SYSPROTO_H_
 2573 struct pathconf_args {
 2574         char    *path;
 2575         int     name;
 2576 };
 2577 #endif
 2578 int
 2579 sys_pathconf(td, uap)
 2580         struct thread *td;
 2581         register struct pathconf_args /* {
 2582                 char *path;
 2583                 int name;
 2584         } */ *uap;
 2585 {
 2586 
 2587         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2588 }
 2589 
 2590 #ifndef _SYS_SYSPROTO_H_
 2591 struct lpathconf_args {
 2592         char    *path;
 2593         int     name;
 2594 };
 2595 #endif
 2596 int
 2597 sys_lpathconf(td, uap)
 2598         struct thread *td;
 2599         register struct lpathconf_args /* {
 2600                 char *path;
 2601                 int name;
 2602         } */ *uap;
 2603 {
 2604 
 2605         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
 2606 }
 2607 
 2608 int
 2609 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2610     u_long flags)
 2611 {
 2612         struct nameidata nd;
 2613         int error, vfslocked;
 2614 
 2615         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
 2616             flags, pathseg, path, td);
 2617         if ((error = namei(&nd)) != 0)
 2618                 return (error);
 2619         vfslocked = NDHASGIANT(&nd);
 2620         NDFREE(&nd, NDF_ONLY_PNBUF);
 2621 
 2622         /* If asynchronous I/O is available, it works for all files. */
 2623         if (name == _PC_ASYNC_IO)
 2624                 td->td_retval[0] = async_io_version;
 2625         else
 2626                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2627         vput(nd.ni_vp);
 2628         VFS_UNLOCK_GIANT(vfslocked);
 2629         return (error);
 2630 }
 2631 
 2632 /*
 2633  * Return target name of a symbolic link.
 2634  */
 2635 #ifndef _SYS_SYSPROTO_H_
 2636 struct readlink_args {
 2637         char    *path;
 2638         char    *buf;
 2639         size_t  count;
 2640 };
 2641 #endif
 2642 int
 2643 sys_readlink(td, uap)
 2644         struct thread *td;
 2645         register struct readlink_args /* {
 2646                 char *path;
 2647                 char *buf;
 2648                 size_t count;
 2649         } */ *uap;
 2650 {
 2651 
 2652         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2653             UIO_USERSPACE, uap->count));
 2654 }
 2655 #ifndef _SYS_SYSPROTO_H_
 2656 struct readlinkat_args {
 2657         int     fd;
 2658         char    *path;
 2659         char    *buf;
 2660         size_t  bufsize;
 2661 };
 2662 #endif
 2663 int
 2664 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2665 {
 2666 
 2667         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2668             uap->buf, UIO_USERSPACE, uap->bufsize));
 2669 }
 2670 
 2671 int
 2672 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2673     enum uio_seg bufseg, size_t count)
 2674 {
 2675 
 2676         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2677             count));
 2678 }
 2679 
 2680 int
 2681 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2682     char *buf, enum uio_seg bufseg, size_t count)
 2683 {
 2684         struct vnode *vp;
 2685         struct iovec aiov;
 2686         struct uio auio;
 2687         int error;
 2688         struct nameidata nd;
 2689         int vfslocked;
 2690 
 2691         if (count > IOSIZE_MAX)
 2692                 return (EINVAL);
 2693 
 2694         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2695             AUDITVNODE1, pathseg, path, fd, td);
 2696 
 2697         if ((error = namei(&nd)) != 0)
 2698                 return (error);
 2699         NDFREE(&nd, NDF_ONLY_PNBUF);
 2700         vfslocked = NDHASGIANT(&nd);
 2701         vp = nd.ni_vp;
 2702 #ifdef MAC
 2703         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2704         if (error) {
 2705                 vput(vp);
 2706                 VFS_UNLOCK_GIANT(vfslocked);
 2707                 return (error);
 2708         }
 2709 #endif
 2710         if (vp->v_type != VLNK)
 2711                 error = EINVAL;
 2712         else {
 2713                 aiov.iov_base = buf;
 2714                 aiov.iov_len = count;
 2715                 auio.uio_iov = &aiov;
 2716                 auio.uio_iovcnt = 1;
 2717                 auio.uio_offset = 0;
 2718                 auio.uio_rw = UIO_READ;
 2719                 auio.uio_segflg = bufseg;
 2720                 auio.uio_td = td;
 2721                 auio.uio_resid = count;
 2722                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2723                 td->td_retval[0] = count - auio.uio_resid;
 2724         }
 2725         vput(vp);
 2726         VFS_UNLOCK_GIANT(vfslocked);
 2727         return (error);
 2728 }
 2729 
 2730 /*
 2731  * Common implementation code for chflags() and fchflags().
 2732  */
 2733 static int
 2734 setfflags(td, vp, flags)
 2735         struct thread *td;
 2736         struct vnode *vp;
 2737         int flags;
 2738 {
 2739         int error;
 2740         struct mount *mp;
 2741         struct vattr vattr;
 2742 
 2743         /* We can't support the value matching VNOVAL. */
 2744         if (flags == VNOVAL)
 2745                 return (EOPNOTSUPP);
 2746 
 2747         /*
 2748          * Prevent non-root users from setting flags on devices.  When
 2749          * a device is reused, users can retain ownership of the device
 2750          * if they are allowed to set flags and programs assume that
 2751          * chown can't fail when done as root.
 2752          */
 2753         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2754                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2755                 if (error)
 2756                         return (error);
 2757         }
 2758 
 2759         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2760                 return (error);
 2761         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2762         VATTR_NULL(&vattr);
 2763         vattr.va_flags = flags;
 2764 #ifdef MAC
 2765         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2766         if (error == 0)
 2767 #endif
 2768                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2769         VOP_UNLOCK(vp, 0);
 2770         vn_finished_write(mp);
 2771         return (error);
 2772 }
 2773 
 2774 /*
 2775  * Change flags of a file given a path name.
 2776  */
 2777 #ifndef _SYS_SYSPROTO_H_
 2778 struct chflags_args {
 2779         char    *path;
 2780         int     flags;
 2781 };
 2782 #endif
 2783 int
 2784 sys_chflags(td, uap)
 2785         struct thread *td;
 2786         register struct chflags_args /* {
 2787                 char *path;
 2788                 int flags;
 2789         } */ *uap;
 2790 {
 2791         int error;
 2792         struct nameidata nd;
 2793         int vfslocked;
 2794 
 2795         AUDIT_ARG_FFLAGS(uap->flags);
 2796         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2797             uap->path, td);
 2798         if ((error = namei(&nd)) != 0)
 2799                 return (error);
 2800         NDFREE(&nd, NDF_ONLY_PNBUF);
 2801         vfslocked = NDHASGIANT(&nd);
 2802         error = setfflags(td, nd.ni_vp, uap->flags);
 2803         vrele(nd.ni_vp);
 2804         VFS_UNLOCK_GIANT(vfslocked);
 2805         return (error);
 2806 }
 2807 
 2808 /*
 2809  * Same as chflags() but doesn't follow symlinks.
 2810  */
 2811 int
 2812 sys_lchflags(td, uap)
 2813         struct thread *td;
 2814         register struct lchflags_args /* {
 2815                 char *path;
 2816                 int flags;
 2817         } */ *uap;
 2818 {
 2819         int error;
 2820         struct nameidata nd;
 2821         int vfslocked;
 2822 
 2823         AUDIT_ARG_FFLAGS(uap->flags);
 2824         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2825             uap->path, td);
 2826         if ((error = namei(&nd)) != 0)
 2827                 return (error);
 2828         vfslocked = NDHASGIANT(&nd);
 2829         NDFREE(&nd, NDF_ONLY_PNBUF);
 2830         error = setfflags(td, nd.ni_vp, uap->flags);
 2831         vrele(nd.ni_vp);
 2832         VFS_UNLOCK_GIANT(vfslocked);
 2833         return (error);
 2834 }
 2835 
 2836 /*
 2837  * Change flags of a file given a file descriptor.
 2838  */
 2839 #ifndef _SYS_SYSPROTO_H_
 2840 struct fchflags_args {
 2841         int     fd;
 2842         int     flags;
 2843 };
 2844 #endif
 2845 int
 2846 sys_fchflags(td, uap)
 2847         struct thread *td;
 2848         register struct fchflags_args /* {
 2849                 int fd;
 2850                 int flags;
 2851         } */ *uap;
 2852 {
 2853         struct file *fp;
 2854         int vfslocked;
 2855         int error;
 2856 
 2857         AUDIT_ARG_FD(uap->fd);
 2858         AUDIT_ARG_FFLAGS(uap->flags);
 2859         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS,
 2860             &fp)) != 0)
 2861                 return (error);
 2862         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2863 #ifdef AUDIT
 2864         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2865         AUDIT_ARG_VNODE1(fp->f_vnode);
 2866         VOP_UNLOCK(fp->f_vnode, 0);
 2867 #endif
 2868         error = setfflags(td, fp->f_vnode, uap->flags);
 2869         VFS_UNLOCK_GIANT(vfslocked);
 2870         fdrop(fp, td);
 2871         return (error);
 2872 }
 2873 
 2874 /*
 2875  * Common implementation code for chmod(), lchmod() and fchmod().
 2876  */
 2877 int
 2878 setfmode(td, cred, vp, mode)
 2879         struct thread *td;
 2880         struct ucred *cred;
 2881         struct vnode *vp;
 2882         int mode;
 2883 {
 2884         int error;
 2885         struct mount *mp;
 2886         struct vattr vattr;
 2887 
 2888         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2889                 return (error);
 2890         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2891         VATTR_NULL(&vattr);
 2892         vattr.va_mode = mode & ALLPERMS;
 2893 #ifdef MAC
 2894         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2895         if (error == 0)
 2896 #endif
 2897                 error = VOP_SETATTR(vp, &vattr, cred);
 2898         VOP_UNLOCK(vp, 0);
 2899         vn_finished_write(mp);
 2900         return (error);
 2901 }
 2902 
 2903 /*
 2904  * Change mode of a file given path name.
 2905  */
 2906 #ifndef _SYS_SYSPROTO_H_
 2907 struct chmod_args {
 2908         char    *path;
 2909         int     mode;
 2910 };
 2911 #endif
 2912 int
 2913 sys_chmod(td, uap)
 2914         struct thread *td;
 2915         register struct chmod_args /* {
 2916                 char *path;
 2917                 int mode;
 2918         } */ *uap;
 2919 {
 2920 
 2921         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2922 }
 2923 
 2924 #ifndef _SYS_SYSPROTO_H_
 2925 struct fchmodat_args {
 2926         int     dirfd;
 2927         char    *path;
 2928         mode_t  mode;
 2929         int     flag;
 2930 }
 2931 #endif
 2932 int
 2933 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2934 {
 2935         int flag = uap->flag;
 2936         int fd = uap->fd;
 2937         char *path = uap->path;
 2938         mode_t mode = uap->mode;
 2939 
 2940         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2941                 return (EINVAL);
 2942 
 2943         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2944 }
 2945 
 2946 int
 2947 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2948 {
 2949 
 2950         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2951 }
 2952 
 2953 /*
 2954  * Change mode of a file given path name (don't follow links.)
 2955  */
 2956 #ifndef _SYS_SYSPROTO_H_
 2957 struct lchmod_args {
 2958         char    *path;
 2959         int     mode;
 2960 };
 2961 #endif
 2962 int
 2963 sys_lchmod(td, uap)
 2964         struct thread *td;
 2965         register struct lchmod_args /* {
 2966                 char *path;
 2967                 int mode;
 2968         } */ *uap;
 2969 {
 2970 
 2971         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2972             uap->mode, AT_SYMLINK_NOFOLLOW));
 2973 }
 2974 
 2975 
 2976 int
 2977 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2978     mode_t mode, int flag)
 2979 {
 2980         int error;
 2981         struct nameidata nd;
 2982         int vfslocked;
 2983         int follow;
 2984 
 2985         AUDIT_ARG_MODE(mode);
 2986         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2987         NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
 2988             path, fd, CAP_FCHMOD, td);
 2989         if ((error = namei(&nd)) != 0)
 2990                 return (error);
 2991         vfslocked = NDHASGIANT(&nd);
 2992         NDFREE(&nd, NDF_ONLY_PNBUF);
 2993         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2994         vrele(nd.ni_vp);
 2995         VFS_UNLOCK_GIANT(vfslocked);
 2996         return (error);
 2997 }
 2998 
 2999 /*
 3000  * Change mode of a file given a file descriptor.
 3001  */
 3002 #ifndef _SYS_SYSPROTO_H_
 3003 struct fchmod_args {
 3004         int     fd;
 3005         int     mode;
 3006 };
 3007 #endif
 3008 int
 3009 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 3010 {
 3011         struct file *fp;
 3012         int error;
 3013 
 3014         AUDIT_ARG_FD(uap->fd);
 3015         AUDIT_ARG_MODE(uap->mode);
 3016 
 3017         error = fget(td, uap->fd, CAP_FCHMOD, &fp);
 3018         if (error != 0)
 3019                 return (error);
 3020         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 3021         fdrop(fp, td);
 3022         return (error);
 3023 }
 3024 
 3025 /*
 3026  * Common implementation for chown(), lchown(), and fchown()
 3027  */
 3028 int
 3029 setfown(td, cred, vp, uid, gid)
 3030         struct thread *td;
 3031         struct ucred *cred;
 3032         struct vnode *vp;
 3033         uid_t uid;
 3034         gid_t gid;
 3035 {
 3036         int error;
 3037         struct mount *mp;
 3038         struct vattr vattr;
 3039 
 3040         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3041                 return (error);
 3042         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3043         VATTR_NULL(&vattr);
 3044         vattr.va_uid = uid;
 3045         vattr.va_gid = gid;
 3046 #ifdef MAC
 3047         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 3048             vattr.va_gid);
 3049         if (error == 0)
 3050 #endif
 3051                 error = VOP_SETATTR(vp, &vattr, cred);
 3052         VOP_UNLOCK(vp, 0);
 3053         vn_finished_write(mp);
 3054         return (error);
 3055 }
 3056 
 3057 /*
 3058  * Set ownership given a path name.
 3059  */
 3060 #ifndef _SYS_SYSPROTO_H_
 3061 struct chown_args {
 3062         char    *path;
 3063         int     uid;
 3064         int     gid;
 3065 };
 3066 #endif
 3067 int
 3068 sys_chown(td, uap)
 3069         struct thread *td;
 3070         register struct chown_args /* {
 3071                 char *path;
 3072                 int uid;
 3073                 int gid;
 3074         } */ *uap;
 3075 {
 3076 
 3077         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3078 }
 3079 
 3080 #ifndef _SYS_SYSPROTO_H_
 3081 struct fchownat_args {
 3082         int fd;
 3083         const char * path;
 3084         uid_t uid;
 3085         gid_t gid;
 3086         int flag;
 3087 };
 3088 #endif
 3089 int
 3090 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 3091 {
 3092         int flag;
 3093 
 3094         flag = uap->flag;
 3095         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3096                 return (EINVAL);
 3097 
 3098         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 3099             uap->gid, uap->flag));
 3100 }
 3101 
 3102 int
 3103 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3104     int gid)
 3105 {
 3106 
 3107         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 3108 }
 3109 
 3110 int
 3111 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3112     int uid, int gid, int flag)
 3113 {
 3114         struct nameidata nd;
 3115         int error, vfslocked, follow;
 3116 
 3117         AUDIT_ARG_OWNER(uid, gid);
 3118         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3119         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
 3120             path, fd, CAP_FCHOWN, td);
 3121 
 3122         if ((error = namei(&nd)) != 0)
 3123                 return (error);
 3124         vfslocked = NDHASGIANT(&nd);
 3125         NDFREE(&nd, NDF_ONLY_PNBUF);
 3126         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3127         vrele(nd.ni_vp);
 3128         VFS_UNLOCK_GIANT(vfslocked);
 3129         return (error);
 3130 }
 3131 
 3132 /*
 3133  * Set ownership given a path name, do not cross symlinks.
 3134  */
 3135 #ifndef _SYS_SYSPROTO_H_
 3136 struct lchown_args {
 3137         char    *path;
 3138         int     uid;
 3139         int     gid;
 3140 };
 3141 #endif
 3142 int
 3143 sys_lchown(td, uap)
 3144         struct thread *td;
 3145         register struct lchown_args /* {
 3146                 char *path;
 3147                 int uid;
 3148                 int gid;
 3149         } */ *uap;
 3150 {
 3151 
 3152         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3153 }
 3154 
 3155 int
 3156 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3157     int gid)
 3158 {
 3159 
 3160         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3161             AT_SYMLINK_NOFOLLOW));
 3162 }
 3163 
 3164 /*
 3165  * Set ownership given a file descriptor.
 3166  */
 3167 #ifndef _SYS_SYSPROTO_H_
 3168 struct fchown_args {
 3169         int     fd;
 3170         int     uid;
 3171         int     gid;
 3172 };
 3173 #endif
 3174 int
 3175 sys_fchown(td, uap)
 3176         struct thread *td;
 3177         register struct fchown_args /* {
 3178                 int fd;
 3179                 int uid;
 3180                 int gid;
 3181         } */ *uap;
 3182 {
 3183         struct file *fp;
 3184         int error;
 3185 
 3186         AUDIT_ARG_FD(uap->fd);
 3187         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3188         error = fget(td, uap->fd, CAP_FCHOWN, &fp);
 3189         if (error != 0)
 3190                 return (error);
 3191         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3192         fdrop(fp, td);
 3193         return (error);
 3194 }
 3195 
 3196 /*
 3197  * Common implementation code for utimes(), lutimes(), and futimes().
 3198  */
 3199 static int
 3200 getutimes(usrtvp, tvpseg, tsp)
 3201         const struct timeval *usrtvp;
 3202         enum uio_seg tvpseg;
 3203         struct timespec *tsp;
 3204 {
 3205         struct timeval tv[2];
 3206         const struct timeval *tvp;
 3207         int error;
 3208 
 3209         if (usrtvp == NULL) {
 3210                 vfs_timestamp(&tsp[0]);
 3211                 tsp[1] = tsp[0];
 3212         } else {
 3213                 if (tvpseg == UIO_SYSSPACE) {
 3214                         tvp = usrtvp;
 3215                 } else {
 3216                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3217                                 return (error);
 3218                         tvp = tv;
 3219                 }
 3220 
 3221                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3222                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3223                         return (EINVAL);
 3224                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3225                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3226         }
 3227         return (0);
 3228 }
 3229 
 3230 /*
 3231  * Common implementation code for utimes(), lutimes(), and futimes().
 3232  */
 3233 static int
 3234 setutimes(td, vp, ts, numtimes, nullflag)
 3235         struct thread *td;
 3236         struct vnode *vp;
 3237         const struct timespec *ts;
 3238         int numtimes;
 3239         int nullflag;
 3240 {
 3241         int error, setbirthtime;
 3242         struct mount *mp;
 3243         struct vattr vattr;
 3244 
 3245         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3246                 return (error);
 3247         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3248         setbirthtime = 0;
 3249         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3250             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3251                 setbirthtime = 1;
 3252         VATTR_NULL(&vattr);
 3253         vattr.va_atime = ts[0];
 3254         vattr.va_mtime = ts[1];
 3255         if (setbirthtime)
 3256                 vattr.va_birthtime = ts[1];
 3257         if (numtimes > 2)
 3258                 vattr.va_birthtime = ts[2];
 3259         if (nullflag)
 3260                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3261 #ifdef MAC
 3262         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3263             vattr.va_mtime);
 3264 #endif
 3265         if (error == 0)
 3266                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3267         VOP_UNLOCK(vp, 0);
 3268         vn_finished_write(mp);
 3269         return (error);
 3270 }
 3271 
 3272 /*
 3273  * Set the access and modification times of a file.
 3274  */
 3275 #ifndef _SYS_SYSPROTO_H_
 3276 struct utimes_args {
 3277         char    *path;
 3278         struct  timeval *tptr;
 3279 };
 3280 #endif
 3281 int
 3282 sys_utimes(td, uap)
 3283         struct thread *td;
 3284         register struct utimes_args /* {
 3285                 char *path;
 3286                 struct timeval *tptr;
 3287         } */ *uap;
 3288 {
 3289 
 3290         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3291             UIO_USERSPACE));
 3292 }
 3293 
 3294 #ifndef _SYS_SYSPROTO_H_
 3295 struct futimesat_args {
 3296         int fd;
 3297         const char * path;
 3298         const struct timeval * times;
 3299 };
 3300 #endif
 3301 int
 3302 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3303 {
 3304 
 3305         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3306             uap->times, UIO_USERSPACE));
 3307 }
 3308 
 3309 int
 3310 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3311     struct timeval *tptr, enum uio_seg tptrseg)
 3312 {
 3313 
 3314         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3315 }
 3316 
 3317 int
 3318 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3319     struct timeval *tptr, enum uio_seg tptrseg)
 3320 {
 3321         struct nameidata nd;
 3322         struct timespec ts[2];
 3323         int error, vfslocked;
 3324 
 3325         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3326                 return (error);
 3327         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
 3328             path, fd, CAP_FUTIMES, td);
 3329 
 3330         if ((error = namei(&nd)) != 0)
 3331                 return (error);
 3332         vfslocked = NDHASGIANT(&nd);
 3333         NDFREE(&nd, NDF_ONLY_PNBUF);
 3334         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3335         vrele(nd.ni_vp);
 3336         VFS_UNLOCK_GIANT(vfslocked);
 3337         return (error);
 3338 }
 3339 
 3340 /*
 3341  * Set the access and modification times of a file.
 3342  */
 3343 #ifndef _SYS_SYSPROTO_H_
 3344 struct lutimes_args {
 3345         char    *path;
 3346         struct  timeval *tptr;
 3347 };
 3348 #endif
 3349 int
 3350 sys_lutimes(td, uap)
 3351         struct thread *td;
 3352         register struct lutimes_args /* {
 3353                 char *path;
 3354                 struct timeval *tptr;
 3355         } */ *uap;
 3356 {
 3357 
 3358         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3359             UIO_USERSPACE));
 3360 }
 3361 
 3362 int
 3363 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3364     struct timeval *tptr, enum uio_seg tptrseg)
 3365 {
 3366         struct timespec ts[2];
 3367         int error;
 3368         struct nameidata nd;
 3369         int vfslocked;
 3370 
 3371         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3372                 return (error);
 3373         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3374         if ((error = namei(&nd)) != 0)
 3375                 return (error);
 3376         vfslocked = NDHASGIANT(&nd);
 3377         NDFREE(&nd, NDF_ONLY_PNBUF);
 3378         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3379         vrele(nd.ni_vp);
 3380         VFS_UNLOCK_GIANT(vfslocked);
 3381         return (error);
 3382 }
 3383 
 3384 /*
 3385  * Set the access and modification times of a file.
 3386  */
 3387 #ifndef _SYS_SYSPROTO_H_
 3388 struct futimes_args {
 3389         int     fd;
 3390         struct  timeval *tptr;
 3391 };
 3392 #endif
 3393 int
 3394 sys_futimes(td, uap)
 3395         struct thread *td;
 3396         register struct futimes_args /* {
 3397                 int  fd;
 3398                 struct timeval *tptr;
 3399         } */ *uap;
 3400 {
 3401 
 3402         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3403 }
 3404 
 3405 int
 3406 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3407     enum uio_seg tptrseg)
 3408 {
 3409         struct timespec ts[2];
 3410         struct file *fp;
 3411         int vfslocked;
 3412         int error;
 3413 
 3414         AUDIT_ARG_FD(fd);
 3415         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3416                 return (error);
 3417         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp))
 3418             != 0)
 3419                 return (error);
 3420         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3421 #ifdef AUDIT
 3422         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3423         AUDIT_ARG_VNODE1(fp->f_vnode);
 3424         VOP_UNLOCK(fp->f_vnode, 0);
 3425 #endif
 3426         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3427         VFS_UNLOCK_GIANT(vfslocked);
 3428         fdrop(fp, td);
 3429         return (error);
 3430 }
 3431 
 3432 /*
 3433  * Truncate a file given its path name.
 3434  */
 3435 #ifndef _SYS_SYSPROTO_H_
 3436 struct truncate_args {
 3437         char    *path;
 3438         int     pad;
 3439         off_t   length;
 3440 };
 3441 #endif
 3442 int
 3443 sys_truncate(td, uap)
 3444         struct thread *td;
 3445         register struct truncate_args /* {
 3446                 char *path;
 3447                 int pad;
 3448                 off_t length;
 3449         } */ *uap;
 3450 {
 3451 
 3452         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3453 }
 3454 
 3455 int
 3456 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3457 {
 3458         struct mount *mp;
 3459         struct vnode *vp;
 3460         void *rl_cookie;
 3461         struct vattr vattr;
 3462         struct nameidata nd;
 3463         int error, vfslocked;
 3464 
 3465         if (length < 0)
 3466                 return(EINVAL);
 3467         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3468         if ((error = namei(&nd)) != 0)
 3469                 return (error);
 3470         vfslocked = NDHASGIANT(&nd);
 3471         vp = nd.ni_vp;
 3472         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3473         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3474                 vn_rangelock_unlock(vp, rl_cookie);
 3475                 vrele(vp);
 3476                 VFS_UNLOCK_GIANT(vfslocked);
 3477                 return (error);
 3478         }
 3479         NDFREE(&nd, NDF_ONLY_PNBUF);
 3480         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3481         if (vp->v_type == VDIR)
 3482                 error = EISDIR;
 3483 #ifdef MAC
 3484         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3485         }
 3486 #endif
 3487         else if ((error = vn_writechk(vp)) == 0 &&
 3488             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3489                 VATTR_NULL(&vattr);
 3490                 vattr.va_size = length;
 3491                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3492         }
 3493         VOP_UNLOCK(vp, 0);
 3494         vn_finished_write(mp);
 3495         vn_rangelock_unlock(vp, rl_cookie);
 3496         vrele(vp);
 3497         VFS_UNLOCK_GIANT(vfslocked);
 3498         return (error);
 3499 }
 3500 
 3501 #if defined(COMPAT_43)
 3502 /*
 3503  * Truncate a file given its path name.
 3504  */
 3505 #ifndef _SYS_SYSPROTO_H_
 3506 struct otruncate_args {
 3507         char    *path;
 3508         long    length;
 3509 };
 3510 #endif
 3511 int
 3512 otruncate(td, uap)
 3513         struct thread *td;
 3514         register struct otruncate_args /* {
 3515                 char *path;
 3516                 long length;
 3517         } */ *uap;
 3518 {
 3519         struct truncate_args /* {
 3520                 char *path;
 3521                 int pad;
 3522                 off_t length;
 3523         } */ nuap;
 3524 
 3525         nuap.path = uap->path;
 3526         nuap.length = uap->length;
 3527         return (sys_truncate(td, &nuap));
 3528 }
 3529 #endif /* COMPAT_43 */
 3530 
 3531 /* Versions with the pad argument */
 3532 int
 3533 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3534 {
 3535         struct truncate_args ouap;
 3536 
 3537         ouap.path = uap->path;
 3538         ouap.length = uap->length;
 3539         return (sys_truncate(td, &ouap));
 3540 }
 3541 
 3542 int
 3543 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3544 {
 3545         struct ftruncate_args ouap;
 3546 
 3547         ouap.fd = uap->fd;
 3548         ouap.length = uap->length;
 3549         return (sys_ftruncate(td, &ouap));
 3550 }
 3551 
 3552 /*
 3553  * Sync an open file.
 3554  */
 3555 #ifndef _SYS_SYSPROTO_H_
 3556 struct fsync_args {
 3557         int     fd;
 3558 };
 3559 #endif
 3560 int
 3561 sys_fsync(td, uap)
 3562         struct thread *td;
 3563         struct fsync_args /* {
 3564                 int fd;
 3565         } */ *uap;
 3566 {
 3567         struct vnode *vp;
 3568         struct mount *mp;
 3569         struct file *fp;
 3570         int vfslocked;
 3571         int error, lock_flags;
 3572 
 3573         AUDIT_ARG_FD(uap->fd);
 3574         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC,
 3575             &fp)) != 0)
 3576                 return (error);
 3577         vp = fp->f_vnode;
 3578         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3579         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3580                 goto drop;
 3581         if (MNT_SHARED_WRITES(mp) ||
 3582             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3583                 lock_flags = LK_SHARED;
 3584         } else {
 3585                 lock_flags = LK_EXCLUSIVE;
 3586         }
 3587         vn_lock(vp, lock_flags | LK_RETRY);
 3588         AUDIT_ARG_VNODE1(vp);
 3589         if (vp->v_object != NULL) {
 3590                 VM_OBJECT_LOCK(vp->v_object);
 3591                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3592                 VM_OBJECT_UNLOCK(vp->v_object);
 3593         }
 3594         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3595 
 3596         VOP_UNLOCK(vp, 0);
 3597         vn_finished_write(mp);
 3598 drop:
 3599         VFS_UNLOCK_GIANT(vfslocked);
 3600         fdrop(fp, td);
 3601         return (error);
 3602 }
 3603 
 3604 /*
 3605  * Rename files.  Source and destination must either both be directories, or
 3606  * both not be directories.  If target is a directory, it must be empty.
 3607  */
 3608 #ifndef _SYS_SYSPROTO_H_
 3609 struct rename_args {
 3610         char    *from;
 3611         char    *to;
 3612 };
 3613 #endif
 3614 int
 3615 sys_rename(td, uap)
 3616         struct thread *td;
 3617         register struct rename_args /* {
 3618                 char *from;
 3619                 char *to;
 3620         } */ *uap;
 3621 {
 3622 
 3623         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3624 }
 3625 
 3626 #ifndef _SYS_SYSPROTO_H_
 3627 struct renameat_args {
 3628         int     oldfd;
 3629         char    *old;
 3630         int     newfd;
 3631         char    *new;
 3632 };
 3633 #endif
 3634 int
 3635 sys_renameat(struct thread *td, struct renameat_args *uap)
 3636 {
 3637 
 3638         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3639             UIO_USERSPACE));
 3640 }
 3641 
 3642 int
 3643 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3644 {
 3645 
 3646         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3647 }
 3648 
 3649 int
 3650 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3651     enum uio_seg pathseg)
 3652 {
 3653         struct mount *mp = NULL;
 3654         struct vnode *tvp, *fvp, *tdvp;
 3655         struct nameidata fromnd, tond;
 3656         int tvfslocked;
 3657         int fvfslocked;
 3658         int error;
 3659 
 3660         bwillwrite();
 3661 #ifdef MAC
 3662         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3663             MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3664 #else
 3665         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
 3666             AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3667 #endif
 3668 
 3669         if ((error = namei(&fromnd)) != 0)
 3670                 return (error);
 3671         fvfslocked = NDHASGIANT(&fromnd);
 3672         tvfslocked = 0;
 3673 #ifdef MAC
 3674         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3675             fromnd.ni_vp, &fromnd.ni_cnd);
 3676         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3677         if (fromnd.ni_dvp != fromnd.ni_vp)
 3678                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3679 #endif
 3680         fvp = fromnd.ni_vp;
 3681         if (error == 0)
 3682                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3683         if (error != 0) {
 3684                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3685                 vrele(fromnd.ni_dvp);
 3686                 vrele(fvp);
 3687                 goto out1;
 3688         }
 3689         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3690             SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
 3691             td);
 3692         if (fromnd.ni_vp->v_type == VDIR)
 3693                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3694         if ((error = namei(&tond)) != 0) {
 3695                 /* Translate error code for rename("dir1", "dir2/."). */
 3696                 if (error == EISDIR && fvp->v_type == VDIR)
 3697                         error = EINVAL;
 3698                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3699                 vrele(fromnd.ni_dvp);
 3700                 vrele(fvp);
 3701                 vn_finished_write(mp);
 3702                 goto out1;
 3703         }
 3704         tvfslocked = NDHASGIANT(&tond);
 3705         tdvp = tond.ni_dvp;
 3706         tvp = tond.ni_vp;
 3707         if (tvp != NULL) {
 3708                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3709                         error = ENOTDIR;
 3710                         goto out;
 3711                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3712                         error = EISDIR;
 3713                         goto out;
 3714                 }
 3715         }
 3716         if (fvp == tdvp) {
 3717                 error = EINVAL;
 3718                 goto out;
 3719         }
 3720         /*
 3721          * If the source is the same as the destination (that is, if they
 3722          * are links to the same vnode), then there is nothing to do.
 3723          */
 3724         if (fvp == tvp)
 3725                 error = -1;
 3726 #ifdef MAC
 3727         else
 3728                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3729                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3730 #endif
 3731 out:
 3732         if (!error) {
 3733                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3734                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3735                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3736                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3737         } else {
 3738                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3739                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3740                 if (tvp)
 3741                         vput(tvp);
 3742                 if (tdvp == tvp)
 3743                         vrele(tdvp);
 3744                 else
 3745                         vput(tdvp);
 3746                 vrele(fromnd.ni_dvp);
 3747                 vrele(fvp);
 3748         }
 3749         vrele(tond.ni_startdir);
 3750         vn_finished_write(mp);
 3751 out1:
 3752         if (fromnd.ni_startdir)
 3753                 vrele(fromnd.ni_startdir);
 3754         VFS_UNLOCK_GIANT(fvfslocked);
 3755         VFS_UNLOCK_GIANT(tvfslocked);
 3756         if (error == -1)
 3757                 return (0);
 3758         return (error);
 3759 }
 3760 
 3761 /*
 3762  * Make a directory file.
 3763  */
 3764 #ifndef _SYS_SYSPROTO_H_
 3765 struct mkdir_args {
 3766         char    *path;
 3767         int     mode;
 3768 };
 3769 #endif
 3770 int
 3771 sys_mkdir(td, uap)
 3772         struct thread *td;
 3773         register struct mkdir_args /* {
 3774                 char *path;
 3775                 int mode;
 3776         } */ *uap;
 3777 {
 3778 
 3779         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3780 }
 3781 
 3782 #ifndef _SYS_SYSPROTO_H_
 3783 struct mkdirat_args {
 3784         int     fd;
 3785         char    *path;
 3786         mode_t  mode;
 3787 };
 3788 #endif
 3789 int
 3790 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3791 {
 3792 
 3793         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3794 }
 3795 
 3796 int
 3797 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3798 {
 3799 
 3800         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3801 }
 3802 
 3803 int
 3804 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3805     int mode)
 3806 {
 3807         struct mount *mp;
 3808         struct vnode *vp;
 3809         struct vattr vattr;
 3810         int error;
 3811         struct nameidata nd;
 3812         int vfslocked;
 3813 
 3814         AUDIT_ARG_MODE(mode);
 3815 restart:
 3816         bwillwrite();
 3817         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
 3818             AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
 3819         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3820         if ((error = namei(&nd)) != 0)
 3821                 return (error);
 3822         vfslocked = NDHASGIANT(&nd);
 3823         vp = nd.ni_vp;
 3824         if (vp != NULL) {
 3825                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3826                 /*
 3827                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3828                  * the strange behaviour of leaving the vnode unlocked
 3829                  * if the target is the same vnode as the parent.
 3830                  */
 3831                 if (vp == nd.ni_dvp)
 3832                         vrele(nd.ni_dvp);
 3833                 else
 3834                         vput(nd.ni_dvp);
 3835                 vrele(vp);
 3836                 VFS_UNLOCK_GIANT(vfslocked);
 3837                 return (EEXIST);
 3838         }
 3839         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3840                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3841                 vput(nd.ni_dvp);
 3842                 VFS_UNLOCK_GIANT(vfslocked);
 3843                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3844                         return (error);
 3845                 goto restart;
 3846         }
 3847         VATTR_NULL(&vattr);
 3848         vattr.va_type = VDIR;
 3849         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3850 #ifdef MAC
 3851         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3852             &vattr);
 3853         if (error)
 3854                 goto out;
 3855 #endif
 3856         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3857 #ifdef MAC
 3858 out:
 3859 #endif
 3860         NDFREE(&nd, NDF_ONLY_PNBUF);
 3861         vput(nd.ni_dvp);
 3862         if (!error)
 3863                 vput(nd.ni_vp);
 3864         vn_finished_write(mp);
 3865         VFS_UNLOCK_GIANT(vfslocked);
 3866         return (error);
 3867 }
 3868 
 3869 /*
 3870  * Remove a directory file.
 3871  */
 3872 #ifndef _SYS_SYSPROTO_H_
 3873 struct rmdir_args {
 3874         char    *path;
 3875 };
 3876 #endif
 3877 int
 3878 sys_rmdir(td, uap)
 3879         struct thread *td;
 3880         struct rmdir_args /* {
 3881                 char *path;
 3882         } */ *uap;
 3883 {
 3884 
 3885         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3886 }
 3887 
 3888 int
 3889 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3890 {
 3891 
 3892         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3893 }
 3894 
 3895 int
 3896 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3897 {
 3898         struct mount *mp;
 3899         struct vnode *vp;
 3900         int error;
 3901         struct nameidata nd;
 3902         int vfslocked;
 3903 
 3904 restart:
 3905         bwillwrite();
 3906         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
 3907             AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
 3908         if ((error = namei(&nd)) != 0)
 3909                 return (error);
 3910         vfslocked = NDHASGIANT(&nd);
 3911         vp = nd.ni_vp;
 3912         if (vp->v_type != VDIR) {
 3913                 error = ENOTDIR;
 3914                 goto out;
 3915         }
 3916         /*
 3917          * No rmdir "." please.
 3918          */
 3919         if (nd.ni_dvp == vp) {
 3920                 error = EINVAL;
 3921                 goto out;
 3922         }
 3923         /*
 3924          * The root of a mounted filesystem cannot be deleted.
 3925          */
 3926         if (vp->v_vflag & VV_ROOT) {
 3927                 error = EBUSY;
 3928                 goto out;
 3929         }
 3930 #ifdef MAC
 3931         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3932             &nd.ni_cnd);
 3933         if (error)
 3934                 goto out;
 3935 #endif
 3936         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3937                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3938                 vput(vp);
 3939                 if (nd.ni_dvp == vp)
 3940                         vrele(nd.ni_dvp);
 3941                 else
 3942                         vput(nd.ni_dvp);
 3943                 VFS_UNLOCK_GIANT(vfslocked);
 3944                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3945                         return (error);
 3946                 goto restart;
 3947         }
 3948         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3949         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3950         vn_finished_write(mp);
 3951 out:
 3952         NDFREE(&nd, NDF_ONLY_PNBUF);
 3953         vput(vp);
 3954         if (nd.ni_dvp == vp)
 3955                 vrele(nd.ni_dvp);
 3956         else
 3957                 vput(nd.ni_dvp);
 3958         VFS_UNLOCK_GIANT(vfslocked);
 3959         return (error);
 3960 }
 3961 
 3962 #ifdef COMPAT_43
 3963 /*
 3964  * Read a block of directory entries in a filesystem independent format.
 3965  */
 3966 #ifndef _SYS_SYSPROTO_H_
 3967 struct ogetdirentries_args {
 3968         int     fd;
 3969         char    *buf;
 3970         u_int   count;
 3971         long    *basep;
 3972 };
 3973 #endif
 3974 int
 3975 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3976 {
 3977         long loff;
 3978         int error;
 3979 
 3980         error = kern_ogetdirentries(td, uap, &loff);
 3981         if (error == 0)
 3982                 error = copyout(&loff, uap->basep, sizeof(long));
 3983         return (error);
 3984 }
 3985 
 3986 int
 3987 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3988     long *ploff)
 3989 {
 3990         struct vnode *vp;
 3991         struct file *fp;
 3992         struct uio auio, kuio;
 3993         struct iovec aiov, kiov;
 3994         struct dirent *dp, *edp;
 3995         caddr_t dirbuf;
 3996         int error, eofflag, readcnt, vfslocked;
 3997         long loff;
 3998         off_t foffset;
 3999 
 4000         /* XXX arbitrary sanity limit on `count'. */
 4001         if (uap->count > 64 * 1024)
 4002                 return (EINVAL);
 4003         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
 4004             &fp)) != 0)
 4005                 return (error);
 4006         if ((fp->f_flag & FREAD) == 0) {
 4007                 fdrop(fp, td);
 4008                 return (EBADF);
 4009         }
 4010         vp = fp->f_vnode;
 4011         foffset = foffset_lock(fp, 0);
 4012 unionread:
 4013         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4014         if (vp->v_type != VDIR) {
 4015                 VFS_UNLOCK_GIANT(vfslocked);
 4016                 foffset_unlock(fp, foffset, 0);
 4017                 fdrop(fp, td);
 4018                 return (EINVAL);
 4019         }
 4020         aiov.iov_base = uap->buf;
 4021         aiov.iov_len = uap->count;
 4022         auio.uio_iov = &aiov;
 4023         auio.uio_iovcnt = 1;
 4024         auio.uio_rw = UIO_READ;
 4025         auio.uio_segflg = UIO_USERSPACE;
 4026         auio.uio_td = td;
 4027         auio.uio_resid = uap->count;
 4028         vn_lock(vp, LK_SHARED | LK_RETRY);
 4029         loff = auio.uio_offset = foffset;
 4030 #ifdef MAC
 4031         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4032         if (error) {
 4033                 VOP_UNLOCK(vp, 0);
 4034                 VFS_UNLOCK_GIANT(vfslocked);
 4035                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 4036                 fdrop(fp, td);
 4037                 return (error);
 4038         }
 4039 #endif
 4040 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4041                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4042                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4043                             NULL, NULL);
 4044                         foffset = auio.uio_offset;
 4045                 } else
 4046 #       endif
 4047         {
 4048                 kuio = auio;
 4049                 kuio.uio_iov = &kiov;
 4050                 kuio.uio_segflg = UIO_SYSSPACE;
 4051                 kiov.iov_len = uap->count;
 4052                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4053                 kiov.iov_base = dirbuf;
 4054                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4055                             NULL, NULL);
 4056                 foffset = kuio.uio_offset;
 4057                 if (error == 0) {
 4058                         readcnt = uap->count - kuio.uio_resid;
 4059                         edp = (struct dirent *)&dirbuf[readcnt];
 4060                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4061 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4062                                         /*
 4063                                          * The expected low byte of
 4064                                          * dp->d_namlen is our dp->d_type.
 4065                                          * The high MBZ byte of dp->d_namlen
 4066                                          * is our dp->d_namlen.
 4067                                          */
 4068                                         dp->d_type = dp->d_namlen;
 4069                                         dp->d_namlen = 0;
 4070 #                               else
 4071                                         /*
 4072                                          * The dp->d_type is the high byte
 4073                                          * of the expected dp->d_namlen,
 4074                                          * so must be zero'ed.
 4075                                          */
 4076                                         dp->d_type = 0;
 4077 #                               endif
 4078                                 if (dp->d_reclen > 0) {
 4079                                         dp = (struct dirent *)
 4080                                             ((char *)dp + dp->d_reclen);
 4081                                 } else {
 4082                                         error = EIO;
 4083                                         break;
 4084                                 }
 4085                         }
 4086                         if (dp >= edp)
 4087                                 error = uiomove(dirbuf, readcnt, &auio);
 4088                 }
 4089                 free(dirbuf, M_TEMP);
 4090         }
 4091         if (error) {
 4092                 VOP_UNLOCK(vp, 0);
 4093                 VFS_UNLOCK_GIANT(vfslocked);
 4094                 foffset_unlock(fp, foffset, 0);
 4095                 fdrop(fp, td);
 4096                 return (error);
 4097         }
 4098         if (uap->count == auio.uio_resid &&
 4099             (vp->v_vflag & VV_ROOT) &&
 4100             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4101                 struct vnode *tvp = vp;
 4102                 vp = vp->v_mount->mnt_vnodecovered;
 4103                 VREF(vp);
 4104                 fp->f_vnode = vp;
 4105                 fp->f_data = vp;
 4106                 foffset = 0;
 4107                 vput(tvp);
 4108                 VFS_UNLOCK_GIANT(vfslocked);
 4109                 goto unionread;
 4110         }
 4111         VOP_UNLOCK(vp, 0);
 4112         VFS_UNLOCK_GIANT(vfslocked);
 4113         foffset_unlock(fp, foffset, 0);
 4114         fdrop(fp, td);
 4115         td->td_retval[0] = uap->count - auio.uio_resid;
 4116         if (error == 0)
 4117                 *ploff = loff;
 4118         return (error);
 4119 }
 4120 #endif /* COMPAT_43 */
 4121 
 4122 /*
 4123  * Read a block of directory entries in a filesystem independent format.
 4124  */
 4125 #ifndef _SYS_SYSPROTO_H_
 4126 struct getdirentries_args {
 4127         int     fd;
 4128         char    *buf;
 4129         u_int   count;
 4130         long    *basep;
 4131 };
 4132 #endif
 4133 int
 4134 sys_getdirentries(td, uap)
 4135         struct thread *td;
 4136         register struct getdirentries_args /* {
 4137                 int fd;
 4138                 char *buf;
 4139                 u_int count;
 4140                 long *basep;
 4141         } */ *uap;
 4142 {
 4143         long base;
 4144         int error;
 4145 
 4146         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 4147         if (error)
 4148                 return (error);
 4149         if (uap->basep != NULL)
 4150                 error = copyout(&base, uap->basep, sizeof(long));
 4151         return (error);
 4152 }
 4153 
 4154 int
 4155 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4156     long *basep)
 4157 {
 4158         struct vnode *vp;
 4159         struct file *fp;
 4160         struct uio auio;
 4161         struct iovec aiov;
 4162         int vfslocked;
 4163         long loff;
 4164         int error, eofflag;
 4165         off_t foffset;
 4166 
 4167         AUDIT_ARG_FD(fd);
 4168         auio.uio_resid = count;
 4169         if (auio.uio_resid > IOSIZE_MAX)
 4170                 return (EINVAL);
 4171         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
 4172             &fp)) != 0)
 4173                 return (error);
 4174         if ((fp->f_flag & FREAD) == 0) {
 4175                 fdrop(fp, td);
 4176                 return (EBADF);
 4177         }
 4178         vp = fp->f_vnode;
 4179         foffset = foffset_lock(fp, 0);
 4180 unionread:
 4181         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4182         if (vp->v_type != VDIR) {
 4183                 VFS_UNLOCK_GIANT(vfslocked);
 4184                 error = EINVAL;
 4185                 goto fail;
 4186         }
 4187         aiov.iov_base = buf;
 4188         aiov.iov_len = count;
 4189         auio.uio_iov = &aiov;
 4190         auio.uio_iovcnt = 1;
 4191         auio.uio_rw = UIO_READ;
 4192         auio.uio_segflg = UIO_USERSPACE;
 4193         auio.uio_td = td;
 4194         vn_lock(vp, LK_SHARED | LK_RETRY);
 4195         AUDIT_ARG_VNODE1(vp);
 4196         loff = auio.uio_offset = foffset;
 4197 #ifdef MAC
 4198         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4199         if (error == 0)
 4200 #endif
 4201                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4202                     NULL);
 4203         foffset = auio.uio_offset;
 4204         if (error) {
 4205                 VOP_UNLOCK(vp, 0);
 4206                 VFS_UNLOCK_GIANT(vfslocked);
 4207                 goto fail;
 4208         }
 4209         if (count == auio.uio_resid &&
 4210             (vp->v_vflag & VV_ROOT) &&
 4211             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4212                 struct vnode *tvp = vp;
 4213                 vp = vp->v_mount->mnt_vnodecovered;
 4214                 VREF(vp);
 4215                 fp->f_vnode = vp;
 4216                 fp->f_data = vp;
 4217                 foffset = 0;
 4218                 vput(tvp);
 4219                 VFS_UNLOCK_GIANT(vfslocked);
 4220                 goto unionread;
 4221         }
 4222         VOP_UNLOCK(vp, 0);
 4223         VFS_UNLOCK_GIANT(vfslocked);
 4224         *basep = loff;
 4225         td->td_retval[0] = count - auio.uio_resid;
 4226 fail:
 4227         foffset_unlock(fp, foffset, 0);
 4228         fdrop(fp, td);
 4229         return (error);
 4230 }
 4231 
 4232 #ifndef _SYS_SYSPROTO_H_
 4233 struct getdents_args {
 4234         int fd;
 4235         char *buf;
 4236         size_t count;
 4237 };
 4238 #endif
 4239 int
 4240 sys_getdents(td, uap)
 4241         struct thread *td;
 4242         register struct getdents_args /* {
 4243                 int fd;
 4244                 char *buf;
 4245                 u_int count;
 4246         } */ *uap;
 4247 {
 4248         struct getdirentries_args ap;
 4249         ap.fd = uap->fd;
 4250         ap.buf = uap->buf;
 4251         ap.count = uap->count;
 4252         ap.basep = NULL;
 4253         return (sys_getdirentries(td, &ap));
 4254 }
 4255 
 4256 /*
 4257  * Set the mode mask for creation of filesystem nodes.
 4258  */
 4259 #ifndef _SYS_SYSPROTO_H_
 4260 struct umask_args {
 4261         int     newmask;
 4262 };
 4263 #endif
 4264 int
 4265 sys_umask(td, uap)
 4266         struct thread *td;
 4267         struct umask_args /* {
 4268                 int newmask;
 4269         } */ *uap;
 4270 {
 4271         register struct filedesc *fdp;
 4272 
 4273         FILEDESC_XLOCK(td->td_proc->p_fd);
 4274         fdp = td->td_proc->p_fd;
 4275         td->td_retval[0] = fdp->fd_cmask;
 4276         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4277         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4278         return (0);
 4279 }
 4280 
 4281 /*
 4282  * Void all references to file by ripping underlying filesystem away from
 4283  * vnode.
 4284  */
 4285 #ifndef _SYS_SYSPROTO_H_
 4286 struct revoke_args {
 4287         char    *path;
 4288 };
 4289 #endif
 4290 int
 4291 sys_revoke(td, uap)
 4292         struct thread *td;
 4293         register struct revoke_args /* {
 4294                 char *path;
 4295         } */ *uap;
 4296 {
 4297         struct vnode *vp;
 4298         struct vattr vattr;
 4299         int error;
 4300         struct nameidata nd;
 4301         int vfslocked;
 4302 
 4303         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4304             UIO_USERSPACE, uap->path, td);
 4305         if ((error = namei(&nd)) != 0)
 4306                 return (error);
 4307         vfslocked = NDHASGIANT(&nd);
 4308         vp = nd.ni_vp;
 4309         NDFREE(&nd, NDF_ONLY_PNBUF);
 4310         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4311                 error = EINVAL;
 4312                 goto out;
 4313         }
 4314 #ifdef MAC
 4315         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4316         if (error)
 4317                 goto out;
 4318 #endif
 4319         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4320         if (error)
 4321                 goto out;
 4322         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4323                 error = priv_check(td, PRIV_VFS_ADMIN);
 4324                 if (error)
 4325                         goto out;
 4326         }
 4327         if (vcount(vp) > 1)
 4328                 VOP_REVOKE(vp, REVOKEALL);
 4329 out:
 4330         vput(vp);
 4331         VFS_UNLOCK_GIANT(vfslocked);
 4332         return (error);
 4333 }
 4334 
 4335 /*
 4336  * Convert a user file descriptor to a kernel file entry and check that, if it
 4337  * is a capability, the correct rights are present. A reference on the file
 4338  * entry is held upon returning.
 4339  */
 4340 int
 4341 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
 4342     struct file **fpp)
 4343 {
 4344         struct file *fp;
 4345 #ifdef CAPABILITIES
 4346         struct file *fp_fromcap;
 4347 #endif
 4348         int error;
 4349 
 4350         error = 0;
 4351         fp = NULL;
 4352         if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
 4353                 return (EBADF);
 4354 #ifdef CAPABILITIES
 4355         /*
 4356          * If the file descriptor is for a capability, test rights and use the
 4357          * file descriptor referenced by the capability.
 4358          */
 4359         error = cap_funwrap(fp, rights, &fp_fromcap);
 4360         if (error) {
 4361                 fdrop(fp, curthread);
 4362                 return (error);
 4363         }
 4364         if (fp != fp_fromcap) {
 4365                 fhold(fp_fromcap);
 4366                 fdrop(fp, curthread);
 4367                 fp = fp_fromcap;
 4368         }
 4369 #endif /* CAPABILITIES */
 4370 
 4371         /*
 4372          * The file could be not of the vnode type, or it may be not
 4373          * yet fully initialized, in which case the f_vnode pointer
 4374          * may be set, but f_ops is still badfileops.  E.g.,
 4375          * devfs_open() transiently create such situation to
 4376          * facilitate csw d_fdopen().
 4377          *
 4378          * Dupfdopen() handling in kern_openat() installs the
 4379          * half-baked file into the process descriptor table, allowing
 4380          * other thread to dereference it. Guard against the race by
 4381          * checking f_ops.
 4382          */
 4383         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4384                 fdrop(fp, curthread);
 4385                 return (EINVAL);
 4386         }
 4387         *fpp = fp;
 4388         return (0);
 4389 }
 4390 
 4391 
 4392 /*
 4393  * Get an (NFS) file handle.
 4394  */
 4395 #ifndef _SYS_SYSPROTO_H_
 4396 struct lgetfh_args {
 4397         char    *fname;
 4398         fhandle_t *fhp;
 4399 };
 4400 #endif
 4401 int
 4402 sys_lgetfh(td, uap)
 4403         struct thread *td;
 4404         register struct lgetfh_args *uap;
 4405 {
 4406         struct nameidata nd;
 4407         fhandle_t fh;
 4408         register struct vnode *vp;
 4409         int vfslocked;
 4410         int error;
 4411 
 4412         error = priv_check(td, PRIV_VFS_GETFH);
 4413         if (error)
 4414                 return (error);
 4415         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4416             UIO_USERSPACE, uap->fname, td);
 4417         error = namei(&nd);
 4418         if (error)
 4419                 return (error);
 4420         vfslocked = NDHASGIANT(&nd);
 4421         NDFREE(&nd, NDF_ONLY_PNBUF);
 4422         vp = nd.ni_vp;
 4423         bzero(&fh, sizeof(fh));
 4424         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4425         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4426         vput(vp);
 4427         VFS_UNLOCK_GIANT(vfslocked);
 4428         if (error)
 4429                 return (error);
 4430         error = copyout(&fh, uap->fhp, sizeof (fh));
 4431         return (error);
 4432 }
 4433 
 4434 #ifndef _SYS_SYSPROTO_H_
 4435 struct getfh_args {
 4436         char    *fname;
 4437         fhandle_t *fhp;
 4438 };
 4439 #endif
 4440 int
 4441 sys_getfh(td, uap)
 4442         struct thread *td;
 4443         register struct getfh_args *uap;
 4444 {
 4445         struct nameidata nd;
 4446         fhandle_t fh;
 4447         register struct vnode *vp;
 4448         int vfslocked;
 4449         int error;
 4450 
 4451         error = priv_check(td, PRIV_VFS_GETFH);
 4452         if (error)
 4453                 return (error);
 4454         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4455             UIO_USERSPACE, uap->fname, td);
 4456         error = namei(&nd);
 4457         if (error)
 4458                 return (error);
 4459         vfslocked = NDHASGIANT(&nd);
 4460         NDFREE(&nd, NDF_ONLY_PNBUF);
 4461         vp = nd.ni_vp;
 4462         bzero(&fh, sizeof(fh));
 4463         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4464         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4465         vput(vp);
 4466         VFS_UNLOCK_GIANT(vfslocked);
 4467         if (error)
 4468                 return (error);
 4469         error = copyout(&fh, uap->fhp, sizeof (fh));
 4470         return (error);
 4471 }
 4472 
 4473 /*
 4474  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4475  * open descriptor.
 4476  *
 4477  * warning: do not remove the priv_check() call or this becomes one giant
 4478  * security hole.
 4479  */
 4480 #ifndef _SYS_SYSPROTO_H_
 4481 struct fhopen_args {
 4482         const struct fhandle *u_fhp;
 4483         int flags;
 4484 };
 4485 #endif
 4486 int
 4487 sys_fhopen(td, uap)
 4488         struct thread *td;
 4489         struct fhopen_args /* {
 4490                 const struct fhandle *u_fhp;
 4491                 int flags;
 4492         } */ *uap;
 4493 {
 4494         struct proc *p = td->td_proc;
 4495         struct mount *mp;
 4496         struct vnode *vp;
 4497         struct fhandle fhp;
 4498         struct vattr vat;
 4499         struct vattr *vap = &vat;
 4500         struct flock lf;
 4501         struct file *fp;
 4502         register struct filedesc *fdp = p->p_fd;
 4503         int fmode, error, type;
 4504         accmode_t accmode;
 4505         struct file *nfp;
 4506         int vfslocked;
 4507         int indx;
 4508 
 4509         error = priv_check(td, PRIV_VFS_FHOPEN);
 4510         if (error)
 4511                 return (error);
 4512         fmode = FFLAGS(uap->flags);
 4513         /* why not allow a non-read/write open for our lockd? */
 4514         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4515                 return (EINVAL);
 4516         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4517         if (error)
 4518                 return(error);
 4519         /* find the mount point */
 4520         mp = vfs_busyfs(&fhp.fh_fsid);
 4521         if (mp == NULL)
 4522                 return (ESTALE);
 4523         vfslocked = VFS_LOCK_GIANT(mp);
 4524         /* now give me my vnode, it gets returned to me locked */
 4525         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4526         vfs_unbusy(mp);
 4527         if (error)
 4528                 goto out;
 4529         /*
 4530          * from now on we have to make sure not
 4531          * to forget about the vnode
 4532          * any error that causes an abort must vput(vp)
 4533          * just set error = err and 'goto bad;'.
 4534          */
 4535 
 4536         /*
 4537          * from vn_open
 4538          */
 4539         if (vp->v_type == VLNK) {
 4540                 error = EMLINK;
 4541                 goto bad;
 4542         }
 4543         if (vp->v_type == VSOCK) {
 4544                 error = EOPNOTSUPP;
 4545                 goto bad;
 4546         }
 4547         if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
 4548                 error = ENOTDIR;
 4549                 goto bad;
 4550         }
 4551         accmode = 0;
 4552         if (fmode & (FWRITE | O_TRUNC)) {
 4553                 if (vp->v_type == VDIR) {
 4554                         error = EISDIR;
 4555                         goto bad;
 4556                 }
 4557                 error = vn_writechk(vp);
 4558                 if (error)
 4559                         goto bad;
 4560                 accmode |= VWRITE;
 4561         }
 4562         if (fmode & FREAD)
 4563                 accmode |= VREAD;
 4564         if ((fmode & O_APPEND) && (fmode & FWRITE))
 4565                 accmode |= VAPPEND;
 4566 #ifdef MAC
 4567         error = mac_vnode_check_open(td->td_ucred, vp, accmode);
 4568         if (error)
 4569                 goto bad;
 4570 #endif
 4571         if (accmode) {
 4572                 error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
 4573                 if (error)
 4574                         goto bad;
 4575         }
 4576         if (fmode & O_TRUNC) {
 4577                 vfs_ref(mp);
 4578                 VOP_UNLOCK(vp, 0);                              /* XXX */
 4579                 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 4580                         vrele(vp);
 4581                         vfs_rel(mp);
 4582                         goto out;
 4583                 }
 4584                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 4585                 vfs_rel(mp);
 4586 #ifdef MAC
 4587                 /*
 4588                  * We don't yet have fp->f_cred, so use td->td_ucred, which
 4589                  * should be right.
 4590                  */
 4591                 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
 4592                 if (error == 0) {
 4593 #endif
 4594                         VATTR_NULL(vap);
 4595                         vap->va_size = 0;
 4596                         error = VOP_SETATTR(vp, vap, td->td_ucred);
 4597 #ifdef MAC
 4598                 }
 4599 #endif
 4600                 vn_finished_write(mp);
 4601                 if (error)
 4602                         goto bad;
 4603         }
 4604         error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
 4605         if (error)
 4606                 goto bad;
 4607 
 4608         if (fmode & FWRITE)
 4609                 vp->v_writecount++;
 4610 
 4611         /*
 4612          * end of vn_open code
 4613          */
 4614 
 4615         if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
 4616                 if (fmode & FWRITE)
 4617                         vp->v_writecount--;
 4618                 goto bad;
 4619         }
 4620         /* An extra reference on `nfp' has been held for us by falloc(). */
 4621         fp = nfp;
 4622         nfp->f_vnode = vp;
 4623         finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
 4624         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4625                 lf.l_whence = SEEK_SET;
 4626                 lf.l_start = 0;
 4627                 lf.l_len = 0;
 4628                 if (fmode & O_EXLOCK)
 4629                         lf.l_type = F_WRLCK;
 4630                 else
 4631                         lf.l_type = F_RDLCK;
 4632                 type = F_FLOCK;
 4633                 if ((fmode & FNONBLOCK) == 0)
 4634                         type |= F_WAIT;
 4635                 VOP_UNLOCK(vp, 0);
 4636                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 4637                             type)) != 0) {
 4638                         /*
 4639                          * The lock request failed.  Normally close the
 4640                          * descriptor but handle the case where someone might
 4641                          * have dup()d or close()d it when we weren't looking.
 4642                          */
 4643                         fdclose(fdp, fp, indx, td);
 4644 
 4645                         /*
 4646                          * release our private reference
 4647                          */
 4648                         fdrop(fp, td);
 4649                         goto out;
 4650                 }
 4651                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4652                 atomic_set_int(&fp->f_flag, FHASLOCK);
 4653         }
 4654 
 4655         VOP_UNLOCK(vp, 0);
 4656         fdrop(fp, td);
 4657         VFS_UNLOCK_GIANT(vfslocked);
 4658         td->td_retval[0] = indx;
 4659         return (0);
 4660 
 4661 bad:
 4662         vput(vp);
 4663 out:
 4664         VFS_UNLOCK_GIANT(vfslocked);
 4665         return (error);
 4666 }
 4667 
 4668 /*
 4669  * Stat an (NFS) file handle.
 4670  */
 4671 #ifndef _SYS_SYSPROTO_H_
 4672 struct fhstat_args {
 4673         struct fhandle *u_fhp;
 4674         struct stat *sb;
 4675 };
 4676 #endif
 4677 int
 4678 sys_fhstat(td, uap)
 4679         struct thread *td;
 4680         register struct fhstat_args /* {
 4681                 struct fhandle *u_fhp;
 4682                 struct stat *sb;
 4683         } */ *uap;
 4684 {
 4685         struct stat sb;
 4686         fhandle_t fh;
 4687         struct mount *mp;
 4688         struct vnode *vp;
 4689         int vfslocked;
 4690         int error;
 4691 
 4692         error = priv_check(td, PRIV_VFS_FHSTAT);
 4693         if (error)
 4694                 return (error);
 4695         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4696         if (error)
 4697                 return (error);
 4698         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4699                 return (ESTALE);
 4700         vfslocked = VFS_LOCK_GIANT(mp);
 4701         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4702         vfs_unbusy(mp);
 4703         if (error) {
 4704                 VFS_UNLOCK_GIANT(vfslocked);
 4705                 return (error);
 4706         }
 4707         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 4708         vput(vp);
 4709         VFS_UNLOCK_GIANT(vfslocked);
 4710         if (error)
 4711                 return (error);
 4712         error = copyout(&sb, uap->sb, sizeof(sb));
 4713         return (error);
 4714 }
 4715 
 4716 /*
 4717  * Implement fstatfs() for (NFS) file handles.
 4718  */
 4719 #ifndef _SYS_SYSPROTO_H_
 4720 struct fhstatfs_args {
 4721         struct fhandle *u_fhp;
 4722         struct statfs *buf;
 4723 };
 4724 #endif
 4725 int
 4726 sys_fhstatfs(td, uap)
 4727         struct thread *td;
 4728         struct fhstatfs_args /* {
 4729                 struct fhandle *u_fhp;
 4730                 struct statfs *buf;
 4731         } */ *uap;
 4732 {
 4733         struct statfs sf;
 4734         fhandle_t fh;
 4735         int error;
 4736 
 4737         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4738         if (error)
 4739                 return (error);
 4740         error = kern_fhstatfs(td, fh, &sf);
 4741         if (error)
 4742                 return (error);
 4743         return (copyout(&sf, uap->buf, sizeof(sf)));
 4744 }
 4745 
 4746 int
 4747 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4748 {
 4749         struct statfs *sp;
 4750         struct mount *mp;
 4751         struct vnode *vp;
 4752         int vfslocked;
 4753         int error;
 4754 
 4755         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4756         if (error)
 4757                 return (error);
 4758         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4759                 return (ESTALE);
 4760         vfslocked = VFS_LOCK_GIANT(mp);
 4761         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4762         if (error) {
 4763                 vfs_unbusy(mp);
 4764                 VFS_UNLOCK_GIANT(vfslocked);
 4765                 return (error);
 4766         }
 4767         vput(vp);
 4768         error = prison_canseemount(td->td_ucred, mp);
 4769         if (error)
 4770                 goto out;
 4771 #ifdef MAC
 4772         error = mac_mount_check_stat(td->td_ucred, mp);
 4773         if (error)
 4774                 goto out;
 4775 #endif
 4776         /*
 4777          * Set these in case the underlying filesystem fails to do so.
 4778          */
 4779         sp = &mp->mnt_stat;
 4780         sp->f_version = STATFS_VERSION;
 4781         sp->f_namemax = NAME_MAX;
 4782         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4783         error = VFS_STATFS(mp, sp);
 4784         if (error == 0)
 4785                 *buf = *sp;
 4786 out:
 4787         vfs_unbusy(mp);
 4788         VFS_UNLOCK_GIANT(vfslocked);
 4789         return (error);
 4790 }
 4791 
 4792 int
 4793 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4794 {
 4795         struct file *fp;
 4796         struct mount *mp;
 4797         struct vnode *vp;
 4798         off_t olen, ooffset;
 4799         int error, vfslocked;
 4800 
 4801         fp = NULL;
 4802         vfslocked = 0;
 4803         error = fget(td, fd, CAP_WRITE, &fp);
 4804         if (error != 0)
 4805                 goto out;
 4806 
 4807         switch (fp->f_type) {
 4808         case DTYPE_VNODE:
 4809                 break;
 4810         case DTYPE_PIPE:
 4811         case DTYPE_FIFO:
 4812                 error = ESPIPE;
 4813                 goto out;
 4814         default:
 4815                 error = ENODEV;
 4816                 goto out;
 4817         }
 4818         if ((fp->f_flag & FWRITE) == 0) {
 4819                 error = EBADF;
 4820                 goto out;
 4821         }
 4822         vp = fp->f_vnode;
 4823         if (vp->v_type != VREG) {
 4824                 error = ENODEV;
 4825                 goto out;
 4826         }
 4827         if (offset < 0 || len <= 0) {
 4828                 error = EINVAL;
 4829                 goto out;
 4830         }
 4831         /* Check for wrap. */
 4832         if (offset > OFF_MAX - len) {
 4833                 error = EFBIG;
 4834                 goto out;
 4835         }
 4836 
 4837         /* Allocating blocks may take a long time, so iterate. */
 4838         for (;;) {
 4839                 olen = len;
 4840                 ooffset = offset;
 4841 
 4842                 bwillwrite();
 4843                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4844                 mp = NULL;
 4845                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4846                 if (error != 0) {
 4847                         VFS_UNLOCK_GIANT(vfslocked);
 4848                         break;
 4849                 }
 4850                 error = vn_lock(vp, LK_EXCLUSIVE);
 4851                 if (error != 0) {
 4852                         vn_finished_write(mp);
 4853                         VFS_UNLOCK_GIANT(vfslocked);
 4854                         break;
 4855                 }
 4856 #ifdef MAC
 4857                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4858                 if (error == 0)
 4859 #endif
 4860                         error = VOP_ALLOCATE(vp, &offset, &len);
 4861                 VOP_UNLOCK(vp, 0);
 4862                 vn_finished_write(mp);
 4863                 VFS_UNLOCK_GIANT(vfslocked);
 4864 
 4865                 if (olen + ooffset != offset + len) {
 4866                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4867                             ooffset, olen, offset, len);
 4868                 }
 4869                 if (error != 0 || len == 0)
 4870                         break;
 4871                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4872                 maybe_yield();
 4873         }
 4874  out:
 4875         if (fp != NULL)
 4876                 fdrop(fp, td);
 4877         return (error);
 4878 }
 4879 
 4880 int
 4881 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4882 {
 4883 
 4884         td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
 4885             uap->len);
 4886         return (0);
 4887 }
 4888 
 4889 /*
 4890  * Unlike madvise(2), we do not make a best effort to remember every
 4891  * possible caching hint.  Instead, we remember the last setting with
 4892  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4893  * region of any current setting.
 4894  */
 4895 int
 4896 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4897     int advice)
 4898 {
 4899         struct fadvise_info *fa, *new;
 4900         struct file *fp;
 4901         struct vnode *vp;
 4902         off_t end;
 4903         int error;
 4904 
 4905         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4906                 return (EINVAL);
 4907         switch (advice) {
 4908         case POSIX_FADV_SEQUENTIAL:
 4909         case POSIX_FADV_RANDOM:
 4910         case POSIX_FADV_NOREUSE:
 4911                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4912                 break;
 4913         case POSIX_FADV_NORMAL:
 4914         case POSIX_FADV_WILLNEED:
 4915         case POSIX_FADV_DONTNEED:
 4916                 new = NULL;
 4917                 break;
 4918         default:
 4919                 return (EINVAL);
 4920         }
 4921         /* XXX: CAP_POSIX_FADVISE? */
 4922         error = fget(td, fd, 0, &fp);
 4923         if (error != 0)
 4924                 goto out;
 4925         
 4926         switch (fp->f_type) {
 4927         case DTYPE_VNODE:
 4928                 break;
 4929         case DTYPE_PIPE:
 4930         case DTYPE_FIFO:
 4931                 error = ESPIPE;
 4932                 goto out;
 4933         default:
 4934                 error = ENODEV;
 4935                 goto out;
 4936         }
 4937         vp = fp->f_vnode;
 4938         if (vp->v_type != VREG) {
 4939                 error = ENODEV;
 4940                 goto out;
 4941         }
 4942         if (len == 0)
 4943                 end = OFF_MAX;
 4944         else
 4945                 end = offset + len - 1;
 4946         switch (advice) {
 4947         case POSIX_FADV_SEQUENTIAL:
 4948         case POSIX_FADV_RANDOM:
 4949         case POSIX_FADV_NOREUSE:
 4950                 /*
 4951                  * Try to merge any existing non-standard region with
 4952                  * this new region if possible, otherwise create a new
 4953                  * non-standard region for this request.
 4954                  */
 4955                 mtx_pool_lock(mtxpool_sleep, fp);
 4956                 fa = fp->f_advice;
 4957                 if (fa != NULL && fa->fa_advice == advice &&
 4958                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4959                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4960                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4961                         if (offset < fa->fa_start)
 4962                                 fa->fa_start = offset;
 4963                         if (end > fa->fa_end)
 4964                                 fa->fa_end = end;
 4965                 } else {
 4966                         new->fa_advice = advice;
 4967                         new->fa_start = offset;
 4968                         new->fa_end = end;
 4969                         new->fa_prevstart = 0;
 4970                         new->fa_prevend = 0;
 4971                         fp->f_advice = new;
 4972                         new = fa;
 4973                 }
 4974                 mtx_pool_unlock(mtxpool_sleep, fp);
 4975                 break;
 4976         case POSIX_FADV_NORMAL:
 4977                 /*
 4978                  * If a the "normal" region overlaps with an existing
 4979                  * non-standard region, trim or remove the
 4980                  * non-standard region.
 4981                  */
 4982                 mtx_pool_lock(mtxpool_sleep, fp);
 4983                 fa = fp->f_advice;
 4984                 if (fa != NULL) {
 4985                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4986                                 new = fa;
 4987                                 fp->f_advice = NULL;
 4988                         } else if (offset <= fa->fa_start &&
 4989                             end >= fa->fa_start)
 4990                                 fa->fa_start = end + 1;
 4991                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4992                                 fa->fa_end = offset - 1;
 4993                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4994                                 /*
 4995                                  * If the "normal" region is a middle
 4996                                  * portion of the existing
 4997                                  * non-standard region, just remove
 4998                                  * the whole thing rather than picking
 4999                                  * one side or the other to
 5000                                  * preserve.
 5001                                  */
 5002                                 new = fa;
 5003                                 fp->f_advice = NULL;
 5004                         }
 5005                 }
 5006                 mtx_pool_unlock(mtxpool_sleep, fp);
 5007                 break;
 5008         case POSIX_FADV_WILLNEED:
 5009         case POSIX_FADV_DONTNEED:
 5010                 error = VOP_ADVISE(vp, offset, end, advice);
 5011                 break;
 5012         }
 5013 out:
 5014         if (fp != NULL)
 5015                 fdrop(fp, td);
 5016         free(new, M_FADVISE);
 5017         return (error);
 5018 }
 5019 
 5020 int
 5021 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 5022 {
 5023 
 5024         td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
 5025             uap->len, uap->advice);
 5026         return (0);
 5027 }

Cache object: 3c7bd780a205f73683de48bc8af51cd9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.