The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_capsicum.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <fs/devfs/devfs.h>
   91 
   92 #include <ufs/ufs/quota.h>
   93 
   94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   95 
   96 static int kern_chflagsat(struct thread *td, int fd, const char *path,
   97     enum uio_seg pathseg, u_long flags, int atflag);
   98 static int setfflags(struct thread *td, struct vnode *, u_long);
   99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  100 static int getutimens(const struct timespec *, enum uio_seg,
  101     struct timespec *, int *);
  102 static int setutimes(struct thread *td, struct vnode *,
  103     const struct timespec *, int, int);
  104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  105     struct thread *td);
  106 static int kern_fhlinkat(struct thread *td, int fd, const char *path,
  107     enum uio_seg pathseg, fhandle_t *fhp);
  108 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg,
  109     size_t count, struct thread *td);
  110 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd,
  111     const char *path, enum uio_seg segflag);
  112 
  113 static uint64_t
  114 at2cnpflags(u_int at_flags, u_int mask)
  115 {
  116         u_int64_t res;
  117 
  118         MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) !=
  119             (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW));
  120 
  121         res = 0;
  122         at_flags &= mask;
  123         if ((at_flags & AT_RESOLVE_BENEATH) != 0)
  124                 res |= RBENEATH;
  125         if ((at_flags & AT_SYMLINK_FOLLOW) != 0)
  126                 res |= FOLLOW;
  127         /* NOFOLLOW is pseudo flag */
  128         if ((mask & AT_SYMLINK_NOFOLLOW) != 0) {
  129                 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW :
  130                     FOLLOW;
  131         }
  132         return (res);
  133 }
  134 
  135 int
  136 kern_sync(struct thread *td)
  137 {
  138         struct mount *mp, *nmp;
  139         int save;
  140 
  141         mtx_lock(&mountlist_mtx);
  142         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  143                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  144                         nmp = TAILQ_NEXT(mp, mnt_list);
  145                         continue;
  146                 }
  147                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  148                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  149                         save = curthread_pflags_set(TDP_SYNCIO);
  150                         vfs_periodic(mp, MNT_NOWAIT);
  151                         VFS_SYNC(mp, MNT_NOWAIT);
  152                         curthread_pflags_restore(save);
  153                         vn_finished_write(mp);
  154                 }
  155                 mtx_lock(&mountlist_mtx);
  156                 nmp = TAILQ_NEXT(mp, mnt_list);
  157                 vfs_unbusy(mp);
  158         }
  159         mtx_unlock(&mountlist_mtx);
  160         return (0);
  161 }
  162 
  163 /*
  164  * Sync each mounted filesystem.
  165  */
  166 #ifndef _SYS_SYSPROTO_H_
  167 struct sync_args {
  168         int     dummy;
  169 };
  170 #endif
  171 /* ARGSUSED */
  172 int
  173 sys_sync(struct thread *td, struct sync_args *uap)
  174 {
  175 
  176         return (kern_sync(td));
  177 }
  178 
  179 /*
  180  * Change filesystem quotas.
  181  */
  182 #ifndef _SYS_SYSPROTO_H_
  183 struct quotactl_args {
  184         char *path;
  185         int cmd;
  186         int uid;
  187         caddr_t arg;
  188 };
  189 #endif
  190 int
  191 sys_quotactl(struct thread *td, struct quotactl_args *uap)
  192 {
  193         struct mount *mp;
  194         struct nameidata nd;
  195         int error;
  196 
  197         AUDIT_ARG_CMD(uap->cmd);
  198         AUDIT_ARG_UID(uap->uid);
  199         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  200                 return (EPERM);
  201         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  202             uap->path, td);
  203         if ((error = namei(&nd)) != 0)
  204                 return (error);
  205         NDFREE(&nd, NDF_ONLY_PNBUF);
  206         mp = nd.ni_vp->v_mount;
  207         vfs_ref(mp);
  208         vput(nd.ni_vp);
  209         error = vfs_busy(mp, 0);
  210         if (error != 0) {
  211                 vfs_rel(mp);
  212                 return (error);
  213         }
  214         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  215 
  216         /*
  217          * Since quota on operation typically needs to open quota
  218          * file, the Q_QUOTAON handler needs to unbusy the mount point
  219          * before calling into namei.  Otherwise, unmount might be
  220          * started between two vfs_busy() invocations (first is our,
  221          * second is from mount point cross-walk code in lookup()),
  222          * causing deadlock.
  223          *
  224          * Require that Q_QUOTAON handles the vfs_busy() reference on
  225          * its own, always returning with ubusied mount point.
  226          */
  227         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON &&
  228             (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF)
  229                 vfs_unbusy(mp);
  230         vfs_rel(mp);
  231         return (error);
  232 }
  233 
  234 /*
  235  * Used by statfs conversion routines to scale the block size up if
  236  * necessary so that all of the block counts are <= 'max_size'.  Note
  237  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  238  * value of 'n'.
  239  */
  240 void
  241 statfs_scale_blocks(struct statfs *sf, long max_size)
  242 {
  243         uint64_t count;
  244         int shift;
  245 
  246         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  247 
  248         /*
  249          * Attempt to scale the block counts to give a more accurate
  250          * overview to userland of the ratio of free space to used
  251          * space.  To do this, find the largest block count and compute
  252          * a divisor that lets it fit into a signed integer <= max_size.
  253          */
  254         if (sf->f_bavail < 0)
  255                 count = -sf->f_bavail;
  256         else
  257                 count = sf->f_bavail;
  258         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  259         if (count <= max_size)
  260                 return;
  261 
  262         count >>= flsl(max_size);
  263         shift = 0;
  264         while (count > 0) {
  265                 shift++;
  266                 count >>=1;
  267         }
  268 
  269         sf->f_bsize <<= shift;
  270         sf->f_blocks >>= shift;
  271         sf->f_bfree >>= shift;
  272         sf->f_bavail >>= shift;
  273 }
  274 
  275 static int
  276 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
  277 {
  278         int error;
  279 
  280         if (mp == NULL)
  281                 return (EBADF);
  282         error = vfs_busy(mp, 0);
  283         vfs_rel(mp);
  284         if (error != 0)
  285                 return (error);
  286 #ifdef MAC
  287         error = mac_mount_check_stat(td->td_ucred, mp);
  288         if (error != 0)
  289                 goto out;
  290 #endif
  291         error = VFS_STATFS(mp, buf);
  292         if (error != 0)
  293                 goto out;
  294         if (priv_check_cred_vfs_generation(td->td_ucred)) {
  295                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
  296                 prison_enforce_statfs(td->td_ucred, mp, buf);
  297         }
  298 out:
  299         vfs_unbusy(mp);
  300         return (error);
  301 }
  302 
  303 /*
  304  * Get filesystem statistics.
  305  */
  306 #ifndef _SYS_SYSPROTO_H_
  307 struct statfs_args {
  308         char *path;
  309         struct statfs *buf;
  310 };
  311 #endif
  312 int
  313 sys_statfs(struct thread *td, struct statfs_args *uap)
  314 {
  315         struct statfs *sfp;
  316         int error;
  317 
  318         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  319         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  320         if (error == 0)
  321                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  322         free(sfp, M_STATFS);
  323         return (error);
  324 }
  325 
  326 int
  327 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
  328     struct statfs *buf)
  329 {
  330         struct mount *mp;
  331         struct nameidata nd;
  332         int error;
  333 
  334         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  335             pathseg, path, td);
  336         error = namei(&nd);
  337         if (error != 0)
  338                 return (error);
  339         mp = nd.ni_vp->v_mount;
  340         vfs_ref(mp);
  341         NDFREE_NOTHING(&nd);
  342         vput(nd.ni_vp);
  343         return (kern_do_statfs(td, mp, buf));
  344 }
  345 
  346 /*
  347  * Get filesystem statistics.
  348  */
  349 #ifndef _SYS_SYSPROTO_H_
  350 struct fstatfs_args {
  351         int fd;
  352         struct statfs *buf;
  353 };
  354 #endif
  355 int
  356 sys_fstatfs(struct thread *td, struct fstatfs_args *uap)
  357 {
  358         struct statfs *sfp;
  359         int error;
  360 
  361         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  362         error = kern_fstatfs(td, uap->fd, sfp);
  363         if (error == 0)
  364                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  365         free(sfp, M_STATFS);
  366         return (error);
  367 }
  368 
  369 int
  370 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  371 {
  372         struct file *fp;
  373         struct mount *mp;
  374         struct vnode *vp;
  375         int error;
  376 
  377         AUDIT_ARG_FD(fd);
  378         error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
  379         if (error != 0)
  380                 return (error);
  381         vp = fp->f_vnode;
  382         vn_lock(vp, LK_SHARED | LK_RETRY);
  383 #ifdef AUDIT
  384         AUDIT_ARG_VNODE1(vp);
  385 #endif
  386         mp = vp->v_mount;
  387         if (mp != NULL)
  388                 vfs_ref(mp);
  389         VOP_UNLOCK(vp);
  390         fdrop(fp, td);
  391         return (kern_do_statfs(td, mp, buf));
  392 }
  393 
  394 /*
  395  * Get statistics on all filesystems.
  396  */
  397 #ifndef _SYS_SYSPROTO_H_
  398 struct getfsstat_args {
  399         struct statfs *buf;
  400         long bufsize;
  401         int mode;
  402 };
  403 #endif
  404 int
  405 sys_getfsstat(struct thread *td, struct getfsstat_args *uap)
  406 {
  407         size_t count;
  408         int error;
  409 
  410         if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
  411                 return (EINVAL);
  412         error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
  413             UIO_USERSPACE, uap->mode);
  414         if (error == 0)
  415                 td->td_retval[0] = count;
  416         return (error);
  417 }
  418 
  419 /*
  420  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  421  *      The caller is responsible for freeing memory which will be allocated
  422  *      in '*buf'.
  423  */
  424 int
  425 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  426     size_t *countp, enum uio_seg bufseg, int mode)
  427 {
  428         struct mount *mp, *nmp;
  429         struct statfs *sfsp, *sp, *sptmp, *tofree;
  430         size_t count, maxcount;
  431         int error;
  432 
  433         switch (mode) {
  434         case MNT_WAIT:
  435         case MNT_NOWAIT:
  436                 break;
  437         default:
  438                 if (bufseg == UIO_SYSSPACE)
  439                         *buf = NULL;
  440                 return (EINVAL);
  441         }
  442 restart:
  443         maxcount = bufsize / sizeof(struct statfs);
  444         if (bufsize == 0) {
  445                 sfsp = NULL;
  446                 tofree = NULL;
  447         } else if (bufseg == UIO_USERSPACE) {
  448                 sfsp = *buf;
  449                 tofree = NULL;
  450         } else /* if (bufseg == UIO_SYSSPACE) */ {
  451                 count = 0;
  452                 mtx_lock(&mountlist_mtx);
  453                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  454                         count++;
  455                 }
  456                 mtx_unlock(&mountlist_mtx);
  457                 if (maxcount > count)
  458                         maxcount = count;
  459                 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
  460                     M_STATFS, M_WAITOK);
  461         }
  462 
  463         count = 0;
  464 
  465         /*
  466          * If there is no target buffer they only want the count.
  467          *
  468          * This could be TAILQ_FOREACH but it is open-coded to match the original
  469          * code below.
  470          */
  471         if (sfsp == NULL) {
  472                 mtx_lock(&mountlist_mtx);
  473                 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  474                         if (prison_canseemount(td->td_ucred, mp) != 0) {
  475                                 nmp = TAILQ_NEXT(mp, mnt_list);
  476                                 continue;
  477                         }
  478 #ifdef MAC
  479                         if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  480                                 nmp = TAILQ_NEXT(mp, mnt_list);
  481                                 continue;
  482                         }
  483 #endif
  484                         count++;
  485                         nmp = TAILQ_NEXT(mp, mnt_list);
  486                 }
  487                 mtx_unlock(&mountlist_mtx);
  488                 *countp = count;
  489                 return (0);
  490         }
  491 
  492         /*
  493          * They want the entire thing.
  494          *
  495          * Short-circuit the corner case of no room for anything, avoids
  496          * relocking below.
  497          */
  498         if (maxcount < 1) {
  499                 goto out;
  500         }
  501 
  502         mtx_lock(&mountlist_mtx);
  503         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  504                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  505                         nmp = TAILQ_NEXT(mp, mnt_list);
  506                         continue;
  507                 }
  508 #ifdef MAC
  509                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  510                         nmp = TAILQ_NEXT(mp, mnt_list);
  511                         continue;
  512                 }
  513 #endif
  514                 if (mode == MNT_WAIT) {
  515                         if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
  516                                 /*
  517                                  * If vfs_busy() failed, and MBF_NOWAIT
  518                                  * wasn't passed, then the mp is gone.
  519                                  * Furthermore, because of MBF_MNTLSTLOCK,
  520                                  * the mountlist_mtx was dropped.  We have
  521                                  * no other choice than to start over.
  522                                  */
  523                                 mtx_unlock(&mountlist_mtx);
  524                                 free(tofree, M_STATFS);
  525                                 goto restart;
  526                         }
  527                 } else {
  528                         if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
  529                                 nmp = TAILQ_NEXT(mp, mnt_list);
  530                                 continue;
  531                         }
  532                 }
  533                 sp = &mp->mnt_stat;
  534                 /*
  535                  * If MNT_NOWAIT is specified, do not refresh
  536                  * the fsstat cache.
  537                  */
  538                 if (mode != MNT_NOWAIT) {
  539                         error = VFS_STATFS(mp, sp);
  540                         if (error != 0) {
  541                                 mtx_lock(&mountlist_mtx);
  542                                 nmp = TAILQ_NEXT(mp, mnt_list);
  543                                 vfs_unbusy(mp);
  544                                 continue;
  545                         }
  546                 }
  547                 if (priv_check_cred_vfs_generation(td->td_ucred)) {
  548                         sptmp = malloc(sizeof(struct statfs), M_STATFS,
  549                             M_WAITOK);
  550                         *sptmp = *sp;
  551                         sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
  552                         prison_enforce_statfs(td->td_ucred, mp, sptmp);
  553                         sp = sptmp;
  554                 } else
  555                         sptmp = NULL;
  556                 if (bufseg == UIO_SYSSPACE) {
  557                         bcopy(sp, sfsp, sizeof(*sp));
  558                         free(sptmp, M_STATFS);
  559                 } else /* if (bufseg == UIO_USERSPACE) */ {
  560                         error = copyout(sp, sfsp, sizeof(*sp));
  561                         free(sptmp, M_STATFS);
  562                         if (error != 0) {
  563                                 vfs_unbusy(mp);
  564                                 return (error);
  565                         }
  566                 }
  567                 sfsp++;
  568                 count++;
  569 
  570                 if (count == maxcount) {
  571                         vfs_unbusy(mp);
  572                         goto out;
  573                 }
  574 
  575                 mtx_lock(&mountlist_mtx);
  576                 nmp = TAILQ_NEXT(mp, mnt_list);
  577                 vfs_unbusy(mp);
  578         }
  579         mtx_unlock(&mountlist_mtx);
  580 out:
  581         *countp = count;
  582         return (0);
  583 }
  584 
  585 #ifdef COMPAT_FREEBSD4
  586 /*
  587  * Get old format filesystem statistics.
  588  */
  589 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *);
  590 
  591 #ifndef _SYS_SYSPROTO_H_
  592 struct freebsd4_statfs_args {
  593         char *path;
  594         struct ostatfs *buf;
  595 };
  596 #endif
  597 int
  598 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap)
  599 {
  600         struct ostatfs osb;
  601         struct statfs *sfp;
  602         int error;
  603 
  604         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  605         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  606         if (error == 0) {
  607                 freebsd4_cvtstatfs(sfp, &osb);
  608                 error = copyout(&osb, uap->buf, sizeof(osb));
  609         }
  610         free(sfp, M_STATFS);
  611         return (error);
  612 }
  613 
  614 /*
  615  * Get filesystem statistics.
  616  */
  617 #ifndef _SYS_SYSPROTO_H_
  618 struct freebsd4_fstatfs_args {
  619         int fd;
  620         struct ostatfs *buf;
  621 };
  622 #endif
  623 int
  624 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap)
  625 {
  626         struct ostatfs osb;
  627         struct statfs *sfp;
  628         int error;
  629 
  630         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  631         error = kern_fstatfs(td, uap->fd, sfp);
  632         if (error == 0) {
  633                 freebsd4_cvtstatfs(sfp, &osb);
  634                 error = copyout(&osb, uap->buf, sizeof(osb));
  635         }
  636         free(sfp, M_STATFS);
  637         return (error);
  638 }
  639 
  640 /*
  641  * Get statistics on all filesystems.
  642  */
  643 #ifndef _SYS_SYSPROTO_H_
  644 struct freebsd4_getfsstat_args {
  645         struct ostatfs *buf;
  646         long bufsize;
  647         int mode;
  648 };
  649 #endif
  650 int
  651 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap)
  652 {
  653         struct statfs *buf, *sp;
  654         struct ostatfs osb;
  655         size_t count, size;
  656         int error;
  657 
  658         if (uap->bufsize < 0)
  659                 return (EINVAL);
  660         count = uap->bufsize / sizeof(struct ostatfs);
  661         if (count > SIZE_MAX / sizeof(struct statfs))
  662                 return (EINVAL);
  663         size = count * sizeof(struct statfs);
  664         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  665             uap->mode);
  666         if (error == 0)
  667                 td->td_retval[0] = count;
  668         if (size != 0) {
  669                 sp = buf;
  670                 while (count != 0 && error == 0) {
  671                         freebsd4_cvtstatfs(sp, &osb);
  672                         error = copyout(&osb, uap->buf, sizeof(osb));
  673                         sp++;
  674                         uap->buf++;
  675                         count--;
  676                 }
  677                 free(buf, M_STATFS);
  678         }
  679         return (error);
  680 }
  681 
  682 /*
  683  * Implement fstatfs() for (NFS) file handles.
  684  */
  685 #ifndef _SYS_SYSPROTO_H_
  686 struct freebsd4_fhstatfs_args {
  687         struct fhandle *u_fhp;
  688         struct ostatfs *buf;
  689 };
  690 #endif
  691 int
  692 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap)
  693 {
  694         struct ostatfs osb;
  695         struct statfs *sfp;
  696         fhandle_t fh;
  697         int error;
  698 
  699         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  700         if (error != 0)
  701                 return (error);
  702         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  703         error = kern_fhstatfs(td, fh, sfp);
  704         if (error == 0) {
  705                 freebsd4_cvtstatfs(sfp, &osb);
  706                 error = copyout(&osb, uap->buf, sizeof(osb));
  707         }
  708         free(sfp, M_STATFS);
  709         return (error);
  710 }
  711 
  712 /*
  713  * Convert a new format statfs structure to an old format statfs structure.
  714  */
  715 static void
  716 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
  717 {
  718 
  719         statfs_scale_blocks(nsp, LONG_MAX);
  720         bzero(osp, sizeof(*osp));
  721         osp->f_bsize = nsp->f_bsize;
  722         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  723         osp->f_blocks = nsp->f_blocks;
  724         osp->f_bfree = nsp->f_bfree;
  725         osp->f_bavail = nsp->f_bavail;
  726         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  727         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  728         osp->f_owner = nsp->f_owner;
  729         osp->f_type = nsp->f_type;
  730         osp->f_flags = nsp->f_flags;
  731         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  732         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  733         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  734         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  735         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  736             MIN(MFSNAMELEN, OMFSNAMELEN));
  737         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  738             MIN(MNAMELEN, OMNAMELEN));
  739         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  740             MIN(MNAMELEN, OMNAMELEN));
  741         osp->f_fsid = nsp->f_fsid;
  742 }
  743 #endif /* COMPAT_FREEBSD4 */
  744 
  745 #if defined(COMPAT_FREEBSD11)
  746 /*
  747  * Get old format filesystem statistics.
  748  */
  749 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *);
  750 
  751 int
  752 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap)
  753 {
  754         struct freebsd11_statfs osb;
  755         struct statfs *sfp;
  756         int error;
  757 
  758         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  759         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  760         if (error == 0) {
  761                 freebsd11_cvtstatfs(sfp, &osb);
  762                 error = copyout(&osb, uap->buf, sizeof(osb));
  763         }
  764         free(sfp, M_STATFS);
  765         return (error);
  766 }
  767 
  768 /*
  769  * Get filesystem statistics.
  770  */
  771 int
  772 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap)
  773 {
  774         struct freebsd11_statfs osb;
  775         struct statfs *sfp;
  776         int error;
  777 
  778         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  779         error = kern_fstatfs(td, uap->fd, sfp);
  780         if (error == 0) {
  781                 freebsd11_cvtstatfs(sfp, &osb);
  782                 error = copyout(&osb, uap->buf, sizeof(osb));
  783         }
  784         free(sfp, M_STATFS);
  785         return (error);
  786 }
  787 
  788 /*
  789  * Get statistics on all filesystems.
  790  */
  791 int
  792 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap)
  793 {
  794         struct freebsd11_statfs osb;
  795         struct statfs *buf, *sp;
  796         size_t count, size;
  797         int error;
  798 
  799         count = uap->bufsize / sizeof(struct ostatfs);
  800         size = count * sizeof(struct statfs);
  801         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  802             uap->mode);
  803         if (error == 0)
  804                 td->td_retval[0] = count;
  805         if (size > 0) {
  806                 sp = buf;
  807                 while (count > 0 && error == 0) {
  808                         freebsd11_cvtstatfs(sp, &osb);
  809                         error = copyout(&osb, uap->buf, sizeof(osb));
  810                         sp++;
  811                         uap->buf++;
  812                         count--;
  813                 }
  814                 free(buf, M_STATFS);
  815         }
  816         return (error);
  817 }
  818 
  819 /*
  820  * Implement fstatfs() for (NFS) file handles.
  821  */
  822 int
  823 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap)
  824 {
  825         struct freebsd11_statfs osb;
  826         struct statfs *sfp;
  827         fhandle_t fh;
  828         int error;
  829 
  830         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  831         if (error)
  832                 return (error);
  833         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  834         error = kern_fhstatfs(td, fh, sfp);
  835         if (error == 0) {
  836                 freebsd11_cvtstatfs(sfp, &osb);
  837                 error = copyout(&osb, uap->buf, sizeof(osb));
  838         }
  839         free(sfp, M_STATFS);
  840         return (error);
  841 }
  842 
  843 /*
  844  * Convert a new format statfs structure to an old format statfs structure.
  845  */
  846 static void
  847 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp)
  848 {
  849 
  850         bzero(osp, sizeof(*osp));
  851         osp->f_version = FREEBSD11_STATFS_VERSION;
  852         osp->f_type = nsp->f_type;
  853         osp->f_flags = nsp->f_flags;
  854         osp->f_bsize = nsp->f_bsize;
  855         osp->f_iosize = nsp->f_iosize;
  856         osp->f_blocks = nsp->f_blocks;
  857         osp->f_bfree = nsp->f_bfree;
  858         osp->f_bavail = nsp->f_bavail;
  859         osp->f_files = nsp->f_files;
  860         osp->f_ffree = nsp->f_ffree;
  861         osp->f_syncwrites = nsp->f_syncwrites;
  862         osp->f_asyncwrites = nsp->f_asyncwrites;
  863         osp->f_syncreads = nsp->f_syncreads;
  864         osp->f_asyncreads = nsp->f_asyncreads;
  865         osp->f_namemax = nsp->f_namemax;
  866         osp->f_owner = nsp->f_owner;
  867         osp->f_fsid = nsp->f_fsid;
  868         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  869             MIN(MFSNAMELEN, sizeof(osp->f_fstypename)));
  870         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  871             MIN(MNAMELEN, sizeof(osp->f_mntonname)));
  872         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  873             MIN(MNAMELEN, sizeof(osp->f_mntfromname)));
  874 }
  875 #endif /* COMPAT_FREEBSD11 */
  876 
  877 /*
  878  * Change current working directory to a given file descriptor.
  879  */
  880 #ifndef _SYS_SYSPROTO_H_
  881 struct fchdir_args {
  882         int     fd;
  883 };
  884 #endif
  885 int
  886 sys_fchdir(struct thread *td, struct fchdir_args *uap)
  887 {
  888         struct vnode *vp, *tdp;
  889         struct mount *mp;
  890         struct file *fp;
  891         int error;
  892 
  893         AUDIT_ARG_FD(uap->fd);
  894         error = getvnode(td, uap->fd, &cap_fchdir_rights,
  895             &fp);
  896         if (error != 0)
  897                 return (error);
  898         vp = fp->f_vnode;
  899         vrefact(vp);
  900         fdrop(fp, td);
  901         vn_lock(vp, LK_SHARED | LK_RETRY);
  902         AUDIT_ARG_VNODE1(vp);
  903         error = change_dir(vp, td);
  904         while (!error && (mp = vp->v_mountedhere) != NULL) {
  905                 if (vfs_busy(mp, 0))
  906                         continue;
  907                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  908                 vfs_unbusy(mp);
  909                 if (error != 0)
  910                         break;
  911                 vput(vp);
  912                 vp = tdp;
  913         }
  914         if (error != 0) {
  915                 vput(vp);
  916                 return (error);
  917         }
  918         VOP_UNLOCK(vp);
  919         pwd_chdir(td, vp);
  920         return (0);
  921 }
  922 
  923 /*
  924  * Change current working directory (``.'').
  925  */
  926 #ifndef _SYS_SYSPROTO_H_
  927 struct chdir_args {
  928         char    *path;
  929 };
  930 #endif
  931 int
  932 sys_chdir(struct thread *td, struct chdir_args *uap)
  933 {
  934 
  935         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  936 }
  937 
  938 int
  939 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg)
  940 {
  941         struct nameidata nd;
  942         int error;
  943 
  944         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  945             pathseg, path, td);
  946         if ((error = namei(&nd)) != 0)
  947                 return (error);
  948         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  949                 vput(nd.ni_vp);
  950                 NDFREE_NOTHING(&nd);
  951                 return (error);
  952         }
  953         VOP_UNLOCK(nd.ni_vp);
  954         NDFREE_NOTHING(&nd);
  955         pwd_chdir(td, nd.ni_vp);
  956         return (0);
  957 }
  958 
  959 /*
  960  * Change notion of root (``/'') directory.
  961  */
  962 #ifndef _SYS_SYSPROTO_H_
  963 struct chroot_args {
  964         char    *path;
  965 };
  966 #endif
  967 int
  968 sys_chroot(struct thread *td, struct chroot_args *uap)
  969 {
  970         struct nameidata nd;
  971         int error;
  972 
  973         error = priv_check(td, PRIV_VFS_CHROOT);
  974         if (error != 0)
  975                 return (error);
  976         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  977             UIO_USERSPACE, uap->path, td);
  978         error = namei(&nd);
  979         if (error != 0)
  980                 goto error;
  981         error = change_dir(nd.ni_vp, td);
  982         if (error != 0)
  983                 goto e_vunlock;
  984 #ifdef MAC
  985         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  986         if (error != 0)
  987                 goto e_vunlock;
  988 #endif
  989         VOP_UNLOCK(nd.ni_vp);
  990         error = pwd_chroot(td, nd.ni_vp);
  991         vrele(nd.ni_vp);
  992         NDFREE_NOTHING(&nd);
  993         return (error);
  994 e_vunlock:
  995         vput(nd.ni_vp);
  996 error:
  997         NDFREE_NOTHING(&nd);
  998         return (error);
  999 }
 1000 
 1001 /*
 1002  * Common routine for chroot and chdir.  Callers must provide a locked vnode
 1003  * instance.
 1004  */
 1005 int
 1006 change_dir(struct vnode *vp, struct thread *td)
 1007 {
 1008 #ifdef MAC
 1009         int error;
 1010 #endif
 1011 
 1012         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 1013         if (vp->v_type != VDIR)
 1014                 return (ENOTDIR);
 1015 #ifdef MAC
 1016         error = mac_vnode_check_chdir(td->td_ucred, vp);
 1017         if (error != 0)
 1018                 return (error);
 1019 #endif
 1020         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
 1021 }
 1022 
 1023 static __inline void
 1024 flags_to_rights(int flags, cap_rights_t *rightsp)
 1025 {
 1026 
 1027         if (flags & O_EXEC) {
 1028                 cap_rights_set_one(rightsp, CAP_FEXECVE);
 1029         } else {
 1030                 switch ((flags & O_ACCMODE)) {
 1031                 case O_RDONLY:
 1032                         cap_rights_set_one(rightsp, CAP_READ);
 1033                         break;
 1034                 case O_RDWR:
 1035                         cap_rights_set_one(rightsp, CAP_READ);
 1036                         /* FALLTHROUGH */
 1037                 case O_WRONLY:
 1038                         cap_rights_set_one(rightsp, CAP_WRITE);
 1039                         if (!(flags & (O_APPEND | O_TRUNC)))
 1040                                 cap_rights_set_one(rightsp, CAP_SEEK);
 1041                         break;
 1042                 }
 1043         }
 1044 
 1045         if (flags & O_CREAT)
 1046                 cap_rights_set_one(rightsp, CAP_CREATE);
 1047 
 1048         if (flags & O_TRUNC)
 1049                 cap_rights_set_one(rightsp, CAP_FTRUNCATE);
 1050 
 1051         if (flags & (O_SYNC | O_FSYNC))
 1052                 cap_rights_set_one(rightsp, CAP_FSYNC);
 1053 
 1054         if (flags & (O_EXLOCK | O_SHLOCK))
 1055                 cap_rights_set_one(rightsp, CAP_FLOCK);
 1056 }
 1057 
 1058 /*
 1059  * Check permissions, allocate an open file structure, and call the device
 1060  * open routine if any.
 1061  */
 1062 #ifndef _SYS_SYSPROTO_H_
 1063 struct open_args {
 1064         char    *path;
 1065         int     flags;
 1066         int     mode;
 1067 };
 1068 #endif
 1069 int
 1070 sys_open(struct thread *td, struct open_args *uap)
 1071 {
 1072 
 1073         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1074             uap->flags, uap->mode));
 1075 }
 1076 
 1077 #ifndef _SYS_SYSPROTO_H_
 1078 struct openat_args {
 1079         int     fd;
 1080         char    *path;
 1081         int     flag;
 1082         int     mode;
 1083 };
 1084 #endif
 1085 int
 1086 sys_openat(struct thread *td, struct openat_args *uap)
 1087 {
 1088 
 1089         AUDIT_ARG_FD(uap->fd);
 1090         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1091             uap->mode));
 1092 }
 1093 
 1094 int
 1095 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
 1096     int flags, int mode)
 1097 {
 1098         struct proc *p = td->td_proc;
 1099         struct filedesc *fdp;
 1100         struct pwddesc *pdp;
 1101         struct file *fp;
 1102         struct vnode *vp;
 1103         struct nameidata nd;
 1104         cap_rights_t rights;
 1105         int cmode, error, indx;
 1106 
 1107         indx = -1;
 1108         fdp = p->p_fd;
 1109         pdp = p->p_pd;
 1110 
 1111         AUDIT_ARG_FFLAGS(flags);
 1112         AUDIT_ARG_MODE(mode);
 1113         cap_rights_init_one(&rights, CAP_LOOKUP);
 1114         flags_to_rights(flags, &rights);
 1115         /*
 1116          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1117          * may be specified.
 1118          */
 1119         if (flags & O_EXEC) {
 1120                 if (flags & O_ACCMODE)
 1121                         return (EINVAL);
 1122         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1123                 return (EINVAL);
 1124         } else {
 1125                 flags = FFLAGS(flags);
 1126         }
 1127 
 1128         /*
 1129          * Allocate a file structure. The descriptor to reference it
 1130          * is allocated and used by finstall_refed() below.
 1131          */
 1132         error = falloc_noinstall(td, &fp);
 1133         if (error != 0)
 1134                 return (error);
 1135         /* Set the flags early so the finit in devfs can pick them up. */
 1136         fp->f_flag = flags & FMASK;
 1137         cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT;
 1138         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1139             &rights, td);
 1140         td->td_dupfd = -1;              /* XXX check for fdopen */
 1141         error = vn_open(&nd, &flags, cmode, fp);
 1142         if (error != 0) {
 1143                 /*
 1144                  * If the vn_open replaced the method vector, something
 1145                  * wonderous happened deep below and we just pass it up
 1146                  * pretending we know what we do.
 1147                  */
 1148                 if (error == ENXIO && fp->f_ops != &badfileops)
 1149                         goto success;
 1150 
 1151                 /*
 1152                  * Handle special fdopen() case. bleh.
 1153                  *
 1154                  * Don't do this for relative (capability) lookups; we don't
 1155                  * understand exactly what would happen, and we don't think
 1156                  * that it ever should.
 1157                  */
 1158                 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 &&
 1159                     (error == ENODEV || error == ENXIO) &&
 1160                     td->td_dupfd >= 0) {
 1161                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1162                             &indx);
 1163                         if (error == 0)
 1164                                 goto success;
 1165                 }
 1166 
 1167                 goto bad;
 1168         }
 1169         td->td_dupfd = 0;
 1170         NDFREE(&nd, NDF_ONLY_PNBUF);
 1171         vp = nd.ni_vp;
 1172 
 1173         /*
 1174          * Store the vnode, for any f_type. Typically, the vnode use
 1175          * count is decremented by direct call to vn_closefile() for
 1176          * files that switched type in the cdevsw fdopen() method.
 1177          */
 1178         fp->f_vnode = vp;
 1179         /*
 1180          * If the file wasn't claimed by devfs bind it to the normal
 1181          * vnode operations here.
 1182          */
 1183         if (fp->f_ops == &badfileops) {
 1184                 KASSERT(vp->v_type != VFIFO,
 1185                     ("Unexpected fifo fp %p vp %p", fp, vp));
 1186                 finit_vnode(fp, flags, NULL, &vnops);
 1187         }
 1188 
 1189         VOP_UNLOCK(vp);
 1190         if (flags & O_TRUNC) {
 1191                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1192                 if (error != 0)
 1193                         goto bad;
 1194         }
 1195 success:
 1196         /*
 1197          * If we haven't already installed the FD (for dupfdopen), do so now.
 1198          */
 1199         if (indx == -1) {
 1200                 struct filecaps *fcaps;
 1201 
 1202 #ifdef CAPABILITIES
 1203                 if ((nd.ni_resflags & NIRES_STRICTREL) != 0)
 1204                         fcaps = &nd.ni_filecaps;
 1205                 else
 1206 #endif
 1207                         fcaps = NULL;
 1208                 error = finstall_refed(td, fp, &indx, flags, fcaps);
 1209                 /* On success finstall_refed() consumes fcaps. */
 1210                 if (error != 0) {
 1211                         filecaps_free(&nd.ni_filecaps);
 1212                         goto bad;
 1213                 }
 1214         } else {
 1215                 filecaps_free(&nd.ni_filecaps);
 1216                 falloc_abort(td, fp);
 1217         }
 1218 
 1219         td->td_retval[0] = indx;
 1220         return (0);
 1221 bad:
 1222         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1223         falloc_abort(td, fp);
 1224         return (error);
 1225 }
 1226 
 1227 #ifdef COMPAT_43
 1228 /*
 1229  * Create a file.
 1230  */
 1231 #ifndef _SYS_SYSPROTO_H_
 1232 struct ocreat_args {
 1233         char    *path;
 1234         int     mode;
 1235 };
 1236 #endif
 1237 int
 1238 ocreat(struct thread *td, struct ocreat_args *uap)
 1239 {
 1240 
 1241         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1242             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1243 }
 1244 #endif /* COMPAT_43 */
 1245 
 1246 /*
 1247  * Create a special file.
 1248  */
 1249 #ifndef _SYS_SYSPROTO_H_
 1250 struct mknodat_args {
 1251         int     fd;
 1252         char    *path;
 1253         mode_t  mode;
 1254         dev_t   dev;
 1255 };
 1256 #endif
 1257 int
 1258 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1259 {
 1260 
 1261         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1262             uap->dev));
 1263 }
 1264 
 1265 #if defined(COMPAT_FREEBSD11)
 1266 int
 1267 freebsd11_mknod(struct thread *td,
 1268     struct freebsd11_mknod_args *uap)
 1269 {
 1270 
 1271         return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1272             uap->mode, uap->dev));
 1273 }
 1274 
 1275 int
 1276 freebsd11_mknodat(struct thread *td,
 1277     struct freebsd11_mknodat_args *uap)
 1278 {
 1279 
 1280         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1281             uap->dev));
 1282 }
 1283 #endif /* COMPAT_FREEBSD11 */
 1284 
 1285 int
 1286 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
 1287     int mode, dev_t dev)
 1288 {
 1289         struct vnode *vp;
 1290         struct mount *mp;
 1291         struct vattr vattr;
 1292         struct nameidata nd;
 1293         int error, whiteout = 0;
 1294 
 1295         AUDIT_ARG_MODE(mode);
 1296         AUDIT_ARG_DEV(dev);
 1297         switch (mode & S_IFMT) {
 1298         case S_IFCHR:
 1299         case S_IFBLK:
 1300                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1301                 if (error == 0 && dev == VNOVAL)
 1302                         error = EINVAL;
 1303                 break;
 1304         case S_IFWHT:
 1305                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1306                 break;
 1307         case S_IFIFO:
 1308                 if (dev == 0)
 1309                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1310                 /* FALLTHROUGH */
 1311         default:
 1312                 error = EINVAL;
 1313                 break;
 1314         }
 1315         if (error != 0)
 1316                 return (error);
 1317 restart:
 1318         bwillwrite();
 1319         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1320             NOCACHE, pathseg, path, fd, &cap_mknodat_rights,
 1321             td);
 1322         if ((error = namei(&nd)) != 0)
 1323                 return (error);
 1324         vp = nd.ni_vp;
 1325         if (vp != NULL) {
 1326                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1327                 if (vp == nd.ni_dvp)
 1328                         vrele(nd.ni_dvp);
 1329                 else
 1330                         vput(nd.ni_dvp);
 1331                 vrele(vp);
 1332                 return (EEXIST);
 1333         } else {
 1334                 VATTR_NULL(&vattr);
 1335                 vattr.va_mode = (mode & ALLPERMS) &
 1336                     ~td->td_proc->p_pd->pd_cmask;
 1337                 vattr.va_rdev = dev;
 1338                 whiteout = 0;
 1339 
 1340                 switch (mode & S_IFMT) {
 1341                 case S_IFCHR:
 1342                         vattr.va_type = VCHR;
 1343                         break;
 1344                 case S_IFBLK:
 1345                         vattr.va_type = VBLK;
 1346                         break;
 1347                 case S_IFWHT:
 1348                         whiteout = 1;
 1349                         break;
 1350                 default:
 1351                         panic("kern_mknod: invalid mode");
 1352                 }
 1353         }
 1354         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1355                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1356                 vput(nd.ni_dvp);
 1357                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1358                         return (error);
 1359                 goto restart;
 1360         }
 1361 #ifdef MAC
 1362         if (error == 0 && !whiteout)
 1363                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1364                     &nd.ni_cnd, &vattr);
 1365 #endif
 1366         if (error == 0) {
 1367                 if (whiteout)
 1368                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1369                 else {
 1370                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1371                                                 &nd.ni_cnd, &vattr);
 1372                 }
 1373         }
 1374         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL,
 1375             true);
 1376         vn_finished_write(mp);
 1377         NDFREE(&nd, NDF_ONLY_PNBUF);
 1378         if (error == ERELOOKUP)
 1379                 goto restart;
 1380         return (error);
 1381 }
 1382 
 1383 /*
 1384  * Create a named pipe.
 1385  */
 1386 #ifndef _SYS_SYSPROTO_H_
 1387 struct mkfifo_args {
 1388         char    *path;
 1389         int     mode;
 1390 };
 1391 #endif
 1392 int
 1393 sys_mkfifo(struct thread *td, struct mkfifo_args *uap)
 1394 {
 1395 
 1396         return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1397             uap->mode));
 1398 }
 1399 
 1400 #ifndef _SYS_SYSPROTO_H_
 1401 struct mkfifoat_args {
 1402         int     fd;
 1403         char    *path;
 1404         mode_t  mode;
 1405 };
 1406 #endif
 1407 int
 1408 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1409 {
 1410 
 1411         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1412             uap->mode));
 1413 }
 1414 
 1415 int
 1416 kern_mkfifoat(struct thread *td, int fd, const char *path,
 1417     enum uio_seg pathseg, int mode)
 1418 {
 1419         struct mount *mp;
 1420         struct vattr vattr;
 1421         struct nameidata nd;
 1422         int error;
 1423 
 1424         AUDIT_ARG_MODE(mode);
 1425 restart:
 1426         bwillwrite();
 1427         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1428             NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights,
 1429             td);
 1430         if ((error = namei(&nd)) != 0)
 1431                 return (error);
 1432         if (nd.ni_vp != NULL) {
 1433                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1434                 if (nd.ni_vp == nd.ni_dvp)
 1435                         vrele(nd.ni_dvp);
 1436                 else
 1437                         vput(nd.ni_dvp);
 1438                 vrele(nd.ni_vp);
 1439                 return (EEXIST);
 1440         }
 1441         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1442                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1443                 vput(nd.ni_dvp);
 1444                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1445                         return (error);
 1446                 goto restart;
 1447         }
 1448         VATTR_NULL(&vattr);
 1449         vattr.va_type = VFIFO;
 1450         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask;
 1451 #ifdef MAC
 1452         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1453             &vattr);
 1454         if (error != 0)
 1455                 goto out;
 1456 #endif
 1457         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1458 #ifdef MAC
 1459 out:
 1460 #endif
 1461         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 1462         vn_finished_write(mp);
 1463         NDFREE(&nd, NDF_ONLY_PNBUF);
 1464         if (error == ERELOOKUP)
 1465                 goto restart;
 1466         return (error);
 1467 }
 1468 
 1469 /*
 1470  * Make a hard file link.
 1471  */
 1472 #ifndef _SYS_SYSPROTO_H_
 1473 struct link_args {
 1474         char    *path;
 1475         char    *link;
 1476 };
 1477 #endif
 1478 int
 1479 sys_link(struct thread *td, struct link_args *uap)
 1480 {
 1481 
 1482         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 1483             UIO_USERSPACE, FOLLOW));
 1484 }
 1485 
 1486 #ifndef _SYS_SYSPROTO_H_
 1487 struct linkat_args {
 1488         int     fd1;
 1489         char    *path1;
 1490         int     fd2;
 1491         char    *path2;
 1492         int     flag;
 1493 };
 1494 #endif
 1495 int
 1496 sys_linkat(struct thread *td, struct linkat_args *uap)
 1497 {
 1498         int flag;
 1499 
 1500         flag = uap->flag;
 1501         if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH)) != 0)
 1502                 return (EINVAL);
 1503 
 1504         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1505             UIO_USERSPACE, at2cnpflags(flag, AT_SYMLINK_FOLLOW |
 1506             AT_RESOLVE_BENEATH)));
 1507 }
 1508 
 1509 int hardlink_check_uid = 0;
 1510 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1511     &hardlink_check_uid, 0,
 1512     "Unprivileged processes cannot create hard links to files owned by other "
 1513     "users");
 1514 static int hardlink_check_gid = 0;
 1515 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1516     &hardlink_check_gid, 0,
 1517     "Unprivileged processes cannot create hard links to files owned by other "
 1518     "groups");
 1519 
 1520 static int
 1521 can_hardlink(struct vnode *vp, struct ucred *cred)
 1522 {
 1523         struct vattr va;
 1524         int error;
 1525 
 1526         if (!hardlink_check_uid && !hardlink_check_gid)
 1527                 return (0);
 1528 
 1529         error = VOP_GETATTR(vp, &va, cred);
 1530         if (error != 0)
 1531                 return (error);
 1532 
 1533         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1534                 error = priv_check_cred(cred, PRIV_VFS_LINK);
 1535                 if (error != 0)
 1536                         return (error);
 1537         }
 1538 
 1539         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1540                 error = priv_check_cred(cred, PRIV_VFS_LINK);
 1541                 if (error != 0)
 1542                         return (error);
 1543         }
 1544 
 1545         return (0);
 1546 }
 1547 
 1548 int
 1549 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
 1550     const char *path2, enum uio_seg segflag, int follow)
 1551 {
 1552         struct nameidata nd;
 1553         int error;
 1554 
 1555         do {
 1556                 bwillwrite();
 1557                 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag,
 1558                     path1, fd1, &cap_linkat_source_rights, td);
 1559                 if ((error = namei(&nd)) != 0)
 1560                         return (error);
 1561                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1562                 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
 1563         } while (error ==  EAGAIN || error == ERELOOKUP);
 1564         return (error);
 1565 }
 1566 
 1567 static int
 1568 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path,
 1569     enum uio_seg segflag)
 1570 {
 1571         struct nameidata nd;
 1572         struct mount *mp;
 1573         int error;
 1574 
 1575         if (vp->v_type == VDIR) {
 1576                 vrele(vp);
 1577                 return (EPERM);         /* POSIX */
 1578         }
 1579         NDINIT_ATRIGHTS(&nd, CREATE,
 1580             LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd,
 1581             &cap_linkat_target_rights, td);
 1582         if ((error = namei(&nd)) == 0) {
 1583                 if (nd.ni_vp != NULL) {
 1584                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1585                         if (nd.ni_dvp == nd.ni_vp)
 1586                                 vrele(nd.ni_dvp);
 1587                         else
 1588                                 vput(nd.ni_dvp);
 1589                         vrele(nd.ni_vp);
 1590                         vrele(vp);
 1591                         return (EEXIST);
 1592                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1593                         /*
 1594                          * Cross-device link.  No need to recheck
 1595                          * vp->v_type, since it cannot change, except
 1596                          * to VBAD.
 1597                          */
 1598                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1599                         vput(nd.ni_dvp);
 1600                         vrele(vp);
 1601                         return (EXDEV);
 1602                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1603                         error = can_hardlink(vp, td->td_ucred);
 1604 #ifdef MAC
 1605                         if (error == 0)
 1606                                 error = mac_vnode_check_link(td->td_ucred,
 1607                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1608 #endif
 1609                         if (error != 0) {
 1610                                 vput(vp);
 1611                                 vput(nd.ni_dvp);
 1612                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1613                                 return (error);
 1614                         }
 1615                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1616                         if (error != 0) {
 1617                                 vput(vp);
 1618                                 vput(nd.ni_dvp);
 1619                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1620                                 error = vn_start_write(NULL, &mp,
 1621                                     V_XSLEEP | PCATCH);
 1622                                 if (error != 0)
 1623                                         return (error);
 1624                                 return (EAGAIN);
 1625                         }
 1626                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1627                         VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 1628                         vn_finished_write(mp);
 1629                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1630                         vp = NULL;
 1631                 } else {
 1632                         vput(nd.ni_dvp);
 1633                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1634                         vrele(vp);
 1635                         return (EAGAIN);
 1636                 }
 1637         }
 1638         if (vp != NULL)
 1639                 vrele(vp);
 1640         return (error);
 1641 }
 1642 
 1643 /*
 1644  * Make a symbolic link.
 1645  */
 1646 #ifndef _SYS_SYSPROTO_H_
 1647 struct symlink_args {
 1648         char    *path;
 1649         char    *link;
 1650 };
 1651 #endif
 1652 int
 1653 sys_symlink(struct thread *td, struct symlink_args *uap)
 1654 {
 1655 
 1656         return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 1657             UIO_USERSPACE));
 1658 }
 1659 
 1660 #ifndef _SYS_SYSPROTO_H_
 1661 struct symlinkat_args {
 1662         char    *path;
 1663         int     fd;
 1664         char    *path2;
 1665 };
 1666 #endif
 1667 int
 1668 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1669 {
 1670 
 1671         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1672             UIO_USERSPACE));
 1673 }
 1674 
 1675 int
 1676 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2,
 1677     enum uio_seg segflg)
 1678 {
 1679         struct mount *mp;
 1680         struct vattr vattr;
 1681         const char *syspath;
 1682         char *tmppath;
 1683         struct nameidata nd;
 1684         int error;
 1685 
 1686         if (segflg == UIO_SYSSPACE) {
 1687                 syspath = path1;
 1688         } else {
 1689                 tmppath = uma_zalloc(namei_zone, M_WAITOK);
 1690                 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0)
 1691                         goto out;
 1692                 syspath = tmppath;
 1693         }
 1694         AUDIT_ARG_TEXT(syspath);
 1695 restart:
 1696         bwillwrite();
 1697         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1698             NOCACHE, segflg, path2, fd, &cap_symlinkat_rights,
 1699             td);
 1700         if ((error = namei(&nd)) != 0)
 1701                 goto out;
 1702         if (nd.ni_vp) {
 1703                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1704                 if (nd.ni_vp == nd.ni_dvp)
 1705                         vrele(nd.ni_dvp);
 1706                 else
 1707                         vput(nd.ni_dvp);
 1708                 vrele(nd.ni_vp);
 1709                 nd.ni_vp = NULL;
 1710                 error = EEXIST;
 1711                 goto out;
 1712         }
 1713         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1714                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1715                 vput(nd.ni_dvp);
 1716                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1717                         goto out;
 1718                 goto restart;
 1719         }
 1720         VATTR_NULL(&vattr);
 1721         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask;
 1722 #ifdef MAC
 1723         vattr.va_type = VLNK;
 1724         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1725             &vattr);
 1726         if (error != 0)
 1727                 goto out2;
 1728 #endif
 1729         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1730 #ifdef MAC
 1731 out2:
 1732 #endif
 1733         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 1734         vn_finished_write(mp);
 1735         NDFREE(&nd, NDF_ONLY_PNBUF);
 1736         if (error == ERELOOKUP)
 1737                 goto restart;
 1738 out:
 1739         if (segflg != UIO_SYSSPACE)
 1740                 uma_zfree(namei_zone, tmppath);
 1741         return (error);
 1742 }
 1743 
 1744 /*
 1745  * Delete a whiteout from the filesystem.
 1746  */
 1747 #ifndef _SYS_SYSPROTO_H_
 1748 struct undelete_args {
 1749         char *path;
 1750 };
 1751 #endif
 1752 int
 1753 sys_undelete(struct thread *td, struct undelete_args *uap)
 1754 {
 1755         struct mount *mp;
 1756         struct nameidata nd;
 1757         int error;
 1758 
 1759 restart:
 1760         bwillwrite();
 1761         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1762             UIO_USERSPACE, uap->path, td);
 1763         error = namei(&nd);
 1764         if (error != 0)
 1765                 return (error);
 1766 
 1767         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1768                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1769                 if (nd.ni_vp == nd.ni_dvp)
 1770                         vrele(nd.ni_dvp);
 1771                 else
 1772                         vput(nd.ni_dvp);
 1773                 if (nd.ni_vp)
 1774                         vrele(nd.ni_vp);
 1775                 return (EEXIST);
 1776         }
 1777         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1778                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1779                 vput(nd.ni_dvp);
 1780                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1781                         return (error);
 1782                 goto restart;
 1783         }
 1784         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1785         NDFREE(&nd, NDF_ONLY_PNBUF);
 1786         vput(nd.ni_dvp);
 1787         vn_finished_write(mp);
 1788         if (error == ERELOOKUP)
 1789                 goto restart;
 1790         return (error);
 1791 }
 1792 
 1793 /*
 1794  * Delete a name from the filesystem.
 1795  */
 1796 #ifndef _SYS_SYSPROTO_H_
 1797 struct unlink_args {
 1798         char    *path;
 1799 };
 1800 #endif
 1801 int
 1802 sys_unlink(struct thread *td, struct unlink_args *uap)
 1803 {
 1804 
 1805         return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
 1806             0, 0));
 1807 }
 1808 
 1809 static int
 1810 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd,
 1811     int flag, enum uio_seg pathseg, ino_t oldinum)
 1812 {
 1813 
 1814         if ((flag & ~AT_REMOVEDIR) != 0)
 1815                 return (EINVAL);
 1816 
 1817         if ((flag & AT_REMOVEDIR) != 0)
 1818                 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0));
 1819 
 1820         return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0));
 1821 }
 1822 
 1823 #ifndef _SYS_SYSPROTO_H_
 1824 struct unlinkat_args {
 1825         int     fd;
 1826         char    *path;
 1827         int     flag;
 1828 };
 1829 #endif
 1830 int
 1831 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1832 {
 1833 
 1834         return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag,
 1835             UIO_USERSPACE, 0));
 1836 }
 1837 
 1838 #ifndef _SYS_SYSPROTO_H_
 1839 struct funlinkat_args {
 1840         int             dfd;
 1841         const char      *path;
 1842         int             fd;
 1843         int             flag;
 1844 };
 1845 #endif
 1846 int
 1847 sys_funlinkat(struct thread *td, struct funlinkat_args *uap)
 1848 {
 1849 
 1850         return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag,
 1851             UIO_USERSPACE, 0));
 1852 }
 1853 
 1854 int
 1855 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
 1856     enum uio_seg pathseg, int flag, ino_t oldinum)
 1857 {
 1858         struct mount *mp;
 1859         struct file *fp;
 1860         struct vnode *vp;
 1861         struct nameidata nd;
 1862         struct stat sb;
 1863         int error;
 1864 
 1865         fp = NULL;
 1866         if (fd != FD_NONE) {
 1867                 error = getvnode(td, fd, &cap_no_rights, &fp);
 1868                 if (error != 0)
 1869                         return (error);
 1870         }
 1871 
 1872 restart:
 1873         bwillwrite();
 1874         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 1875             at2cnpflags(flag, AT_RESOLVE_BENEATH),
 1876             pathseg, path, dfd, &cap_unlinkat_rights, td);
 1877         if ((error = namei(&nd)) != 0) {
 1878                 if (error == EINVAL)
 1879                         error = EPERM;
 1880                 goto fdout;
 1881         }
 1882         vp = nd.ni_vp;
 1883         if (vp->v_type == VDIR && oldinum == 0) {
 1884                 error = EPERM;          /* POSIX */
 1885         } else if (oldinum != 0 &&
 1886                   ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1887                   sb.st_ino != oldinum) {
 1888                 error = EIDRM;  /* Identifier removed */
 1889         } else if (fp != NULL && fp->f_vnode != vp) {
 1890                 if (VN_IS_DOOMED(fp->f_vnode))
 1891                         error = EBADF;
 1892                 else
 1893                         error = EDEADLK;
 1894         } else {
 1895                 /*
 1896                  * The root of a mounted filesystem cannot be deleted.
 1897                  *
 1898                  * XXX: can this only be a VDIR case?
 1899                  */
 1900                 if (vp->v_vflag & VV_ROOT)
 1901                         error = EBUSY;
 1902         }
 1903         if (error == 0) {
 1904                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1905                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1906                         vput(nd.ni_dvp);
 1907                         if (vp == nd.ni_dvp)
 1908                                 vrele(vp);
 1909                         else
 1910                                 vput(vp);
 1911                         if ((error = vn_start_write(NULL, &mp,
 1912                             V_XSLEEP | PCATCH)) != 0) {
 1913                                 goto fdout;
 1914                         }
 1915                         goto restart;
 1916                 }
 1917 #ifdef MAC
 1918                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1919                     &nd.ni_cnd);
 1920                 if (error != 0)
 1921                         goto out;
 1922 #endif
 1923                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1924                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1925 #ifdef MAC
 1926 out:
 1927 #endif
 1928                 vn_finished_write(mp);
 1929         }
 1930         NDFREE(&nd, NDF_ONLY_PNBUF);
 1931         vput(nd.ni_dvp);
 1932         if (vp == nd.ni_dvp)
 1933                 vrele(vp);
 1934         else
 1935                 vput(vp);
 1936         if (error == ERELOOKUP)
 1937                 goto restart;
 1938 fdout:
 1939         if (fp != NULL)
 1940                 fdrop(fp, td);
 1941         return (error);
 1942 }
 1943 
 1944 /*
 1945  * Reposition read/write file offset.
 1946  */
 1947 #ifndef _SYS_SYSPROTO_H_
 1948 struct lseek_args {
 1949         int     fd;
 1950         int     pad;
 1951         off_t   offset;
 1952         int     whence;
 1953 };
 1954 #endif
 1955 int
 1956 sys_lseek(struct thread *td, struct lseek_args *uap)
 1957 {
 1958 
 1959         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1960 }
 1961 
 1962 int
 1963 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 1964 {
 1965         struct file *fp;
 1966         int error;
 1967 
 1968         AUDIT_ARG_FD(fd);
 1969         error = fget(td, fd, &cap_seek_rights, &fp);
 1970         if (error != 0)
 1971                 return (error);
 1972         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1973             fo_seek(fp, offset, whence, td) : ESPIPE;
 1974         fdrop(fp, td);
 1975         return (error);
 1976 }
 1977 
 1978 #if defined(COMPAT_43)
 1979 /*
 1980  * Reposition read/write file offset.
 1981  */
 1982 #ifndef _SYS_SYSPROTO_H_
 1983 struct olseek_args {
 1984         int     fd;
 1985         long    offset;
 1986         int     whence;
 1987 };
 1988 #endif
 1989 int
 1990 olseek(struct thread *td, struct olseek_args *uap)
 1991 {
 1992 
 1993         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1994 }
 1995 #endif /* COMPAT_43 */
 1996 
 1997 #if defined(COMPAT_FREEBSD6)
 1998 /* Version with the 'pad' argument */
 1999 int
 2000 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 2001 {
 2002 
 2003         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 2004 }
 2005 #endif
 2006 
 2007 /*
 2008  * Check access permissions using passed credentials.
 2009  */
 2010 static int
 2011 vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
 2012      struct thread *td)
 2013 {
 2014         accmode_t accmode;
 2015         int error;
 2016 
 2017         /* Flags == 0 means only check for existence. */
 2018         if (user_flags == 0)
 2019                 return (0);
 2020 
 2021         accmode = 0;
 2022         if (user_flags & R_OK)
 2023                 accmode |= VREAD;
 2024         if (user_flags & W_OK)
 2025                 accmode |= VWRITE;
 2026         if (user_flags & X_OK)
 2027                 accmode |= VEXEC;
 2028 #ifdef MAC
 2029         error = mac_vnode_check_access(cred, vp, accmode);
 2030         if (error != 0)
 2031                 return (error);
 2032 #endif
 2033         if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2034                 error = VOP_ACCESS(vp, accmode, cred, td);
 2035         return (error);
 2036 }
 2037 
 2038 /*
 2039  * Check access permissions using "real" credentials.
 2040  */
 2041 #ifndef _SYS_SYSPROTO_H_
 2042 struct access_args {
 2043         char    *path;
 2044         int     amode;
 2045 };
 2046 #endif
 2047 int
 2048 sys_access(struct thread *td, struct access_args *uap)
 2049 {
 2050 
 2051         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2052             0, uap->amode));
 2053 }
 2054 
 2055 #ifndef _SYS_SYSPROTO_H_
 2056 struct faccessat_args {
 2057         int     dirfd;
 2058         char    *path;
 2059         int     amode;
 2060         int     flag;
 2061 }
 2062 #endif
 2063 int
 2064 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2065 {
 2066 
 2067         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2068             uap->amode));
 2069 }
 2070 
 2071 int
 2072 kern_accessat(struct thread *td, int fd, const char *path,
 2073     enum uio_seg pathseg, int flag, int amode)
 2074 {
 2075         struct ucred *cred, *usecred;
 2076         struct vnode *vp;
 2077         struct nameidata nd;
 2078         int error;
 2079 
 2080         if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH)) != 0)
 2081                 return (EINVAL);
 2082         if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 2083                 return (EINVAL);
 2084 
 2085         /*
 2086          * Create and modify a temporary credential instead of one that
 2087          * is potentially shared (if we need one).
 2088          */
 2089         cred = td->td_ucred;
 2090         if ((flag & AT_EACCESS) == 0 &&
 2091             ((cred->cr_uid != cred->cr_ruid ||
 2092             cred->cr_rgid != cred->cr_groups[0]))) {
 2093                 usecred = crdup(cred);
 2094                 usecred->cr_uid = cred->cr_ruid;
 2095                 usecred->cr_groups[0] = cred->cr_rgid;
 2096                 td->td_ucred = usecred;
 2097         } else
 2098                 usecred = cred;
 2099         AUDIT_ARG_VALUE(amode);
 2100         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2101             AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH),
 2102             pathseg, path, fd, &cap_fstat_rights, td);
 2103         if ((error = namei(&nd)) != 0)
 2104                 goto out;
 2105         vp = nd.ni_vp;
 2106 
 2107         error = vn_access(vp, amode, usecred, td);
 2108         NDFREE_NOTHING(&nd);
 2109         vput(vp);
 2110 out:
 2111         if (usecred != cred) {
 2112                 td->td_ucred = cred;
 2113                 crfree(usecred);
 2114         }
 2115         return (error);
 2116 }
 2117 
 2118 /*
 2119  * Check access permissions using "effective" credentials.
 2120  */
 2121 #ifndef _SYS_SYSPROTO_H_
 2122 struct eaccess_args {
 2123         char    *path;
 2124         int     amode;
 2125 };
 2126 #endif
 2127 int
 2128 sys_eaccess(struct thread *td, struct eaccess_args *uap)
 2129 {
 2130 
 2131         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2132             AT_EACCESS, uap->amode));
 2133 }
 2134 
 2135 #if defined(COMPAT_43)
 2136 /*
 2137  * Get file status; this version follows links.
 2138  */
 2139 #ifndef _SYS_SYSPROTO_H_
 2140 struct ostat_args {
 2141         char    *path;
 2142         struct ostat *ub;
 2143 };
 2144 #endif
 2145 int
 2146 ostat(struct thread *td, struct ostat_args *uap)
 2147 {
 2148         struct stat sb;
 2149         struct ostat osb;
 2150         int error;
 2151 
 2152         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2153             &sb, NULL);
 2154         if (error != 0)
 2155                 return (error);
 2156         cvtstat(&sb, &osb);
 2157         return (copyout(&osb, uap->ub, sizeof (osb)));
 2158 }
 2159 
 2160 /*
 2161  * Get file status; this version does not follow links.
 2162  */
 2163 #ifndef _SYS_SYSPROTO_H_
 2164 struct olstat_args {
 2165         char    *path;
 2166         struct ostat *ub;
 2167 };
 2168 #endif
 2169 int
 2170 olstat(struct thread *td, struct olstat_args *uap)
 2171 {
 2172         struct stat sb;
 2173         struct ostat osb;
 2174         int error;
 2175 
 2176         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2177             UIO_USERSPACE, &sb, NULL);
 2178         if (error != 0)
 2179                 return (error);
 2180         cvtstat(&sb, &osb);
 2181         return (copyout(&osb, uap->ub, sizeof (osb)));
 2182 }
 2183 
 2184 /*
 2185  * Convert from an old to a new stat structure.
 2186  * XXX: many values are blindly truncated.
 2187  */
 2188 void
 2189 cvtstat(struct stat *st, struct ostat *ost)
 2190 {
 2191 
 2192         bzero(ost, sizeof(*ost));
 2193         ost->st_dev = st->st_dev;
 2194         ost->st_ino = st->st_ino;
 2195         ost->st_mode = st->st_mode;
 2196         ost->st_nlink = st->st_nlink;
 2197         ost->st_uid = st->st_uid;
 2198         ost->st_gid = st->st_gid;
 2199         ost->st_rdev = st->st_rdev;
 2200         ost->st_size = MIN(st->st_size, INT32_MAX);
 2201         ost->st_atim = st->st_atim;
 2202         ost->st_mtim = st->st_mtim;
 2203         ost->st_ctim = st->st_ctim;
 2204         ost->st_blksize = st->st_blksize;
 2205         ost->st_blocks = st->st_blocks;
 2206         ost->st_flags = st->st_flags;
 2207         ost->st_gen = st->st_gen;
 2208 }
 2209 #endif /* COMPAT_43 */
 2210 
 2211 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 2212 int ino64_trunc_error;
 2213 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW,
 2214     &ino64_trunc_error, 0,
 2215     "Error on truncation of device, file or inode number, or link count");
 2216 
 2217 int
 2218 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost)
 2219 {
 2220 
 2221         ost->st_dev = st->st_dev;
 2222         if (ost->st_dev != st->st_dev) {
 2223                 switch (ino64_trunc_error) {
 2224                 default:
 2225                         /*
 2226                          * Since dev_t is almost raw, don't clamp to the
 2227                          * maximum for case 2, but ignore the error.
 2228                          */
 2229                         break;
 2230                 case 1:
 2231                         return (EOVERFLOW);
 2232                 }
 2233         }
 2234         ost->st_ino = st->st_ino;
 2235         if (ost->st_ino != st->st_ino) {
 2236                 switch (ino64_trunc_error) {
 2237                 default:
 2238                 case 0:
 2239                         break;
 2240                 case 1:
 2241                         return (EOVERFLOW);
 2242                 case 2:
 2243                         ost->st_ino = UINT32_MAX;
 2244                         break;
 2245                 }
 2246         }
 2247         ost->st_mode = st->st_mode;
 2248         ost->st_nlink = st->st_nlink;
 2249         if (ost->st_nlink != st->st_nlink) {
 2250                 switch (ino64_trunc_error) {
 2251                 default:
 2252                 case 0:
 2253                         break;
 2254                 case 1:
 2255                         return (EOVERFLOW);
 2256                 case 2:
 2257                         ost->st_nlink = UINT16_MAX;
 2258                         break;
 2259                 }
 2260         }
 2261         ost->st_uid = st->st_uid;
 2262         ost->st_gid = st->st_gid;
 2263         ost->st_rdev = st->st_rdev;
 2264         if (ost->st_rdev != st->st_rdev) {
 2265                 switch (ino64_trunc_error) {
 2266                 default:
 2267                         break;
 2268                 case 1:
 2269                         return (EOVERFLOW);
 2270                 }
 2271         }
 2272         ost->st_atim = st->st_atim;
 2273         ost->st_mtim = st->st_mtim;
 2274         ost->st_ctim = st->st_ctim;
 2275         ost->st_size = st->st_size;
 2276         ost->st_blocks = st->st_blocks;
 2277         ost->st_blksize = st->st_blksize;
 2278         ost->st_flags = st->st_flags;
 2279         ost->st_gen = st->st_gen;
 2280         ost->st_lspare = 0;
 2281         ost->st_birthtim = st->st_birthtim;
 2282         bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim),
 2283             sizeof(*ost) - offsetof(struct freebsd11_stat,
 2284             st_birthtim) - sizeof(ost->st_birthtim));
 2285         return (0);
 2286 }
 2287 
 2288 int
 2289 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap)
 2290 {
 2291         struct stat sb;
 2292         struct freebsd11_stat osb;
 2293         int error;
 2294 
 2295         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2296             &sb, NULL);
 2297         if (error != 0)
 2298                 return (error);
 2299         error = freebsd11_cvtstat(&sb, &osb);
 2300         if (error == 0)
 2301                 error = copyout(&osb, uap->ub, sizeof(osb));
 2302         return (error);
 2303 }
 2304 
 2305 int
 2306 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap)
 2307 {
 2308         struct stat sb;
 2309         struct freebsd11_stat osb;
 2310         int error;
 2311 
 2312         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2313             UIO_USERSPACE, &sb, NULL);
 2314         if (error != 0)
 2315                 return (error);
 2316         error = freebsd11_cvtstat(&sb, &osb);
 2317         if (error == 0)
 2318                 error = copyout(&osb, uap->ub, sizeof(osb));
 2319         return (error);
 2320 }
 2321 
 2322 int
 2323 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap)
 2324 {
 2325         struct fhandle fh;
 2326         struct stat sb;
 2327         struct freebsd11_stat osb;
 2328         int error;
 2329 
 2330         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 2331         if (error != 0)
 2332                 return (error);
 2333         error = kern_fhstat(td, fh, &sb);
 2334         if (error != 0)
 2335                 return (error);
 2336         error = freebsd11_cvtstat(&sb, &osb);
 2337         if (error == 0)
 2338                 error = copyout(&osb, uap->sb, sizeof(osb));
 2339         return (error);
 2340 }
 2341 
 2342 int
 2343 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap)
 2344 {
 2345         struct stat sb;
 2346         struct freebsd11_stat osb;
 2347         int error;
 2348 
 2349         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2350             UIO_USERSPACE, &sb, NULL);
 2351         if (error != 0)
 2352                 return (error);
 2353         error = freebsd11_cvtstat(&sb, &osb);
 2354         if (error == 0)
 2355                 error = copyout(&osb, uap->buf, sizeof(osb));
 2356         return (error);
 2357 }
 2358 #endif  /* COMPAT_FREEBSD11 */
 2359 
 2360 /*
 2361  * Get file status
 2362  */
 2363 #ifndef _SYS_SYSPROTO_H_
 2364 struct fstatat_args {
 2365         int     fd;
 2366         char    *path;
 2367         struct stat     *buf;
 2368         int     flag;
 2369 }
 2370 #endif
 2371 int
 2372 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2373 {
 2374         struct stat sb;
 2375         int error;
 2376 
 2377         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2378             UIO_USERSPACE, &sb, NULL);
 2379         if (error == 0)
 2380                 error = copyout(&sb, uap->buf, sizeof (sb));
 2381         return (error);
 2382 }
 2383 
 2384 int
 2385 kern_statat(struct thread *td, int flag, int fd, const char *path,
 2386     enum uio_seg pathseg, struct stat *sbp,
 2387     void (*hook)(struct vnode *vp, struct stat *sbp))
 2388 {
 2389         struct nameidata nd;
 2390         int error;
 2391 
 2392         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2393                 return (EINVAL);
 2394 
 2395         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH |
 2396             AT_SYMLINK_NOFOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2397             pathseg, path, fd, &cap_fstat_rights, td);
 2398 
 2399         if ((error = namei(&nd)) != 0)
 2400                 return (error);
 2401         error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
 2402         if (error == 0) {
 2403                 if (__predict_false(hook != NULL))
 2404                         hook(nd.ni_vp, sbp);
 2405         }
 2406         NDFREE_NOTHING(&nd);
 2407         vput(nd.ni_vp);
 2408 #ifdef __STAT_TIME_T_EXT
 2409         sbp->st_atim_ext = 0;
 2410         sbp->st_mtim_ext = 0;
 2411         sbp->st_ctim_ext = 0;
 2412         sbp->st_btim_ext = 0;
 2413 #endif
 2414 #ifdef KTRACE
 2415         if (KTRPOINT(td, KTR_STRUCT))
 2416                 ktrstat_error(sbp, error);
 2417 #endif
 2418         return (error);
 2419 }
 2420 
 2421 #if defined(COMPAT_FREEBSD11)
 2422 /*
 2423  * Implementation of the NetBSD [l]stat() functions.
 2424  */
 2425 void
 2426 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb)
 2427 {
 2428 
 2429         bzero(nsb, sizeof(*nsb));
 2430         nsb->st_dev = sb->st_dev;
 2431         nsb->st_ino = sb->st_ino;
 2432         nsb->st_mode = sb->st_mode;
 2433         nsb->st_nlink = sb->st_nlink;
 2434         nsb->st_uid = sb->st_uid;
 2435         nsb->st_gid = sb->st_gid;
 2436         nsb->st_rdev = sb->st_rdev;
 2437         nsb->st_atim = sb->st_atim;
 2438         nsb->st_mtim = sb->st_mtim;
 2439         nsb->st_ctim = sb->st_ctim;
 2440         nsb->st_size = sb->st_size;
 2441         nsb->st_blocks = sb->st_blocks;
 2442         nsb->st_blksize = sb->st_blksize;
 2443         nsb->st_flags = sb->st_flags;
 2444         nsb->st_gen = sb->st_gen;
 2445         nsb->st_birthtim = sb->st_birthtim;
 2446 }
 2447 
 2448 #ifndef _SYS_SYSPROTO_H_
 2449 struct freebsd11_nstat_args {
 2450         char    *path;
 2451         struct nstat *ub;
 2452 };
 2453 #endif
 2454 int
 2455 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap)
 2456 {
 2457         struct stat sb;
 2458         struct nstat nsb;
 2459         int error;
 2460 
 2461         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2462             &sb, NULL);
 2463         if (error != 0)
 2464                 return (error);
 2465         freebsd11_cvtnstat(&sb, &nsb);
 2466         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2467 }
 2468 
 2469 /*
 2470  * NetBSD lstat.  Get file status; this version does not follow links.
 2471  */
 2472 #ifndef _SYS_SYSPROTO_H_
 2473 struct freebsd11_nlstat_args {
 2474         char    *path;
 2475         struct nstat *ub;
 2476 };
 2477 #endif
 2478 int
 2479 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap)
 2480 {
 2481         struct stat sb;
 2482         struct nstat nsb;
 2483         int error;
 2484 
 2485         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2486             UIO_USERSPACE, &sb, NULL);
 2487         if (error != 0)
 2488                 return (error);
 2489         freebsd11_cvtnstat(&sb, &nsb);
 2490         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2491 }
 2492 #endif /* COMPAT_FREEBSD11 */
 2493 
 2494 /*
 2495  * Get configurable pathname variables.
 2496  */
 2497 #ifndef _SYS_SYSPROTO_H_
 2498 struct pathconf_args {
 2499         char    *path;
 2500         int     name;
 2501 };
 2502 #endif
 2503 int
 2504 sys_pathconf(struct thread *td, struct pathconf_args *uap)
 2505 {
 2506         long value;
 2507         int error;
 2508 
 2509         error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW,
 2510             &value);
 2511         if (error == 0)
 2512                 td->td_retval[0] = value;
 2513         return (error);
 2514 }
 2515 
 2516 #ifndef _SYS_SYSPROTO_H_
 2517 struct lpathconf_args {
 2518         char    *path;
 2519         int     name;
 2520 };
 2521 #endif
 2522 int
 2523 sys_lpathconf(struct thread *td, struct lpathconf_args *uap)
 2524 {
 2525         long value;
 2526         int error;
 2527 
 2528         error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2529             NOFOLLOW, &value);
 2530         if (error == 0)
 2531                 td->td_retval[0] = value;
 2532         return (error);
 2533 }
 2534 
 2535 int
 2536 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg,
 2537     int name, u_long flags, long *valuep)
 2538 {
 2539         struct nameidata nd;
 2540         int error;
 2541 
 2542         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2543             pathseg, path, td);
 2544         if ((error = namei(&nd)) != 0)
 2545                 return (error);
 2546         NDFREE_NOTHING(&nd);
 2547 
 2548         error = VOP_PATHCONF(nd.ni_vp, name, valuep);
 2549         vput(nd.ni_vp);
 2550         return (error);
 2551 }
 2552 
 2553 /*
 2554  * Return target name of a symbolic link.
 2555  */
 2556 #ifndef _SYS_SYSPROTO_H_
 2557 struct readlink_args {
 2558         char    *path;
 2559         char    *buf;
 2560         size_t  count;
 2561 };
 2562 #endif
 2563 int
 2564 sys_readlink(struct thread *td, struct readlink_args *uap)
 2565 {
 2566 
 2567         return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2568             uap->buf, UIO_USERSPACE, uap->count));
 2569 }
 2570 #ifndef _SYS_SYSPROTO_H_
 2571 struct readlinkat_args {
 2572         int     fd;
 2573         char    *path;
 2574         char    *buf;
 2575         size_t  bufsize;
 2576 };
 2577 #endif
 2578 int
 2579 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2580 {
 2581 
 2582         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2583             uap->buf, UIO_USERSPACE, uap->bufsize));
 2584 }
 2585 
 2586 int
 2587 kern_readlinkat(struct thread *td, int fd, const char *path,
 2588     enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count)
 2589 {
 2590         struct vnode *vp;
 2591         struct nameidata nd;
 2592         int error;
 2593 
 2594         if (count > IOSIZE_MAX)
 2595                 return (EINVAL);
 2596 
 2597         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2598             pathseg, path, fd, td);
 2599 
 2600         if ((error = namei(&nd)) != 0)
 2601                 return (error);
 2602         NDFREE_NOTHING(&nd);
 2603         vp = nd.ni_vp;
 2604 
 2605         error = kern_readlink_vp(vp, buf, bufseg, count, td);
 2606         vput(vp);
 2607 
 2608         return (error);
 2609 }
 2610 
 2611 /*
 2612  * Helper function to readlink from a vnode
 2613  */
 2614 static int
 2615 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count,
 2616     struct thread *td)
 2617 {
 2618         struct iovec aiov;
 2619         struct uio auio;
 2620         int error;
 2621 
 2622         ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked");
 2623 #ifdef MAC
 2624         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2625         if (error != 0)
 2626                 return (error);
 2627 #endif
 2628         if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
 2629                 return (EINVAL);
 2630 
 2631         aiov.iov_base = buf;
 2632         aiov.iov_len = count;
 2633         auio.uio_iov = &aiov;
 2634         auio.uio_iovcnt = 1;
 2635         auio.uio_offset = 0;
 2636         auio.uio_rw = UIO_READ;
 2637         auio.uio_segflg = bufseg;
 2638         auio.uio_td = td;
 2639         auio.uio_resid = count;
 2640         error = VOP_READLINK(vp, &auio, td->td_ucred);
 2641         td->td_retval[0] = count - auio.uio_resid;
 2642         return (error);
 2643 }
 2644 
 2645 /*
 2646  * Common implementation code for chflags() and fchflags().
 2647  */
 2648 static int
 2649 setfflags(struct thread *td, struct vnode *vp, u_long flags)
 2650 {
 2651         struct mount *mp;
 2652         struct vattr vattr;
 2653         int error;
 2654 
 2655         /* We can't support the value matching VNOVAL. */
 2656         if (flags == VNOVAL)
 2657                 return (EOPNOTSUPP);
 2658 
 2659         /*
 2660          * Prevent non-root users from setting flags on devices.  When
 2661          * a device is reused, users can retain ownership of the device
 2662          * if they are allowed to set flags and programs assume that
 2663          * chown can't fail when done as root.
 2664          */
 2665         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2666                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2667                 if (error != 0)
 2668                         return (error);
 2669         }
 2670 
 2671         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2672                 return (error);
 2673         VATTR_NULL(&vattr);
 2674         vattr.va_flags = flags;
 2675         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2676 #ifdef MAC
 2677         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2678         if (error == 0)
 2679 #endif
 2680                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2681         VOP_UNLOCK(vp);
 2682         vn_finished_write(mp);
 2683         return (error);
 2684 }
 2685 
 2686 /*
 2687  * Change flags of a file given a path name.
 2688  */
 2689 #ifndef _SYS_SYSPROTO_H_
 2690 struct chflags_args {
 2691         const char *path;
 2692         u_long  flags;
 2693 };
 2694 #endif
 2695 int
 2696 sys_chflags(struct thread *td, struct chflags_args *uap)
 2697 {
 2698 
 2699         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2700             uap->flags, 0));
 2701 }
 2702 
 2703 #ifndef _SYS_SYSPROTO_H_
 2704 struct chflagsat_args {
 2705         int     fd;
 2706         const char *path;
 2707         u_long  flags;
 2708         int     atflag;
 2709 }
 2710 #endif
 2711 int
 2712 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2713 {
 2714 
 2715         if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2716                 return (EINVAL);
 2717 
 2718         return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE,
 2719             uap->flags, uap->atflag));
 2720 }
 2721 
 2722 /*
 2723  * Same as chflags() but doesn't follow symlinks.
 2724  */
 2725 #ifndef _SYS_SYSPROTO_H_
 2726 struct lchflags_args {
 2727         const char *path;
 2728         u_long flags;
 2729 };
 2730 #endif
 2731 int
 2732 sys_lchflags(struct thread *td, struct lchflags_args *uap)
 2733 {
 2734 
 2735         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2736             uap->flags, AT_SYMLINK_NOFOLLOW));
 2737 }
 2738 
 2739 static int
 2740 kern_chflagsat(struct thread *td, int fd, const char *path,
 2741     enum uio_seg pathseg, u_long flags, int atflag)
 2742 {
 2743         struct nameidata nd;
 2744         int error;
 2745 
 2746         AUDIT_ARG_FFLAGS(flags);
 2747         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW |
 2748             AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd,
 2749             &cap_fchflags_rights, td);
 2750         if ((error = namei(&nd)) != 0)
 2751                 return (error);
 2752         NDFREE_NOTHING(&nd);
 2753         error = setfflags(td, nd.ni_vp, flags);
 2754         vrele(nd.ni_vp);
 2755         return (error);
 2756 }
 2757 
 2758 /*
 2759  * Change flags of a file given a file descriptor.
 2760  */
 2761 #ifndef _SYS_SYSPROTO_H_
 2762 struct fchflags_args {
 2763         int     fd;
 2764         u_long  flags;
 2765 };
 2766 #endif
 2767 int
 2768 sys_fchflags(struct thread *td, struct fchflags_args *uap)
 2769 {
 2770         struct file *fp;
 2771         int error;
 2772 
 2773         AUDIT_ARG_FD(uap->fd);
 2774         AUDIT_ARG_FFLAGS(uap->flags);
 2775         error = getvnode(td, uap->fd, &cap_fchflags_rights,
 2776             &fp);
 2777         if (error != 0)
 2778                 return (error);
 2779 #ifdef AUDIT
 2780         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2781         AUDIT_ARG_VNODE1(fp->f_vnode);
 2782         VOP_UNLOCK(fp->f_vnode);
 2783 #endif
 2784         error = setfflags(td, fp->f_vnode, uap->flags);
 2785         fdrop(fp, td);
 2786         return (error);
 2787 }
 2788 
 2789 /*
 2790  * Common implementation code for chmod(), lchmod() and fchmod().
 2791  */
 2792 int
 2793 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode)
 2794 {
 2795         struct mount *mp;
 2796         struct vattr vattr;
 2797         int error;
 2798 
 2799         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2800                 return (error);
 2801         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2802         VATTR_NULL(&vattr);
 2803         vattr.va_mode = mode & ALLPERMS;
 2804 #ifdef MAC
 2805         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2806         if (error == 0)
 2807 #endif
 2808                 error = VOP_SETATTR(vp, &vattr, cred);
 2809         VOP_UNLOCK(vp);
 2810         vn_finished_write(mp);
 2811         return (error);
 2812 }
 2813 
 2814 /*
 2815  * Change mode of a file given path name.
 2816  */
 2817 #ifndef _SYS_SYSPROTO_H_
 2818 struct chmod_args {
 2819         char    *path;
 2820         int     mode;
 2821 };
 2822 #endif
 2823 int
 2824 sys_chmod(struct thread *td, struct chmod_args *uap)
 2825 {
 2826 
 2827         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2828             uap->mode, 0));
 2829 }
 2830 
 2831 #ifndef _SYS_SYSPROTO_H_
 2832 struct fchmodat_args {
 2833         int     dirfd;
 2834         char    *path;
 2835         mode_t  mode;
 2836         int     flag;
 2837 }
 2838 #endif
 2839 int
 2840 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2841 {
 2842 
 2843         if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2844                 return (EINVAL);
 2845 
 2846         return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE,
 2847             uap->mode, uap->flag));
 2848 }
 2849 
 2850 /*
 2851  * Change mode of a file given path name (don't follow links.)
 2852  */
 2853 #ifndef _SYS_SYSPROTO_H_
 2854 struct lchmod_args {
 2855         char    *path;
 2856         int     mode;
 2857 };
 2858 #endif
 2859 int
 2860 sys_lchmod(struct thread *td, struct lchmod_args *uap)
 2861 {
 2862 
 2863         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2864             uap->mode, AT_SYMLINK_NOFOLLOW));
 2865 }
 2866 
 2867 int
 2868 kern_fchmodat(struct thread *td, int fd, const char *path,
 2869     enum uio_seg pathseg, mode_t mode, int flag)
 2870 {
 2871         struct nameidata nd;
 2872         int error;
 2873 
 2874         AUDIT_ARG_MODE(mode);
 2875         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 2876             AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd,
 2877             &cap_fchmod_rights, td);
 2878         if ((error = namei(&nd)) != 0)
 2879                 return (error);
 2880         NDFREE_NOTHING(&nd);
 2881         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2882         vrele(nd.ni_vp);
 2883         return (error);
 2884 }
 2885 
 2886 /*
 2887  * Change mode of a file given a file descriptor.
 2888  */
 2889 #ifndef _SYS_SYSPROTO_H_
 2890 struct fchmod_args {
 2891         int     fd;
 2892         int     mode;
 2893 };
 2894 #endif
 2895 int
 2896 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2897 {
 2898         struct file *fp;
 2899         int error;
 2900 
 2901         AUDIT_ARG_FD(uap->fd);
 2902         AUDIT_ARG_MODE(uap->mode);
 2903 
 2904         error = fget(td, uap->fd, &cap_fchmod_rights, &fp);
 2905         if (error != 0)
 2906                 return (error);
 2907         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2908         fdrop(fp, td);
 2909         return (error);
 2910 }
 2911 
 2912 /*
 2913  * Common implementation for chown(), lchown(), and fchown()
 2914  */
 2915 int
 2916 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
 2917     gid_t gid)
 2918 {
 2919         struct mount *mp;
 2920         struct vattr vattr;
 2921         int error;
 2922 
 2923         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2924                 return (error);
 2925         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2926         VATTR_NULL(&vattr);
 2927         vattr.va_uid = uid;
 2928         vattr.va_gid = gid;
 2929 #ifdef MAC
 2930         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2931             vattr.va_gid);
 2932         if (error == 0)
 2933 #endif
 2934                 error = VOP_SETATTR(vp, &vattr, cred);
 2935         VOP_UNLOCK(vp);
 2936         vn_finished_write(mp);
 2937         return (error);
 2938 }
 2939 
 2940 /*
 2941  * Set ownership given a path name.
 2942  */
 2943 #ifndef _SYS_SYSPROTO_H_
 2944 struct chown_args {
 2945         char    *path;
 2946         int     uid;
 2947         int     gid;
 2948 };
 2949 #endif
 2950 int
 2951 sys_chown(struct thread *td, struct chown_args *uap)
 2952 {
 2953 
 2954         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 2955             uap->gid, 0));
 2956 }
 2957 
 2958 #ifndef _SYS_SYSPROTO_H_
 2959 struct fchownat_args {
 2960         int fd;
 2961         const char * path;
 2962         uid_t uid;
 2963         gid_t gid;
 2964         int flag;
 2965 };
 2966 #endif
 2967 int
 2968 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2969 {
 2970 
 2971         if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2972                 return (EINVAL);
 2973 
 2974         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2975             uap->gid, uap->flag));
 2976 }
 2977 
 2978 int
 2979 kern_fchownat(struct thread *td, int fd, const char *path,
 2980     enum uio_seg pathseg, int uid, int gid, int flag)
 2981 {
 2982         struct nameidata nd;
 2983         int error;
 2984 
 2985         AUDIT_ARG_OWNER(uid, gid);
 2986         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 2987             AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd,
 2988             &cap_fchown_rights, td);
 2989 
 2990         if ((error = namei(&nd)) != 0)
 2991                 return (error);
 2992         NDFREE_NOTHING(&nd);
 2993         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2994         vrele(nd.ni_vp);
 2995         return (error);
 2996 }
 2997 
 2998 /*
 2999  * Set ownership given a path name, do not cross symlinks.
 3000  */
 3001 #ifndef _SYS_SYSPROTO_H_
 3002 struct lchown_args {
 3003         char    *path;
 3004         int     uid;
 3005         int     gid;
 3006 };
 3007 #endif
 3008 int
 3009 sys_lchown(struct thread *td, struct lchown_args *uap)
 3010 {
 3011 
 3012         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3013             uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 3014 }
 3015 
 3016 /*
 3017  * Set ownership given a file descriptor.
 3018  */
 3019 #ifndef _SYS_SYSPROTO_H_
 3020 struct fchown_args {
 3021         int     fd;
 3022         int     uid;
 3023         int     gid;
 3024 };
 3025 #endif
 3026 int
 3027 sys_fchown(struct thread *td, struct fchown_args *uap)
 3028 {
 3029         struct file *fp;
 3030         int error;
 3031 
 3032         AUDIT_ARG_FD(uap->fd);
 3033         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3034         error = fget(td, uap->fd, &cap_fchown_rights, &fp);
 3035         if (error != 0)
 3036                 return (error);
 3037         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3038         fdrop(fp, td);
 3039         return (error);
 3040 }
 3041 
 3042 /*
 3043  * Common implementation code for utimes(), lutimes(), and futimes().
 3044  */
 3045 static int
 3046 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg,
 3047     struct timespec *tsp)
 3048 {
 3049         struct timeval tv[2];
 3050         const struct timeval *tvp;
 3051         int error;
 3052 
 3053         if (usrtvp == NULL) {
 3054                 vfs_timestamp(&tsp[0]);
 3055                 tsp[1] = tsp[0];
 3056         } else {
 3057                 if (tvpseg == UIO_SYSSPACE) {
 3058                         tvp = usrtvp;
 3059                 } else {
 3060                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3061                                 return (error);
 3062                         tvp = tv;
 3063                 }
 3064 
 3065                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3066                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3067                         return (EINVAL);
 3068                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3069                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3070         }
 3071         return (0);
 3072 }
 3073 
 3074 /*
 3075  * Common implementation code for futimens(), utimensat().
 3076  */
 3077 #define UTIMENS_NULL    0x1
 3078 #define UTIMENS_EXIT    0x2
 3079 static int
 3080 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 3081     struct timespec *tsp, int *retflags)
 3082 {
 3083         struct timespec tsnow;
 3084         int error;
 3085 
 3086         vfs_timestamp(&tsnow);
 3087         *retflags = 0;
 3088         if (usrtsp == NULL) {
 3089                 tsp[0] = tsnow;
 3090                 tsp[1] = tsnow;
 3091                 *retflags |= UTIMENS_NULL;
 3092                 return (0);
 3093         }
 3094         if (tspseg == UIO_SYSSPACE) {
 3095                 tsp[0] = usrtsp[0];
 3096                 tsp[1] = usrtsp[1];
 3097         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 3098                 return (error);
 3099         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 3100                 *retflags |= UTIMENS_EXIT;
 3101         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 3102                 *retflags |= UTIMENS_NULL;
 3103         if (tsp[0].tv_nsec == UTIME_OMIT)
 3104                 tsp[0].tv_sec = VNOVAL;
 3105         else if (tsp[0].tv_nsec == UTIME_NOW)
 3106                 tsp[0] = tsnow;
 3107         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 3108                 return (EINVAL);
 3109         if (tsp[1].tv_nsec == UTIME_OMIT)
 3110                 tsp[1].tv_sec = VNOVAL;
 3111         else if (tsp[1].tv_nsec == UTIME_NOW)
 3112                 tsp[1] = tsnow;
 3113         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 3114                 return (EINVAL);
 3115 
 3116         return (0);
 3117 }
 3118 
 3119 /*
 3120  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 3121  * and utimensat().
 3122  */
 3123 static int
 3124 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts,
 3125     int numtimes, int nullflag)
 3126 {
 3127         struct mount *mp;
 3128         struct vattr vattr;
 3129         int error, setbirthtime;
 3130 
 3131         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3132                 return (error);
 3133         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3134         setbirthtime = 0;
 3135         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3136             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3137                 setbirthtime = 1;
 3138         VATTR_NULL(&vattr);
 3139         vattr.va_atime = ts[0];
 3140         vattr.va_mtime = ts[1];
 3141         if (setbirthtime)
 3142                 vattr.va_birthtime = ts[1];
 3143         if (numtimes > 2)
 3144                 vattr.va_birthtime = ts[2];
 3145         if (nullflag)
 3146                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3147 #ifdef MAC
 3148         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3149             vattr.va_mtime);
 3150 #endif
 3151         if (error == 0)
 3152                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3153         VOP_UNLOCK(vp);
 3154         vn_finished_write(mp);
 3155         return (error);
 3156 }
 3157 
 3158 /*
 3159  * Set the access and modification times of a file.
 3160  */
 3161 #ifndef _SYS_SYSPROTO_H_
 3162 struct utimes_args {
 3163         char    *path;
 3164         struct  timeval *tptr;
 3165 };
 3166 #endif
 3167 int
 3168 sys_utimes(struct thread *td, struct utimes_args *uap)
 3169 {
 3170 
 3171         return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3172             uap->tptr, UIO_USERSPACE));
 3173 }
 3174 
 3175 #ifndef _SYS_SYSPROTO_H_
 3176 struct futimesat_args {
 3177         int fd;
 3178         const char * path;
 3179         const struct timeval * times;
 3180 };
 3181 #endif
 3182 int
 3183 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3184 {
 3185 
 3186         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3187             uap->times, UIO_USERSPACE));
 3188 }
 3189 
 3190 int
 3191 kern_utimesat(struct thread *td, int fd, const char *path,
 3192     enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg)
 3193 {
 3194         struct nameidata nd;
 3195         struct timespec ts[2];
 3196         int error;
 3197 
 3198         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3199                 return (error);
 3200         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3201             &cap_futimes_rights, td);
 3202 
 3203         if ((error = namei(&nd)) != 0)
 3204                 return (error);
 3205         NDFREE_NOTHING(&nd);
 3206         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3207         vrele(nd.ni_vp);
 3208         return (error);
 3209 }
 3210 
 3211 /*
 3212  * Set the access and modification times of a file.
 3213  */
 3214 #ifndef _SYS_SYSPROTO_H_
 3215 struct lutimes_args {
 3216         char    *path;
 3217         struct  timeval *tptr;
 3218 };
 3219 #endif
 3220 int
 3221 sys_lutimes(struct thread *td, struct lutimes_args *uap)
 3222 {
 3223 
 3224         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3225             UIO_USERSPACE));
 3226 }
 3227 
 3228 int
 3229 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg,
 3230     struct timeval *tptr, enum uio_seg tptrseg)
 3231 {
 3232         struct timespec ts[2];
 3233         struct nameidata nd;
 3234         int error;
 3235 
 3236         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3237                 return (error);
 3238         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3239         if ((error = namei(&nd)) != 0)
 3240                 return (error);
 3241         NDFREE_NOTHING(&nd);
 3242         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3243         vrele(nd.ni_vp);
 3244         return (error);
 3245 }
 3246 
 3247 /*
 3248  * Set the access and modification times of a file.
 3249  */
 3250 #ifndef _SYS_SYSPROTO_H_
 3251 struct futimes_args {
 3252         int     fd;
 3253         struct  timeval *tptr;
 3254 };
 3255 #endif
 3256 int
 3257 sys_futimes(struct thread *td, struct futimes_args *uap)
 3258 {
 3259 
 3260         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3261 }
 3262 
 3263 int
 3264 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3265     enum uio_seg tptrseg)
 3266 {
 3267         struct timespec ts[2];
 3268         struct file *fp;
 3269         int error;
 3270 
 3271         AUDIT_ARG_FD(fd);
 3272         error = getutimes(tptr, tptrseg, ts);
 3273         if (error != 0)
 3274                 return (error);
 3275         error = getvnode(td, fd, &cap_futimes_rights, &fp);
 3276         if (error != 0)
 3277                 return (error);
 3278 #ifdef AUDIT
 3279         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3280         AUDIT_ARG_VNODE1(fp->f_vnode);
 3281         VOP_UNLOCK(fp->f_vnode);
 3282 #endif
 3283         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3284         fdrop(fp, td);
 3285         return (error);
 3286 }
 3287 
 3288 int
 3289 sys_futimens(struct thread *td, struct futimens_args *uap)
 3290 {
 3291 
 3292         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3293 }
 3294 
 3295 int
 3296 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3297     enum uio_seg tptrseg)
 3298 {
 3299         struct timespec ts[2];
 3300         struct file *fp;
 3301         int error, flags;
 3302 
 3303         AUDIT_ARG_FD(fd);
 3304         error = getutimens(tptr, tptrseg, ts, &flags);
 3305         if (error != 0)
 3306                 return (error);
 3307         if (flags & UTIMENS_EXIT)
 3308                 return (0);
 3309         error = getvnode(td, fd, &cap_futimes_rights, &fp);
 3310         if (error != 0)
 3311                 return (error);
 3312 #ifdef AUDIT
 3313         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3314         AUDIT_ARG_VNODE1(fp->f_vnode);
 3315         VOP_UNLOCK(fp->f_vnode);
 3316 #endif
 3317         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3318         fdrop(fp, td);
 3319         return (error);
 3320 }
 3321 
 3322 int
 3323 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3324 {
 3325 
 3326         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3327             uap->times, UIO_USERSPACE, uap->flag));
 3328 }
 3329 
 3330 int
 3331 kern_utimensat(struct thread *td, int fd, const char *path,
 3332     enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg,
 3333     int flag)
 3334 {
 3335         struct nameidata nd;
 3336         struct timespec ts[2];
 3337         int error, flags;
 3338 
 3339         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 3340                 return (EINVAL);
 3341 
 3342         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3343                 return (error);
 3344         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 3345             AT_RESOLVE_BENEATH) | AUDITVNODE1,
 3346             pathseg, path, fd, &cap_futimes_rights, td);
 3347         if ((error = namei(&nd)) != 0)
 3348                 return (error);
 3349         /*
 3350          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3351          * POSIX states:
 3352          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3353          * "Search permission is denied by a component of the path prefix."
 3354          */
 3355         NDFREE_NOTHING(&nd);
 3356         if ((flags & UTIMENS_EXIT) == 0)
 3357                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3358         vrele(nd.ni_vp);
 3359         return (error);
 3360 }
 3361 
 3362 /*
 3363  * Truncate a file given its path name.
 3364  */
 3365 #ifndef _SYS_SYSPROTO_H_
 3366 struct truncate_args {
 3367         char    *path;
 3368         int     pad;
 3369         off_t   length;
 3370 };
 3371 #endif
 3372 int
 3373 sys_truncate(struct thread *td, struct truncate_args *uap)
 3374 {
 3375 
 3376         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3377 }
 3378 
 3379 int
 3380 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg,
 3381     off_t length)
 3382 {
 3383         struct mount *mp;
 3384         struct vnode *vp;
 3385         void *rl_cookie;
 3386         struct vattr vattr;
 3387         struct nameidata nd;
 3388         int error;
 3389 
 3390         if (length < 0)
 3391                 return (EINVAL);
 3392 retry:
 3393         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3394         if ((error = namei(&nd)) != 0)
 3395                 return (error);
 3396         vp = nd.ni_vp;
 3397         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3398         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3399                 vn_rangelock_unlock(vp, rl_cookie);
 3400                 vrele(vp);
 3401                 return (error);
 3402         }
 3403         NDFREE(&nd, NDF_ONLY_PNBUF);
 3404         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3405         if (vp->v_type == VDIR)
 3406                 error = EISDIR;
 3407 #ifdef MAC
 3408         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3409         }
 3410 #endif
 3411         else if ((error = vn_writechk(vp)) == 0 &&
 3412             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3413                 VATTR_NULL(&vattr);
 3414                 vattr.va_size = length;
 3415                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3416         }
 3417         VOP_UNLOCK(vp);
 3418         vn_finished_write(mp);
 3419         vn_rangelock_unlock(vp, rl_cookie);
 3420         vrele(vp);
 3421         if (error == ERELOOKUP)
 3422                 goto retry;
 3423         return (error);
 3424 }
 3425 
 3426 #if defined(COMPAT_43)
 3427 /*
 3428  * Truncate a file given its path name.
 3429  */
 3430 #ifndef _SYS_SYSPROTO_H_
 3431 struct otruncate_args {
 3432         char    *path;
 3433         long    length;
 3434 };
 3435 #endif
 3436 int
 3437 otruncate(struct thread *td, struct otruncate_args *uap)
 3438 {
 3439 
 3440         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3441 }
 3442 #endif /* COMPAT_43 */
 3443 
 3444 #if defined(COMPAT_FREEBSD6)
 3445 /* Versions with the pad argument */
 3446 int
 3447 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3448 {
 3449 
 3450         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3451 }
 3452 
 3453 int
 3454 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3455 {
 3456 
 3457         return (kern_ftruncate(td, uap->fd, uap->length));
 3458 }
 3459 #endif
 3460 
 3461 int
 3462 kern_fsync(struct thread *td, int fd, bool fullsync)
 3463 {
 3464         struct vnode *vp;
 3465         struct mount *mp;
 3466         struct file *fp;
 3467         int error, lock_flags;
 3468 
 3469         AUDIT_ARG_FD(fd);
 3470         error = getvnode(td, fd, &cap_fsync_rights, &fp);
 3471         if (error != 0)
 3472                 return (error);
 3473         vp = fp->f_vnode;
 3474 #if 0
 3475         if (!fullsync)
 3476                 /* XXXKIB: compete outstanding aio writes */;
 3477 #endif
 3478 retry:
 3479         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3480         if (error != 0)
 3481                 goto drop;
 3482         if (MNT_SHARED_WRITES(mp) ||
 3483             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3484                 lock_flags = LK_SHARED;
 3485         } else {
 3486                 lock_flags = LK_EXCLUSIVE;
 3487         }
 3488         vn_lock(vp, lock_flags | LK_RETRY);
 3489         AUDIT_ARG_VNODE1(vp);
 3490         if (vp->v_object != NULL) {
 3491                 VM_OBJECT_WLOCK(vp->v_object);
 3492                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3493                 VM_OBJECT_WUNLOCK(vp->v_object);
 3494         }
 3495         error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 3496         VOP_UNLOCK(vp);
 3497         vn_finished_write(mp);
 3498         if (error == ERELOOKUP)
 3499                 goto retry;
 3500 drop:
 3501         fdrop(fp, td);
 3502         return (error);
 3503 }
 3504 
 3505 /*
 3506  * Sync an open file.
 3507  */
 3508 #ifndef _SYS_SYSPROTO_H_
 3509 struct fsync_args {
 3510         int     fd;
 3511 };
 3512 #endif
 3513 int
 3514 sys_fsync(struct thread *td, struct fsync_args *uap)
 3515 {
 3516 
 3517         return (kern_fsync(td, uap->fd, true));
 3518 }
 3519 
 3520 int
 3521 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 3522 {
 3523 
 3524         return (kern_fsync(td, uap->fd, false));
 3525 }
 3526 
 3527 /*
 3528  * Rename files.  Source and destination must either both be directories, or
 3529  * both not be directories.  If target is a directory, it must be empty.
 3530  */
 3531 #ifndef _SYS_SYSPROTO_H_
 3532 struct rename_args {
 3533         char    *from;
 3534         char    *to;
 3535 };
 3536 #endif
 3537 int
 3538 sys_rename(struct thread *td, struct rename_args *uap)
 3539 {
 3540 
 3541         return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 3542             uap->to, UIO_USERSPACE));
 3543 }
 3544 
 3545 #ifndef _SYS_SYSPROTO_H_
 3546 struct renameat_args {
 3547         int     oldfd;
 3548         char    *old;
 3549         int     newfd;
 3550         char    *new;
 3551 };
 3552 #endif
 3553 int
 3554 sys_renameat(struct thread *td, struct renameat_args *uap)
 3555 {
 3556 
 3557         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3558             UIO_USERSPACE));
 3559 }
 3560 
 3561 #ifdef MAC
 3562 static int
 3563 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd,
 3564     const char *new, enum uio_seg pathseg, struct nameidata *fromnd)
 3565 {
 3566         int error;
 3567 
 3568         NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3569             AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td);
 3570         if ((error = namei(fromnd)) != 0)
 3571                 return (error);
 3572         error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp,
 3573             fromnd->ni_vp, &fromnd->ni_cnd);
 3574         VOP_UNLOCK(fromnd->ni_dvp);
 3575         if (fromnd->ni_dvp != fromnd->ni_vp)
 3576                 VOP_UNLOCK(fromnd->ni_vp);
 3577         if (error != 0) {
 3578                 NDFREE(fromnd, NDF_ONLY_PNBUF);
 3579                 vrele(fromnd->ni_dvp);
 3580                 vrele(fromnd->ni_vp);
 3581                 if (fromnd->ni_startdir)
 3582                         vrele(fromnd->ni_startdir);
 3583         }
 3584         return (error);
 3585 }
 3586 #endif
 3587 
 3588 int
 3589 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
 3590     const char *new, enum uio_seg pathseg)
 3591 {
 3592         struct mount *mp = NULL;
 3593         struct vnode *tvp, *fvp, *tdvp;
 3594         struct nameidata fromnd, tond;
 3595         u_int64_t tondflags;
 3596         int error;
 3597 
 3598 again:
 3599         bwillwrite();
 3600 #ifdef MAC
 3601         if (mac_vnode_check_rename_from_enabled()) {
 3602                 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg,
 3603                     &fromnd);
 3604                 if (error != 0)
 3605                         return (error);
 3606         } else {
 3607 #endif
 3608         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3609             pathseg, old, oldfd, &cap_renameat_source_rights, td);
 3610         if ((error = namei(&fromnd)) != 0)
 3611                 return (error);
 3612 #ifdef MAC
 3613         }
 3614 #endif
 3615         fvp = fromnd.ni_vp;
 3616         tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2;
 3617         if (fromnd.ni_vp->v_type == VDIR)
 3618                 tondflags |= WILLBEDIR;
 3619         NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd,
 3620             &cap_renameat_target_rights, td);
 3621         if ((error = namei(&tond)) != 0) {
 3622                 /* Translate error code for rename("dir1", "dir2/."). */
 3623                 if (error == EISDIR && fvp->v_type == VDIR)
 3624                         error = EINVAL;
 3625                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3626                 vrele(fromnd.ni_dvp);
 3627                 vrele(fvp);
 3628                 goto out1;
 3629         }
 3630         tdvp = tond.ni_dvp;
 3631         tvp = tond.ni_vp;
 3632         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3633         if (error != 0) {
 3634                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3635                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3636                 if (tvp != NULL)
 3637                         vput(tvp);
 3638                 if (tdvp == tvp)
 3639                         vrele(tdvp);
 3640                 else
 3641                         vput(tdvp);
 3642                 vrele(fromnd.ni_dvp);
 3643                 vrele(fvp);
 3644                 vrele(tond.ni_startdir);
 3645                 if (fromnd.ni_startdir != NULL)
 3646                         vrele(fromnd.ni_startdir);
 3647                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3648                 if (error != 0)
 3649                         return (error);
 3650                 goto again;
 3651         }
 3652         if (tvp != NULL) {
 3653                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3654                         error = ENOTDIR;
 3655                         goto out;
 3656                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3657                         error = EISDIR;
 3658                         goto out;
 3659                 }
 3660 #ifdef CAPABILITIES
 3661                 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) {
 3662                         /*
 3663                          * If the target already exists we require CAP_UNLINKAT
 3664                          * from 'newfd', when newfd was used for the lookup.
 3665                          */
 3666                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3667                             &cap_unlinkat_rights);
 3668                         if (error != 0)
 3669                                 goto out;
 3670                 }
 3671 #endif
 3672         }
 3673         if (fvp == tdvp) {
 3674                 error = EINVAL;
 3675                 goto out;
 3676         }
 3677         /*
 3678          * If the source is the same as the destination (that is, if they
 3679          * are links to the same vnode), then there is nothing to do.
 3680          */
 3681         if (fvp == tvp)
 3682                 error = ERESTART;
 3683 #ifdef MAC
 3684         else
 3685                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3686                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3687 #endif
 3688 out:
 3689         if (error == 0) {
 3690                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3691                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3692                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3693                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3694         } else {
 3695                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3696                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3697                 if (tvp != NULL)
 3698                         vput(tvp);
 3699                 if (tdvp == tvp)
 3700                         vrele(tdvp);
 3701                 else
 3702                         vput(tdvp);
 3703                 vrele(fromnd.ni_dvp);
 3704                 vrele(fvp);
 3705         }
 3706         vrele(tond.ni_startdir);
 3707         vn_finished_write(mp);
 3708 out1:
 3709         if (fromnd.ni_startdir)
 3710                 vrele(fromnd.ni_startdir);
 3711         if (error == ERESTART)
 3712                 return (0);
 3713         if (error == ERELOOKUP)
 3714                 goto again;
 3715         return (error);
 3716 }
 3717 
 3718 /*
 3719  * Make a directory file.
 3720  */
 3721 #ifndef _SYS_SYSPROTO_H_
 3722 struct mkdir_args {
 3723         char    *path;
 3724         int     mode;
 3725 };
 3726 #endif
 3727 int
 3728 sys_mkdir(struct thread *td, struct mkdir_args *uap)
 3729 {
 3730 
 3731         return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3732             uap->mode));
 3733 }
 3734 
 3735 #ifndef _SYS_SYSPROTO_H_
 3736 struct mkdirat_args {
 3737         int     fd;
 3738         char    *path;
 3739         mode_t  mode;
 3740 };
 3741 #endif
 3742 int
 3743 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3744 {
 3745 
 3746         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3747 }
 3748 
 3749 int
 3750 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
 3751     int mode)
 3752 {
 3753         struct mount *mp;
 3754         struct vattr vattr;
 3755         struct nameidata nd;
 3756         int error;
 3757 
 3758         AUDIT_ARG_MODE(mode);
 3759 restart:
 3760         bwillwrite();
 3761         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3762             NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR,
 3763             segflg, path, fd, &cap_mkdirat_rights, td);
 3764         if ((error = namei(&nd)) != 0)
 3765                 return (error);
 3766         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3767                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3768                 vput(nd.ni_dvp);
 3769                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3770                         return (error);
 3771                 goto restart;
 3772         }
 3773         VATTR_NULL(&vattr);
 3774         vattr.va_type = VDIR;
 3775         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask;
 3776 #ifdef MAC
 3777         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3778             &vattr);
 3779         if (error != 0)
 3780                 goto out;
 3781 #endif
 3782         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3783 #ifdef MAC
 3784 out:
 3785 #endif
 3786         NDFREE(&nd, NDF_ONLY_PNBUF);
 3787         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 3788         vn_finished_write(mp);
 3789         if (error == ERELOOKUP)
 3790                 goto restart;
 3791         return (error);
 3792 }
 3793 
 3794 /*
 3795  * Remove a directory file.
 3796  */
 3797 #ifndef _SYS_SYSPROTO_H_
 3798 struct rmdir_args {
 3799         char    *path;
 3800 };
 3801 #endif
 3802 int
 3803 sys_rmdir(struct thread *td, struct rmdir_args *uap)
 3804 {
 3805 
 3806         return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
 3807             0));
 3808 }
 3809 
 3810 int
 3811 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd,
 3812     enum uio_seg pathseg, int flag)
 3813 {
 3814         struct mount *mp;
 3815         struct vnode *vp;
 3816         struct file *fp;
 3817         struct nameidata nd;
 3818         cap_rights_t rights;
 3819         int error;
 3820 
 3821         fp = NULL;
 3822         if (fd != FD_NONE) {
 3823                 error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP),
 3824                     &fp);
 3825                 if (error != 0)
 3826                         return (error);
 3827         }
 3828 
 3829 restart:
 3830         bwillwrite();
 3831         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 3832             at2cnpflags(flag, AT_RESOLVE_BENEATH),
 3833             pathseg, path, dfd, &cap_unlinkat_rights, td);
 3834         if ((error = namei(&nd)) != 0)
 3835                 goto fdout;
 3836         vp = nd.ni_vp;
 3837         if (vp->v_type != VDIR) {
 3838                 error = ENOTDIR;
 3839                 goto out;
 3840         }
 3841         /*
 3842          * No rmdir "." please.
 3843          */
 3844         if (nd.ni_dvp == vp) {
 3845                 error = EINVAL;
 3846                 goto out;
 3847         }
 3848         /*
 3849          * The root of a mounted filesystem cannot be deleted.
 3850          */
 3851         if (vp->v_vflag & VV_ROOT) {
 3852                 error = EBUSY;
 3853                 goto out;
 3854         }
 3855 
 3856         if (fp != NULL && fp->f_vnode != vp) {
 3857                 if (VN_IS_DOOMED(fp->f_vnode))
 3858                         error = EBADF;
 3859                 else
 3860                         error = EDEADLK;
 3861                 goto out;
 3862         }
 3863 
 3864 #ifdef MAC
 3865         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3866             &nd.ni_cnd);
 3867         if (error != 0)
 3868                 goto out;
 3869 #endif
 3870         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3871                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3872                 vput(vp);
 3873                 if (nd.ni_dvp == vp)
 3874                         vrele(nd.ni_dvp);
 3875                 else
 3876                         vput(nd.ni_dvp);
 3877                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3878                         goto fdout;
 3879                 goto restart;
 3880         }
 3881         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3882         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3883         vn_finished_write(mp);
 3884 out:
 3885         NDFREE(&nd, NDF_ONLY_PNBUF);
 3886         vput(vp);
 3887         if (nd.ni_dvp == vp)
 3888                 vrele(nd.ni_dvp);
 3889         else
 3890                 vput(nd.ni_dvp);
 3891         if (error == ERELOOKUP)
 3892                 goto restart;
 3893 fdout:
 3894         if (fp != NULL)
 3895                 fdrop(fp, td);
 3896         return (error);
 3897 }
 3898 
 3899 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 3900 int
 3901 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count,
 3902     long *basep, void (*func)(struct freebsd11_dirent *))
 3903 {
 3904         struct freebsd11_dirent dstdp;
 3905         struct dirent *dp, *edp;
 3906         char *dirbuf;
 3907         off_t base;
 3908         ssize_t resid, ucount;
 3909         int error;
 3910 
 3911         /* XXX arbitrary sanity limit on `count'. */
 3912         count = min(count, 64 * 1024);
 3913 
 3914         dirbuf = malloc(count, M_TEMP, M_WAITOK);
 3915 
 3916         error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid,
 3917             UIO_SYSSPACE);
 3918         if (error != 0)
 3919                 goto done;
 3920         if (basep != NULL)
 3921                 *basep = base;
 3922 
 3923         ucount = 0;
 3924         for (dp = (struct dirent *)dirbuf,
 3925             edp = (struct dirent *)&dirbuf[count - resid];
 3926             ucount < count && dp < edp; ) {
 3927                 if (dp->d_reclen == 0)
 3928                         break;
 3929                 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0));
 3930                 if (dp->d_namlen >= sizeof(dstdp.d_name))
 3931                         continue;
 3932                 dstdp.d_type = dp->d_type;
 3933                 dstdp.d_namlen = dp->d_namlen;
 3934                 dstdp.d_fileno = dp->d_fileno;          /* truncate */
 3935                 if (dstdp.d_fileno != dp->d_fileno) {
 3936                         switch (ino64_trunc_error) {
 3937                         default:
 3938                         case 0:
 3939                                 break;
 3940                         case 1:
 3941                                 error = EOVERFLOW;
 3942                                 goto done;
 3943                         case 2:
 3944                                 dstdp.d_fileno = UINT32_MAX;
 3945                                 break;
 3946                         }
 3947                 }
 3948                 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) +
 3949                     ((dp->d_namlen + 1 + 3) &~ 3);
 3950                 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 3951                 bzero(dstdp.d_name + dstdp.d_namlen,
 3952                     dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) -
 3953                     dstdp.d_namlen);
 3954                 MPASS(dstdp.d_reclen <= dp->d_reclen);
 3955                 MPASS(ucount + dstdp.d_reclen <= count);
 3956                 if (func != NULL)
 3957                         func(&dstdp);
 3958                 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen);
 3959                 if (error != 0)
 3960                         break;
 3961                 dp = (struct dirent *)((char *)dp + dp->d_reclen);
 3962                 ucount += dstdp.d_reclen;
 3963         }
 3964 
 3965 done:
 3966         free(dirbuf, M_TEMP);
 3967         if (error == 0)
 3968                 td->td_retval[0] = ucount;
 3969         return (error);
 3970 }
 3971 #endif /* COMPAT */
 3972 
 3973 #ifdef COMPAT_43
 3974 static void
 3975 ogetdirentries_cvt(struct freebsd11_dirent *dp)
 3976 {
 3977 #if (BYTE_ORDER == LITTLE_ENDIAN)
 3978         /*
 3979          * The expected low byte of dp->d_namlen is our dp->d_type.
 3980          * The high MBZ byte of dp->d_namlen is our dp->d_namlen.
 3981          */
 3982         dp->d_type = dp->d_namlen;
 3983         dp->d_namlen = 0;
 3984 #else
 3985         /*
 3986          * The dp->d_type is the high byte of the expected dp->d_namlen,
 3987          * so must be zero'ed.
 3988          */
 3989         dp->d_type = 0;
 3990 #endif
 3991 }
 3992 
 3993 /*
 3994  * Read a block of directory entries in a filesystem independent format.
 3995  */
 3996 #ifndef _SYS_SYSPROTO_H_
 3997 struct ogetdirentries_args {
 3998         int     fd;
 3999         char    *buf;
 4000         u_int   count;
 4001         long    *basep;
 4002 };
 4003 #endif
 4004 int
 4005 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 4006 {
 4007         long loff;
 4008         int error;
 4009 
 4010         error = kern_ogetdirentries(td, uap, &loff);
 4011         if (error == 0)
 4012                 error = copyout(&loff, uap->basep, sizeof(long));
 4013         return (error);
 4014 }
 4015 
 4016 int
 4017 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 4018     long *ploff)
 4019 {
 4020         long base;
 4021         int error;
 4022 
 4023         /* XXX arbitrary sanity limit on `count'. */
 4024         if (uap->count > 64 * 1024)
 4025                 return (EINVAL);
 4026 
 4027         error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 4028             &base, ogetdirentries_cvt);
 4029 
 4030         if (error == 0 && uap->basep != NULL)
 4031                 error = copyout(&base, uap->basep, sizeof(long));
 4032 
 4033         return (error);
 4034 }
 4035 #endif /* COMPAT_43 */
 4036 
 4037 #if defined(COMPAT_FREEBSD11)
 4038 #ifndef _SYS_SYSPROTO_H_
 4039 struct freebsd11_getdirentries_args {
 4040         int     fd;
 4041         char    *buf;
 4042         u_int   count;
 4043         long    *basep;
 4044 };
 4045 #endif
 4046 int
 4047 freebsd11_getdirentries(struct thread *td,
 4048     struct freebsd11_getdirentries_args *uap)
 4049 {
 4050         long base;
 4051         int error;
 4052 
 4053         error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 4054             &base, NULL);
 4055 
 4056         if (error == 0 && uap->basep != NULL)
 4057                 error = copyout(&base, uap->basep, sizeof(long));
 4058         return (error);
 4059 }
 4060 
 4061 int
 4062 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap)
 4063 {
 4064         struct freebsd11_getdirentries_args ap;
 4065 
 4066         ap.fd = uap->fd;
 4067         ap.buf = uap->buf;
 4068         ap.count = uap->count;
 4069         ap.basep = NULL;
 4070         return (freebsd11_getdirentries(td, &ap));
 4071 }
 4072 #endif /* COMPAT_FREEBSD11 */
 4073 
 4074 /*
 4075  * Read a block of directory entries in a filesystem independent format.
 4076  */
 4077 int
 4078 sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
 4079 {
 4080         off_t base;
 4081         int error;
 4082 
 4083         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4084             NULL, UIO_USERSPACE);
 4085         if (error != 0)
 4086                 return (error);
 4087         if (uap->basep != NULL)
 4088                 error = copyout(&base, uap->basep, sizeof(off_t));
 4089         return (error);
 4090 }
 4091 
 4092 int
 4093 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
 4094     off_t *basep, ssize_t *residp, enum uio_seg bufseg)
 4095 {
 4096         struct vnode *vp;
 4097         struct file *fp;
 4098         struct uio auio;
 4099         struct iovec aiov;
 4100         off_t loff;
 4101         int error, eofflag;
 4102         off_t foffset;
 4103 
 4104         AUDIT_ARG_FD(fd);
 4105         if (count > IOSIZE_MAX)
 4106                 return (EINVAL);
 4107         auio.uio_resid = count;
 4108         error = getvnode(td, fd, &cap_read_rights, &fp);
 4109         if (error != 0)
 4110                 return (error);
 4111         if ((fp->f_flag & FREAD) == 0) {
 4112                 fdrop(fp, td);
 4113                 return (EBADF);
 4114         }
 4115         vp = fp->f_vnode;
 4116         foffset = foffset_lock(fp, 0);
 4117 unionread:
 4118         if (vp->v_type != VDIR) {
 4119                 error = EINVAL;
 4120                 goto fail;
 4121         }
 4122         aiov.iov_base = buf;
 4123         aiov.iov_len = count;
 4124         auio.uio_iov = &aiov;
 4125         auio.uio_iovcnt = 1;
 4126         auio.uio_rw = UIO_READ;
 4127         auio.uio_segflg = bufseg;
 4128         auio.uio_td = td;
 4129         vn_lock(vp, LK_SHARED | LK_RETRY);
 4130         AUDIT_ARG_VNODE1(vp);
 4131         loff = auio.uio_offset = foffset;
 4132 #ifdef MAC
 4133         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4134         if (error == 0)
 4135 #endif
 4136                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4137                     NULL);
 4138         foffset = auio.uio_offset;
 4139         if (error != 0) {
 4140                 VOP_UNLOCK(vp);
 4141                 goto fail;
 4142         }
 4143         if (count == auio.uio_resid &&
 4144             (vp->v_vflag & VV_ROOT) &&
 4145             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4146                 struct vnode *tvp = vp;
 4147 
 4148                 vp = vp->v_mount->mnt_vnodecovered;
 4149                 VREF(vp);
 4150                 fp->f_vnode = vp;
 4151                 foffset = 0;
 4152                 vput(tvp);
 4153                 goto unionread;
 4154         }
 4155         VOP_UNLOCK(vp);
 4156         *basep = loff;
 4157         if (residp != NULL)
 4158                 *residp = auio.uio_resid;
 4159         td->td_retval[0] = count - auio.uio_resid;
 4160 fail:
 4161         foffset_unlock(fp, foffset, 0);
 4162         fdrop(fp, td);
 4163         return (error);
 4164 }
 4165 
 4166 /*
 4167  * Set the mode mask for creation of filesystem nodes.
 4168  */
 4169 #ifndef _SYS_SYSPROTO_H_
 4170 struct umask_args {
 4171         int     newmask;
 4172 };
 4173 #endif
 4174 int
 4175 sys_umask(struct thread *td, struct umask_args *uap)
 4176 {
 4177         struct pwddesc *pdp;
 4178 
 4179         pdp = td->td_proc->p_pd;
 4180         PWDDESC_XLOCK(pdp);
 4181         td->td_retval[0] = pdp->pd_cmask;
 4182         pdp->pd_cmask = uap->newmask & ALLPERMS;
 4183         PWDDESC_XUNLOCK(pdp);
 4184         return (0);
 4185 }
 4186 
 4187 /*
 4188  * Void all references to file by ripping underlying filesystem away from
 4189  * vnode.
 4190  */
 4191 #ifndef _SYS_SYSPROTO_H_
 4192 struct revoke_args {
 4193         char    *path;
 4194 };
 4195 #endif
 4196 int
 4197 sys_revoke(struct thread *td, struct revoke_args *uap)
 4198 {
 4199         struct vnode *vp;
 4200         struct vattr vattr;
 4201         struct nameidata nd;
 4202         int error;
 4203 
 4204         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4205             uap->path, td);
 4206         if ((error = namei(&nd)) != 0)
 4207                 return (error);
 4208         vp = nd.ni_vp;
 4209         NDFREE_NOTHING(&nd);
 4210         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4211                 error = EINVAL;
 4212                 goto out;
 4213         }
 4214 #ifdef MAC
 4215         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4216         if (error != 0)
 4217                 goto out;
 4218 #endif
 4219         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4220         if (error != 0)
 4221                 goto out;
 4222         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4223                 error = priv_check(td, PRIV_VFS_ADMIN);
 4224                 if (error != 0)
 4225                         goto out;
 4226         }
 4227         if (devfs_usecount(vp) > 0)
 4228                 VOP_REVOKE(vp, REVOKEALL);
 4229 out:
 4230         vput(vp);
 4231         return (error);
 4232 }
 4233 
 4234 /*
 4235  * Convert a user file descriptor to a kernel file entry and check that, if it
 4236  * is a capability, the correct rights are present. A reference on the file
 4237  * entry is held upon returning.
 4238  */
 4239 int
 4240 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 4241 {
 4242         struct file *fp;
 4243         int error;
 4244 
 4245         error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp);
 4246         if (error != 0)
 4247                 return (error);
 4248 
 4249         /*
 4250          * The file could be not of the vnode type, or it may be not
 4251          * yet fully initialized, in which case the f_vnode pointer
 4252          * may be set, but f_ops is still badfileops.  E.g.,
 4253          * devfs_open() transiently create such situation to
 4254          * facilitate csw d_fdopen().
 4255          *
 4256          * Dupfdopen() handling in kern_openat() installs the
 4257          * half-baked file into the process descriptor table, allowing
 4258          * other thread to dereference it. Guard against the race by
 4259          * checking f_ops.
 4260          */
 4261         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4262                 fdrop(fp, td);
 4263                 return (EINVAL);
 4264         }
 4265         *fpp = fp;
 4266         return (0);
 4267 }
 4268 
 4269 /*
 4270  * Get an (NFS) file handle.
 4271  */
 4272 #ifndef _SYS_SYSPROTO_H_
 4273 struct lgetfh_args {
 4274         char *fname;
 4275         fhandle_t *fhp;
 4276 };
 4277 #endif
 4278 int
 4279 sys_lgetfh(struct thread *td, struct lgetfh_args *uap)
 4280 {
 4281 
 4282         return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname,
 4283             UIO_USERSPACE, uap->fhp, UIO_USERSPACE));
 4284 }
 4285 
 4286 #ifndef _SYS_SYSPROTO_H_
 4287 struct getfh_args {
 4288         char *fname;
 4289         fhandle_t *fhp;
 4290 };
 4291 #endif
 4292 int
 4293 sys_getfh(struct thread *td, struct getfh_args *uap)
 4294 {
 4295 
 4296         return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE,
 4297             uap->fhp, UIO_USERSPACE));
 4298 }
 4299 
 4300 /*
 4301  * syscall for the rpc.lockd to use to translate an open descriptor into
 4302  * a NFS file handle.
 4303  *
 4304  * warning: do not remove the priv_check() call or this becomes one giant
 4305  * security hole.
 4306  */
 4307 #ifndef _SYS_SYSPROTO_H_
 4308 struct getfhat_args {
 4309         int fd;
 4310         char *path;
 4311         fhandle_t *fhp;
 4312         int flags;
 4313 };
 4314 #endif
 4315 int
 4316 sys_getfhat(struct thread *td, struct getfhat_args *uap)
 4317 {
 4318 
 4319         if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 4320                 return (EINVAL);
 4321         return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE,
 4322             uap->fhp, UIO_USERSPACE));
 4323 }
 4324 
 4325 int
 4326 kern_getfhat(struct thread *td, int flags, int fd, const char *path,
 4327     enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg)
 4328 {
 4329         struct nameidata nd;
 4330         fhandle_t fh;
 4331         struct vnode *vp;
 4332         int error;
 4333 
 4334         error = priv_check(td, PRIV_VFS_GETFH);
 4335         if (error != 0)
 4336                 return (error);
 4337         NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW |
 4338             AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path,
 4339             fd, td);
 4340         error = namei(&nd);
 4341         if (error != 0)
 4342                 return (error);
 4343         NDFREE_NOTHING(&nd);
 4344         vp = nd.ni_vp;
 4345         bzero(&fh, sizeof(fh));
 4346         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4347         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4348         vput(vp);
 4349         if (error == 0) {
 4350                 if (fhseg == UIO_USERSPACE)
 4351                         error = copyout(&fh, fhp, sizeof (fh));
 4352                 else
 4353                         memcpy(fhp, &fh, sizeof(fh));
 4354         }
 4355         return (error);
 4356 }
 4357 
 4358 #ifndef _SYS_SYSPROTO_H_
 4359 struct fhlink_args {
 4360         fhandle_t *fhp;
 4361         const char *to;
 4362 };
 4363 #endif
 4364 int
 4365 sys_fhlink(struct thread *td, struct fhlink_args *uap)
 4366 {
 4367 
 4368         return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp));
 4369 }
 4370 
 4371 #ifndef _SYS_SYSPROTO_H_
 4372 struct fhlinkat_args {
 4373         fhandle_t *fhp;
 4374         int tofd;
 4375         const char *to;
 4376 };
 4377 #endif
 4378 int
 4379 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap)
 4380 {
 4381 
 4382         return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp));
 4383 }
 4384 
 4385 static int
 4386 kern_fhlinkat(struct thread *td, int fd, const char *path,
 4387     enum uio_seg pathseg, fhandle_t *fhp)
 4388 {
 4389         fhandle_t fh;
 4390         struct mount *mp;
 4391         struct vnode *vp;
 4392         int error;
 4393 
 4394         error = priv_check(td, PRIV_VFS_GETFH);
 4395         if (error != 0)
 4396                 return (error);
 4397         error = copyin(fhp, &fh, sizeof(fh));
 4398         if (error != 0)
 4399                 return (error);
 4400         do {
 4401                 bwillwrite();
 4402                 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4403                         return (ESTALE);
 4404                 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 4405                 vfs_unbusy(mp);
 4406                 if (error != 0)
 4407                         return (error);
 4408                 VOP_UNLOCK(vp);
 4409                 error = kern_linkat_vp(td, vp, fd, path, pathseg);
 4410         } while (error == EAGAIN || error == ERELOOKUP);
 4411         return (error);
 4412 }
 4413 
 4414 #ifndef _SYS_SYSPROTO_H_
 4415 struct fhreadlink_args {
 4416         fhandle_t *fhp;
 4417         char *buf;
 4418         size_t bufsize;
 4419 };
 4420 #endif
 4421 int
 4422 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap)
 4423 {
 4424         fhandle_t fh;
 4425         struct mount *mp;
 4426         struct vnode *vp;
 4427         int error;
 4428 
 4429         error = priv_check(td, PRIV_VFS_GETFH);
 4430         if (error != 0)
 4431                 return (error);
 4432         if (uap->bufsize > IOSIZE_MAX)
 4433                 return (EINVAL);
 4434         error = copyin(uap->fhp, &fh, sizeof(fh));
 4435         if (error != 0)
 4436                 return (error);
 4437         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4438                 return (ESTALE);
 4439         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 4440         vfs_unbusy(mp);
 4441         if (error != 0)
 4442                 return (error);
 4443         error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td);
 4444         vput(vp);
 4445         return (error);
 4446 }
 4447 
 4448 /*
 4449  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4450  * open descriptor.
 4451  *
 4452  * warning: do not remove the priv_check() call or this becomes one giant
 4453  * security hole.
 4454  */
 4455 #ifndef _SYS_SYSPROTO_H_
 4456 struct fhopen_args {
 4457         const struct fhandle *u_fhp;
 4458         int flags;
 4459 };
 4460 #endif
 4461 int
 4462 sys_fhopen(struct thread *td, struct fhopen_args *uap)
 4463 {
 4464         return (kern_fhopen(td, uap->u_fhp, uap->flags));
 4465 }
 4466 
 4467 int
 4468 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags)
 4469 {
 4470         struct mount *mp;
 4471         struct vnode *vp;
 4472         struct fhandle fhp;
 4473         struct file *fp;
 4474         int fmode, error;
 4475         int indx;
 4476 
 4477         error = priv_check(td, PRIV_VFS_FHOPEN);
 4478         if (error != 0)
 4479                 return (error);
 4480         indx = -1;
 4481         fmode = FFLAGS(flags);
 4482         /* why not allow a non-read/write open for our lockd? */
 4483         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4484                 return (EINVAL);
 4485         error = copyin(u_fhp, &fhp, sizeof(fhp));
 4486         if (error != 0)
 4487                 return(error);
 4488         /* find the mount point */
 4489         mp = vfs_busyfs(&fhp.fh_fsid);
 4490         if (mp == NULL)
 4491                 return (ESTALE);
 4492         /* now give me my vnode, it gets returned to me locked */
 4493         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4494         vfs_unbusy(mp);
 4495         if (error != 0)
 4496                 return (error);
 4497 
 4498         error = falloc_noinstall(td, &fp);
 4499         if (error != 0) {
 4500                 vput(vp);
 4501                 return (error);
 4502         }
 4503         /*
 4504          * An extra reference on `fp' has been held for us by
 4505          * falloc_noinstall().
 4506          */
 4507 
 4508 #ifdef INVARIANTS
 4509         td->td_dupfd = -1;
 4510 #endif
 4511         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4512         if (error != 0) {
 4513                 KASSERT(fp->f_ops == &badfileops,
 4514                     ("VOP_OPEN in fhopen() set f_ops"));
 4515                 KASSERT(td->td_dupfd < 0,
 4516                     ("fhopen() encountered fdopen()"));
 4517 
 4518                 vput(vp);
 4519                 goto bad;
 4520         }
 4521 #ifdef INVARIANTS
 4522         td->td_dupfd = 0;
 4523 #endif
 4524         fp->f_vnode = vp;
 4525         finit_vnode(fp, fmode, NULL, &vnops);
 4526         VOP_UNLOCK(vp);
 4527         if ((fmode & O_TRUNC) != 0) {
 4528                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4529                 if (error != 0)
 4530                         goto bad;
 4531         }
 4532 
 4533         error = finstall(td, fp, &indx, fmode, NULL);
 4534 bad:
 4535         fdrop(fp, td);
 4536         td->td_retval[0] = indx;
 4537         return (error);
 4538 }
 4539 
 4540 /*
 4541  * Stat an (NFS) file handle.
 4542  */
 4543 #ifndef _SYS_SYSPROTO_H_
 4544 struct fhstat_args {
 4545         struct fhandle *u_fhp;
 4546         struct stat *sb;
 4547 };
 4548 #endif
 4549 int
 4550 sys_fhstat(struct thread *td, struct fhstat_args *uap)
 4551 {
 4552         struct stat sb;
 4553         struct fhandle fh;
 4554         int error;
 4555 
 4556         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4557         if (error != 0)
 4558                 return (error);
 4559         error = kern_fhstat(td, fh, &sb);
 4560         if (error == 0)
 4561                 error = copyout(&sb, uap->sb, sizeof(sb));
 4562         return (error);
 4563 }
 4564 
 4565 int
 4566 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4567 {
 4568         struct mount *mp;
 4569         struct vnode *vp;
 4570         int error;
 4571 
 4572         error = priv_check(td, PRIV_VFS_FHSTAT);
 4573         if (error != 0)
 4574                 return (error);
 4575         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4576                 return (ESTALE);
 4577         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4578         vfs_unbusy(mp);
 4579         if (error != 0)
 4580                 return (error);
 4581         error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
 4582         vput(vp);
 4583         return (error);
 4584 }
 4585 
 4586 /*
 4587  * Implement fstatfs() for (NFS) file handles.
 4588  */
 4589 #ifndef _SYS_SYSPROTO_H_
 4590 struct fhstatfs_args {
 4591         struct fhandle *u_fhp;
 4592         struct statfs *buf;
 4593 };
 4594 #endif
 4595 int
 4596 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap)
 4597 {
 4598         struct statfs *sfp;
 4599         fhandle_t fh;
 4600         int error;
 4601 
 4602         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4603         if (error != 0)
 4604                 return (error);
 4605         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 4606         error = kern_fhstatfs(td, fh, sfp);
 4607         if (error == 0)
 4608                 error = copyout(sfp, uap->buf, sizeof(*sfp));
 4609         free(sfp, M_STATFS);
 4610         return (error);
 4611 }
 4612 
 4613 int
 4614 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4615 {
 4616         struct mount *mp;
 4617         struct vnode *vp;
 4618         int error;
 4619 
 4620         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4621         if (error != 0)
 4622                 return (error);
 4623         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4624                 return (ESTALE);
 4625         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4626         if (error != 0) {
 4627                 vfs_unbusy(mp);
 4628                 return (error);
 4629         }
 4630         vput(vp);
 4631         error = prison_canseemount(td->td_ucred, mp);
 4632         if (error != 0)
 4633                 goto out;
 4634 #ifdef MAC
 4635         error = mac_mount_check_stat(td->td_ucred, mp);
 4636         if (error != 0)
 4637                 goto out;
 4638 #endif
 4639         error = VFS_STATFS(mp, buf);
 4640 out:
 4641         vfs_unbusy(mp);
 4642         return (error);
 4643 }
 4644 
 4645 /*
 4646  * Unlike madvise(2), we do not make a best effort to remember every
 4647  * possible caching hint.  Instead, we remember the last setting with
 4648  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4649  * region of any current setting.
 4650  */
 4651 int
 4652 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4653     int advice)
 4654 {
 4655         struct fadvise_info *fa, *new;
 4656         struct file *fp;
 4657         struct vnode *vp;
 4658         off_t end;
 4659         int error;
 4660 
 4661         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4662                 return (EINVAL);
 4663         AUDIT_ARG_VALUE(advice);
 4664         switch (advice) {
 4665         case POSIX_FADV_SEQUENTIAL:
 4666         case POSIX_FADV_RANDOM:
 4667         case POSIX_FADV_NOREUSE:
 4668                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4669                 break;
 4670         case POSIX_FADV_NORMAL:
 4671         case POSIX_FADV_WILLNEED:
 4672         case POSIX_FADV_DONTNEED:
 4673                 new = NULL;
 4674                 break;
 4675         default:
 4676                 return (EINVAL);
 4677         }
 4678         /* XXX: CAP_POSIX_FADVISE? */
 4679         AUDIT_ARG_FD(fd);
 4680         error = fget(td, fd, &cap_no_rights, &fp);
 4681         if (error != 0)
 4682                 goto out;
 4683         AUDIT_ARG_FILE(td->td_proc, fp);
 4684         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4685                 error = ESPIPE;
 4686                 goto out;
 4687         }
 4688         if (fp->f_type != DTYPE_VNODE) {
 4689                 error = ENODEV;
 4690                 goto out;
 4691         }
 4692         vp = fp->f_vnode;
 4693         if (vp->v_type != VREG) {
 4694                 error = ENODEV;
 4695                 goto out;
 4696         }
 4697         if (len == 0)
 4698                 end = OFF_MAX;
 4699         else
 4700                 end = offset + len - 1;
 4701         switch (advice) {
 4702         case POSIX_FADV_SEQUENTIAL:
 4703         case POSIX_FADV_RANDOM:
 4704         case POSIX_FADV_NOREUSE:
 4705                 /*
 4706                  * Try to merge any existing non-standard region with
 4707                  * this new region if possible, otherwise create a new
 4708                  * non-standard region for this request.
 4709                  */
 4710                 mtx_pool_lock(mtxpool_sleep, fp);
 4711                 fa = fp->f_advice;
 4712                 if (fa != NULL && fa->fa_advice == advice &&
 4713                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4714                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4715                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4716                         if (offset < fa->fa_start)
 4717                                 fa->fa_start = offset;
 4718                         if (end > fa->fa_end)
 4719                                 fa->fa_end = end;
 4720                 } else {
 4721                         new->fa_advice = advice;
 4722                         new->fa_start = offset;
 4723                         new->fa_end = end;
 4724                         fp->f_advice = new;
 4725                         new = fa;
 4726                 }
 4727                 mtx_pool_unlock(mtxpool_sleep, fp);
 4728                 break;
 4729         case POSIX_FADV_NORMAL:
 4730                 /*
 4731                  * If a the "normal" region overlaps with an existing
 4732                  * non-standard region, trim or remove the
 4733                  * non-standard region.
 4734                  */
 4735                 mtx_pool_lock(mtxpool_sleep, fp);
 4736                 fa = fp->f_advice;
 4737                 if (fa != NULL) {
 4738                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4739                                 new = fa;
 4740                                 fp->f_advice = NULL;
 4741                         } else if (offset <= fa->fa_start &&
 4742                             end >= fa->fa_start)
 4743                                 fa->fa_start = end + 1;
 4744                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4745                                 fa->fa_end = offset - 1;
 4746                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4747                                 /*
 4748                                  * If the "normal" region is a middle
 4749                                  * portion of the existing
 4750                                  * non-standard region, just remove
 4751                                  * the whole thing rather than picking
 4752                                  * one side or the other to
 4753                                  * preserve.
 4754                                  */
 4755                                 new = fa;
 4756                                 fp->f_advice = NULL;
 4757                         }
 4758                 }
 4759                 mtx_pool_unlock(mtxpool_sleep, fp);
 4760                 break;
 4761         case POSIX_FADV_WILLNEED:
 4762         case POSIX_FADV_DONTNEED:
 4763                 error = VOP_ADVISE(vp, offset, end, advice);
 4764                 break;
 4765         }
 4766 out:
 4767         if (fp != NULL)
 4768                 fdrop(fp, td);
 4769         free(new, M_FADVISE);
 4770         return (error);
 4771 }
 4772 
 4773 int
 4774 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4775 {
 4776         int error;
 4777 
 4778         error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4779             uap->advice);
 4780         return (kern_posix_error(td, error));
 4781 }
 4782 
 4783 int
 4784 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 4785     off_t *outoffp, size_t len, unsigned int flags)
 4786 {
 4787         struct file *infp, *outfp;
 4788         struct vnode *invp, *outvp;
 4789         int error;
 4790         size_t retlen;
 4791         void *rl_rcookie, *rl_wcookie;
 4792         off_t savinoff, savoutoff;
 4793 
 4794         infp = outfp = NULL;
 4795         rl_rcookie = rl_wcookie = NULL;
 4796         savinoff = -1;
 4797         error = 0;
 4798         retlen = 0;
 4799 
 4800         if (flags != 0) {
 4801                 error = EINVAL;
 4802                 goto out;
 4803         }
 4804         if (len > SSIZE_MAX)
 4805                 /*
 4806                  * Although the len argument is size_t, the return argument
 4807                  * is ssize_t (which is signed).  Therefore a size that won't
 4808                  * fit in ssize_t can't be returned.
 4809                  */
 4810                 len = SSIZE_MAX;
 4811 
 4812         /* Get the file structures for the file descriptors. */
 4813         error = fget_read(td, infd, &cap_read_rights, &infp);
 4814         if (error != 0)
 4815                 goto out;
 4816         if (infp->f_ops == &badfileops) {
 4817                 error = EBADF;
 4818                 goto out;
 4819         }
 4820         if (infp->f_vnode == NULL) {
 4821                 error = EINVAL;
 4822                 goto out;
 4823         }
 4824         error = fget_write(td, outfd, &cap_write_rights, &outfp);
 4825         if (error != 0)
 4826                 goto out;
 4827         if (outfp->f_ops == &badfileops) {
 4828                 error = EBADF;
 4829                 goto out;
 4830         }
 4831         if (outfp->f_vnode == NULL) {
 4832                 error = EINVAL;
 4833                 goto out;
 4834         }
 4835 
 4836         /* Set the offset pointers to the correct place. */
 4837         if (inoffp == NULL)
 4838                 inoffp = &infp->f_offset;
 4839         if (outoffp == NULL)
 4840                 outoffp = &outfp->f_offset;
 4841         savinoff = *inoffp;
 4842         savoutoff = *outoffp;
 4843 
 4844         invp = infp->f_vnode;
 4845         outvp = outfp->f_vnode;
 4846         /* Sanity check the f_flag bits. */
 4847         if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
 4848             (infp->f_flag & FREAD) == 0) {
 4849                 error = EBADF;
 4850                 goto out;
 4851         }
 4852 
 4853         /* If len == 0, just return 0. */
 4854         if (len == 0)
 4855                 goto out;
 4856 
 4857         /*
 4858          * If infp and outfp refer to the same file, the byte ranges cannot
 4859          * overlap.
 4860          */
 4861         if (invp == outvp && ((savinoff <= savoutoff && savinoff + len >
 4862             savoutoff) || (savinoff > savoutoff && savoutoff + len >
 4863             savinoff))) {
 4864                 error = EINVAL;
 4865                 goto out;
 4866         }
 4867 
 4868         /* Range lock the byte ranges for both invp and outvp. */
 4869         for (;;) {
 4870                 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp +
 4871                     len);
 4872                 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp +
 4873                     len);
 4874                 if (rl_rcookie != NULL)
 4875                         break;
 4876                 vn_rangelock_unlock(outvp, rl_wcookie);
 4877                 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len);
 4878                 vn_rangelock_unlock(invp, rl_rcookie);
 4879         }
 4880 
 4881         retlen = len;
 4882         error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
 4883             flags, infp->f_cred, outfp->f_cred, td);
 4884 out:
 4885         if (rl_rcookie != NULL)
 4886                 vn_rangelock_unlock(invp, rl_rcookie);
 4887         if (rl_wcookie != NULL)
 4888                 vn_rangelock_unlock(outvp, rl_wcookie);
 4889         if (savinoff != -1 && (error == EINTR || error == ERESTART)) {
 4890                 *inoffp = savinoff;
 4891                 *outoffp = savoutoff;
 4892         }
 4893         if (outfp != NULL)
 4894                 fdrop(outfp, td);
 4895         if (infp != NULL)
 4896                 fdrop(infp, td);
 4897         td->td_retval[0] = retlen;
 4898         return (error);
 4899 }
 4900 
 4901 int
 4902 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
 4903 {
 4904         off_t inoff, outoff, *inoffp, *outoffp;
 4905         int error;
 4906 
 4907         inoffp = outoffp = NULL;
 4908         if (uap->inoffp != NULL) {
 4909                 error = copyin(uap->inoffp, &inoff, sizeof(off_t));
 4910                 if (error != 0)
 4911                         return (error);
 4912                 inoffp = &inoff;
 4913         }
 4914         if (uap->outoffp != NULL) {
 4915                 error = copyin(uap->outoffp, &outoff, sizeof(off_t));
 4916                 if (error != 0)
 4917                         return (error);
 4918                 outoffp = &outoff;
 4919         }
 4920         error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
 4921             outoffp, uap->len, uap->flags);
 4922         if (error == 0 && uap->inoffp != NULL)
 4923                 error = copyout(inoffp, uap->inoffp, sizeof(off_t));
 4924         if (error == 0 && uap->outoffp != NULL)
 4925                 error = copyout(outoffp, uap->outoffp, sizeof(off_t));
 4926         return (error);
 4927 }

Cache object: 124a233c736507d4eaaae0a5888679d3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.