The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_capsicum.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #ifdef COMPAT_FREEBSD11
   48 #include <sys/abi_compat.h>
   49 #endif
   50 #include <sys/bio.h>
   51 #include <sys/buf.h>
   52 #include <sys/capsicum.h>
   53 #include <sys/disk.h>
   54 #include <sys/malloc.h>
   55 #include <sys/mount.h>
   56 #include <sys/mutex.h>
   57 #include <sys/sysproto.h>
   58 #include <sys/namei.h>
   59 #include <sys/filedesc.h>
   60 #include <sys/kernel.h>
   61 #include <sys/fcntl.h>
   62 #include <sys/file.h>
   63 #include <sys/filio.h>
   64 #include <sys/limits.h>
   65 #include <sys/linker.h>
   66 #include <sys/rwlock.h>
   67 #include <sys/sdt.h>
   68 #include <sys/stat.h>
   69 #include <sys/sx.h>
   70 #include <sys/unistd.h>
   71 #include <sys/vnode.h>
   72 #include <sys/priv.h>
   73 #include <sys/proc.h>
   74 #include <sys/dirent.h>
   75 #include <sys/jail.h>
   76 #include <sys/syscallsubr.h>
   77 #include <sys/sysctl.h>
   78 #ifdef KTRACE
   79 #include <sys/ktrace.h>
   80 #endif
   81 
   82 #include <machine/stdarg.h>
   83 
   84 #include <security/audit/audit.h>
   85 #include <security/mac/mac_framework.h>
   86 
   87 #include <vm/vm.h>
   88 #include <vm/vm_object.h>
   89 #include <vm/vm_page.h>
   90 #include <vm/uma.h>
   91 
   92 #include <fs/devfs/devfs.h>
   93 
   94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   95 
   96 static int kern_chflagsat(struct thread *td, int fd, const char *path,
   97     enum uio_seg pathseg, u_long flags, int atflag);
   98 static int setfflags(struct thread *td, struct vnode *, u_long);
   99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  100 static int getutimens(const struct timespec *, enum uio_seg,
  101     struct timespec *, int *);
  102 static int setutimes(struct thread *td, struct vnode *,
  103     const struct timespec *, int, int);
  104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  105     struct thread *td);
  106 static int kern_fhlinkat(struct thread *td, int fd, const char *path,
  107     enum uio_seg pathseg, fhandle_t *fhp);
  108 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg,
  109     size_t count, struct thread *td);
  110 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd,
  111     const char *path, enum uio_seg segflag);
  112 
  113 static uint64_t
  114 at2cnpflags(u_int at_flags, u_int mask)
  115 {
  116         uint64_t res;
  117 
  118         MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) !=
  119             (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW));
  120 
  121         res = 0;
  122         at_flags &= mask;
  123         if ((at_flags & AT_RESOLVE_BENEATH) != 0)
  124                 res |= RBENEATH;
  125         if ((at_flags & AT_SYMLINK_FOLLOW) != 0)
  126                 res |= FOLLOW;
  127         /* NOFOLLOW is pseudo flag */
  128         if ((mask & AT_SYMLINK_NOFOLLOW) != 0) {
  129                 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW :
  130                     FOLLOW;
  131         }
  132         if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0)
  133                 res |= EMPTYPATH;
  134         return (res);
  135 }
  136 
  137 int
  138 kern_sync(struct thread *td)
  139 {
  140         struct mount *mp, *nmp;
  141         int save;
  142 
  143         mtx_lock(&mountlist_mtx);
  144         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  145                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  146                         nmp = TAILQ_NEXT(mp, mnt_list);
  147                         continue;
  148                 }
  149                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  150                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  151                         save = curthread_pflags_set(TDP_SYNCIO);
  152                         vfs_periodic(mp, MNT_NOWAIT);
  153                         VFS_SYNC(mp, MNT_NOWAIT);
  154                         curthread_pflags_restore(save);
  155                         vn_finished_write(mp);
  156                 }
  157                 mtx_lock(&mountlist_mtx);
  158                 nmp = TAILQ_NEXT(mp, mnt_list);
  159                 vfs_unbusy(mp);
  160         }
  161         mtx_unlock(&mountlist_mtx);
  162         return (0);
  163 }
  164 
  165 /*
  166  * Sync each mounted filesystem.
  167  */
  168 #ifndef _SYS_SYSPROTO_H_
  169 struct sync_args {
  170         int     dummy;
  171 };
  172 #endif
  173 /* ARGSUSED */
  174 int
  175 sys_sync(struct thread *td, struct sync_args *uap)
  176 {
  177 
  178         return (kern_sync(td));
  179 }
  180 
  181 /*
  182  * Change filesystem quotas.
  183  */
  184 #ifndef _SYS_SYSPROTO_H_
  185 struct quotactl_args {
  186         char *path;
  187         int cmd;
  188         int uid;
  189         caddr_t arg;
  190 };
  191 #endif
  192 int
  193 sys_quotactl(struct thread *td, struct quotactl_args *uap)
  194 {
  195         struct mount *mp;
  196         struct nameidata nd;
  197         int error;
  198         bool mp_busy;
  199 
  200         AUDIT_ARG_CMD(uap->cmd);
  201         AUDIT_ARG_UID(uap->uid);
  202         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  203                 return (EPERM);
  204         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  205             uap->path);
  206         if ((error = namei(&nd)) != 0)
  207                 return (error);
  208         NDFREE_PNBUF(&nd);
  209         mp = nd.ni_vp->v_mount;
  210         vfs_ref(mp);
  211         vput(nd.ni_vp);
  212         error = vfs_busy(mp, 0);
  213         if (error != 0) {
  214                 vfs_rel(mp);
  215                 return (error);
  216         }
  217         mp_busy = true;
  218         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy);
  219 
  220         /*
  221          * Since quota on/off operations typically need to open quota
  222          * files, the implementation may need to unbusy the mount point
  223          * before calling into namei.  Otherwise, unmount might be
  224          * started between two vfs_busy() invocations (first is ours,
  225          * second is from mount point cross-walk code in lookup()),
  226          * causing deadlock.
  227          *
  228          * Avoid unbusying mp if the implementation indicates it has
  229          * already done so.
  230          */
  231         if (mp_busy)
  232                 vfs_unbusy(mp);
  233         vfs_rel(mp);
  234         return (error);
  235 }
  236 
  237 /*
  238  * Used by statfs conversion routines to scale the block size up if
  239  * necessary so that all of the block counts are <= 'max_size'.  Note
  240  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  241  * value of 'n'.
  242  */
  243 void
  244 statfs_scale_blocks(struct statfs *sf, long max_size)
  245 {
  246         uint64_t count;
  247         int shift;
  248 
  249         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  250 
  251         /*
  252          * Attempt to scale the block counts to give a more accurate
  253          * overview to userland of the ratio of free space to used
  254          * space.  To do this, find the largest block count and compute
  255          * a divisor that lets it fit into a signed integer <= max_size.
  256          */
  257         if (sf->f_bavail < 0)
  258                 count = -sf->f_bavail;
  259         else
  260                 count = sf->f_bavail;
  261         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  262         if (count <= max_size)
  263                 return;
  264 
  265         count >>= flsl(max_size);
  266         shift = 0;
  267         while (count > 0) {
  268                 shift++;
  269                 count >>=1;
  270         }
  271 
  272         sf->f_bsize <<= shift;
  273         sf->f_blocks >>= shift;
  274         sf->f_bfree >>= shift;
  275         sf->f_bavail >>= shift;
  276 }
  277 
  278 static int
  279 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
  280 {
  281         int error;
  282 
  283         if (mp == NULL)
  284                 return (EBADF);
  285         error = vfs_busy(mp, 0);
  286         vfs_rel(mp);
  287         if (error != 0)
  288                 return (error);
  289 #ifdef MAC
  290         error = mac_mount_check_stat(td->td_ucred, mp);
  291         if (error != 0)
  292                 goto out;
  293 #endif
  294         error = VFS_STATFS(mp, buf);
  295         if (error != 0)
  296                 goto out;
  297         if (priv_check_cred_vfs_generation(td->td_ucred)) {
  298                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
  299                 prison_enforce_statfs(td->td_ucred, mp, buf);
  300         }
  301 out:
  302         vfs_unbusy(mp);
  303         return (error);
  304 }
  305 
  306 /*
  307  * Get filesystem statistics.
  308  */
  309 #ifndef _SYS_SYSPROTO_H_
  310 struct statfs_args {
  311         char *path;
  312         struct statfs *buf;
  313 };
  314 #endif
  315 int
  316 sys_statfs(struct thread *td, struct statfs_args *uap)
  317 {
  318         struct statfs *sfp;
  319         int error;
  320 
  321         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  322         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  323         if (error == 0)
  324                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  325         free(sfp, M_STATFS);
  326         return (error);
  327 }
  328 
  329 int
  330 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
  331     struct statfs *buf)
  332 {
  333         struct mount *mp;
  334         struct nameidata nd;
  335         int error;
  336 
  337         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path);
  338         error = namei(&nd);
  339         if (error != 0)
  340                 return (error);
  341         NDFREE_PNBUF(&nd);
  342         mp = vfs_ref_from_vp(nd.ni_vp);
  343         vrele(nd.ni_vp);
  344         return (kern_do_statfs(td, mp, buf));
  345 }
  346 
  347 /*
  348  * Get filesystem statistics.
  349  */
  350 #ifndef _SYS_SYSPROTO_H_
  351 struct fstatfs_args {
  352         int fd;
  353         struct statfs *buf;
  354 };
  355 #endif
  356 int
  357 sys_fstatfs(struct thread *td, struct fstatfs_args *uap)
  358 {
  359         struct statfs *sfp;
  360         int error;
  361 
  362         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  363         error = kern_fstatfs(td, uap->fd, sfp);
  364         if (error == 0)
  365                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  366         free(sfp, M_STATFS);
  367         return (error);
  368 }
  369 
  370 int
  371 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  372 {
  373         struct file *fp;
  374         struct mount *mp;
  375         struct vnode *vp;
  376         int error;
  377 
  378         AUDIT_ARG_FD(fd);
  379         error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
  380         if (error != 0)
  381                 return (error);
  382         vp = fp->f_vnode;
  383 #ifdef AUDIT
  384         if (AUDITING_TD(td)) {
  385                 vn_lock(vp, LK_SHARED | LK_RETRY);
  386                 AUDIT_ARG_VNODE1(vp);
  387                 VOP_UNLOCK(vp);
  388         }
  389 #endif
  390         mp = vfs_ref_from_vp(vp);
  391         fdrop(fp, td);
  392         return (kern_do_statfs(td, mp, buf));
  393 }
  394 
  395 /*
  396  * Get statistics on all filesystems.
  397  */
  398 #ifndef _SYS_SYSPROTO_H_
  399 struct getfsstat_args {
  400         struct statfs *buf;
  401         long bufsize;
  402         int mode;
  403 };
  404 #endif
  405 int
  406 sys_getfsstat(struct thread *td, struct getfsstat_args *uap)
  407 {
  408         size_t count;
  409         int error;
  410 
  411         if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
  412                 return (EINVAL);
  413         error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
  414             UIO_USERSPACE, uap->mode);
  415         if (error == 0)
  416                 td->td_retval[0] = count;
  417         return (error);
  418 }
  419 
  420 /*
  421  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  422  *      The caller is responsible for freeing memory which will be allocated
  423  *      in '*buf'.
  424  */
  425 int
  426 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  427     size_t *countp, enum uio_seg bufseg, int mode)
  428 {
  429         struct mount *mp, *nmp;
  430         struct statfs *sfsp, *sp, *sptmp, *tofree;
  431         size_t count, maxcount;
  432         int error;
  433 
  434         switch (mode) {
  435         case MNT_WAIT:
  436         case MNT_NOWAIT:
  437                 break;
  438         default:
  439                 if (bufseg == UIO_SYSSPACE)
  440                         *buf = NULL;
  441                 return (EINVAL);
  442         }
  443 restart:
  444         maxcount = bufsize / sizeof(struct statfs);
  445         if (bufsize == 0) {
  446                 sfsp = NULL;
  447                 tofree = NULL;
  448         } else if (bufseg == UIO_USERSPACE) {
  449                 sfsp = *buf;
  450                 tofree = NULL;
  451         } else /* if (bufseg == UIO_SYSSPACE) */ {
  452                 count = 0;
  453                 mtx_lock(&mountlist_mtx);
  454                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  455                         count++;
  456                 }
  457                 mtx_unlock(&mountlist_mtx);
  458                 if (maxcount > count)
  459                         maxcount = count;
  460                 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
  461                     M_STATFS, M_WAITOK);
  462         }
  463 
  464         count = 0;
  465 
  466         /*
  467          * If there is no target buffer they only want the count.
  468          *
  469          * This could be TAILQ_FOREACH but it is open-coded to match the original
  470          * code below.
  471          */
  472         if (sfsp == NULL) {
  473                 mtx_lock(&mountlist_mtx);
  474                 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  475                         if (prison_canseemount(td->td_ucred, mp) != 0) {
  476                                 nmp = TAILQ_NEXT(mp, mnt_list);
  477                                 continue;
  478                         }
  479 #ifdef MAC
  480                         if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  481                                 nmp = TAILQ_NEXT(mp, mnt_list);
  482                                 continue;
  483                         }
  484 #endif
  485                         count++;
  486                         nmp = TAILQ_NEXT(mp, mnt_list);
  487                 }
  488                 mtx_unlock(&mountlist_mtx);
  489                 *countp = count;
  490                 return (0);
  491         }
  492 
  493         /*
  494          * They want the entire thing.
  495          *
  496          * Short-circuit the corner case of no room for anything, avoids
  497          * relocking below.
  498          */
  499         if (maxcount < 1) {
  500                 goto out;
  501         }
  502 
  503         mtx_lock(&mountlist_mtx);
  504         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  505                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  506                         nmp = TAILQ_NEXT(mp, mnt_list);
  507                         continue;
  508                 }
  509 #ifdef MAC
  510                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  511                         nmp = TAILQ_NEXT(mp, mnt_list);
  512                         continue;
  513                 }
  514 #endif
  515                 if (mode == MNT_WAIT) {
  516                         if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
  517                                 /*
  518                                  * If vfs_busy() failed, and MBF_NOWAIT
  519                                  * wasn't passed, then the mp is gone.
  520                                  * Furthermore, because of MBF_MNTLSTLOCK,
  521                                  * the mountlist_mtx was dropped.  We have
  522                                  * no other choice than to start over.
  523                                  */
  524                                 mtx_unlock(&mountlist_mtx);
  525                                 free(tofree, M_STATFS);
  526                                 goto restart;
  527                         }
  528                 } else {
  529                         if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
  530                                 nmp = TAILQ_NEXT(mp, mnt_list);
  531                                 continue;
  532                         }
  533                 }
  534                 sp = &mp->mnt_stat;
  535                 /*
  536                  * If MNT_NOWAIT is specified, do not refresh
  537                  * the fsstat cache.
  538                  */
  539                 if (mode != MNT_NOWAIT) {
  540                         error = VFS_STATFS(mp, sp);
  541                         if (error != 0) {
  542                                 mtx_lock(&mountlist_mtx);
  543                                 nmp = TAILQ_NEXT(mp, mnt_list);
  544                                 vfs_unbusy(mp);
  545                                 continue;
  546                         }
  547                 }
  548                 if (priv_check_cred_vfs_generation(td->td_ucred)) {
  549                         sptmp = malloc(sizeof(struct statfs), M_STATFS,
  550                             M_WAITOK);
  551                         *sptmp = *sp;
  552                         sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
  553                         prison_enforce_statfs(td->td_ucred, mp, sptmp);
  554                         sp = sptmp;
  555                 } else
  556                         sptmp = NULL;
  557                 if (bufseg == UIO_SYSSPACE) {
  558                         bcopy(sp, sfsp, sizeof(*sp));
  559                         free(sptmp, M_STATFS);
  560                 } else /* if (bufseg == UIO_USERSPACE) */ {
  561                         error = copyout(sp, sfsp, sizeof(*sp));
  562                         free(sptmp, M_STATFS);
  563                         if (error != 0) {
  564                                 vfs_unbusy(mp);
  565                                 return (error);
  566                         }
  567                 }
  568                 sfsp++;
  569                 count++;
  570 
  571                 if (count == maxcount) {
  572                         vfs_unbusy(mp);
  573                         goto out;
  574                 }
  575 
  576                 mtx_lock(&mountlist_mtx);
  577                 nmp = TAILQ_NEXT(mp, mnt_list);
  578                 vfs_unbusy(mp);
  579         }
  580         mtx_unlock(&mountlist_mtx);
  581 out:
  582         *countp = count;
  583         return (0);
  584 }
  585 
  586 #ifdef COMPAT_FREEBSD4
  587 /*
  588  * Get old format filesystem statistics.
  589  */
  590 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *);
  591 
  592 #ifndef _SYS_SYSPROTO_H_
  593 struct freebsd4_statfs_args {
  594         char *path;
  595         struct ostatfs *buf;
  596 };
  597 #endif
  598 int
  599 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap)
  600 {
  601         struct ostatfs osb;
  602         struct statfs *sfp;
  603         int error;
  604 
  605         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  606         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  607         if (error == 0) {
  608                 freebsd4_cvtstatfs(sfp, &osb);
  609                 error = copyout(&osb, uap->buf, sizeof(osb));
  610         }
  611         free(sfp, M_STATFS);
  612         return (error);
  613 }
  614 
  615 /*
  616  * Get filesystem statistics.
  617  */
  618 #ifndef _SYS_SYSPROTO_H_
  619 struct freebsd4_fstatfs_args {
  620         int fd;
  621         struct ostatfs *buf;
  622 };
  623 #endif
  624 int
  625 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap)
  626 {
  627         struct ostatfs osb;
  628         struct statfs *sfp;
  629         int error;
  630 
  631         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  632         error = kern_fstatfs(td, uap->fd, sfp);
  633         if (error == 0) {
  634                 freebsd4_cvtstatfs(sfp, &osb);
  635                 error = copyout(&osb, uap->buf, sizeof(osb));
  636         }
  637         free(sfp, M_STATFS);
  638         return (error);
  639 }
  640 
  641 /*
  642  * Get statistics on all filesystems.
  643  */
  644 #ifndef _SYS_SYSPROTO_H_
  645 struct freebsd4_getfsstat_args {
  646         struct ostatfs *buf;
  647         long bufsize;
  648         int mode;
  649 };
  650 #endif
  651 int
  652 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap)
  653 {
  654         struct statfs *buf, *sp;
  655         struct ostatfs osb;
  656         size_t count, size;
  657         int error;
  658 
  659         if (uap->bufsize < 0)
  660                 return (EINVAL);
  661         count = uap->bufsize / sizeof(struct ostatfs);
  662         if (count > SIZE_MAX / sizeof(struct statfs))
  663                 return (EINVAL);
  664         size = count * sizeof(struct statfs);
  665         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  666             uap->mode);
  667         if (error == 0)
  668                 td->td_retval[0] = count;
  669         if (size != 0) {
  670                 sp = buf;
  671                 while (count != 0 && error == 0) {
  672                         freebsd4_cvtstatfs(sp, &osb);
  673                         error = copyout(&osb, uap->buf, sizeof(osb));
  674                         sp++;
  675                         uap->buf++;
  676                         count--;
  677                 }
  678                 free(buf, M_STATFS);
  679         }
  680         return (error);
  681 }
  682 
  683 /*
  684  * Implement fstatfs() for (NFS) file handles.
  685  */
  686 #ifndef _SYS_SYSPROTO_H_
  687 struct freebsd4_fhstatfs_args {
  688         struct fhandle *u_fhp;
  689         struct ostatfs *buf;
  690 };
  691 #endif
  692 int
  693 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap)
  694 {
  695         struct ostatfs osb;
  696         struct statfs *sfp;
  697         fhandle_t fh;
  698         int error;
  699 
  700         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  701         if (error != 0)
  702                 return (error);
  703         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  704         error = kern_fhstatfs(td, fh, sfp);
  705         if (error == 0) {
  706                 freebsd4_cvtstatfs(sfp, &osb);
  707                 error = copyout(&osb, uap->buf, sizeof(osb));
  708         }
  709         free(sfp, M_STATFS);
  710         return (error);
  711 }
  712 
  713 /*
  714  * Convert a new format statfs structure to an old format statfs structure.
  715  */
  716 static void
  717 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
  718 {
  719 
  720         statfs_scale_blocks(nsp, LONG_MAX);
  721         bzero(osp, sizeof(*osp));
  722         osp->f_bsize = nsp->f_bsize;
  723         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  724         osp->f_blocks = nsp->f_blocks;
  725         osp->f_bfree = nsp->f_bfree;
  726         osp->f_bavail = nsp->f_bavail;
  727         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  728         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  729         osp->f_owner = nsp->f_owner;
  730         osp->f_type = nsp->f_type;
  731         osp->f_flags = nsp->f_flags;
  732         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  733         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  734         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  735         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  736         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  737             MIN(MFSNAMELEN, OMFSNAMELEN));
  738         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  739             MIN(MNAMELEN, OMNAMELEN));
  740         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  741             MIN(MNAMELEN, OMNAMELEN));
  742         osp->f_fsid = nsp->f_fsid;
  743 }
  744 #endif /* COMPAT_FREEBSD4 */
  745 
  746 #if defined(COMPAT_FREEBSD11)
  747 /*
  748  * Get old format filesystem statistics.
  749  */
  750 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *);
  751 
  752 int
  753 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap)
  754 {
  755         struct freebsd11_statfs osb;
  756         struct statfs *sfp;
  757         int error;
  758 
  759         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  760         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  761         if (error == 0) {
  762                 freebsd11_cvtstatfs(sfp, &osb);
  763                 error = copyout(&osb, uap->buf, sizeof(osb));
  764         }
  765         free(sfp, M_STATFS);
  766         return (error);
  767 }
  768 
  769 /*
  770  * Get filesystem statistics.
  771  */
  772 int
  773 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap)
  774 {
  775         struct freebsd11_statfs osb;
  776         struct statfs *sfp;
  777         int error;
  778 
  779         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  780         error = kern_fstatfs(td, uap->fd, sfp);
  781         if (error == 0) {
  782                 freebsd11_cvtstatfs(sfp, &osb);
  783                 error = copyout(&osb, uap->buf, sizeof(osb));
  784         }
  785         free(sfp, M_STATFS);
  786         return (error);
  787 }
  788 
  789 /*
  790  * Get statistics on all filesystems.
  791  */
  792 int
  793 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap)
  794 {
  795         return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode));
  796 }
  797 
  798 int
  799 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf,
  800     long bufsize, int mode)
  801 {
  802         struct freebsd11_statfs osb;
  803         struct statfs *buf, *sp;
  804         size_t count, size;
  805         int error;
  806 
  807         if (bufsize < 0)
  808                 return (EINVAL);
  809 
  810         count = bufsize / sizeof(struct ostatfs);
  811         size = count * sizeof(struct statfs);
  812         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode);
  813         if (error == 0)
  814                 td->td_retval[0] = count;
  815         if (size > 0) {
  816                 sp = buf;
  817                 while (count > 0 && error == 0) {
  818                         freebsd11_cvtstatfs(sp, &osb);
  819                         error = copyout(&osb, ubuf, sizeof(osb));
  820                         sp++;
  821                         ubuf++;
  822                         count--;
  823                 }
  824                 free(buf, M_STATFS);
  825         }
  826         return (error);
  827 }
  828 
  829 /*
  830  * Implement fstatfs() for (NFS) file handles.
  831  */
  832 int
  833 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap)
  834 {
  835         struct freebsd11_statfs osb;
  836         struct statfs *sfp;
  837         fhandle_t fh;
  838         int error;
  839 
  840         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  841         if (error)
  842                 return (error);
  843         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  844         error = kern_fhstatfs(td, fh, sfp);
  845         if (error == 0) {
  846                 freebsd11_cvtstatfs(sfp, &osb);
  847                 error = copyout(&osb, uap->buf, sizeof(osb));
  848         }
  849         free(sfp, M_STATFS);
  850         return (error);
  851 }
  852 
  853 /*
  854  * Convert a new format statfs structure to an old format statfs structure.
  855  */
  856 static void
  857 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp)
  858 {
  859 
  860         bzero(osp, sizeof(*osp));
  861         osp->f_version = FREEBSD11_STATFS_VERSION;
  862         osp->f_type = nsp->f_type;
  863         osp->f_flags = nsp->f_flags;
  864         osp->f_bsize = nsp->f_bsize;
  865         osp->f_iosize = nsp->f_iosize;
  866         osp->f_blocks = nsp->f_blocks;
  867         osp->f_bfree = nsp->f_bfree;
  868         osp->f_bavail = nsp->f_bavail;
  869         osp->f_files = nsp->f_files;
  870         osp->f_ffree = nsp->f_ffree;
  871         osp->f_syncwrites = nsp->f_syncwrites;
  872         osp->f_asyncwrites = nsp->f_asyncwrites;
  873         osp->f_syncreads = nsp->f_syncreads;
  874         osp->f_asyncreads = nsp->f_asyncreads;
  875         osp->f_namemax = nsp->f_namemax;
  876         osp->f_owner = nsp->f_owner;
  877         osp->f_fsid = nsp->f_fsid;
  878         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  879             MIN(MFSNAMELEN, sizeof(osp->f_fstypename)));
  880         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  881             MIN(MNAMELEN, sizeof(osp->f_mntonname)));
  882         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  883             MIN(MNAMELEN, sizeof(osp->f_mntfromname)));
  884 }
  885 #endif /* COMPAT_FREEBSD11 */
  886 
  887 /*
  888  * Change current working directory to a given file descriptor.
  889  */
  890 #ifndef _SYS_SYSPROTO_H_
  891 struct fchdir_args {
  892         int     fd;
  893 };
  894 #endif
  895 int
  896 sys_fchdir(struct thread *td, struct fchdir_args *uap)
  897 {
  898         struct vnode *vp, *tdp;
  899         struct mount *mp;
  900         struct file *fp;
  901         int error;
  902 
  903         AUDIT_ARG_FD(uap->fd);
  904         error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
  905             &fp);
  906         if (error != 0)
  907                 return (error);
  908         vp = fp->f_vnode;
  909         vrefact(vp);
  910         fdrop(fp, td);
  911         vn_lock(vp, LK_SHARED | LK_RETRY);
  912         AUDIT_ARG_VNODE1(vp);
  913         error = change_dir(vp, td);
  914         while (!error && (mp = vp->v_mountedhere) != NULL) {
  915                 if (vfs_busy(mp, 0))
  916                         continue;
  917                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  918                 vfs_unbusy(mp);
  919                 if (error != 0)
  920                         break;
  921                 vput(vp);
  922                 vp = tdp;
  923         }
  924         if (error != 0) {
  925                 vput(vp);
  926                 return (error);
  927         }
  928         VOP_UNLOCK(vp);
  929         pwd_chdir(td, vp);
  930         return (0);
  931 }
  932 
  933 /*
  934  * Change current working directory (``.'').
  935  */
  936 #ifndef _SYS_SYSPROTO_H_
  937 struct chdir_args {
  938         char    *path;
  939 };
  940 #endif
  941 int
  942 sys_chdir(struct thread *td, struct chdir_args *uap)
  943 {
  944 
  945         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  946 }
  947 
  948 int
  949 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg)
  950 {
  951         struct nameidata nd;
  952         int error;
  953 
  954         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  955             pathseg, path);
  956         if ((error = namei(&nd)) != 0)
  957                 return (error);
  958         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  959                 vput(nd.ni_vp);
  960                 NDFREE_PNBUF(&nd);
  961                 return (error);
  962         }
  963         VOP_UNLOCK(nd.ni_vp);
  964         NDFREE_PNBUF(&nd);
  965         pwd_chdir(td, nd.ni_vp);
  966         return (0);
  967 }
  968 
  969 static int unprivileged_chroot = 0;
  970 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW,
  971     &unprivileged_chroot, 0,
  972     "Unprivileged processes can use chroot(2)");
  973 /*
  974  * Change notion of root (``/'') directory.
  975  */
  976 #ifndef _SYS_SYSPROTO_H_
  977 struct chroot_args {
  978         char    *path;
  979 };
  980 #endif
  981 int
  982 sys_chroot(struct thread *td, struct chroot_args *uap)
  983 {
  984         struct nameidata nd;
  985         struct proc *p;
  986         int error;
  987 
  988         error = priv_check(td, PRIV_VFS_CHROOT);
  989         if (error != 0) {
  990                 p = td->td_proc;
  991                 PROC_LOCK(p);
  992                 if (unprivileged_chroot == 0 ||
  993                     (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) {
  994                         PROC_UNLOCK(p);
  995                         return (error);
  996                 }
  997                 PROC_UNLOCK(p);
  998         }
  999         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 1000             UIO_USERSPACE, uap->path);
 1001         error = namei(&nd);
 1002         if (error != 0)
 1003                 return (error);
 1004         NDFREE_PNBUF(&nd);
 1005         error = change_dir(nd.ni_vp, td);
 1006         if (error != 0)
 1007                 goto e_vunlock;
 1008 #ifdef MAC
 1009         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
 1010         if (error != 0)
 1011                 goto e_vunlock;
 1012 #endif
 1013         VOP_UNLOCK(nd.ni_vp);
 1014         error = pwd_chroot(td, nd.ni_vp);
 1015         vrele(nd.ni_vp);
 1016         return (error);
 1017 e_vunlock:
 1018         vput(nd.ni_vp);
 1019         return (error);
 1020 }
 1021 
 1022 /*
 1023  * Common routine for chroot and chdir.  Callers must provide a locked vnode
 1024  * instance.
 1025  */
 1026 int
 1027 change_dir(struct vnode *vp, struct thread *td)
 1028 {
 1029 #ifdef MAC
 1030         int error;
 1031 #endif
 1032 
 1033         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 1034         if (vp->v_type != VDIR)
 1035                 return (ENOTDIR);
 1036 #ifdef MAC
 1037         error = mac_vnode_check_chdir(td->td_ucred, vp);
 1038         if (error != 0)
 1039                 return (error);
 1040 #endif
 1041         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
 1042 }
 1043 
 1044 static __inline void
 1045 flags_to_rights(int flags, cap_rights_t *rightsp)
 1046 {
 1047         if (flags & O_EXEC) {
 1048                 cap_rights_set_one(rightsp, CAP_FEXECVE);
 1049                 if (flags & O_PATH)
 1050                         return;
 1051         } else {
 1052                 switch ((flags & O_ACCMODE)) {
 1053                 case O_RDONLY:
 1054                         cap_rights_set_one(rightsp, CAP_READ);
 1055                         break;
 1056                 case O_RDWR:
 1057                         cap_rights_set_one(rightsp, CAP_READ);
 1058                         /* FALLTHROUGH */
 1059                 case O_WRONLY:
 1060                         cap_rights_set_one(rightsp, CAP_WRITE);
 1061                         if (!(flags & (O_APPEND | O_TRUNC)))
 1062                                 cap_rights_set_one(rightsp, CAP_SEEK);
 1063                         break;
 1064                 }
 1065         }
 1066 
 1067         if (flags & O_CREAT)
 1068                 cap_rights_set_one(rightsp, CAP_CREATE);
 1069 
 1070         if (flags & O_TRUNC)
 1071                 cap_rights_set_one(rightsp, CAP_FTRUNCATE);
 1072 
 1073         if (flags & (O_SYNC | O_FSYNC))
 1074                 cap_rights_set_one(rightsp, CAP_FSYNC);
 1075 
 1076         if (flags & (O_EXLOCK | O_SHLOCK))
 1077                 cap_rights_set_one(rightsp, CAP_FLOCK);
 1078 }
 1079 
 1080 /*
 1081  * Check permissions, allocate an open file structure, and call the device
 1082  * open routine if any.
 1083  */
 1084 #ifndef _SYS_SYSPROTO_H_
 1085 struct open_args {
 1086         char    *path;
 1087         int     flags;
 1088         int     mode;
 1089 };
 1090 #endif
 1091 int
 1092 sys_open(struct thread *td, struct open_args *uap)
 1093 {
 1094 
 1095         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1096             uap->flags, uap->mode));
 1097 }
 1098 
 1099 #ifndef _SYS_SYSPROTO_H_
 1100 struct openat_args {
 1101         int     fd;
 1102         char    *path;
 1103         int     flag;
 1104         int     mode;
 1105 };
 1106 #endif
 1107 int
 1108 sys_openat(struct thread *td, struct openat_args *uap)
 1109 {
 1110 
 1111         AUDIT_ARG_FD(uap->fd);
 1112         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1113             uap->mode));
 1114 }
 1115 
 1116 int
 1117 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
 1118     int flags, int mode)
 1119 {
 1120         struct proc *p = td->td_proc;
 1121         struct filedesc *fdp;
 1122         struct pwddesc *pdp;
 1123         struct file *fp;
 1124         struct vnode *vp;
 1125         struct nameidata nd;
 1126         cap_rights_t rights;
 1127         int cmode, error, indx;
 1128 
 1129         indx = -1;
 1130         fdp = p->p_fd;
 1131         pdp = p->p_pd;
 1132 
 1133         AUDIT_ARG_FFLAGS(flags);
 1134         AUDIT_ARG_MODE(mode);
 1135         cap_rights_init_one(&rights, CAP_LOOKUP);
 1136         flags_to_rights(flags, &rights);
 1137 
 1138         /*
 1139          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1140          * may be specified.  On the other hand, for O_PATH any mode
 1141          * except O_EXEC is ignored.
 1142          */
 1143         if ((flags & O_PATH) != 0) {
 1144                 flags &= ~(O_CREAT | O_ACCMODE);
 1145         } else if ((flags & O_EXEC) != 0) {
 1146                 if (flags & O_ACCMODE)
 1147                         return (EINVAL);
 1148         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1149                 return (EINVAL);
 1150         } else {
 1151                 flags = FFLAGS(flags);
 1152         }
 1153 
 1154         /*
 1155          * Allocate a file structure. The descriptor to reference it
 1156          * is allocated and used by finstall_refed() below.
 1157          */
 1158         error = falloc_noinstall(td, &fp);
 1159         if (error != 0)
 1160                 return (error);
 1161         /* Set the flags early so the finit in devfs can pick them up. */
 1162         fp->f_flag = flags & FMASK;
 1163         cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT;
 1164         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS,
 1165             pathseg, path, fd, &rights);
 1166         td->td_dupfd = -1;              /* XXX check for fdopen */
 1167         error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS,
 1168             td->td_ucred, fp);
 1169         if (error != 0) {
 1170                 /*
 1171                  * If the vn_open replaced the method vector, something
 1172                  * wonderous happened deep below and we just pass it up
 1173                  * pretending we know what we do.
 1174                  */
 1175                 if (error == ENXIO && fp->f_ops != &badfileops) {
 1176                         MPASS((flags & O_PATH) == 0);
 1177                         goto success;
 1178                 }
 1179 
 1180                 /*
 1181                  * Handle special fdopen() case. bleh.
 1182                  *
 1183                  * Don't do this for relative (capability) lookups; we don't
 1184                  * understand exactly what would happen, and we don't think
 1185                  * that it ever should.
 1186                  */
 1187                 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 &&
 1188                     (error == ENODEV || error == ENXIO) &&
 1189                     td->td_dupfd >= 0) {
 1190                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1191                             &indx);
 1192                         if (error == 0)
 1193                                 goto success;
 1194                 }
 1195 
 1196                 goto bad;
 1197         }
 1198         td->td_dupfd = 0;
 1199         NDFREE_PNBUF(&nd);
 1200         vp = nd.ni_vp;
 1201 
 1202         /*
 1203          * Store the vnode, for any f_type. Typically, the vnode use
 1204          * count is decremented by direct call to vn_closefile() for
 1205          * files that switched type in the cdevsw fdopen() method.
 1206          */
 1207         fp->f_vnode = vp;
 1208 
 1209         /*
 1210          * If the file wasn't claimed by devfs bind it to the normal
 1211          * vnode operations here.
 1212          */
 1213         if (fp->f_ops == &badfileops) {
 1214                 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
 1215                     ("Unexpected fifo fp %p vp %p", fp, vp));
 1216                 if ((flags & O_PATH) != 0) {
 1217                         finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED),
 1218                             DTYPE_VNODE, NULL, &path_fileops);
 1219                 } else {
 1220                         finit_vnode(fp, flags, NULL, &vnops);
 1221                 }
 1222         }
 1223 
 1224         VOP_UNLOCK(vp);
 1225         if (flags & O_TRUNC) {
 1226                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1227                 if (error != 0)
 1228                         goto bad;
 1229         }
 1230 success:
 1231         /*
 1232          * If we haven't already installed the FD (for dupfdopen), do so now.
 1233          */
 1234         if (indx == -1) {
 1235                 struct filecaps *fcaps;
 1236 
 1237 #ifdef CAPABILITIES
 1238                 if ((nd.ni_resflags & NIRES_STRICTREL) != 0)
 1239                         fcaps = &nd.ni_filecaps;
 1240                 else
 1241 #endif
 1242                         fcaps = NULL;
 1243                 error = finstall_refed(td, fp, &indx, flags, fcaps);
 1244                 /* On success finstall_refed() consumes fcaps. */
 1245                 if (error != 0) {
 1246                         goto bad;
 1247                 }
 1248         } else {
 1249                 NDFREE_IOCTLCAPS(&nd);
 1250                 falloc_abort(td, fp);
 1251         }
 1252 
 1253         td->td_retval[0] = indx;
 1254         return (0);
 1255 bad:
 1256         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1257         NDFREE_IOCTLCAPS(&nd);
 1258         falloc_abort(td, fp);
 1259         return (error);
 1260 }
 1261 
 1262 #ifdef COMPAT_43
 1263 /*
 1264  * Create a file.
 1265  */
 1266 #ifndef _SYS_SYSPROTO_H_
 1267 struct ocreat_args {
 1268         char    *path;
 1269         int     mode;
 1270 };
 1271 #endif
 1272 int
 1273 ocreat(struct thread *td, struct ocreat_args *uap)
 1274 {
 1275 
 1276         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1277             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1278 }
 1279 #endif /* COMPAT_43 */
 1280 
 1281 /*
 1282  * Create a special file.
 1283  */
 1284 #ifndef _SYS_SYSPROTO_H_
 1285 struct mknodat_args {
 1286         int     fd;
 1287         char    *path;
 1288         mode_t  mode;
 1289         dev_t   dev;
 1290 };
 1291 #endif
 1292 int
 1293 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1294 {
 1295 
 1296         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1297             uap->dev));
 1298 }
 1299 
 1300 #if defined(COMPAT_FREEBSD11)
 1301 int
 1302 freebsd11_mknod(struct thread *td,
 1303     struct freebsd11_mknod_args *uap)
 1304 {
 1305 
 1306         return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1307             uap->mode, uap->dev));
 1308 }
 1309 
 1310 int
 1311 freebsd11_mknodat(struct thread *td,
 1312     struct freebsd11_mknodat_args *uap)
 1313 {
 1314 
 1315         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1316             uap->dev));
 1317 }
 1318 #endif /* COMPAT_FREEBSD11 */
 1319 
 1320 int
 1321 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
 1322     int mode, dev_t dev)
 1323 {
 1324         struct vnode *vp;
 1325         struct mount *mp;
 1326         struct vattr vattr;
 1327         struct nameidata nd;
 1328         int error, whiteout = 0;
 1329 
 1330         AUDIT_ARG_MODE(mode);
 1331         AUDIT_ARG_DEV(dev);
 1332         switch (mode & S_IFMT) {
 1333         case S_IFCHR:
 1334         case S_IFBLK:
 1335                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1336                 if (error == 0 && dev == VNOVAL)
 1337                         error = EINVAL;
 1338                 break;
 1339         case S_IFWHT:
 1340                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1341                 break;
 1342         case S_IFIFO:
 1343                 if (dev == 0)
 1344                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1345                 /* FALLTHROUGH */
 1346         default:
 1347                 error = EINVAL;
 1348                 break;
 1349         }
 1350         if (error != 0)
 1351                 return (error);
 1352         NDPREINIT(&nd);
 1353 restart:
 1354         bwillwrite();
 1355         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE,
 1356             pathseg, path, fd, &cap_mknodat_rights);
 1357         if ((error = namei(&nd)) != 0)
 1358                 return (error);
 1359         vp = nd.ni_vp;
 1360         if (vp != NULL) {
 1361                 NDFREE_PNBUF(&nd);
 1362                 if (vp == nd.ni_dvp)
 1363                         vrele(nd.ni_dvp);
 1364                 else
 1365                         vput(nd.ni_dvp);
 1366                 vrele(vp);
 1367                 return (EEXIST);
 1368         } else {
 1369                 VATTR_NULL(&vattr);
 1370                 vattr.va_mode = (mode & ALLPERMS) &
 1371                     ~td->td_proc->p_pd->pd_cmask;
 1372                 vattr.va_rdev = dev;
 1373                 whiteout = 0;
 1374 
 1375                 switch (mode & S_IFMT) {
 1376                 case S_IFCHR:
 1377                         vattr.va_type = VCHR;
 1378                         break;
 1379                 case S_IFBLK:
 1380                         vattr.va_type = VBLK;
 1381                         break;
 1382                 case S_IFWHT:
 1383                         whiteout = 1;
 1384                         break;
 1385                 default:
 1386                         panic("kern_mknod: invalid mode");
 1387                 }
 1388         }
 1389         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1390                 NDFREE_PNBUF(&nd);
 1391                 vput(nd.ni_dvp);
 1392                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0)
 1393                         return (error);
 1394                 goto restart;
 1395         }
 1396 #ifdef MAC
 1397         if (error == 0 && !whiteout)
 1398                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1399                     &nd.ni_cnd, &vattr);
 1400 #endif
 1401         if (error == 0) {
 1402                 if (whiteout)
 1403                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1404                 else {
 1405                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1406                                                 &nd.ni_cnd, &vattr);
 1407                 }
 1408         }
 1409         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL,
 1410             true);
 1411         vn_finished_write(mp);
 1412         NDFREE_PNBUF(&nd);
 1413         if (error == ERELOOKUP)
 1414                 goto restart;
 1415         return (error);
 1416 }
 1417 
 1418 /*
 1419  * Create a named pipe.
 1420  */
 1421 #ifndef _SYS_SYSPROTO_H_
 1422 struct mkfifo_args {
 1423         char    *path;
 1424         int     mode;
 1425 };
 1426 #endif
 1427 int
 1428 sys_mkfifo(struct thread *td, struct mkfifo_args *uap)
 1429 {
 1430 
 1431         return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1432             uap->mode));
 1433 }
 1434 
 1435 #ifndef _SYS_SYSPROTO_H_
 1436 struct mkfifoat_args {
 1437         int     fd;
 1438         char    *path;
 1439         mode_t  mode;
 1440 };
 1441 #endif
 1442 int
 1443 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1444 {
 1445 
 1446         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1447             uap->mode));
 1448 }
 1449 
 1450 int
 1451 kern_mkfifoat(struct thread *td, int fd, const char *path,
 1452     enum uio_seg pathseg, int mode)
 1453 {
 1454         struct mount *mp;
 1455         struct vattr vattr;
 1456         struct nameidata nd;
 1457         int error;
 1458 
 1459         AUDIT_ARG_MODE(mode);
 1460         NDPREINIT(&nd);
 1461 restart:
 1462         bwillwrite();
 1463         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE,
 1464             pathseg, path, fd, &cap_mkfifoat_rights);
 1465         if ((error = namei(&nd)) != 0)
 1466                 return (error);
 1467         if (nd.ni_vp != NULL) {
 1468                 NDFREE_PNBUF(&nd);
 1469                 if (nd.ni_vp == nd.ni_dvp)
 1470                         vrele(nd.ni_dvp);
 1471                 else
 1472                         vput(nd.ni_dvp);
 1473                 vrele(nd.ni_vp);
 1474                 return (EEXIST);
 1475         }
 1476         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1477                 NDFREE_PNBUF(&nd);
 1478                 vput(nd.ni_dvp);
 1479                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0)
 1480                         return (error);
 1481                 goto restart;
 1482         }
 1483         VATTR_NULL(&vattr);
 1484         vattr.va_type = VFIFO;
 1485         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask;
 1486 #ifdef MAC
 1487         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1488             &vattr);
 1489         if (error != 0)
 1490                 goto out;
 1491 #endif
 1492         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1493 #ifdef MAC
 1494 out:
 1495 #endif
 1496         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 1497         vn_finished_write(mp);
 1498         NDFREE_PNBUF(&nd);
 1499         if (error == ERELOOKUP)
 1500                 goto restart;
 1501         return (error);
 1502 }
 1503 
 1504 /*
 1505  * Make a hard file link.
 1506  */
 1507 #ifndef _SYS_SYSPROTO_H_
 1508 struct link_args {
 1509         char    *path;
 1510         char    *link;
 1511 };
 1512 #endif
 1513 int
 1514 sys_link(struct thread *td, struct link_args *uap)
 1515 {
 1516 
 1517         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 1518             UIO_USERSPACE, AT_SYMLINK_FOLLOW));
 1519 }
 1520 
 1521 #ifndef _SYS_SYSPROTO_H_
 1522 struct linkat_args {
 1523         int     fd1;
 1524         char    *path1;
 1525         int     fd2;
 1526         char    *path2;
 1527         int     flag;
 1528 };
 1529 #endif
 1530 int
 1531 sys_linkat(struct thread *td, struct linkat_args *uap)
 1532 {
 1533 
 1534         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1535             UIO_USERSPACE, uap->flag));
 1536 }
 1537 
 1538 int hardlink_check_uid = 0;
 1539 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1540     &hardlink_check_uid, 0,
 1541     "Unprivileged processes cannot create hard links to files owned by other "
 1542     "users");
 1543 static int hardlink_check_gid = 0;
 1544 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1545     &hardlink_check_gid, 0,
 1546     "Unprivileged processes cannot create hard links to files owned by other "
 1547     "groups");
 1548 
 1549 static int
 1550 can_hardlink(struct vnode *vp, struct ucred *cred)
 1551 {
 1552         struct vattr va;
 1553         int error;
 1554 
 1555         if (!hardlink_check_uid && !hardlink_check_gid)
 1556                 return (0);
 1557 
 1558         error = VOP_GETATTR(vp, &va, cred);
 1559         if (error != 0)
 1560                 return (error);
 1561 
 1562         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1563                 error = priv_check_cred(cred, PRIV_VFS_LINK);
 1564                 if (error != 0)
 1565                         return (error);
 1566         }
 1567 
 1568         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1569                 error = priv_check_cred(cred, PRIV_VFS_LINK);
 1570                 if (error != 0)
 1571                         return (error);
 1572         }
 1573 
 1574         return (0);
 1575 }
 1576 
 1577 int
 1578 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
 1579     const char *path2, enum uio_seg segflag, int flag)
 1580 {
 1581         struct nameidata nd;
 1582         int error;
 1583 
 1584         if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH |
 1585             AT_EMPTY_PATH)) != 0)
 1586                 return (EINVAL);
 1587 
 1588         NDPREINIT(&nd);
 1589         do {
 1590                 bwillwrite();
 1591                 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag,
 1592                     AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH),
 1593                     segflag, path1, fd1, &cap_linkat_source_rights);
 1594                 if ((error = namei(&nd)) != 0)
 1595                         return (error);
 1596                 NDFREE_PNBUF(&nd);
 1597                 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) {
 1598                         error = priv_check(td, PRIV_VFS_FHOPEN);
 1599                         if (error != 0) {
 1600                                 vrele(nd.ni_vp);
 1601                                 return (error);
 1602                         }
 1603                 }
 1604                 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
 1605         } while (error ==  EAGAIN || error == ERELOOKUP);
 1606         return (error);
 1607 }
 1608 
 1609 static int
 1610 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path,
 1611     enum uio_seg segflag)
 1612 {
 1613         struct nameidata nd;
 1614         struct mount *mp;
 1615         int error;
 1616 
 1617         if (vp->v_type == VDIR) {
 1618                 vrele(vp);
 1619                 return (EPERM);         /* POSIX */
 1620         }
 1621         NDINIT_ATRIGHTS(&nd, CREATE,
 1622             LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd,
 1623             &cap_linkat_target_rights);
 1624         if ((error = namei(&nd)) == 0) {
 1625                 if (nd.ni_vp != NULL) {
 1626                         NDFREE_PNBUF(&nd);
 1627                         if (nd.ni_dvp == nd.ni_vp)
 1628                                 vrele(nd.ni_dvp);
 1629                         else
 1630                                 vput(nd.ni_dvp);
 1631                         vrele(nd.ni_vp);
 1632                         vrele(vp);
 1633                         return (EEXIST);
 1634                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1635                         /*
 1636                          * Cross-device link.  No need to recheck
 1637                          * vp->v_type, since it cannot change, except
 1638                          * to VBAD.
 1639                          */
 1640                         NDFREE_PNBUF(&nd);
 1641                         vput(nd.ni_dvp);
 1642                         vrele(vp);
 1643                         return (EXDEV);
 1644                 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) {
 1645                         error = can_hardlink(vp, td->td_ucred);
 1646 #ifdef MAC
 1647                         if (error == 0)
 1648                                 error = mac_vnode_check_link(td->td_ucred,
 1649                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1650 #endif
 1651                         if (error != 0) {
 1652                                 vput(vp);
 1653                                 vput(nd.ni_dvp);
 1654                                 NDFREE_PNBUF(&nd);
 1655                                 return (error);
 1656                         }
 1657                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1658                         if (error != 0) {
 1659                                 vput(vp);
 1660                                 vput(nd.ni_dvp);
 1661                                 NDFREE_PNBUF(&nd);
 1662                                 error = vn_start_write(NULL, &mp,
 1663                                     V_XSLEEP | V_PCATCH);
 1664                                 if (error != 0)
 1665                                         return (error);
 1666                                 return (EAGAIN);
 1667                         }
 1668                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1669                         VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 1670                         vn_finished_write(mp);
 1671                         NDFREE_PNBUF(&nd);
 1672                         vp = NULL;
 1673                 } else {
 1674                         vput(nd.ni_dvp);
 1675                         NDFREE_PNBUF(&nd);
 1676                         vrele(vp);
 1677                         return (EAGAIN);
 1678                 }
 1679         }
 1680         if (vp != NULL)
 1681                 vrele(vp);
 1682         return (error);
 1683 }
 1684 
 1685 /*
 1686  * Make a symbolic link.
 1687  */
 1688 #ifndef _SYS_SYSPROTO_H_
 1689 struct symlink_args {
 1690         char    *path;
 1691         char    *link;
 1692 };
 1693 #endif
 1694 int
 1695 sys_symlink(struct thread *td, struct symlink_args *uap)
 1696 {
 1697 
 1698         return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 1699             UIO_USERSPACE));
 1700 }
 1701 
 1702 #ifndef _SYS_SYSPROTO_H_
 1703 struct symlinkat_args {
 1704         char    *path;
 1705         int     fd;
 1706         char    *path2;
 1707 };
 1708 #endif
 1709 int
 1710 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1711 {
 1712 
 1713         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1714             UIO_USERSPACE));
 1715 }
 1716 
 1717 int
 1718 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2,
 1719     enum uio_seg segflg)
 1720 {
 1721         struct mount *mp;
 1722         struct vattr vattr;
 1723         const char *syspath;
 1724         char *tmppath;
 1725         struct nameidata nd;
 1726         int error;
 1727 
 1728         if (segflg == UIO_SYSSPACE) {
 1729                 syspath = path1;
 1730         } else {
 1731                 tmppath = uma_zalloc(namei_zone, M_WAITOK);
 1732                 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0)
 1733                         goto out;
 1734                 syspath = tmppath;
 1735         }
 1736         AUDIT_ARG_TEXT(syspath);
 1737         NDPREINIT(&nd);
 1738 restart:
 1739         bwillwrite();
 1740         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg,
 1741             path2, fd, &cap_symlinkat_rights);
 1742         if ((error = namei(&nd)) != 0)
 1743                 goto out;
 1744         if (nd.ni_vp) {
 1745                 NDFREE_PNBUF(&nd);
 1746                 if (nd.ni_vp == nd.ni_dvp)
 1747                         vrele(nd.ni_dvp);
 1748                 else
 1749                         vput(nd.ni_dvp);
 1750                 vrele(nd.ni_vp);
 1751                 nd.ni_vp = NULL;
 1752                 error = EEXIST;
 1753                 goto out;
 1754         }
 1755         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1756                 NDFREE_PNBUF(&nd);
 1757                 vput(nd.ni_dvp);
 1758                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0)
 1759                         goto out;
 1760                 goto restart;
 1761         }
 1762         VATTR_NULL(&vattr);
 1763         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask;
 1764 #ifdef MAC
 1765         vattr.va_type = VLNK;
 1766         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1767             &vattr);
 1768         if (error != 0)
 1769                 goto out2;
 1770 #endif
 1771         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1772 #ifdef MAC
 1773 out2:
 1774 #endif
 1775         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 1776         vn_finished_write(mp);
 1777         NDFREE_PNBUF(&nd);
 1778         if (error == ERELOOKUP)
 1779                 goto restart;
 1780 out:
 1781         if (segflg != UIO_SYSSPACE)
 1782                 uma_zfree(namei_zone, tmppath);
 1783         return (error);
 1784 }
 1785 
 1786 /*
 1787  * Delete a whiteout from the filesystem.
 1788  */
 1789 #ifndef _SYS_SYSPROTO_H_
 1790 struct undelete_args {
 1791         char *path;
 1792 };
 1793 #endif
 1794 int
 1795 sys_undelete(struct thread *td, struct undelete_args *uap)
 1796 {
 1797         struct mount *mp;
 1798         struct nameidata nd;
 1799         int error;
 1800 
 1801         NDPREINIT(&nd);
 1802 restart:
 1803         bwillwrite();
 1804         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1805             UIO_USERSPACE, uap->path);
 1806         error = namei(&nd);
 1807         if (error != 0)
 1808                 return (error);
 1809 
 1810         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1811                 NDFREE_PNBUF(&nd);
 1812                 if (nd.ni_vp == nd.ni_dvp)
 1813                         vrele(nd.ni_dvp);
 1814                 else
 1815                         vput(nd.ni_dvp);
 1816                 if (nd.ni_vp)
 1817                         vrele(nd.ni_vp);
 1818                 return (EEXIST);
 1819         }
 1820         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1821                 NDFREE_PNBUF(&nd);
 1822                 vput(nd.ni_dvp);
 1823                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0)
 1824                         return (error);
 1825                 goto restart;
 1826         }
 1827         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1828         NDFREE_PNBUF(&nd);
 1829         vput(nd.ni_dvp);
 1830         vn_finished_write(mp);
 1831         if (error == ERELOOKUP)
 1832                 goto restart;
 1833         return (error);
 1834 }
 1835 
 1836 /*
 1837  * Delete a name from the filesystem.
 1838  */
 1839 #ifndef _SYS_SYSPROTO_H_
 1840 struct unlink_args {
 1841         char    *path;
 1842 };
 1843 #endif
 1844 int
 1845 sys_unlink(struct thread *td, struct unlink_args *uap)
 1846 {
 1847 
 1848         return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
 1849             0, 0));
 1850 }
 1851 
 1852 static int
 1853 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd,
 1854     int flag, enum uio_seg pathseg, ino_t oldinum)
 1855 {
 1856 
 1857         if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0)
 1858                 return (EINVAL);
 1859 
 1860         if ((flag & AT_REMOVEDIR) != 0)
 1861                 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0));
 1862 
 1863         return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0));
 1864 }
 1865 
 1866 #ifndef _SYS_SYSPROTO_H_
 1867 struct unlinkat_args {
 1868         int     fd;
 1869         char    *path;
 1870         int     flag;
 1871 };
 1872 #endif
 1873 int
 1874 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1875 {
 1876 
 1877         return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag,
 1878             UIO_USERSPACE, 0));
 1879 }
 1880 
 1881 #ifndef _SYS_SYSPROTO_H_
 1882 struct funlinkat_args {
 1883         int             dfd;
 1884         const char      *path;
 1885         int             fd;
 1886         int             flag;
 1887 };
 1888 #endif
 1889 int
 1890 sys_funlinkat(struct thread *td, struct funlinkat_args *uap)
 1891 {
 1892 
 1893         return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag,
 1894             UIO_USERSPACE, 0));
 1895 }
 1896 
 1897 int
 1898 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
 1899     enum uio_seg pathseg, int flag, ino_t oldinum)
 1900 {
 1901         struct mount *mp;
 1902         struct file *fp;
 1903         struct vnode *vp;
 1904         struct nameidata nd;
 1905         struct stat sb;
 1906         int error;
 1907 
 1908         fp = NULL;
 1909         if (fd != FD_NONE) {
 1910                 error = getvnode_path(td, fd, &cap_no_rights, &fp);
 1911                 if (error != 0)
 1912                         return (error);
 1913         }
 1914 
 1915         NDPREINIT(&nd);
 1916 restart:
 1917         bwillwrite();
 1918         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 1919             at2cnpflags(flag, AT_RESOLVE_BENEATH),
 1920             pathseg, path, dfd, &cap_unlinkat_rights);
 1921         if ((error = namei(&nd)) != 0) {
 1922                 if (error == EINVAL)
 1923                         error = EPERM;
 1924                 goto fdout;
 1925         }
 1926         vp = nd.ni_vp;
 1927         if (vp->v_type == VDIR && oldinum == 0) {
 1928                 error = EPERM;          /* POSIX */
 1929         } else if (oldinum != 0 &&
 1930             ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) &&
 1931             sb.st_ino != oldinum) {
 1932                 error = EIDRM;  /* Identifier removed */
 1933         } else if (fp != NULL && fp->f_vnode != vp) {
 1934                 if (VN_IS_DOOMED(fp->f_vnode))
 1935                         error = EBADF;
 1936                 else
 1937                         error = EDEADLK;
 1938         } else {
 1939                 /*
 1940                  * The root of a mounted filesystem cannot be deleted.
 1941                  *
 1942                  * XXX: can this only be a VDIR case?
 1943                  */
 1944                 if (vp->v_vflag & VV_ROOT)
 1945                         error = EBUSY;
 1946         }
 1947         if (error == 0) {
 1948                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1949                         NDFREE_PNBUF(&nd);
 1950                         vput(nd.ni_dvp);
 1951                         if (vp == nd.ni_dvp)
 1952                                 vrele(vp);
 1953                         else
 1954                                 vput(vp);
 1955                         if ((error = vn_start_write(NULL, &mp,
 1956                             V_XSLEEP | V_PCATCH)) != 0) {
 1957                                 goto fdout;
 1958                         }
 1959                         goto restart;
 1960                 }
 1961 #ifdef MAC
 1962                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1963                     &nd.ni_cnd);
 1964                 if (error != 0)
 1965                         goto out;
 1966 #endif
 1967                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1968                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1969 #ifdef MAC
 1970 out:
 1971 #endif
 1972                 vn_finished_write(mp);
 1973         }
 1974         NDFREE_PNBUF(&nd);
 1975         vput(nd.ni_dvp);
 1976         if (vp == nd.ni_dvp)
 1977                 vrele(vp);
 1978         else
 1979                 vput(vp);
 1980         if (error == ERELOOKUP)
 1981                 goto restart;
 1982 fdout:
 1983         if (fp != NULL)
 1984                 fdrop(fp, td);
 1985         return (error);
 1986 }
 1987 
 1988 /*
 1989  * Reposition read/write file offset.
 1990  */
 1991 #ifndef _SYS_SYSPROTO_H_
 1992 struct lseek_args {
 1993         int     fd;
 1994         int     pad;
 1995         off_t   offset;
 1996         int     whence;
 1997 };
 1998 #endif
 1999 int
 2000 sys_lseek(struct thread *td, struct lseek_args *uap)
 2001 {
 2002 
 2003         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 2004 }
 2005 
 2006 int
 2007 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 2008 {
 2009         struct file *fp;
 2010         int error;
 2011 
 2012         AUDIT_ARG_FD(fd);
 2013         error = fget(td, fd, &cap_seek_rights, &fp);
 2014         if (error != 0)
 2015                 return (error);
 2016         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 2017             fo_seek(fp, offset, whence, td) : ESPIPE;
 2018         fdrop(fp, td);
 2019         return (error);
 2020 }
 2021 
 2022 #if defined(COMPAT_43)
 2023 /*
 2024  * Reposition read/write file offset.
 2025  */
 2026 #ifndef _SYS_SYSPROTO_H_
 2027 struct olseek_args {
 2028         int     fd;
 2029         long    offset;
 2030         int     whence;
 2031 };
 2032 #endif
 2033 int
 2034 olseek(struct thread *td, struct olseek_args *uap)
 2035 {
 2036 
 2037         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 2038 }
 2039 #endif /* COMPAT_43 */
 2040 
 2041 #if defined(COMPAT_FREEBSD6)
 2042 /* Version with the 'pad' argument */
 2043 int
 2044 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 2045 {
 2046 
 2047         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 2048 }
 2049 #endif
 2050 
 2051 /*
 2052  * Check access permissions using passed credentials.
 2053  */
 2054 static int
 2055 vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
 2056      struct thread *td)
 2057 {
 2058         accmode_t accmode;
 2059         int error;
 2060 
 2061         /* Flags == 0 means only check for existence. */
 2062         if (user_flags == 0)
 2063                 return (0);
 2064 
 2065         accmode = 0;
 2066         if (user_flags & R_OK)
 2067                 accmode |= VREAD;
 2068         if (user_flags & W_OK)
 2069                 accmode |= VWRITE;
 2070         if (user_flags & X_OK)
 2071                 accmode |= VEXEC;
 2072 #ifdef MAC
 2073         error = mac_vnode_check_access(cred, vp, accmode);
 2074         if (error != 0)
 2075                 return (error);
 2076 #endif
 2077         if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2078                 error = VOP_ACCESS(vp, accmode, cred, td);
 2079         return (error);
 2080 }
 2081 
 2082 /*
 2083  * Check access permissions using "real" credentials.
 2084  */
 2085 #ifndef _SYS_SYSPROTO_H_
 2086 struct access_args {
 2087         char    *path;
 2088         int     amode;
 2089 };
 2090 #endif
 2091 int
 2092 sys_access(struct thread *td, struct access_args *uap)
 2093 {
 2094 
 2095         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2096             0, uap->amode));
 2097 }
 2098 
 2099 #ifndef _SYS_SYSPROTO_H_
 2100 struct faccessat_args {
 2101         int     dirfd;
 2102         char    *path;
 2103         int     amode;
 2104         int     flag;
 2105 }
 2106 #endif
 2107 int
 2108 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2109 {
 2110 
 2111         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2112             uap->amode));
 2113 }
 2114 
 2115 int
 2116 kern_accessat(struct thread *td, int fd, const char *path,
 2117     enum uio_seg pathseg, int flag, int amode)
 2118 {
 2119         struct ucred *cred, *usecred;
 2120         struct vnode *vp;
 2121         struct nameidata nd;
 2122         int error;
 2123 
 2124         if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0)
 2125                 return (EINVAL);
 2126         if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 2127                 return (EINVAL);
 2128 
 2129         /*
 2130          * Create and modify a temporary credential instead of one that
 2131          * is potentially shared (if we need one).
 2132          */
 2133         cred = td->td_ucred;
 2134         if ((flag & AT_EACCESS) == 0 &&
 2135             ((cred->cr_uid != cred->cr_ruid ||
 2136             cred->cr_rgid != cred->cr_groups[0]))) {
 2137                 usecred = crdup(cred);
 2138                 usecred->cr_uid = cred->cr_ruid;
 2139                 usecred->cr_groups[0] = cred->cr_rgid;
 2140                 td->td_ucred = usecred;
 2141         } else
 2142                 usecred = cred;
 2143         AUDIT_ARG_VALUE(amode);
 2144         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2145             AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH |
 2146             AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights);
 2147         if ((error = namei(&nd)) != 0)
 2148                 goto out;
 2149         vp = nd.ni_vp;
 2150 
 2151         error = vn_access(vp, amode, usecred, td);
 2152         NDFREE_PNBUF(&nd);
 2153         vput(vp);
 2154 out:
 2155         if (usecred != cred) {
 2156                 td->td_ucred = cred;
 2157                 crfree(usecred);
 2158         }
 2159         return (error);
 2160 }
 2161 
 2162 /*
 2163  * Check access permissions using "effective" credentials.
 2164  */
 2165 #ifndef _SYS_SYSPROTO_H_
 2166 struct eaccess_args {
 2167         char    *path;
 2168         int     amode;
 2169 };
 2170 #endif
 2171 int
 2172 sys_eaccess(struct thread *td, struct eaccess_args *uap)
 2173 {
 2174 
 2175         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2176             AT_EACCESS, uap->amode));
 2177 }
 2178 
 2179 #if defined(COMPAT_43)
 2180 /*
 2181  * Get file status; this version follows links.
 2182  */
 2183 #ifndef _SYS_SYSPROTO_H_
 2184 struct ostat_args {
 2185         char    *path;
 2186         struct ostat *ub;
 2187 };
 2188 #endif
 2189 int
 2190 ostat(struct thread *td, struct ostat_args *uap)
 2191 {
 2192         struct stat sb;
 2193         struct ostat osb;
 2194         int error;
 2195 
 2196         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2197             &sb, NULL);
 2198         if (error != 0)
 2199                 return (error);
 2200         cvtstat(&sb, &osb);
 2201         return (copyout(&osb, uap->ub, sizeof (osb)));
 2202 }
 2203 
 2204 /*
 2205  * Get file status; this version does not follow links.
 2206  */
 2207 #ifndef _SYS_SYSPROTO_H_
 2208 struct olstat_args {
 2209         char    *path;
 2210         struct ostat *ub;
 2211 };
 2212 #endif
 2213 int
 2214 olstat(struct thread *td, struct olstat_args *uap)
 2215 {
 2216         struct stat sb;
 2217         struct ostat osb;
 2218         int error;
 2219 
 2220         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2221             UIO_USERSPACE, &sb, NULL);
 2222         if (error != 0)
 2223                 return (error);
 2224         cvtstat(&sb, &osb);
 2225         return (copyout(&osb, uap->ub, sizeof (osb)));
 2226 }
 2227 
 2228 /*
 2229  * Convert from an old to a new stat structure.
 2230  * XXX: many values are blindly truncated.
 2231  */
 2232 void
 2233 cvtstat(struct stat *st, struct ostat *ost)
 2234 {
 2235 
 2236         bzero(ost, sizeof(*ost));
 2237         ost->st_dev = st->st_dev;
 2238         ost->st_ino = st->st_ino;
 2239         ost->st_mode = st->st_mode;
 2240         ost->st_nlink = st->st_nlink;
 2241         ost->st_uid = st->st_uid;
 2242         ost->st_gid = st->st_gid;
 2243         ost->st_rdev = st->st_rdev;
 2244         ost->st_size = MIN(st->st_size, INT32_MAX);
 2245         ost->st_atim = st->st_atim;
 2246         ost->st_mtim = st->st_mtim;
 2247         ost->st_ctim = st->st_ctim;
 2248         ost->st_blksize = st->st_blksize;
 2249         ost->st_blocks = st->st_blocks;
 2250         ost->st_flags = st->st_flags;
 2251         ost->st_gen = st->st_gen;
 2252 }
 2253 #endif /* COMPAT_43 */
 2254 
 2255 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 2256 int ino64_trunc_error;
 2257 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW,
 2258     &ino64_trunc_error, 0,
 2259     "Error on truncation of device, file or inode number, or link count");
 2260 
 2261 int
 2262 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost)
 2263 {
 2264 
 2265         ost->st_dev = st->st_dev;
 2266         if (ost->st_dev != st->st_dev) {
 2267                 switch (ino64_trunc_error) {
 2268                 default:
 2269                         /*
 2270                          * Since dev_t is almost raw, don't clamp to the
 2271                          * maximum for case 2, but ignore the error.
 2272                          */
 2273                         break;
 2274                 case 1:
 2275                         return (EOVERFLOW);
 2276                 }
 2277         }
 2278         ost->st_ino = st->st_ino;
 2279         if (ost->st_ino != st->st_ino) {
 2280                 switch (ino64_trunc_error) {
 2281                 default:
 2282                 case 0:
 2283                         break;
 2284                 case 1:
 2285                         return (EOVERFLOW);
 2286                 case 2:
 2287                         ost->st_ino = UINT32_MAX;
 2288                         break;
 2289                 }
 2290         }
 2291         ost->st_mode = st->st_mode;
 2292         ost->st_nlink = st->st_nlink;
 2293         if (ost->st_nlink != st->st_nlink) {
 2294                 switch (ino64_trunc_error) {
 2295                 default:
 2296                 case 0:
 2297                         break;
 2298                 case 1:
 2299                         return (EOVERFLOW);
 2300                 case 2:
 2301                         ost->st_nlink = UINT16_MAX;
 2302                         break;
 2303                 }
 2304         }
 2305         ost->st_uid = st->st_uid;
 2306         ost->st_gid = st->st_gid;
 2307         ost->st_rdev = st->st_rdev;
 2308         if (ost->st_rdev != st->st_rdev) {
 2309                 switch (ino64_trunc_error) {
 2310                 default:
 2311                         break;
 2312                 case 1:
 2313                         return (EOVERFLOW);
 2314                 }
 2315         }
 2316         ost->st_atim = st->st_atim;
 2317         ost->st_mtim = st->st_mtim;
 2318         ost->st_ctim = st->st_ctim;
 2319         ost->st_size = st->st_size;
 2320         ost->st_blocks = st->st_blocks;
 2321         ost->st_blksize = st->st_blksize;
 2322         ost->st_flags = st->st_flags;
 2323         ost->st_gen = st->st_gen;
 2324         ost->st_lspare = 0;
 2325         ost->st_birthtim = st->st_birthtim;
 2326         bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim),
 2327             sizeof(*ost) - offsetof(struct freebsd11_stat,
 2328             st_birthtim) - sizeof(ost->st_birthtim));
 2329         return (0);
 2330 }
 2331 
 2332 int
 2333 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap)
 2334 {
 2335         struct stat sb;
 2336         struct freebsd11_stat osb;
 2337         int error;
 2338 
 2339         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2340             &sb, NULL);
 2341         if (error != 0)
 2342                 return (error);
 2343         error = freebsd11_cvtstat(&sb, &osb);
 2344         if (error == 0)
 2345                 error = copyout(&osb, uap->ub, sizeof(osb));
 2346         return (error);
 2347 }
 2348 
 2349 int
 2350 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap)
 2351 {
 2352         struct stat sb;
 2353         struct freebsd11_stat osb;
 2354         int error;
 2355 
 2356         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2357             UIO_USERSPACE, &sb, NULL);
 2358         if (error != 0)
 2359                 return (error);
 2360         error = freebsd11_cvtstat(&sb, &osb);
 2361         if (error == 0)
 2362                 error = copyout(&osb, uap->ub, sizeof(osb));
 2363         return (error);
 2364 }
 2365 
 2366 int
 2367 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap)
 2368 {
 2369         struct fhandle fh;
 2370         struct stat sb;
 2371         struct freebsd11_stat osb;
 2372         int error;
 2373 
 2374         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 2375         if (error != 0)
 2376                 return (error);
 2377         error = kern_fhstat(td, fh, &sb);
 2378         if (error != 0)
 2379                 return (error);
 2380         error = freebsd11_cvtstat(&sb, &osb);
 2381         if (error == 0)
 2382                 error = copyout(&osb, uap->sb, sizeof(osb));
 2383         return (error);
 2384 }
 2385 
 2386 int
 2387 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap)
 2388 {
 2389         struct stat sb;
 2390         struct freebsd11_stat osb;
 2391         int error;
 2392 
 2393         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2394             UIO_USERSPACE, &sb, NULL);
 2395         if (error != 0)
 2396                 return (error);
 2397         error = freebsd11_cvtstat(&sb, &osb);
 2398         if (error == 0)
 2399                 error = copyout(&osb, uap->buf, sizeof(osb));
 2400         return (error);
 2401 }
 2402 #endif  /* COMPAT_FREEBSD11 */
 2403 
 2404 /*
 2405  * Get file status
 2406  */
 2407 #ifndef _SYS_SYSPROTO_H_
 2408 struct fstatat_args {
 2409         int     fd;
 2410         char    *path;
 2411         struct stat     *buf;
 2412         int     flag;
 2413 }
 2414 #endif
 2415 int
 2416 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2417 {
 2418         struct stat sb;
 2419         int error;
 2420 
 2421         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2422             UIO_USERSPACE, &sb, NULL);
 2423         if (error == 0)
 2424                 error = copyout(&sb, uap->buf, sizeof (sb));
 2425         return (error);
 2426 }
 2427 
 2428 int
 2429 kern_statat(struct thread *td, int flag, int fd, const char *path,
 2430     enum uio_seg pathseg, struct stat *sbp,
 2431     void (*hook)(struct vnode *vp, struct stat *sbp))
 2432 {
 2433         struct nameidata nd;
 2434         int error;
 2435 
 2436         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 2437             AT_EMPTY_PATH)) != 0)
 2438                 return (EINVAL);
 2439 
 2440         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH |
 2441             AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF |
 2442             AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights);
 2443 
 2444         if ((error = namei(&nd)) != 0) {
 2445                 if (error == ENOTDIR &&
 2446                     (nd.ni_resflags & NIRES_EMPTYPATH) != 0)
 2447                         error = kern_fstat(td, fd, sbp);
 2448                 return (error);
 2449         }
 2450         error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED);
 2451         if (__predict_false(hook != NULL)) {
 2452                 if (error == 0) {
 2453                         hook(nd.ni_vp, sbp);
 2454                 }
 2455         }
 2456         NDFREE_PNBUF(&nd);
 2457         vput(nd.ni_vp);
 2458 #ifdef __STAT_TIME_T_EXT
 2459         sbp->st_atim_ext = 0;
 2460         sbp->st_mtim_ext = 0;
 2461         sbp->st_ctim_ext = 0;
 2462         sbp->st_btim_ext = 0;
 2463 #endif
 2464 #ifdef KTRACE
 2465         if (KTRPOINT(td, KTR_STRUCT))
 2466                 ktrstat_error(sbp, error);
 2467 #endif
 2468         return (error);
 2469 }
 2470 
 2471 #if defined(COMPAT_FREEBSD11)
 2472 /*
 2473  * Implementation of the NetBSD [l]stat() functions.
 2474  */
 2475 int
 2476 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb)
 2477 {
 2478         struct freebsd11_stat sb11;
 2479         int error;
 2480 
 2481         error = freebsd11_cvtstat(sb, &sb11);
 2482         if (error != 0)
 2483                 return (error);
 2484 
 2485         bzero(nsb, sizeof(*nsb));
 2486         CP(sb11, *nsb, st_dev);
 2487         CP(sb11, *nsb, st_ino);
 2488         CP(sb11, *nsb, st_mode);
 2489         CP(sb11, *nsb, st_nlink);
 2490         CP(sb11, *nsb, st_uid);
 2491         CP(sb11, *nsb, st_gid);
 2492         CP(sb11, *nsb, st_rdev);
 2493         CP(sb11, *nsb, st_atim);
 2494         CP(sb11, *nsb, st_mtim);
 2495         CP(sb11, *nsb, st_ctim);
 2496         CP(sb11, *nsb, st_size);
 2497         CP(sb11, *nsb, st_blocks);
 2498         CP(sb11, *nsb, st_blksize);
 2499         CP(sb11, *nsb, st_flags);
 2500         CP(sb11, *nsb, st_gen);
 2501         CP(sb11, *nsb, st_birthtim);
 2502         return (0);
 2503 }
 2504 
 2505 #ifndef _SYS_SYSPROTO_H_
 2506 struct freebsd11_nstat_args {
 2507         char    *path;
 2508         struct nstat *ub;
 2509 };
 2510 #endif
 2511 int
 2512 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap)
 2513 {
 2514         struct stat sb;
 2515         struct nstat nsb;
 2516         int error;
 2517 
 2518         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2519             &sb, NULL);
 2520         if (error != 0)
 2521                 return (error);
 2522         error = freebsd11_cvtnstat(&sb, &nsb);
 2523         if (error == 0)
 2524                 error = copyout(&nsb, uap->ub, sizeof (nsb));
 2525         return (error);
 2526 }
 2527 
 2528 /*
 2529  * NetBSD lstat.  Get file status; this version does not follow links.
 2530  */
 2531 #ifndef _SYS_SYSPROTO_H_
 2532 struct freebsd11_nlstat_args {
 2533         char    *path;
 2534         struct nstat *ub;
 2535 };
 2536 #endif
 2537 int
 2538 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap)
 2539 {
 2540         struct stat sb;
 2541         struct nstat nsb;
 2542         int error;
 2543 
 2544         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2545             UIO_USERSPACE, &sb, NULL);
 2546         if (error != 0)
 2547                 return (error);
 2548         error = freebsd11_cvtnstat(&sb, &nsb);
 2549         if (error == 0)
 2550                 error = copyout(&nsb, uap->ub, sizeof (nsb));
 2551         return (error);
 2552 }
 2553 #endif /* COMPAT_FREEBSD11 */
 2554 
 2555 /*
 2556  * Get configurable pathname variables.
 2557  */
 2558 #ifndef _SYS_SYSPROTO_H_
 2559 struct pathconf_args {
 2560         char    *path;
 2561         int     name;
 2562 };
 2563 #endif
 2564 int
 2565 sys_pathconf(struct thread *td, struct pathconf_args *uap)
 2566 {
 2567         long value;
 2568         int error;
 2569 
 2570         error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW,
 2571             &value);
 2572         if (error == 0)
 2573                 td->td_retval[0] = value;
 2574         return (error);
 2575 }
 2576 
 2577 #ifndef _SYS_SYSPROTO_H_
 2578 struct lpathconf_args {
 2579         char    *path;
 2580         int     name;
 2581 };
 2582 #endif
 2583 int
 2584 sys_lpathconf(struct thread *td, struct lpathconf_args *uap)
 2585 {
 2586         long value;
 2587         int error;
 2588 
 2589         error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2590             NOFOLLOW, &value);
 2591         if (error == 0)
 2592                 td->td_retval[0] = value;
 2593         return (error);
 2594 }
 2595 
 2596 int
 2597 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg,
 2598     int name, u_long flags, long *valuep)
 2599 {
 2600         struct nameidata nd;
 2601         int error;
 2602 
 2603         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2604                pathseg, path);
 2605         if ((error = namei(&nd)) != 0)
 2606                 return (error);
 2607         NDFREE_PNBUF(&nd);
 2608 
 2609         error = VOP_PATHCONF(nd.ni_vp, name, valuep);
 2610         vput(nd.ni_vp);
 2611         return (error);
 2612 }
 2613 
 2614 /*
 2615  * Return target name of a symbolic link.
 2616  */
 2617 #ifndef _SYS_SYSPROTO_H_
 2618 struct readlink_args {
 2619         char    *path;
 2620         char    *buf;
 2621         size_t  count;
 2622 };
 2623 #endif
 2624 int
 2625 sys_readlink(struct thread *td, struct readlink_args *uap)
 2626 {
 2627 
 2628         return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2629             uap->buf, UIO_USERSPACE, uap->count));
 2630 }
 2631 #ifndef _SYS_SYSPROTO_H_
 2632 struct readlinkat_args {
 2633         int     fd;
 2634         char    *path;
 2635         char    *buf;
 2636         size_t  bufsize;
 2637 };
 2638 #endif
 2639 int
 2640 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2641 {
 2642 
 2643         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2644             uap->buf, UIO_USERSPACE, uap->bufsize));
 2645 }
 2646 
 2647 int
 2648 kern_readlinkat(struct thread *td, int fd, const char *path,
 2649     enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count)
 2650 {
 2651         struct vnode *vp;
 2652         struct nameidata nd;
 2653         int error;
 2654 
 2655         if (count > IOSIZE_MAX)
 2656                 return (EINVAL);
 2657 
 2658         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
 2659             EMPTYPATH, pathseg, path, fd);
 2660 
 2661         if ((error = namei(&nd)) != 0)
 2662                 return (error);
 2663         NDFREE_PNBUF(&nd);
 2664         vp = nd.ni_vp;
 2665 
 2666         error = kern_readlink_vp(vp, buf, bufseg, count, td);
 2667         vput(vp);
 2668 
 2669         return (error);
 2670 }
 2671 
 2672 /*
 2673  * Helper function to readlink from a vnode
 2674  */
 2675 static int
 2676 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count,
 2677     struct thread *td)
 2678 {
 2679         struct iovec aiov;
 2680         struct uio auio;
 2681         int error;
 2682 
 2683         ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked");
 2684 #ifdef MAC
 2685         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2686         if (error != 0)
 2687                 return (error);
 2688 #endif
 2689         if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
 2690                 return (EINVAL);
 2691 
 2692         aiov.iov_base = buf;
 2693         aiov.iov_len = count;
 2694         auio.uio_iov = &aiov;
 2695         auio.uio_iovcnt = 1;
 2696         auio.uio_offset = 0;
 2697         auio.uio_rw = UIO_READ;
 2698         auio.uio_segflg = bufseg;
 2699         auio.uio_td = td;
 2700         auio.uio_resid = count;
 2701         error = VOP_READLINK(vp, &auio, td->td_ucred);
 2702         td->td_retval[0] = count - auio.uio_resid;
 2703         return (error);
 2704 }
 2705 
 2706 /*
 2707  * Common implementation code for chflags() and fchflags().
 2708  */
 2709 static int
 2710 setfflags(struct thread *td, struct vnode *vp, u_long flags)
 2711 {
 2712         struct mount *mp;
 2713         struct vattr vattr;
 2714         int error;
 2715 
 2716         /* We can't support the value matching VNOVAL. */
 2717         if (flags == VNOVAL)
 2718                 return (EOPNOTSUPP);
 2719 
 2720         /*
 2721          * Prevent non-root users from setting flags on devices.  When
 2722          * a device is reused, users can retain ownership of the device
 2723          * if they are allowed to set flags and programs assume that
 2724          * chown can't fail when done as root.
 2725          */
 2726         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2727                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2728                 if (error != 0)
 2729                         return (error);
 2730         }
 2731 
 2732         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2733                 return (error);
 2734         VATTR_NULL(&vattr);
 2735         vattr.va_flags = flags;
 2736         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2737 #ifdef MAC
 2738         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2739         if (error == 0)
 2740 #endif
 2741                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2742         VOP_UNLOCK(vp);
 2743         vn_finished_write(mp);
 2744         return (error);
 2745 }
 2746 
 2747 /*
 2748  * Change flags of a file given a path name.
 2749  */
 2750 #ifndef _SYS_SYSPROTO_H_
 2751 struct chflags_args {
 2752         const char *path;
 2753         u_long  flags;
 2754 };
 2755 #endif
 2756 int
 2757 sys_chflags(struct thread *td, struct chflags_args *uap)
 2758 {
 2759 
 2760         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2761             uap->flags, 0));
 2762 }
 2763 
 2764 #ifndef _SYS_SYSPROTO_H_
 2765 struct chflagsat_args {
 2766         int     fd;
 2767         const char *path;
 2768         u_long  flags;
 2769         int     atflag;
 2770 }
 2771 #endif
 2772 int
 2773 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2774 {
 2775 
 2776         return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE,
 2777             uap->flags, uap->atflag));
 2778 }
 2779 
 2780 /*
 2781  * Same as chflags() but doesn't follow symlinks.
 2782  */
 2783 #ifndef _SYS_SYSPROTO_H_
 2784 struct lchflags_args {
 2785         const char *path;
 2786         u_long flags;
 2787 };
 2788 #endif
 2789 int
 2790 sys_lchflags(struct thread *td, struct lchflags_args *uap)
 2791 {
 2792 
 2793         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2794             uap->flags, AT_SYMLINK_NOFOLLOW));
 2795 }
 2796 
 2797 static int
 2798 kern_chflagsat(struct thread *td, int fd, const char *path,
 2799     enum uio_seg pathseg, u_long flags, int atflag)
 2800 {
 2801         struct nameidata nd;
 2802         int error;
 2803 
 2804         if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 2805             AT_EMPTY_PATH)) != 0)
 2806                 return (EINVAL);
 2807 
 2808         AUDIT_ARG_FFLAGS(flags);
 2809         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW |
 2810             AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path,
 2811             fd, &cap_fchflags_rights);
 2812         if ((error = namei(&nd)) != 0)
 2813                 return (error);
 2814         NDFREE_PNBUF(&nd);
 2815         error = setfflags(td, nd.ni_vp, flags);
 2816         vrele(nd.ni_vp);
 2817         return (error);
 2818 }
 2819 
 2820 /*
 2821  * Change flags of a file given a file descriptor.
 2822  */
 2823 #ifndef _SYS_SYSPROTO_H_
 2824 struct fchflags_args {
 2825         int     fd;
 2826         u_long  flags;
 2827 };
 2828 #endif
 2829 int
 2830 sys_fchflags(struct thread *td, struct fchflags_args *uap)
 2831 {
 2832         struct file *fp;
 2833         int error;
 2834 
 2835         AUDIT_ARG_FD(uap->fd);
 2836         AUDIT_ARG_FFLAGS(uap->flags);
 2837         error = getvnode(td, uap->fd, &cap_fchflags_rights,
 2838             &fp);
 2839         if (error != 0)
 2840                 return (error);
 2841 #ifdef AUDIT
 2842         if (AUDITING_TD(td)) {
 2843                 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2844                 AUDIT_ARG_VNODE1(fp->f_vnode);
 2845                 VOP_UNLOCK(fp->f_vnode);
 2846         }
 2847 #endif
 2848         error = setfflags(td, fp->f_vnode, uap->flags);
 2849         fdrop(fp, td);
 2850         return (error);
 2851 }
 2852 
 2853 /*
 2854  * Common implementation code for chmod(), lchmod() and fchmod().
 2855  */
 2856 int
 2857 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode)
 2858 {
 2859         struct mount *mp;
 2860         struct vattr vattr;
 2861         int error;
 2862 
 2863         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2864                 return (error);
 2865         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2866         VATTR_NULL(&vattr);
 2867         vattr.va_mode = mode & ALLPERMS;
 2868 #ifdef MAC
 2869         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2870         if (error == 0)
 2871 #endif
 2872                 error = VOP_SETATTR(vp, &vattr, cred);
 2873         VOP_UNLOCK(vp);
 2874         vn_finished_write(mp);
 2875         return (error);
 2876 }
 2877 
 2878 /*
 2879  * Change mode of a file given path name.
 2880  */
 2881 #ifndef _SYS_SYSPROTO_H_
 2882 struct chmod_args {
 2883         char    *path;
 2884         int     mode;
 2885 };
 2886 #endif
 2887 int
 2888 sys_chmod(struct thread *td, struct chmod_args *uap)
 2889 {
 2890 
 2891         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2892             uap->mode, 0));
 2893 }
 2894 
 2895 #ifndef _SYS_SYSPROTO_H_
 2896 struct fchmodat_args {
 2897         int     dirfd;
 2898         char    *path;
 2899         mode_t  mode;
 2900         int     flag;
 2901 }
 2902 #endif
 2903 int
 2904 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2905 {
 2906 
 2907         return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE,
 2908             uap->mode, uap->flag));
 2909 }
 2910 
 2911 /*
 2912  * Change mode of a file given path name (don't follow links.)
 2913  */
 2914 #ifndef _SYS_SYSPROTO_H_
 2915 struct lchmod_args {
 2916         char    *path;
 2917         int     mode;
 2918 };
 2919 #endif
 2920 int
 2921 sys_lchmod(struct thread *td, struct lchmod_args *uap)
 2922 {
 2923 
 2924         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2925             uap->mode, AT_SYMLINK_NOFOLLOW));
 2926 }
 2927 
 2928 int
 2929 kern_fchmodat(struct thread *td, int fd, const char *path,
 2930     enum uio_seg pathseg, mode_t mode, int flag)
 2931 {
 2932         struct nameidata nd;
 2933         int error;
 2934 
 2935         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 2936             AT_EMPTY_PATH)) != 0)
 2937                 return (EINVAL);
 2938 
 2939         AUDIT_ARG_MODE(mode);
 2940         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 2941             AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path,
 2942             fd, &cap_fchmod_rights);
 2943         if ((error = namei(&nd)) != 0)
 2944                 return (error);
 2945         NDFREE_PNBUF(&nd);
 2946         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2947         vrele(nd.ni_vp);
 2948         return (error);
 2949 }
 2950 
 2951 /*
 2952  * Change mode of a file given a file descriptor.
 2953  */
 2954 #ifndef _SYS_SYSPROTO_H_
 2955 struct fchmod_args {
 2956         int     fd;
 2957         int     mode;
 2958 };
 2959 #endif
 2960 int
 2961 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2962 {
 2963         struct file *fp;
 2964         int error;
 2965 
 2966         AUDIT_ARG_FD(uap->fd);
 2967         AUDIT_ARG_MODE(uap->mode);
 2968 
 2969         error = fget(td, uap->fd, &cap_fchmod_rights, &fp);
 2970         if (error != 0)
 2971                 return (error);
 2972         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2973         fdrop(fp, td);
 2974         return (error);
 2975 }
 2976 
 2977 /*
 2978  * Common implementation for chown(), lchown(), and fchown()
 2979  */
 2980 int
 2981 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
 2982     gid_t gid)
 2983 {
 2984         struct mount *mp;
 2985         struct vattr vattr;
 2986         int error;
 2987 
 2988         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 2989                 return (error);
 2990         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2991         VATTR_NULL(&vattr);
 2992         vattr.va_uid = uid;
 2993         vattr.va_gid = gid;
 2994 #ifdef MAC
 2995         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2996             vattr.va_gid);
 2997         if (error == 0)
 2998 #endif
 2999                 error = VOP_SETATTR(vp, &vattr, cred);
 3000         VOP_UNLOCK(vp);
 3001         vn_finished_write(mp);
 3002         return (error);
 3003 }
 3004 
 3005 /*
 3006  * Set ownership given a path name.
 3007  */
 3008 #ifndef _SYS_SYSPROTO_H_
 3009 struct chown_args {
 3010         char    *path;
 3011         int     uid;
 3012         int     gid;
 3013 };
 3014 #endif
 3015 int
 3016 sys_chown(struct thread *td, struct chown_args *uap)
 3017 {
 3018 
 3019         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 3020             uap->gid, 0));
 3021 }
 3022 
 3023 #ifndef _SYS_SYSPROTO_H_
 3024 struct fchownat_args {
 3025         int fd;
 3026         const char * path;
 3027         uid_t uid;
 3028         gid_t gid;
 3029         int flag;
 3030 };
 3031 #endif
 3032 int
 3033 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 3034 {
 3035 
 3036         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 3037             uap->gid, uap->flag));
 3038 }
 3039 
 3040 int
 3041 kern_fchownat(struct thread *td, int fd, const char *path,
 3042     enum uio_seg pathseg, int uid, int gid, int flag)
 3043 {
 3044         struct nameidata nd;
 3045         int error;
 3046 
 3047         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 3048             AT_EMPTY_PATH)) != 0)
 3049                 return (EINVAL);
 3050 
 3051         AUDIT_ARG_OWNER(uid, gid);
 3052         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 3053             AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path,
 3054             fd, &cap_fchown_rights);
 3055 
 3056         if ((error = namei(&nd)) != 0)
 3057                 return (error);
 3058         NDFREE_PNBUF(&nd);
 3059         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3060         vrele(nd.ni_vp);
 3061         return (error);
 3062 }
 3063 
 3064 /*
 3065  * Set ownership given a path name, do not cross symlinks.
 3066  */
 3067 #ifndef _SYS_SYSPROTO_H_
 3068 struct lchown_args {
 3069         char    *path;
 3070         int     uid;
 3071         int     gid;
 3072 };
 3073 #endif
 3074 int
 3075 sys_lchown(struct thread *td, struct lchown_args *uap)
 3076 {
 3077 
 3078         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3079             uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 3080 }
 3081 
 3082 /*
 3083  * Set ownership given a file descriptor.
 3084  */
 3085 #ifndef _SYS_SYSPROTO_H_
 3086 struct fchown_args {
 3087         int     fd;
 3088         int     uid;
 3089         int     gid;
 3090 };
 3091 #endif
 3092 int
 3093 sys_fchown(struct thread *td, struct fchown_args *uap)
 3094 {
 3095         struct file *fp;
 3096         int error;
 3097 
 3098         AUDIT_ARG_FD(uap->fd);
 3099         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3100         error = fget(td, uap->fd, &cap_fchown_rights, &fp);
 3101         if (error != 0)
 3102                 return (error);
 3103         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3104         fdrop(fp, td);
 3105         return (error);
 3106 }
 3107 
 3108 /*
 3109  * Common implementation code for utimes(), lutimes(), and futimes().
 3110  */
 3111 static int
 3112 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg,
 3113     struct timespec *tsp)
 3114 {
 3115         struct timeval tv[2];
 3116         const struct timeval *tvp;
 3117         int error;
 3118 
 3119         if (usrtvp == NULL) {
 3120                 vfs_timestamp(&tsp[0]);
 3121                 tsp[1] = tsp[0];
 3122         } else {
 3123                 if (tvpseg == UIO_SYSSPACE) {
 3124                         tvp = usrtvp;
 3125                 } else {
 3126                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3127                                 return (error);
 3128                         tvp = tv;
 3129                 }
 3130 
 3131                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3132                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3133                         return (EINVAL);
 3134                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3135                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3136         }
 3137         return (0);
 3138 }
 3139 
 3140 /*
 3141  * Common implementation code for futimens(), utimensat().
 3142  */
 3143 #define UTIMENS_NULL    0x1
 3144 #define UTIMENS_EXIT    0x2
 3145 static int
 3146 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 3147     struct timespec *tsp, int *retflags)
 3148 {
 3149         struct timespec tsnow;
 3150         int error;
 3151 
 3152         vfs_timestamp(&tsnow);
 3153         *retflags = 0;
 3154         if (usrtsp == NULL) {
 3155                 tsp[0] = tsnow;
 3156                 tsp[1] = tsnow;
 3157                 *retflags |= UTIMENS_NULL;
 3158                 return (0);
 3159         }
 3160         if (tspseg == UIO_SYSSPACE) {
 3161                 tsp[0] = usrtsp[0];
 3162                 tsp[1] = usrtsp[1];
 3163         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 3164                 return (error);
 3165         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 3166                 *retflags |= UTIMENS_EXIT;
 3167         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 3168                 *retflags |= UTIMENS_NULL;
 3169         if (tsp[0].tv_nsec == UTIME_OMIT)
 3170                 tsp[0].tv_sec = VNOVAL;
 3171         else if (tsp[0].tv_nsec == UTIME_NOW)
 3172                 tsp[0] = tsnow;
 3173         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 3174                 return (EINVAL);
 3175         if (tsp[1].tv_nsec == UTIME_OMIT)
 3176                 tsp[1].tv_sec = VNOVAL;
 3177         else if (tsp[1].tv_nsec == UTIME_NOW)
 3178                 tsp[1] = tsnow;
 3179         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 3180                 return (EINVAL);
 3181 
 3182         return (0);
 3183 }
 3184 
 3185 /*
 3186  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 3187  * and utimensat().
 3188  */
 3189 static int
 3190 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts,
 3191     int numtimes, int nullflag)
 3192 {
 3193         struct mount *mp;
 3194         struct vattr vattr;
 3195         int error;
 3196         bool setbirthtime;
 3197 
 3198         setbirthtime = false;
 3199         vattr.va_birthtime.tv_sec = VNOVAL;
 3200         vattr.va_birthtime.tv_nsec = 0;
 3201 
 3202         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
 3203                 return (error);
 3204         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3205         if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 &&
 3206             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3207                 setbirthtime = true;
 3208         VATTR_NULL(&vattr);
 3209         vattr.va_atime = ts[0];
 3210         vattr.va_mtime = ts[1];
 3211         if (setbirthtime)
 3212                 vattr.va_birthtime = ts[1];
 3213         if (numtimes > 2)
 3214                 vattr.va_birthtime = ts[2];
 3215         if (nullflag)
 3216                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3217 #ifdef MAC
 3218         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3219             vattr.va_mtime);
 3220 #endif
 3221         if (error == 0)
 3222                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3223         VOP_UNLOCK(vp);
 3224         vn_finished_write(mp);
 3225         return (error);
 3226 }
 3227 
 3228 /*
 3229  * Set the access and modification times of a file.
 3230  */
 3231 #ifndef _SYS_SYSPROTO_H_
 3232 struct utimes_args {
 3233         char    *path;
 3234         struct  timeval *tptr;
 3235 };
 3236 #endif
 3237 int
 3238 sys_utimes(struct thread *td, struct utimes_args *uap)
 3239 {
 3240 
 3241         return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3242             uap->tptr, UIO_USERSPACE));
 3243 }
 3244 
 3245 #ifndef _SYS_SYSPROTO_H_
 3246 struct futimesat_args {
 3247         int fd;
 3248         const char * path;
 3249         const struct timeval * times;
 3250 };
 3251 #endif
 3252 int
 3253 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3254 {
 3255 
 3256         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3257             uap->times, UIO_USERSPACE));
 3258 }
 3259 
 3260 int
 3261 kern_utimesat(struct thread *td, int fd, const char *path,
 3262     enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg)
 3263 {
 3264         struct nameidata nd;
 3265         struct timespec ts[2];
 3266         int error;
 3267 
 3268         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3269                 return (error);
 3270         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3271             &cap_futimes_rights);
 3272 
 3273         if ((error = namei(&nd)) != 0)
 3274                 return (error);
 3275         NDFREE_PNBUF(&nd);
 3276         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3277         vrele(nd.ni_vp);
 3278         return (error);
 3279 }
 3280 
 3281 /*
 3282  * Set the access and modification times of a file.
 3283  */
 3284 #ifndef _SYS_SYSPROTO_H_
 3285 struct lutimes_args {
 3286         char    *path;
 3287         struct  timeval *tptr;
 3288 };
 3289 #endif
 3290 int
 3291 sys_lutimes(struct thread *td, struct lutimes_args *uap)
 3292 {
 3293 
 3294         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3295             UIO_USERSPACE));
 3296 }
 3297 
 3298 int
 3299 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg,
 3300     const struct timeval *tptr, enum uio_seg tptrseg)
 3301 {
 3302         struct timespec ts[2];
 3303         struct nameidata nd;
 3304         int error;
 3305 
 3306         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3307                 return (error);
 3308         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path);
 3309         if ((error = namei(&nd)) != 0)
 3310                 return (error);
 3311         NDFREE_PNBUF(&nd);
 3312         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3313         vrele(nd.ni_vp);
 3314         return (error);
 3315 }
 3316 
 3317 /*
 3318  * Set the access and modification times of a file.
 3319  */
 3320 #ifndef _SYS_SYSPROTO_H_
 3321 struct futimes_args {
 3322         int     fd;
 3323         struct  timeval *tptr;
 3324 };
 3325 #endif
 3326 int
 3327 sys_futimes(struct thread *td, struct futimes_args *uap)
 3328 {
 3329 
 3330         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3331 }
 3332 
 3333 int
 3334 kern_futimes(struct thread *td, int fd, const struct timeval *tptr,
 3335     enum uio_seg tptrseg)
 3336 {
 3337         struct timespec ts[2];
 3338         struct file *fp;
 3339         int error;
 3340 
 3341         AUDIT_ARG_FD(fd);
 3342         error = getutimes(tptr, tptrseg, ts);
 3343         if (error != 0)
 3344                 return (error);
 3345         error = getvnode(td, fd, &cap_futimes_rights, &fp);
 3346         if (error != 0)
 3347                 return (error);
 3348 #ifdef AUDIT
 3349         if (AUDITING_TD(td)) {
 3350                 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3351                 AUDIT_ARG_VNODE1(fp->f_vnode);
 3352                 VOP_UNLOCK(fp->f_vnode);
 3353         }
 3354 #endif
 3355         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3356         fdrop(fp, td);
 3357         return (error);
 3358 }
 3359 
 3360 int
 3361 sys_futimens(struct thread *td, struct futimens_args *uap)
 3362 {
 3363 
 3364         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3365 }
 3366 
 3367 int
 3368 kern_futimens(struct thread *td, int fd, const struct timespec *tptr,
 3369     enum uio_seg tptrseg)
 3370 {
 3371         struct timespec ts[2];
 3372         struct file *fp;
 3373         int error, flags;
 3374 
 3375         AUDIT_ARG_FD(fd);
 3376         error = getutimens(tptr, tptrseg, ts, &flags);
 3377         if (error != 0)
 3378                 return (error);
 3379         if (flags & UTIMENS_EXIT)
 3380                 return (0);
 3381         error = getvnode(td, fd, &cap_futimes_rights, &fp);
 3382         if (error != 0)
 3383                 return (error);
 3384 #ifdef AUDIT
 3385         if (AUDITING_TD(td)) {
 3386                 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3387                 AUDIT_ARG_VNODE1(fp->f_vnode);
 3388                 VOP_UNLOCK(fp->f_vnode);
 3389         }
 3390 #endif
 3391         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3392         fdrop(fp, td);
 3393         return (error);
 3394 }
 3395 
 3396 int
 3397 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3398 {
 3399 
 3400         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3401             uap->times, UIO_USERSPACE, uap->flag));
 3402 }
 3403 
 3404 int
 3405 kern_utimensat(struct thread *td, int fd, const char *path,
 3406     enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg,
 3407     int flag)
 3408 {
 3409         struct nameidata nd;
 3410         struct timespec ts[2];
 3411         int error, flags;
 3412 
 3413         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 3414             AT_EMPTY_PATH)) != 0)
 3415                 return (EINVAL);
 3416 
 3417         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3418                 return (error);
 3419         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 3420             AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1,
 3421             pathseg, path, fd, &cap_futimes_rights);
 3422         if ((error = namei(&nd)) != 0)
 3423                 return (error);
 3424         /*
 3425          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3426          * POSIX states:
 3427          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3428          * "Search permission is denied by a component of the path prefix."
 3429          */
 3430         NDFREE_PNBUF(&nd);
 3431         if ((flags & UTIMENS_EXIT) == 0)
 3432                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3433         vrele(nd.ni_vp);
 3434         return (error);
 3435 }
 3436 
 3437 /*
 3438  * Truncate a file given its path name.
 3439  */
 3440 #ifndef _SYS_SYSPROTO_H_
 3441 struct truncate_args {
 3442         char    *path;
 3443         int     pad;
 3444         off_t   length;
 3445 };
 3446 #endif
 3447 int
 3448 sys_truncate(struct thread *td, struct truncate_args *uap)
 3449 {
 3450 
 3451         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3452 }
 3453 
 3454 int
 3455 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg,
 3456     off_t length)
 3457 {
 3458         struct mount *mp;
 3459         struct vnode *vp;
 3460         void *rl_cookie;
 3461         struct nameidata nd;
 3462         int error;
 3463 
 3464         if (length < 0)
 3465                 return (EINVAL);
 3466         NDPREINIT(&nd);
 3467 retry:
 3468         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path);
 3469         if ((error = namei(&nd)) != 0)
 3470                 return (error);
 3471         vp = nd.ni_vp;
 3472         NDFREE_PNBUF(&nd);
 3473         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3474         if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
 3475                 vn_rangelock_unlock(vp, rl_cookie);
 3476                 vrele(vp);
 3477                 return (error);
 3478         }
 3479         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3480         if (vp->v_type == VDIR) {
 3481                 error = EISDIR;
 3482                 goto out;
 3483         }
 3484 #ifdef MAC
 3485         error = mac_vnode_check_write(td->td_ucred, NOCRED, vp);
 3486         if (error != 0)
 3487                 goto out;
 3488 #endif
 3489         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 3490         if (error != 0)
 3491                 goto out;
 3492 
 3493         error = vn_truncate_locked(vp, length, false, td->td_ucred);
 3494 out:
 3495         VOP_UNLOCK(vp);
 3496         vn_finished_write(mp);
 3497         vn_rangelock_unlock(vp, rl_cookie);
 3498         vrele(vp);
 3499         if (error == ERELOOKUP)
 3500                 goto retry;
 3501         return (error);
 3502 }
 3503 
 3504 #if defined(COMPAT_43)
 3505 /*
 3506  * Truncate a file given its path name.
 3507  */
 3508 #ifndef _SYS_SYSPROTO_H_
 3509 struct otruncate_args {
 3510         char    *path;
 3511         long    length;
 3512 };
 3513 #endif
 3514 int
 3515 otruncate(struct thread *td, struct otruncate_args *uap)
 3516 {
 3517 
 3518         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3519 }
 3520 #endif /* COMPAT_43 */
 3521 
 3522 #if defined(COMPAT_FREEBSD6)
 3523 /* Versions with the pad argument */
 3524 int
 3525 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3526 {
 3527 
 3528         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3529 }
 3530 
 3531 int
 3532 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3533 {
 3534 
 3535         return (kern_ftruncate(td, uap->fd, uap->length));
 3536 }
 3537 #endif
 3538 
 3539 int
 3540 kern_fsync(struct thread *td, int fd, bool fullsync)
 3541 {
 3542         struct vnode *vp;
 3543         struct mount *mp;
 3544         struct file *fp;
 3545         int error;
 3546 
 3547         AUDIT_ARG_FD(fd);
 3548         error = getvnode(td, fd, &cap_fsync_rights, &fp);
 3549         if (error != 0)
 3550                 return (error);
 3551         vp = fp->f_vnode;
 3552 #if 0
 3553         if (!fullsync)
 3554                 /* XXXKIB: compete outstanding aio writes */;
 3555 #endif
 3556 retry:
 3557         error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
 3558         if (error != 0)
 3559                 goto drop;
 3560         vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY);
 3561         AUDIT_ARG_VNODE1(vp);
 3562         if (vp->v_object != NULL) {
 3563                 VM_OBJECT_WLOCK(vp->v_object);
 3564                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3565                 VM_OBJECT_WUNLOCK(vp->v_object);
 3566         }
 3567         error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 3568         VOP_UNLOCK(vp);
 3569         vn_finished_write(mp);
 3570         if (error == ERELOOKUP)
 3571                 goto retry;
 3572 drop:
 3573         fdrop(fp, td);
 3574         return (error);
 3575 }
 3576 
 3577 /*
 3578  * Sync an open file.
 3579  */
 3580 #ifndef _SYS_SYSPROTO_H_
 3581 struct fsync_args {
 3582         int     fd;
 3583 };
 3584 #endif
 3585 int
 3586 sys_fsync(struct thread *td, struct fsync_args *uap)
 3587 {
 3588 
 3589         return (kern_fsync(td, uap->fd, true));
 3590 }
 3591 
 3592 int
 3593 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 3594 {
 3595 
 3596         return (kern_fsync(td, uap->fd, false));
 3597 }
 3598 
 3599 /*
 3600  * Rename files.  Source and destination must either both be directories, or
 3601  * both not be directories.  If target is a directory, it must be empty.
 3602  */
 3603 #ifndef _SYS_SYSPROTO_H_
 3604 struct rename_args {
 3605         char    *from;
 3606         char    *to;
 3607 };
 3608 #endif
 3609 int
 3610 sys_rename(struct thread *td, struct rename_args *uap)
 3611 {
 3612 
 3613         return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 3614             uap->to, UIO_USERSPACE));
 3615 }
 3616 
 3617 #ifndef _SYS_SYSPROTO_H_
 3618 struct renameat_args {
 3619         int     oldfd;
 3620         char    *old;
 3621         int     newfd;
 3622         char    *new;
 3623 };
 3624 #endif
 3625 int
 3626 sys_renameat(struct thread *td, struct renameat_args *uap)
 3627 {
 3628 
 3629         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3630             UIO_USERSPACE));
 3631 }
 3632 
 3633 #ifdef MAC
 3634 static int
 3635 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd,
 3636     const char *new, enum uio_seg pathseg, struct nameidata *fromnd)
 3637 {
 3638         int error;
 3639 
 3640         NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3641             pathseg, old, oldfd, &cap_renameat_source_rights);
 3642         if ((error = namei(fromnd)) != 0)
 3643                 return (error);
 3644         error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp,
 3645             fromnd->ni_vp, &fromnd->ni_cnd);
 3646         VOP_UNLOCK(fromnd->ni_dvp);
 3647         if (fromnd->ni_dvp != fromnd->ni_vp)
 3648                 VOP_UNLOCK(fromnd->ni_vp);
 3649         if (error != 0) {
 3650                 NDFREE_PNBUF(fromnd);
 3651                 vrele(fromnd->ni_dvp);
 3652                 vrele(fromnd->ni_vp);
 3653         }
 3654         return (error);
 3655 }
 3656 #endif
 3657 
 3658 int
 3659 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
 3660     const char *new, enum uio_seg pathseg)
 3661 {
 3662         struct mount *mp = NULL;
 3663         struct vnode *tvp, *fvp, *tdvp;
 3664         struct nameidata fromnd, tond;
 3665         uint64_t tondflags;
 3666         int error;
 3667 
 3668 again:
 3669         bwillwrite();
 3670 #ifdef MAC
 3671         if (mac_vnode_check_rename_from_enabled()) {
 3672                 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg,
 3673                     &fromnd);
 3674                 if (error != 0)
 3675                         return (error);
 3676         } else {
 3677 #endif
 3678         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1,
 3679             pathseg, old, oldfd, &cap_renameat_source_rights);
 3680         if ((error = namei(&fromnd)) != 0)
 3681                 return (error);
 3682 #ifdef MAC
 3683         }
 3684 #endif
 3685         fvp = fromnd.ni_vp;
 3686         tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2;
 3687         if (fromnd.ni_vp->v_type == VDIR)
 3688                 tondflags |= WILLBEDIR;
 3689         NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd,
 3690             &cap_renameat_target_rights);
 3691         if ((error = namei(&tond)) != 0) {
 3692                 /* Translate error code for rename("dir1", "dir2/."). */
 3693                 if (error == EISDIR && fvp->v_type == VDIR)
 3694                         error = EINVAL;
 3695                 NDFREE_PNBUF(&fromnd);
 3696                 vrele(fromnd.ni_dvp);
 3697                 vrele(fvp);
 3698                 goto out1;
 3699         }
 3700         tdvp = tond.ni_dvp;
 3701         tvp = tond.ni_vp;
 3702         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3703         if (error != 0) {
 3704                 NDFREE_PNBUF(&fromnd);
 3705                 NDFREE_PNBUF(&tond);
 3706                 if (tvp != NULL)
 3707                         vput(tvp);
 3708                 if (tdvp == tvp)
 3709                         vrele(tdvp);
 3710                 else
 3711                         vput(tdvp);
 3712                 vrele(fromnd.ni_dvp);
 3713                 vrele(fvp);
 3714                 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH);
 3715                 if (error != 0)
 3716                         return (error);
 3717                 goto again;
 3718         }
 3719         if (tvp != NULL) {
 3720                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3721                         error = ENOTDIR;
 3722                         goto out;
 3723                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3724                         error = EISDIR;
 3725                         goto out;
 3726                 }
 3727 #ifdef CAPABILITIES
 3728                 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) {
 3729                         /*
 3730                          * If the target already exists we require CAP_UNLINKAT
 3731                          * from 'newfd', when newfd was used for the lookup.
 3732                          */
 3733                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3734                             &cap_unlinkat_rights);
 3735                         if (error != 0)
 3736                                 goto out;
 3737                 }
 3738 #endif
 3739         }
 3740         if (fvp == tdvp) {
 3741                 error = EINVAL;
 3742                 goto out;
 3743         }
 3744         /*
 3745          * If the source is the same as the destination (that is, if they
 3746          * are links to the same vnode), then there is nothing to do.
 3747          */
 3748         if (fvp == tvp)
 3749                 error = ERESTART;
 3750 #ifdef MAC
 3751         else
 3752                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3753                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3754 #endif
 3755 out:
 3756         if (error == 0) {
 3757                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3758                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3759                 NDFREE_PNBUF(&fromnd);
 3760                 NDFREE_PNBUF(&tond);
 3761         } else {
 3762                 NDFREE_PNBUF(&fromnd);
 3763                 NDFREE_PNBUF(&tond);
 3764                 if (tvp != NULL)
 3765                         vput(tvp);
 3766                 if (tdvp == tvp)
 3767                         vrele(tdvp);
 3768                 else
 3769                         vput(tdvp);
 3770                 vrele(fromnd.ni_dvp);
 3771                 vrele(fvp);
 3772         }
 3773         vn_finished_write(mp);
 3774 out1:
 3775         if (error == ERESTART)
 3776                 return (0);
 3777         if (error == ERELOOKUP)
 3778                 goto again;
 3779         return (error);
 3780 }
 3781 
 3782 /*
 3783  * Make a directory file.
 3784  */
 3785 #ifndef _SYS_SYSPROTO_H_
 3786 struct mkdir_args {
 3787         char    *path;
 3788         int     mode;
 3789 };
 3790 #endif
 3791 int
 3792 sys_mkdir(struct thread *td, struct mkdir_args *uap)
 3793 {
 3794 
 3795         return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3796             uap->mode));
 3797 }
 3798 
 3799 #ifndef _SYS_SYSPROTO_H_
 3800 struct mkdirat_args {
 3801         int     fd;
 3802         char    *path;
 3803         mode_t  mode;
 3804 };
 3805 #endif
 3806 int
 3807 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3808 {
 3809 
 3810         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3811 }
 3812 
 3813 int
 3814 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
 3815     int mode)
 3816 {
 3817         struct mount *mp;
 3818         struct vattr vattr;
 3819         struct nameidata nd;
 3820         int error;
 3821 
 3822         AUDIT_ARG_MODE(mode);
 3823         NDPREINIT(&nd);
 3824 restart:
 3825         bwillwrite();
 3826         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 |
 3827             NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR,
 3828             segflg, path, fd, &cap_mkdirat_rights);
 3829         if ((error = namei(&nd)) != 0)
 3830                 return (error);
 3831         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3832                 NDFREE_PNBUF(&nd);
 3833                 vput(nd.ni_dvp);
 3834                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0)
 3835                         return (error);
 3836                 goto restart;
 3837         }
 3838         VATTR_NULL(&vattr);
 3839         vattr.va_type = VDIR;
 3840         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask;
 3841 #ifdef MAC
 3842         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3843             &vattr);
 3844         if (error != 0)
 3845                 goto out;
 3846 #endif
 3847         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3848 #ifdef MAC
 3849 out:
 3850 #endif
 3851         NDFREE_PNBUF(&nd);
 3852         VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 3853         vn_finished_write(mp);
 3854         if (error == ERELOOKUP)
 3855                 goto restart;
 3856         return (error);
 3857 }
 3858 
 3859 /*
 3860  * Remove a directory file.
 3861  */
 3862 #ifndef _SYS_SYSPROTO_H_
 3863 struct rmdir_args {
 3864         char    *path;
 3865 };
 3866 #endif
 3867 int
 3868 sys_rmdir(struct thread *td, struct rmdir_args *uap)
 3869 {
 3870 
 3871         return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
 3872             0));
 3873 }
 3874 
 3875 int
 3876 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd,
 3877     enum uio_seg pathseg, int flag)
 3878 {
 3879         struct mount *mp;
 3880         struct vnode *vp;
 3881         struct file *fp;
 3882         struct nameidata nd;
 3883         cap_rights_t rights;
 3884         int error;
 3885 
 3886         fp = NULL;
 3887         if (fd != FD_NONE) {
 3888                 error = getvnode(td, fd, cap_rights_init_one(&rights,
 3889                     CAP_LOOKUP), &fp);
 3890                 if (error != 0)
 3891                         return (error);
 3892         }
 3893 
 3894         NDPREINIT(&nd);
 3895 restart:
 3896         bwillwrite();
 3897         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 3898             at2cnpflags(flag, AT_RESOLVE_BENEATH),
 3899             pathseg, path, dfd, &cap_unlinkat_rights);
 3900         if ((error = namei(&nd)) != 0)
 3901                 goto fdout;
 3902         vp = nd.ni_vp;
 3903         if (vp->v_type != VDIR) {
 3904                 error = ENOTDIR;
 3905                 goto out;
 3906         }
 3907         /*
 3908          * No rmdir "." please.
 3909          */
 3910         if (nd.ni_dvp == vp) {
 3911                 error = EINVAL;
 3912                 goto out;
 3913         }
 3914         /*
 3915          * The root of a mounted filesystem cannot be deleted.
 3916          */
 3917         if (vp->v_vflag & VV_ROOT) {
 3918                 error = EBUSY;
 3919                 goto out;
 3920         }
 3921 
 3922         if (fp != NULL && fp->f_vnode != vp) {
 3923                 if (VN_IS_DOOMED(fp->f_vnode))
 3924                         error = EBADF;
 3925                 else
 3926                         error = EDEADLK;
 3927                 goto out;
 3928         }
 3929 
 3930 #ifdef MAC
 3931         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3932             &nd.ni_cnd);
 3933         if (error != 0)
 3934                 goto out;
 3935 #endif
 3936         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3937                 NDFREE_PNBUF(&nd);
 3938                 vput(vp);
 3939                 if (nd.ni_dvp == vp)
 3940                         vrele(nd.ni_dvp);
 3941                 else
 3942                         vput(nd.ni_dvp);
 3943                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0)
 3944                         goto fdout;
 3945                 goto restart;
 3946         }
 3947         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3948         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3949         vn_finished_write(mp);
 3950 out:
 3951         NDFREE_PNBUF(&nd);
 3952         vput(vp);
 3953         if (nd.ni_dvp == vp)
 3954                 vrele(nd.ni_dvp);
 3955         else
 3956                 vput(nd.ni_dvp);
 3957         if (error == ERELOOKUP)
 3958                 goto restart;
 3959 fdout:
 3960         if (fp != NULL)
 3961                 fdrop(fp, td);
 3962         return (error);
 3963 }
 3964 
 3965 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 3966 int
 3967 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count,
 3968     long *basep, void (*func)(struct freebsd11_dirent *))
 3969 {
 3970         struct freebsd11_dirent dstdp;
 3971         struct dirent *dp, *edp;
 3972         char *dirbuf;
 3973         off_t base;
 3974         ssize_t resid, ucount;
 3975         int error;
 3976 
 3977         /* XXX arbitrary sanity limit on `count'. */
 3978         count = min(count, 64 * 1024);
 3979 
 3980         dirbuf = malloc(count, M_TEMP, M_WAITOK);
 3981 
 3982         error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid,
 3983             UIO_SYSSPACE);
 3984         if (error != 0)
 3985                 goto done;
 3986         if (basep != NULL)
 3987                 *basep = base;
 3988 
 3989         ucount = 0;
 3990         for (dp = (struct dirent *)dirbuf,
 3991             edp = (struct dirent *)&dirbuf[count - resid];
 3992             ucount < count && dp < edp; ) {
 3993                 if (dp->d_reclen == 0)
 3994                         break;
 3995                 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0));
 3996                 if (dp->d_namlen >= sizeof(dstdp.d_name))
 3997                         continue;
 3998                 dstdp.d_type = dp->d_type;
 3999                 dstdp.d_namlen = dp->d_namlen;
 4000                 dstdp.d_fileno = dp->d_fileno;          /* truncate */
 4001                 if (dstdp.d_fileno != dp->d_fileno) {
 4002                         switch (ino64_trunc_error) {
 4003                         default:
 4004                         case 0:
 4005                                 break;
 4006                         case 1:
 4007                                 error = EOVERFLOW;
 4008                                 goto done;
 4009                         case 2:
 4010                                 dstdp.d_fileno = UINT32_MAX;
 4011                                 break;
 4012                         }
 4013                 }
 4014                 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) +
 4015                     ((dp->d_namlen + 1 + 3) &~ 3);
 4016                 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 4017                 bzero(dstdp.d_name + dstdp.d_namlen,
 4018                     dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) -
 4019                     dstdp.d_namlen);
 4020                 MPASS(dstdp.d_reclen <= dp->d_reclen);
 4021                 MPASS(ucount + dstdp.d_reclen <= count);
 4022                 if (func != NULL)
 4023                         func(&dstdp);
 4024                 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen);
 4025                 if (error != 0)
 4026                         break;
 4027                 dp = (struct dirent *)((char *)dp + dp->d_reclen);
 4028                 ucount += dstdp.d_reclen;
 4029         }
 4030 
 4031 done:
 4032         free(dirbuf, M_TEMP);
 4033         if (error == 0)
 4034                 td->td_retval[0] = ucount;
 4035         return (error);
 4036 }
 4037 #endif /* COMPAT */
 4038 
 4039 #ifdef COMPAT_43
 4040 static void
 4041 ogetdirentries_cvt(struct freebsd11_dirent *dp)
 4042 {
 4043 #if (BYTE_ORDER == LITTLE_ENDIAN)
 4044         /*
 4045          * The expected low byte of dp->d_namlen is our dp->d_type.
 4046          * The high MBZ byte of dp->d_namlen is our dp->d_namlen.
 4047          */
 4048         dp->d_type = dp->d_namlen;
 4049         dp->d_namlen = 0;
 4050 #else
 4051         /*
 4052          * The dp->d_type is the high byte of the expected dp->d_namlen,
 4053          * so must be zero'ed.
 4054          */
 4055         dp->d_type = 0;
 4056 #endif
 4057 }
 4058 
 4059 /*
 4060  * Read a block of directory entries in a filesystem independent format.
 4061  */
 4062 #ifndef _SYS_SYSPROTO_H_
 4063 struct ogetdirentries_args {
 4064         int     fd;
 4065         char    *buf;
 4066         u_int   count;
 4067         long    *basep;
 4068 };
 4069 #endif
 4070 int
 4071 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 4072 {
 4073         long loff;
 4074         int error;
 4075 
 4076         error = kern_ogetdirentries(td, uap, &loff);
 4077         if (error == 0)
 4078                 error = copyout(&loff, uap->basep, sizeof(long));
 4079         return (error);
 4080 }
 4081 
 4082 int
 4083 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 4084     long *ploff)
 4085 {
 4086         long base;
 4087         int error;
 4088 
 4089         /* XXX arbitrary sanity limit on `count'. */
 4090         if (uap->count > 64 * 1024)
 4091                 return (EINVAL);
 4092 
 4093         error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 4094             &base, ogetdirentries_cvt);
 4095 
 4096         if (error == 0 && uap->basep != NULL)
 4097                 error = copyout(&base, uap->basep, sizeof(long));
 4098 
 4099         return (error);
 4100 }
 4101 #endif /* COMPAT_43 */
 4102 
 4103 #if defined(COMPAT_FREEBSD11)
 4104 #ifndef _SYS_SYSPROTO_H_
 4105 struct freebsd11_getdirentries_args {
 4106         int     fd;
 4107         char    *buf;
 4108         u_int   count;
 4109         long    *basep;
 4110 };
 4111 #endif
 4112 int
 4113 freebsd11_getdirentries(struct thread *td,
 4114     struct freebsd11_getdirentries_args *uap)
 4115 {
 4116         long base;
 4117         int error;
 4118 
 4119         error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 4120             &base, NULL);
 4121 
 4122         if (error == 0 && uap->basep != NULL)
 4123                 error = copyout(&base, uap->basep, sizeof(long));
 4124         return (error);
 4125 }
 4126 
 4127 int
 4128 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap)
 4129 {
 4130         struct freebsd11_getdirentries_args ap;
 4131 
 4132         ap.fd = uap->fd;
 4133         ap.buf = uap->buf;
 4134         ap.count = uap->count;
 4135         ap.basep = NULL;
 4136         return (freebsd11_getdirentries(td, &ap));
 4137 }
 4138 #endif /* COMPAT_FREEBSD11 */
 4139 
 4140 /*
 4141  * Read a block of directory entries in a filesystem independent format.
 4142  */
 4143 int
 4144 sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
 4145 {
 4146         off_t base;
 4147         int error;
 4148 
 4149         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4150             NULL, UIO_USERSPACE);
 4151         if (error != 0)
 4152                 return (error);
 4153         if (uap->basep != NULL)
 4154                 error = copyout(&base, uap->basep, sizeof(off_t));
 4155         return (error);
 4156 }
 4157 
 4158 int
 4159 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
 4160     off_t *basep, ssize_t *residp, enum uio_seg bufseg)
 4161 {
 4162         struct vnode *vp;
 4163         struct file *fp;
 4164         struct uio auio;
 4165         struct iovec aiov;
 4166         off_t loff;
 4167         int error, eofflag;
 4168         off_t foffset;
 4169 
 4170         AUDIT_ARG_FD(fd);
 4171         if (count > IOSIZE_MAX)
 4172                 return (EINVAL);
 4173         auio.uio_resid = count;
 4174         error = getvnode(td, fd, &cap_read_rights, &fp);
 4175         if (error != 0)
 4176                 return (error);
 4177         if ((fp->f_flag & FREAD) == 0) {
 4178                 fdrop(fp, td);
 4179                 return (EBADF);
 4180         }
 4181         vp = fp->f_vnode;
 4182         foffset = foffset_lock(fp, 0);
 4183 unionread:
 4184         if (vp->v_type != VDIR) {
 4185                 error = EINVAL;
 4186                 goto fail;
 4187         }
 4188         if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) {
 4189                 error = ENOENT;
 4190                 goto fail;
 4191         }
 4192         aiov.iov_base = buf;
 4193         aiov.iov_len = count;
 4194         auio.uio_iov = &aiov;
 4195         auio.uio_iovcnt = 1;
 4196         auio.uio_rw = UIO_READ;
 4197         auio.uio_segflg = bufseg;
 4198         auio.uio_td = td;
 4199         vn_lock(vp, LK_SHARED | LK_RETRY);
 4200         AUDIT_ARG_VNODE1(vp);
 4201         loff = auio.uio_offset = foffset;
 4202 #ifdef MAC
 4203         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4204         if (error == 0)
 4205 #endif
 4206                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4207                     NULL);
 4208         foffset = auio.uio_offset;
 4209         if (error != 0) {
 4210                 VOP_UNLOCK(vp);
 4211                 goto fail;
 4212         }
 4213         if (count == auio.uio_resid &&
 4214             (vp->v_vflag & VV_ROOT) &&
 4215             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4216                 struct vnode *tvp = vp;
 4217 
 4218                 vp = vp->v_mount->mnt_vnodecovered;
 4219                 VREF(vp);
 4220                 fp->f_vnode = vp;
 4221                 foffset = 0;
 4222                 vput(tvp);
 4223                 goto unionread;
 4224         }
 4225         VOP_UNLOCK(vp);
 4226         *basep = loff;
 4227         if (residp != NULL)
 4228                 *residp = auio.uio_resid;
 4229         td->td_retval[0] = count - auio.uio_resid;
 4230 fail:
 4231         foffset_unlock(fp, foffset, 0);
 4232         fdrop(fp, td);
 4233         return (error);
 4234 }
 4235 
 4236 /*
 4237  * Set the mode mask for creation of filesystem nodes.
 4238  */
 4239 #ifndef _SYS_SYSPROTO_H_
 4240 struct umask_args {
 4241         int     newmask;
 4242 };
 4243 #endif
 4244 int
 4245 sys_umask(struct thread *td, struct umask_args *uap)
 4246 {
 4247         struct pwddesc *pdp;
 4248 
 4249         pdp = td->td_proc->p_pd;
 4250         PWDDESC_XLOCK(pdp);
 4251         td->td_retval[0] = pdp->pd_cmask;
 4252         pdp->pd_cmask = uap->newmask & ALLPERMS;
 4253         PWDDESC_XUNLOCK(pdp);
 4254         return (0);
 4255 }
 4256 
 4257 /*
 4258  * Void all references to file by ripping underlying filesystem away from
 4259  * vnode.
 4260  */
 4261 #ifndef _SYS_SYSPROTO_H_
 4262 struct revoke_args {
 4263         char    *path;
 4264 };
 4265 #endif
 4266 int
 4267 sys_revoke(struct thread *td, struct revoke_args *uap)
 4268 {
 4269         struct vnode *vp;
 4270         struct vattr vattr;
 4271         struct nameidata nd;
 4272         int error;
 4273 
 4274         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4275             uap->path);
 4276         if ((error = namei(&nd)) != 0)
 4277                 return (error);
 4278         vp = nd.ni_vp;
 4279         NDFREE_PNBUF(&nd);
 4280         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4281                 error = EINVAL;
 4282                 goto out;
 4283         }
 4284 #ifdef MAC
 4285         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4286         if (error != 0)
 4287                 goto out;
 4288 #endif
 4289         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4290         if (error != 0)
 4291                 goto out;
 4292         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4293                 error = priv_check(td, PRIV_VFS_ADMIN);
 4294                 if (error != 0)
 4295                         goto out;
 4296         }
 4297         if (devfs_usecount(vp) > 0)
 4298                 VOP_REVOKE(vp, REVOKEALL);
 4299 out:
 4300         vput(vp);
 4301         return (error);
 4302 }
 4303 
 4304 /*
 4305  * This variant of getvnode() allows O_PATH files.  Caller should
 4306  * ensure that returned file and vnode are only used for compatible
 4307  * semantics.
 4308  */
 4309 int
 4310 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
 4311     struct file **fpp)
 4312 {
 4313         struct file *fp;
 4314         int error;
 4315 
 4316         error = fget_unlocked(td, fd, rightsp, &fp);
 4317         if (error != 0)
 4318                 return (error);
 4319 
 4320         /*
 4321          * The file could be not of the vnode type, or it may be not
 4322          * yet fully initialized, in which case the f_vnode pointer
 4323          * may be set, but f_ops is still badfileops.  E.g.,
 4324          * devfs_open() transiently create such situation to
 4325          * facilitate csw d_fdopen().
 4326          *
 4327          * Dupfdopen() handling in kern_openat() installs the
 4328          * half-baked file into the process descriptor table, allowing
 4329          * other thread to dereference it. Guard against the race by
 4330          * checking f_ops.
 4331          */
 4332         if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) {
 4333                 fdrop(fp, td);
 4334                 *fpp = NULL;
 4335                 return (EINVAL);
 4336         }
 4337 
 4338         *fpp = fp;
 4339         return (0);
 4340 }
 4341 
 4342 /*
 4343  * Convert a user file descriptor to a kernel file entry and check
 4344  * that, if it is a capability, the correct rights are present.
 4345  * A reference on the file entry is held upon returning.
 4346  */
 4347 int
 4348 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 4349 {
 4350         int error;
 4351 
 4352         error = getvnode_path(td, fd, rightsp, fpp);
 4353         if (__predict_false(error != 0))
 4354                 return (error);
 4355 
 4356         /*
 4357          * Filter out O_PATH file descriptors, most getvnode() callers
 4358          * do not call fo_ methods.
 4359          */
 4360         if (__predict_false((*fpp)->f_ops == &path_fileops)) {
 4361                 fdrop(*fpp, td);
 4362                 *fpp = NULL;
 4363                 error = EBADF;
 4364         }
 4365 
 4366         return (error);
 4367 }
 4368 
 4369 /*
 4370  * Get an (NFS) file handle.
 4371  */
 4372 #ifndef _SYS_SYSPROTO_H_
 4373 struct lgetfh_args {
 4374         char *fname;
 4375         fhandle_t *fhp;
 4376 };
 4377 #endif
 4378 int
 4379 sys_lgetfh(struct thread *td, struct lgetfh_args *uap)
 4380 {
 4381 
 4382         return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname,
 4383             UIO_USERSPACE, uap->fhp, UIO_USERSPACE));
 4384 }
 4385 
 4386 #ifndef _SYS_SYSPROTO_H_
 4387 struct getfh_args {
 4388         char *fname;
 4389         fhandle_t *fhp;
 4390 };
 4391 #endif
 4392 int
 4393 sys_getfh(struct thread *td, struct getfh_args *uap)
 4394 {
 4395 
 4396         return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE,
 4397             uap->fhp, UIO_USERSPACE));
 4398 }
 4399 
 4400 /*
 4401  * syscall for the rpc.lockd to use to translate an open descriptor into
 4402  * a NFS file handle.
 4403  *
 4404  * warning: do not remove the priv_check() call or this becomes one giant
 4405  * security hole.
 4406  */
 4407 #ifndef _SYS_SYSPROTO_H_
 4408 struct getfhat_args {
 4409         int fd;
 4410         char *path;
 4411         fhandle_t *fhp;
 4412         int flags;
 4413 };
 4414 #endif
 4415 int
 4416 sys_getfhat(struct thread *td, struct getfhat_args *uap)
 4417 {
 4418 
 4419         return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE,
 4420             uap->fhp, UIO_USERSPACE));
 4421 }
 4422 
 4423 int
 4424 kern_getfhat(struct thread *td, int flags, int fd, const char *path,
 4425     enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg)
 4426 {
 4427         struct nameidata nd;
 4428         fhandle_t fh;
 4429         struct vnode *vp;
 4430         int error;
 4431 
 4432         if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 4433                 return (EINVAL);
 4434         error = priv_check(td, PRIV_VFS_GETFH);
 4435         if (error != 0)
 4436                 return (error);
 4437         NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW |
 4438             AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path,
 4439             fd);
 4440         error = namei(&nd);
 4441         if (error != 0)
 4442                 return (error);
 4443         NDFREE_PNBUF(&nd);
 4444         vp = nd.ni_vp;
 4445         bzero(&fh, sizeof(fh));
 4446         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4447         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4448         vput(vp);
 4449         if (error == 0) {
 4450                 if (fhseg == UIO_USERSPACE)
 4451                         error = copyout(&fh, fhp, sizeof (fh));
 4452                 else
 4453                         memcpy(fhp, &fh, sizeof(fh));
 4454         }
 4455         return (error);
 4456 }
 4457 
 4458 #ifndef _SYS_SYSPROTO_H_
 4459 struct fhlink_args {
 4460         fhandle_t *fhp;
 4461         const char *to;
 4462 };
 4463 #endif
 4464 int
 4465 sys_fhlink(struct thread *td, struct fhlink_args *uap)
 4466 {
 4467 
 4468         return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp));
 4469 }
 4470 
 4471 #ifndef _SYS_SYSPROTO_H_
 4472 struct fhlinkat_args {
 4473         fhandle_t *fhp;
 4474         int tofd;
 4475         const char *to;
 4476 };
 4477 #endif
 4478 int
 4479 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap)
 4480 {
 4481 
 4482         return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp));
 4483 }
 4484 
 4485 static int
 4486 kern_fhlinkat(struct thread *td, int fd, const char *path,
 4487     enum uio_seg pathseg, fhandle_t *fhp)
 4488 {
 4489         fhandle_t fh;
 4490         struct mount *mp;
 4491         struct vnode *vp;
 4492         int error;
 4493 
 4494         error = priv_check(td, PRIV_VFS_GETFH);
 4495         if (error != 0)
 4496                 return (error);
 4497         error = copyin(fhp, &fh, sizeof(fh));
 4498         if (error != 0)
 4499                 return (error);
 4500         do {
 4501                 bwillwrite();
 4502                 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4503                         return (ESTALE);
 4504                 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 4505                 vfs_unbusy(mp);
 4506                 if (error != 0)
 4507                         return (error);
 4508                 VOP_UNLOCK(vp);
 4509                 error = kern_linkat_vp(td, vp, fd, path, pathseg);
 4510         } while (error == EAGAIN || error == ERELOOKUP);
 4511         return (error);
 4512 }
 4513 
 4514 #ifndef _SYS_SYSPROTO_H_
 4515 struct fhreadlink_args {
 4516         fhandle_t *fhp;
 4517         char *buf;
 4518         size_t bufsize;
 4519 };
 4520 #endif
 4521 int
 4522 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap)
 4523 {
 4524         fhandle_t fh;
 4525         struct mount *mp;
 4526         struct vnode *vp;
 4527         int error;
 4528 
 4529         error = priv_check(td, PRIV_VFS_GETFH);
 4530         if (error != 0)
 4531                 return (error);
 4532         if (uap->bufsize > IOSIZE_MAX)
 4533                 return (EINVAL);
 4534         error = copyin(uap->fhp, &fh, sizeof(fh));
 4535         if (error != 0)
 4536                 return (error);
 4537         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4538                 return (ESTALE);
 4539         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 4540         vfs_unbusy(mp);
 4541         if (error != 0)
 4542                 return (error);
 4543         error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td);
 4544         vput(vp);
 4545         return (error);
 4546 }
 4547 
 4548 /*
 4549  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4550  * open descriptor.
 4551  *
 4552  * warning: do not remove the priv_check() call or this becomes one giant
 4553  * security hole.
 4554  */
 4555 #ifndef _SYS_SYSPROTO_H_
 4556 struct fhopen_args {
 4557         const struct fhandle *u_fhp;
 4558         int flags;
 4559 };
 4560 #endif
 4561 int
 4562 sys_fhopen(struct thread *td, struct fhopen_args *uap)
 4563 {
 4564         return (kern_fhopen(td, uap->u_fhp, uap->flags));
 4565 }
 4566 
 4567 int
 4568 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags)
 4569 {
 4570         struct mount *mp;
 4571         struct vnode *vp;
 4572         struct fhandle fhp;
 4573         struct file *fp;
 4574         int fmode, error;
 4575         int indx;
 4576 
 4577         error = priv_check(td, PRIV_VFS_FHOPEN);
 4578         if (error != 0)
 4579                 return (error);
 4580         indx = -1;
 4581         fmode = FFLAGS(flags);
 4582         /* why not allow a non-read/write open for our lockd? */
 4583         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4584                 return (EINVAL);
 4585         error = copyin(u_fhp, &fhp, sizeof(fhp));
 4586         if (error != 0)
 4587                 return(error);
 4588         /* find the mount point */
 4589         mp = vfs_busyfs(&fhp.fh_fsid);
 4590         if (mp == NULL)
 4591                 return (ESTALE);
 4592         /* now give me my vnode, it gets returned to me locked */
 4593         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4594         vfs_unbusy(mp);
 4595         if (error != 0)
 4596                 return (error);
 4597 
 4598         error = falloc_noinstall(td, &fp);
 4599         if (error != 0) {
 4600                 vput(vp);
 4601                 return (error);
 4602         }
 4603         /*
 4604          * An extra reference on `fp' has been held for us by
 4605          * falloc_noinstall().
 4606          */
 4607 
 4608 #ifdef INVARIANTS
 4609         td->td_dupfd = -1;
 4610 #endif
 4611         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4612         if (error != 0) {
 4613                 KASSERT(fp->f_ops == &badfileops,
 4614                     ("VOP_OPEN in fhopen() set f_ops"));
 4615                 KASSERT(td->td_dupfd < 0,
 4616                     ("fhopen() encountered fdopen()"));
 4617 
 4618                 vput(vp);
 4619                 goto bad;
 4620         }
 4621 #ifdef INVARIANTS
 4622         td->td_dupfd = 0;
 4623 #endif
 4624         fp->f_vnode = vp;
 4625         finit_vnode(fp, fmode, NULL, &vnops);
 4626         VOP_UNLOCK(vp);
 4627         if ((fmode & O_TRUNC) != 0) {
 4628                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4629                 if (error != 0)
 4630                         goto bad;
 4631         }
 4632 
 4633         error = finstall(td, fp, &indx, fmode, NULL);
 4634 bad:
 4635         fdrop(fp, td);
 4636         td->td_retval[0] = indx;
 4637         return (error);
 4638 }
 4639 
 4640 /*
 4641  * Stat an (NFS) file handle.
 4642  */
 4643 #ifndef _SYS_SYSPROTO_H_
 4644 struct fhstat_args {
 4645         struct fhandle *u_fhp;
 4646         struct stat *sb;
 4647 };
 4648 #endif
 4649 int
 4650 sys_fhstat(struct thread *td, struct fhstat_args *uap)
 4651 {
 4652         struct stat sb;
 4653         struct fhandle fh;
 4654         int error;
 4655 
 4656         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4657         if (error != 0)
 4658                 return (error);
 4659         error = kern_fhstat(td, fh, &sb);
 4660         if (error == 0)
 4661                 error = copyout(&sb, uap->sb, sizeof(sb));
 4662         return (error);
 4663 }
 4664 
 4665 int
 4666 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4667 {
 4668         struct mount *mp;
 4669         struct vnode *vp;
 4670         int error;
 4671 
 4672         error = priv_check(td, PRIV_VFS_FHSTAT);
 4673         if (error != 0)
 4674                 return (error);
 4675         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4676                 return (ESTALE);
 4677         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4678         vfs_unbusy(mp);
 4679         if (error != 0)
 4680                 return (error);
 4681         error = VOP_STAT(vp, sb, td->td_ucred, NOCRED);
 4682         vput(vp);
 4683         return (error);
 4684 }
 4685 
 4686 /*
 4687  * Implement fstatfs() for (NFS) file handles.
 4688  */
 4689 #ifndef _SYS_SYSPROTO_H_
 4690 struct fhstatfs_args {
 4691         struct fhandle *u_fhp;
 4692         struct statfs *buf;
 4693 };
 4694 #endif
 4695 int
 4696 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap)
 4697 {
 4698         struct statfs *sfp;
 4699         fhandle_t fh;
 4700         int error;
 4701 
 4702         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4703         if (error != 0)
 4704                 return (error);
 4705         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 4706         error = kern_fhstatfs(td, fh, sfp);
 4707         if (error == 0)
 4708                 error = copyout(sfp, uap->buf, sizeof(*sfp));
 4709         free(sfp, M_STATFS);
 4710         return (error);
 4711 }
 4712 
 4713 int
 4714 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4715 {
 4716         struct mount *mp;
 4717         struct vnode *vp;
 4718         int error;
 4719 
 4720         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4721         if (error != 0)
 4722                 return (error);
 4723         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4724                 return (ESTALE);
 4725         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4726         if (error != 0) {
 4727                 vfs_unbusy(mp);
 4728                 return (error);
 4729         }
 4730         vput(vp);
 4731         error = prison_canseemount(td->td_ucred, mp);
 4732         if (error != 0)
 4733                 goto out;
 4734 #ifdef MAC
 4735         error = mac_mount_check_stat(td->td_ucred, mp);
 4736         if (error != 0)
 4737                 goto out;
 4738 #endif
 4739         error = VFS_STATFS(mp, buf);
 4740 out:
 4741         vfs_unbusy(mp);
 4742         return (error);
 4743 }
 4744 
 4745 /*
 4746  * Unlike madvise(2), we do not make a best effort to remember every
 4747  * possible caching hint.  Instead, we remember the last setting with
 4748  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4749  * region of any current setting.
 4750  */
 4751 int
 4752 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4753     int advice)
 4754 {
 4755         struct fadvise_info *fa, *new;
 4756         struct file *fp;
 4757         struct vnode *vp;
 4758         off_t end;
 4759         int error;
 4760 
 4761         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4762                 return (EINVAL);
 4763         AUDIT_ARG_VALUE(advice);
 4764         switch (advice) {
 4765         case POSIX_FADV_SEQUENTIAL:
 4766         case POSIX_FADV_RANDOM:
 4767         case POSIX_FADV_NOREUSE:
 4768                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4769                 break;
 4770         case POSIX_FADV_NORMAL:
 4771         case POSIX_FADV_WILLNEED:
 4772         case POSIX_FADV_DONTNEED:
 4773                 new = NULL;
 4774                 break;
 4775         default:
 4776                 return (EINVAL);
 4777         }
 4778         /* XXX: CAP_POSIX_FADVISE? */
 4779         AUDIT_ARG_FD(fd);
 4780         error = fget(td, fd, &cap_no_rights, &fp);
 4781         if (error != 0)
 4782                 goto out;
 4783         AUDIT_ARG_FILE(td->td_proc, fp);
 4784         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4785                 error = ESPIPE;
 4786                 goto out;
 4787         }
 4788         if (fp->f_type != DTYPE_VNODE) {
 4789                 error = ENODEV;
 4790                 goto out;
 4791         }
 4792         vp = fp->f_vnode;
 4793         if (vp->v_type != VREG) {
 4794                 error = ENODEV;
 4795                 goto out;
 4796         }
 4797         if (len == 0)
 4798                 end = OFF_MAX;
 4799         else
 4800                 end = offset + len - 1;
 4801         switch (advice) {
 4802         case POSIX_FADV_SEQUENTIAL:
 4803         case POSIX_FADV_RANDOM:
 4804         case POSIX_FADV_NOREUSE:
 4805                 /*
 4806                  * Try to merge any existing non-standard region with
 4807                  * this new region if possible, otherwise create a new
 4808                  * non-standard region for this request.
 4809                  */
 4810                 mtx_pool_lock(mtxpool_sleep, fp);
 4811                 fa = fp->f_advice;
 4812                 if (fa != NULL && fa->fa_advice == advice &&
 4813                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4814                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4815                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4816                         if (offset < fa->fa_start)
 4817                                 fa->fa_start = offset;
 4818                         if (end > fa->fa_end)
 4819                                 fa->fa_end = end;
 4820                 } else {
 4821                         new->fa_advice = advice;
 4822                         new->fa_start = offset;
 4823                         new->fa_end = end;
 4824                         fp->f_advice = new;
 4825                         new = fa;
 4826                 }
 4827                 mtx_pool_unlock(mtxpool_sleep, fp);
 4828                 break;
 4829         case POSIX_FADV_NORMAL:
 4830                 /*
 4831                  * If a the "normal" region overlaps with an existing
 4832                  * non-standard region, trim or remove the
 4833                  * non-standard region.
 4834                  */
 4835                 mtx_pool_lock(mtxpool_sleep, fp);
 4836                 fa = fp->f_advice;
 4837                 if (fa != NULL) {
 4838                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4839                                 new = fa;
 4840                                 fp->f_advice = NULL;
 4841                         } else if (offset <= fa->fa_start &&
 4842                             end >= fa->fa_start)
 4843                                 fa->fa_start = end + 1;
 4844                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4845                                 fa->fa_end = offset - 1;
 4846                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4847                                 /*
 4848                                  * If the "normal" region is a middle
 4849                                  * portion of the existing
 4850                                  * non-standard region, just remove
 4851                                  * the whole thing rather than picking
 4852                                  * one side or the other to
 4853                                  * preserve.
 4854                                  */
 4855                                 new = fa;
 4856                                 fp->f_advice = NULL;
 4857                         }
 4858                 }
 4859                 mtx_pool_unlock(mtxpool_sleep, fp);
 4860                 break;
 4861         case POSIX_FADV_WILLNEED:
 4862         case POSIX_FADV_DONTNEED:
 4863                 error = VOP_ADVISE(vp, offset, end, advice);
 4864                 break;
 4865         }
 4866 out:
 4867         if (fp != NULL)
 4868                 fdrop(fp, td);
 4869         free(new, M_FADVISE);
 4870         return (error);
 4871 }
 4872 
 4873 int
 4874 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4875 {
 4876         int error;
 4877 
 4878         error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4879             uap->advice);
 4880         return (kern_posix_error(td, error));
 4881 }
 4882 
 4883 int
 4884 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 4885     off_t *outoffp, size_t len, unsigned int flags)
 4886 {
 4887         struct file *infp, *outfp;
 4888         struct vnode *invp, *outvp;
 4889         int error;
 4890         size_t retlen;
 4891         void *rl_rcookie, *rl_wcookie;
 4892         off_t savinoff, savoutoff;
 4893 
 4894         infp = outfp = NULL;
 4895         rl_rcookie = rl_wcookie = NULL;
 4896         savinoff = -1;
 4897         error = 0;
 4898         retlen = 0;
 4899 
 4900         if (flags != 0) {
 4901                 error = EINVAL;
 4902                 goto out;
 4903         }
 4904         if (len > SSIZE_MAX)
 4905                 /*
 4906                  * Although the len argument is size_t, the return argument
 4907                  * is ssize_t (which is signed).  Therefore a size that won't
 4908                  * fit in ssize_t can't be returned.
 4909                  */
 4910                 len = SSIZE_MAX;
 4911 
 4912         /* Get the file structures for the file descriptors. */
 4913         error = fget_read(td, infd, &cap_read_rights, &infp);
 4914         if (error != 0)
 4915                 goto out;
 4916         if (infp->f_ops == &badfileops) {
 4917                 error = EBADF;
 4918                 goto out;
 4919         }
 4920         if (infp->f_vnode == NULL) {
 4921                 error = EINVAL;
 4922                 goto out;
 4923         }
 4924         error = fget_write(td, outfd, &cap_write_rights, &outfp);
 4925         if (error != 0)
 4926                 goto out;
 4927         if (outfp->f_ops == &badfileops) {
 4928                 error = EBADF;
 4929                 goto out;
 4930         }
 4931         if (outfp->f_vnode == NULL) {
 4932                 error = EINVAL;
 4933                 goto out;
 4934         }
 4935 
 4936         /* Set the offset pointers to the correct place. */
 4937         if (inoffp == NULL)
 4938                 inoffp = &infp->f_offset;
 4939         if (outoffp == NULL)
 4940                 outoffp = &outfp->f_offset;
 4941         savinoff = *inoffp;
 4942         savoutoff = *outoffp;
 4943 
 4944         invp = infp->f_vnode;
 4945         outvp = outfp->f_vnode;
 4946         /* Sanity check the f_flag bits. */
 4947         if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
 4948             (infp->f_flag & FREAD) == 0) {
 4949                 error = EBADF;
 4950                 goto out;
 4951         }
 4952 
 4953         /* If len == 0, just return 0. */
 4954         if (len == 0)
 4955                 goto out;
 4956 
 4957         /*
 4958          * If infp and outfp refer to the same file, the byte ranges cannot
 4959          * overlap.
 4960          */
 4961         if (invp == outvp && ((savinoff <= savoutoff && savinoff + len >
 4962             savoutoff) || (savinoff > savoutoff && savoutoff + len >
 4963             savinoff))) {
 4964                 error = EINVAL;
 4965                 goto out;
 4966         }
 4967 
 4968         /* Range lock the byte ranges for both invp and outvp. */
 4969         for (;;) {
 4970                 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp +
 4971                     len);
 4972                 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp +
 4973                     len);
 4974                 if (rl_rcookie != NULL)
 4975                         break;
 4976                 vn_rangelock_unlock(outvp, rl_wcookie);
 4977                 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len);
 4978                 vn_rangelock_unlock(invp, rl_rcookie);
 4979         }
 4980 
 4981         retlen = len;
 4982         error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
 4983             flags, infp->f_cred, outfp->f_cred, td);
 4984 out:
 4985         if (rl_rcookie != NULL)
 4986                 vn_rangelock_unlock(invp, rl_rcookie);
 4987         if (rl_wcookie != NULL)
 4988                 vn_rangelock_unlock(outvp, rl_wcookie);
 4989         if (savinoff != -1 && (error == EINTR || error == ERESTART)) {
 4990                 *inoffp = savinoff;
 4991                 *outoffp = savoutoff;
 4992         }
 4993         if (outfp != NULL)
 4994                 fdrop(outfp, td);
 4995         if (infp != NULL)
 4996                 fdrop(infp, td);
 4997         td->td_retval[0] = retlen;
 4998         return (error);
 4999 }
 5000 
 5001 int
 5002 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
 5003 {
 5004         off_t inoff, outoff, *inoffp, *outoffp;
 5005         int error;
 5006 
 5007         inoffp = outoffp = NULL;
 5008         if (uap->inoffp != NULL) {
 5009                 error = copyin(uap->inoffp, &inoff, sizeof(off_t));
 5010                 if (error != 0)
 5011                         return (error);
 5012                 inoffp = &inoff;
 5013         }
 5014         if (uap->outoffp != NULL) {
 5015                 error = copyin(uap->outoffp, &outoff, sizeof(off_t));
 5016                 if (error != 0)
 5017                         return (error);
 5018                 outoffp = &outoff;
 5019         }
 5020         error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
 5021             outoffp, uap->len, uap->flags);
 5022         if (error == 0 && uap->inoffp != NULL)
 5023                 error = copyout(inoffp, uap->inoffp, sizeof(off_t));
 5024         if (error == 0 && uap->outoffp != NULL)
 5025                 error = copyout(outoffp, uap->outoffp, sizeof(off_t));
 5026         return (error);
 5027 }

Cache object: 063af18b053906c53371c14ac52830b0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.