The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_capsicum.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   97 
   98 static int kern_chflagsat(struct thread *td, int fd, const char *path,
   99     enum uio_seg pathseg, u_long flags, int atflag);
  100 static int setfflags(struct thread *td, struct vnode *, u_long);
  101 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  102 static int getutimens(const struct timespec *, enum uio_seg,
  103     struct timespec *, int *);
  104 static int setutimes(struct thread *td, struct vnode *,
  105     const struct timespec *, int, int);
  106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  107     struct thread *td);
  108 static int kern_fhlinkat(struct thread *td, int fd, const char *path,
  109     enum uio_seg pathseg, fhandle_t *fhp);
  110 static int kern_getfhat(struct thread *td, int flags, int fd,
  111     const char *path, enum uio_seg pathseg, fhandle_t *fhp);
  112 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg,
  113     size_t count, struct thread *td);
  114 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd,
  115     const char *path, enum uio_seg segflag);
  116 
  117 static uint64_t
  118 at2cnpflags(u_int at_flags, u_int mask)
  119 {
  120         u_int64_t res;
  121 
  122         MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) !=
  123             (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW));
  124 
  125         res = 0;
  126         at_flags &= mask;
  127         if ((at_flags & AT_RESOLVE_BENEATH) != 0)
  128                 res |= RBENEATH;
  129         if ((at_flags & AT_SYMLINK_FOLLOW) != 0)
  130                 res |= FOLLOW;
  131         /* NOFOLLOW is pseudo flag */
  132         if ((mask & AT_SYMLINK_NOFOLLOW) != 0) {
  133                 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW :
  134                     FOLLOW;
  135         }
  136         return (res);
  137 }
  138 
  139 /*
  140  * Sync each mounted filesystem.
  141  */
  142 #ifndef _SYS_SYSPROTO_H_
  143 struct sync_args {
  144         int     dummy;
  145 };
  146 #endif
  147 /* ARGSUSED */
  148 int
  149 sys_sync(struct thread *td, struct sync_args *uap)
  150 {
  151         struct mount *mp, *nmp;
  152         int save;
  153 
  154         mtx_lock(&mountlist_mtx);
  155         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  156                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  157                         nmp = TAILQ_NEXT(mp, mnt_list);
  158                         continue;
  159                 }
  160                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  161                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  162                         save = curthread_pflags_set(TDP_SYNCIO);
  163                         vfs_msync(mp, MNT_NOWAIT);
  164                         VFS_SYNC(mp, MNT_NOWAIT);
  165                         curthread_pflags_restore(save);
  166                         vn_finished_write(mp);
  167                 }
  168                 mtx_lock(&mountlist_mtx);
  169                 nmp = TAILQ_NEXT(mp, mnt_list);
  170                 vfs_unbusy(mp);
  171         }
  172         mtx_unlock(&mountlist_mtx);
  173         return (0);
  174 }
  175 
  176 /*
  177  * Change filesystem quotas.
  178  */
  179 #ifndef _SYS_SYSPROTO_H_
  180 struct quotactl_args {
  181         char *path;
  182         int cmd;
  183         int uid;
  184         caddr_t arg;
  185 };
  186 #endif
  187 int
  188 sys_quotactl(struct thread *td, struct quotactl_args *uap)
  189 {
  190         struct mount *mp;
  191         struct nameidata nd;
  192         int error;
  193 
  194         AUDIT_ARG_CMD(uap->cmd);
  195         AUDIT_ARG_UID(uap->uid);
  196         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  197                 return (EPERM);
  198         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  199             uap->path, td);
  200         if ((error = namei(&nd)) != 0)
  201                 return (error);
  202         NDFREE(&nd, NDF_ONLY_PNBUF);
  203         mp = nd.ni_vp->v_mount;
  204         vfs_ref(mp);
  205         vput(nd.ni_vp);
  206         error = vfs_busy(mp, 0);
  207         vfs_rel(mp);
  208         if (error != 0)
  209                 return (error);
  210         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  211 
  212         /*
  213          * Since quota on operation typically needs to open quota
  214          * file, the Q_QUOTAON handler needs to unbusy the mount point
  215          * before calling into namei.  Otherwise, unmount might be
  216          * started between two vfs_busy() invocations (first is our,
  217          * second is from mount point cross-walk code in lookup()),
  218          * causing deadlock.
  219          *
  220          * Require that Q_QUOTAON handles the vfs_busy() reference on
  221          * its own, always returning with ubusied mount point.
  222          */
  223         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON &&
  224             (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF)
  225                 vfs_unbusy(mp);
  226         return (error);
  227 }
  228 
  229 /*
  230  * Used by statfs conversion routines to scale the block size up if
  231  * necessary so that all of the block counts are <= 'max_size'.  Note
  232  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  233  * value of 'n'.
  234  */
  235 void
  236 statfs_scale_blocks(struct statfs *sf, long max_size)
  237 {
  238         uint64_t count;
  239         int shift;
  240 
  241         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  242 
  243         /*
  244          * Attempt to scale the block counts to give a more accurate
  245          * overview to userland of the ratio of free space to used
  246          * space.  To do this, find the largest block count and compute
  247          * a divisor that lets it fit into a signed integer <= max_size.
  248          */
  249         if (sf->f_bavail < 0)
  250                 count = -sf->f_bavail;
  251         else
  252                 count = sf->f_bavail;
  253         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  254         if (count <= max_size)
  255                 return;
  256 
  257         count >>= flsl(max_size);
  258         shift = 0;
  259         while (count > 0) {
  260                 shift++;
  261                 count >>=1;
  262         }
  263 
  264         sf->f_bsize <<= shift;
  265         sf->f_blocks >>= shift;
  266         sf->f_bfree >>= shift;
  267         sf->f_bavail >>= shift;
  268 }
  269 
  270 static int
  271 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
  272 {
  273         struct statfs *sp;
  274         int error;
  275 
  276         if (mp == NULL)
  277                 return (EBADF);
  278         error = vfs_busy(mp, 0);
  279         vfs_rel(mp);
  280         if (error != 0)
  281                 return (error);
  282 #ifdef MAC
  283         error = mac_mount_check_stat(td->td_ucred, mp);
  284         if (error != 0)
  285                 goto out;
  286 #endif
  287         /*
  288          * Set these in case the underlying filesystem fails to do so.
  289          */
  290         sp = &mp->mnt_stat;
  291         sp->f_version = STATFS_VERSION;
  292         sp->f_namemax = NAME_MAX;
  293         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  294         error = VFS_STATFS(mp, sp);
  295         if (error != 0)
  296                 goto out;
  297         *buf = *sp;
  298         if (priv_check(td, PRIV_VFS_GENERATION)) {
  299                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
  300                 prison_enforce_statfs(td->td_ucred, mp, buf);
  301         }
  302 out:
  303         vfs_unbusy(mp);
  304         return (error);
  305 }
  306 
  307 /*
  308  * Get filesystem statistics.
  309  */
  310 #ifndef _SYS_SYSPROTO_H_
  311 struct statfs_args {
  312         char *path;
  313         struct statfs *buf;
  314 };
  315 #endif
  316 int
  317 sys_statfs(struct thread *td, struct statfs_args *uap)
  318 {
  319         struct statfs *sfp;
  320         int error;
  321 
  322         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  323         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  324         if (error == 0)
  325                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  326         free(sfp, M_STATFS);
  327         return (error);
  328 }
  329 
  330 int
  331 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  332     struct statfs *buf)
  333 {
  334         struct mount *mp;
  335         struct nameidata nd;
  336         int error;
  337 
  338         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  339             pathseg, path, td);
  340         error = namei(&nd);
  341         if (error != 0)
  342                 return (error);
  343         mp = nd.ni_vp->v_mount;
  344         vfs_ref(mp);
  345         NDFREE(&nd, NDF_ONLY_PNBUF);
  346         vput(nd.ni_vp);
  347         return (kern_do_statfs(td, mp, buf));
  348 }
  349 
  350 /*
  351  * Get filesystem statistics.
  352  */
  353 #ifndef _SYS_SYSPROTO_H_
  354 struct fstatfs_args {
  355         int fd;
  356         struct statfs *buf;
  357 };
  358 #endif
  359 int
  360 sys_fstatfs(struct thread *td, struct fstatfs_args *uap)
  361 {
  362         struct statfs *sfp;
  363         int error;
  364 
  365         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  366         error = kern_fstatfs(td, uap->fd, sfp);
  367         if (error == 0)
  368                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  369         free(sfp, M_STATFS);
  370         return (error);
  371 }
  372 
  373 int
  374 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  375 {
  376         struct file *fp;
  377         struct mount *mp;
  378         struct vnode *vp;
  379         int error;
  380 
  381         AUDIT_ARG_FD(fd);
  382         error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
  383         if (error != 0)
  384                 return (error);
  385         vp = fp->f_vnode;
  386         vn_lock(vp, LK_SHARED | LK_RETRY);
  387 #ifdef AUDIT
  388         AUDIT_ARG_VNODE1(vp);
  389 #endif
  390         mp = vp->v_mount;
  391         if (mp != NULL)
  392                 vfs_ref(mp);
  393         VOP_UNLOCK(vp, 0);
  394         fdrop(fp, td);
  395         return (kern_do_statfs(td, mp, buf));
  396 }
  397 
  398 /*
  399  * Get statistics on all filesystems.
  400  */
  401 #ifndef _SYS_SYSPROTO_H_
  402 struct getfsstat_args {
  403         struct statfs *buf;
  404         long bufsize;
  405         int mode;
  406 };
  407 #endif
  408 int
  409 sys_getfsstat(struct thread *td, struct getfsstat_args *uap)
  410 {
  411         size_t count;
  412         int error;
  413 
  414         if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
  415                 return (EINVAL);
  416         error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
  417             UIO_USERSPACE, uap->mode);
  418         if (error == 0)
  419                 td->td_retval[0] = count;
  420         return (error);
  421 }
  422 
  423 /*
  424  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  425  *      The caller is responsible for freeing memory which will be allocated
  426  *      in '*buf'.
  427  */
  428 int
  429 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  430     size_t *countp, enum uio_seg bufseg, int mode)
  431 {
  432         struct mount *mp, *nmp;
  433         struct statfs *sfsp, *sp, *sptmp, *tofree;
  434         size_t count, maxcount;
  435         int error;
  436 
  437         switch (mode) {
  438         case MNT_WAIT:
  439         case MNT_NOWAIT:
  440                 break;
  441         default:
  442                 if (bufseg == UIO_SYSSPACE)
  443                         *buf = NULL;
  444                 return (EINVAL);
  445         }
  446 restart:
  447         maxcount = bufsize / sizeof(struct statfs);
  448         if (bufsize == 0) {
  449                 sfsp = NULL;
  450                 tofree = NULL;
  451         } else if (bufseg == UIO_USERSPACE) {
  452                 sfsp = *buf;
  453                 tofree = NULL;
  454         } else /* if (bufseg == UIO_SYSSPACE) */ {
  455                 count = 0;
  456                 mtx_lock(&mountlist_mtx);
  457                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  458                         count++;
  459                 }
  460                 mtx_unlock(&mountlist_mtx);
  461                 if (maxcount > count)
  462                         maxcount = count;
  463                 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
  464                     M_STATFS, M_WAITOK);
  465         }
  466         count = 0;
  467         mtx_lock(&mountlist_mtx);
  468         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  469                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  470                         nmp = TAILQ_NEXT(mp, mnt_list);
  471                         continue;
  472                 }
  473 #ifdef MAC
  474                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  475                         nmp = TAILQ_NEXT(mp, mnt_list);
  476                         continue;
  477                 }
  478 #endif
  479                 if (mode == MNT_WAIT) {
  480                         if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
  481                                 /*
  482                                  * If vfs_busy() failed, and MBF_NOWAIT
  483                                  * wasn't passed, then the mp is gone.
  484                                  * Furthermore, because of MBF_MNTLSTLOCK,
  485                                  * the mountlist_mtx was dropped.  We have
  486                                  * no other choice than to start over.
  487                                  */
  488                                 mtx_unlock(&mountlist_mtx);
  489                                 free(tofree, M_STATFS);
  490                                 goto restart;
  491                         }
  492                 } else {
  493                         if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
  494                                 nmp = TAILQ_NEXT(mp, mnt_list);
  495                                 continue;
  496                         }
  497                 }
  498                 if (sfsp != NULL && count < maxcount) {
  499                         sp = &mp->mnt_stat;
  500                         /*
  501                          * Set these in case the underlying filesystem
  502                          * fails to do so.
  503                          */
  504                         sp->f_version = STATFS_VERSION;
  505                         sp->f_namemax = NAME_MAX;
  506                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  507                         /*
  508                          * If MNT_NOWAIT is specified, do not refresh
  509                          * the fsstat cache.
  510                          */
  511                         if (mode != MNT_NOWAIT) {
  512                                 error = VFS_STATFS(mp, sp);
  513                                 if (error != 0) {
  514                                         mtx_lock(&mountlist_mtx);
  515                                         nmp = TAILQ_NEXT(mp, mnt_list);
  516                                         vfs_unbusy(mp);
  517                                         continue;
  518                                 }
  519                         }
  520                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  521                                 sptmp = malloc(sizeof(struct statfs), M_STATFS,
  522                                     M_WAITOK);
  523                                 *sptmp = *sp;
  524                                 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
  525                                 prison_enforce_statfs(td->td_ucred, mp, sptmp);
  526                                 sp = sptmp;
  527                         } else
  528                                 sptmp = NULL;
  529                         if (bufseg == UIO_SYSSPACE) {
  530                                 bcopy(sp, sfsp, sizeof(*sp));
  531                                 free(sptmp, M_STATFS);
  532                         } else /* if (bufseg == UIO_USERSPACE) */ {
  533                                 error = copyout(sp, sfsp, sizeof(*sp));
  534                                 free(sptmp, M_STATFS);
  535                                 if (error != 0) {
  536                                         vfs_unbusy(mp);
  537                                         return (error);
  538                                 }
  539                         }
  540                         sfsp++;
  541                 }
  542                 count++;
  543                 mtx_lock(&mountlist_mtx);
  544                 nmp = TAILQ_NEXT(mp, mnt_list);
  545                 vfs_unbusy(mp);
  546         }
  547         mtx_unlock(&mountlist_mtx);
  548         if (sfsp != NULL && count > maxcount)
  549                 *countp = maxcount;
  550         else
  551                 *countp = count;
  552         return (0);
  553 }
  554 
  555 #ifdef COMPAT_FREEBSD4
  556 /*
  557  * Get old format filesystem statistics.
  558  */
  559 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *);
  560 
  561 #ifndef _SYS_SYSPROTO_H_
  562 struct freebsd4_statfs_args {
  563         char *path;
  564         struct ostatfs *buf;
  565 };
  566 #endif
  567 int
  568 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap)
  569 {
  570         struct ostatfs osb;
  571         struct statfs *sfp;
  572         int error;
  573 
  574         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  575         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  576         if (error == 0) {
  577                 freebsd4_cvtstatfs(sfp, &osb);
  578                 error = copyout(&osb, uap->buf, sizeof(osb));
  579         }
  580         free(sfp, M_STATFS);
  581         return (error);
  582 }
  583 
  584 /*
  585  * Get filesystem statistics.
  586  */
  587 #ifndef _SYS_SYSPROTO_H_
  588 struct freebsd4_fstatfs_args {
  589         int fd;
  590         struct ostatfs *buf;
  591 };
  592 #endif
  593 int
  594 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap)
  595 {
  596         struct ostatfs osb;
  597         struct statfs *sfp;
  598         int error;
  599 
  600         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  601         error = kern_fstatfs(td, uap->fd, sfp);
  602         if (error == 0) {
  603                 freebsd4_cvtstatfs(sfp, &osb);
  604                 error = copyout(&osb, uap->buf, sizeof(osb));
  605         }
  606         free(sfp, M_STATFS);
  607         return (error);
  608 }
  609 
  610 /*
  611  * Get statistics on all filesystems.
  612  */
  613 #ifndef _SYS_SYSPROTO_H_
  614 struct freebsd4_getfsstat_args {
  615         struct ostatfs *buf;
  616         long bufsize;
  617         int mode;
  618 };
  619 #endif
  620 int
  621 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap)
  622 {
  623         struct statfs *buf, *sp;
  624         struct ostatfs osb;
  625         size_t count, size;
  626         int error;
  627 
  628         if (uap->bufsize < 0)
  629                 return (EINVAL);
  630         count = uap->bufsize / sizeof(struct ostatfs);
  631         if (count > SIZE_MAX / sizeof(struct statfs))
  632                 return (EINVAL);
  633         size = count * sizeof(struct statfs);
  634         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  635             uap->mode);
  636         if (error == 0)
  637                 td->td_retval[0] = count;
  638         if (size != 0) {
  639                 sp = buf;
  640                 while (count != 0 && error == 0) {
  641                         freebsd4_cvtstatfs(sp, &osb);
  642                         error = copyout(&osb, uap->buf, sizeof(osb));
  643                         sp++;
  644                         uap->buf++;
  645                         count--;
  646                 }
  647                 free(buf, M_STATFS);
  648         }
  649         return (error);
  650 }
  651 
  652 /*
  653  * Implement fstatfs() for (NFS) file handles.
  654  */
  655 #ifndef _SYS_SYSPROTO_H_
  656 struct freebsd4_fhstatfs_args {
  657         struct fhandle *u_fhp;
  658         struct ostatfs *buf;
  659 };
  660 #endif
  661 int
  662 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap)
  663 {
  664         struct ostatfs osb;
  665         struct statfs *sfp;
  666         fhandle_t fh;
  667         int error;
  668 
  669         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  670         if (error != 0)
  671                 return (error);
  672         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  673         error = kern_fhstatfs(td, fh, sfp);
  674         if (error == 0) {
  675                 freebsd4_cvtstatfs(sfp, &osb);
  676                 error = copyout(&osb, uap->buf, sizeof(osb));
  677         }
  678         free(sfp, M_STATFS);
  679         return (error);
  680 }
  681 
  682 /*
  683  * Convert a new format statfs structure to an old format statfs structure.
  684  */
  685 static void
  686 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
  687 {
  688 
  689         statfs_scale_blocks(nsp, LONG_MAX);
  690         bzero(osp, sizeof(*osp));
  691         osp->f_bsize = nsp->f_bsize;
  692         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  693         osp->f_blocks = nsp->f_blocks;
  694         osp->f_bfree = nsp->f_bfree;
  695         osp->f_bavail = nsp->f_bavail;
  696         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  697         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  698         osp->f_owner = nsp->f_owner;
  699         osp->f_type = nsp->f_type;
  700         osp->f_flags = nsp->f_flags;
  701         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  702         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  703         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  704         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  705         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  706             MIN(MFSNAMELEN, OMFSNAMELEN));
  707         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  708             MIN(MNAMELEN, OMNAMELEN));
  709         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  710             MIN(MNAMELEN, OMNAMELEN));
  711         osp->f_fsid = nsp->f_fsid;
  712 }
  713 #endif /* COMPAT_FREEBSD4 */
  714 
  715 #if defined(COMPAT_FREEBSD11)
  716 /*
  717  * Get old format filesystem statistics.
  718  */
  719 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *);
  720 
  721 int
  722 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap)
  723 {
  724         struct freebsd11_statfs osb;
  725         struct statfs *sfp;
  726         int error;
  727 
  728         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  729         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  730         if (error == 0) {
  731                 freebsd11_cvtstatfs(sfp, &osb);
  732                 error = copyout(&osb, uap->buf, sizeof(osb));
  733         }
  734         free(sfp, M_STATFS);
  735         return (error);
  736 }
  737 
  738 /*
  739  * Get filesystem statistics.
  740  */
  741 int
  742 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap)
  743 {
  744         struct freebsd11_statfs osb;
  745         struct statfs *sfp;
  746         int error;
  747 
  748         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  749         error = kern_fstatfs(td, uap->fd, sfp);
  750         if (error == 0) {
  751                 freebsd11_cvtstatfs(sfp, &osb);
  752                 error = copyout(&osb, uap->buf, sizeof(osb));
  753         }
  754         free(sfp, M_STATFS);
  755         return (error);
  756 }
  757 
  758 /*
  759  * Get statistics on all filesystems.
  760  */
  761 int
  762 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap)
  763 {
  764         struct freebsd11_statfs osb;
  765         struct statfs *buf, *sp;
  766         size_t count, size;
  767         int error;
  768 
  769         count = uap->bufsize / sizeof(struct ostatfs);
  770         size = count * sizeof(struct statfs);
  771         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  772             uap->mode);
  773         if (error == 0)
  774                 td->td_retval[0] = count;
  775         if (size > 0) {
  776                 sp = buf;
  777                 while (count > 0 && error == 0) {
  778                         freebsd11_cvtstatfs(sp, &osb);
  779                         error = copyout(&osb, uap->buf, sizeof(osb));
  780                         sp++;
  781                         uap->buf++;
  782                         count--;
  783                 }
  784                 free(buf, M_STATFS);
  785         }
  786         return (error);
  787 }
  788 
  789 /*
  790  * Implement fstatfs() for (NFS) file handles.
  791  */
  792 int
  793 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap)
  794 {
  795         struct freebsd11_statfs osb;
  796         struct statfs *sfp;
  797         fhandle_t fh;
  798         int error;
  799 
  800         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  801         if (error)
  802                 return (error);
  803         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  804         error = kern_fhstatfs(td, fh, sfp);
  805         if (error == 0) {
  806                 freebsd11_cvtstatfs(sfp, &osb);
  807                 error = copyout(&osb, uap->buf, sizeof(osb));
  808         }
  809         free(sfp, M_STATFS);
  810         return (error);
  811 }
  812 
  813 /*
  814  * Convert a new format statfs structure to an old format statfs structure.
  815  */
  816 static void
  817 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp)
  818 {
  819 
  820         bzero(osp, sizeof(*osp));
  821         osp->f_version = FREEBSD11_STATFS_VERSION;
  822         osp->f_type = nsp->f_type;
  823         osp->f_flags = nsp->f_flags;
  824         osp->f_bsize = nsp->f_bsize;
  825         osp->f_iosize = nsp->f_iosize;
  826         osp->f_blocks = nsp->f_blocks;
  827         osp->f_bfree = nsp->f_bfree;
  828         osp->f_bavail = nsp->f_bavail;
  829         osp->f_files = nsp->f_files;
  830         osp->f_ffree = nsp->f_ffree;
  831         osp->f_syncwrites = nsp->f_syncwrites;
  832         osp->f_asyncwrites = nsp->f_asyncwrites;
  833         osp->f_syncreads = nsp->f_syncreads;
  834         osp->f_asyncreads = nsp->f_asyncreads;
  835         osp->f_namemax = nsp->f_namemax;
  836         osp->f_owner = nsp->f_owner;
  837         osp->f_fsid = nsp->f_fsid;
  838         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  839             MIN(MFSNAMELEN, sizeof(osp->f_fstypename)));
  840         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  841             MIN(MNAMELEN, sizeof(osp->f_mntonname)));
  842         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  843             MIN(MNAMELEN, sizeof(osp->f_mntfromname)));
  844 }
  845 #endif /* COMPAT_FREEBSD11 */
  846 
  847 /*
  848  * Change current working directory to a given file descriptor.
  849  */
  850 #ifndef _SYS_SYSPROTO_H_
  851 struct fchdir_args {
  852         int     fd;
  853 };
  854 #endif
  855 int
  856 sys_fchdir(struct thread *td, struct fchdir_args *uap)
  857 {
  858         struct vnode *vp, *tdp;
  859         struct mount *mp;
  860         struct file *fp;
  861         int error;
  862 
  863         AUDIT_ARG_FD(uap->fd);
  864         error = getvnode(td, uap->fd, &cap_fchdir_rights,
  865             &fp);
  866         if (error != 0)
  867                 return (error);
  868         vp = fp->f_vnode;
  869         vrefact(vp);
  870         fdrop(fp, td);
  871         vn_lock(vp, LK_SHARED | LK_RETRY);
  872         AUDIT_ARG_VNODE1(vp);
  873         error = change_dir(vp, td);
  874         while (!error && (mp = vp->v_mountedhere) != NULL) {
  875                 if (vfs_busy(mp, 0))
  876                         continue;
  877                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  878                 vfs_unbusy(mp);
  879                 if (error != 0)
  880                         break;
  881                 vput(vp);
  882                 vp = tdp;
  883         }
  884         if (error != 0) {
  885                 vput(vp);
  886                 return (error);
  887         }
  888         VOP_UNLOCK(vp, 0);
  889         pwd_chdir(td, vp);
  890         return (0);
  891 }
  892 
  893 /*
  894  * Change current working directory (``.'').
  895  */
  896 #ifndef _SYS_SYSPROTO_H_
  897 struct chdir_args {
  898         char    *path;
  899 };
  900 #endif
  901 int
  902 sys_chdir(struct thread *td, struct chdir_args *uap)
  903 {
  904 
  905         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  906 }
  907 
  908 int
  909 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  910 {
  911         struct nameidata nd;
  912         int error;
  913 
  914         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  915             pathseg, path, td);
  916         if ((error = namei(&nd)) != 0)
  917                 return (error);
  918         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  919                 vput(nd.ni_vp);
  920                 NDFREE(&nd, NDF_ONLY_PNBUF);
  921                 return (error);
  922         }
  923         VOP_UNLOCK(nd.ni_vp, 0);
  924         NDFREE(&nd, NDF_ONLY_PNBUF);
  925         pwd_chdir(td, nd.ni_vp);
  926         return (0);
  927 }
  928 
  929 /*
  930  * Change notion of root (``/'') directory.
  931  */
  932 #ifndef _SYS_SYSPROTO_H_
  933 struct chroot_args {
  934         char    *path;
  935 };
  936 #endif
  937 int
  938 sys_chroot(struct thread *td, struct chroot_args *uap)
  939 {
  940         struct nameidata nd;
  941         int error;
  942 
  943         error = priv_check(td, PRIV_VFS_CHROOT);
  944         if (error != 0)
  945                 return (error);
  946         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  947             UIO_USERSPACE, uap->path, td);
  948         error = namei(&nd);
  949         if (error != 0)
  950                 goto error;
  951         error = change_dir(nd.ni_vp, td);
  952         if (error != 0)
  953                 goto e_vunlock;
  954 #ifdef MAC
  955         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  956         if (error != 0)
  957                 goto e_vunlock;
  958 #endif
  959         VOP_UNLOCK(nd.ni_vp, 0);
  960         error = pwd_chroot(td, nd.ni_vp);
  961         vrele(nd.ni_vp);
  962         NDFREE(&nd, NDF_ONLY_PNBUF);
  963         return (error);
  964 e_vunlock:
  965         vput(nd.ni_vp);
  966 error:
  967         NDFREE(&nd, NDF_ONLY_PNBUF);
  968         return (error);
  969 }
  970 
  971 /*
  972  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  973  * instance.
  974  */
  975 int
  976 change_dir(struct vnode *vp, struct thread *td)
  977 {
  978 #ifdef MAC
  979         int error;
  980 #endif
  981 
  982         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  983         if (vp->v_type != VDIR)
  984                 return (ENOTDIR);
  985 #ifdef MAC
  986         error = mac_vnode_check_chdir(td->td_ucred, vp);
  987         if (error != 0)
  988                 return (error);
  989 #endif
  990         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  991 }
  992 
  993 static __inline void
  994 flags_to_rights(int flags, cap_rights_t *rightsp)
  995 {
  996 
  997         if (flags & O_EXEC) {
  998                 cap_rights_set(rightsp, CAP_FEXECVE);
  999         } else {
 1000                 switch ((flags & O_ACCMODE)) {
 1001                 case O_RDONLY:
 1002                         cap_rights_set(rightsp, CAP_READ);
 1003                         break;
 1004                 case O_RDWR:
 1005                         cap_rights_set(rightsp, CAP_READ);
 1006                         /* FALLTHROUGH */
 1007                 case O_WRONLY:
 1008                         cap_rights_set(rightsp, CAP_WRITE);
 1009                         if (!(flags & (O_APPEND | O_TRUNC)))
 1010                                 cap_rights_set(rightsp, CAP_SEEK);
 1011                         break;
 1012                 }
 1013         }
 1014 
 1015         if (flags & O_CREAT)
 1016                 cap_rights_set(rightsp, CAP_CREATE);
 1017 
 1018         if (flags & O_TRUNC)
 1019                 cap_rights_set(rightsp, CAP_FTRUNCATE);
 1020 
 1021         if (flags & (O_SYNC | O_FSYNC))
 1022                 cap_rights_set(rightsp, CAP_FSYNC);
 1023 
 1024         if (flags & (O_EXLOCK | O_SHLOCK))
 1025                 cap_rights_set(rightsp, CAP_FLOCK);
 1026 }
 1027 
 1028 /*
 1029  * Check permissions, allocate an open file structure, and call the device
 1030  * open routine if any.
 1031  */
 1032 #ifndef _SYS_SYSPROTO_H_
 1033 struct open_args {
 1034         char    *path;
 1035         int     flags;
 1036         int     mode;
 1037 };
 1038 #endif
 1039 int
 1040 sys_open(struct thread *td, struct open_args *uap)
 1041 {
 1042 
 1043         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1044             uap->flags, uap->mode));
 1045 }
 1046 
 1047 #ifndef _SYS_SYSPROTO_H_
 1048 struct openat_args {
 1049         int     fd;
 1050         char    *path;
 1051         int     flag;
 1052         int     mode;
 1053 };
 1054 #endif
 1055 int
 1056 sys_openat(struct thread *td, struct openat_args *uap)
 1057 {
 1058 
 1059         AUDIT_ARG_FD(uap->fd);
 1060         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1061             uap->mode));
 1062 }
 1063 
 1064 int
 1065 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1066     int flags, int mode)
 1067 {
 1068         struct proc *p = td->td_proc;
 1069         struct filedesc *fdp = p->p_fd;
 1070         struct file *fp;
 1071         struct vnode *vp;
 1072         struct nameidata nd;
 1073         cap_rights_t rights;
 1074         int cmode, error, indx;
 1075 
 1076         indx = -1;
 1077 
 1078         AUDIT_ARG_FFLAGS(flags);
 1079         AUDIT_ARG_MODE(mode);
 1080         cap_rights_init(&rights, CAP_LOOKUP);
 1081         flags_to_rights(flags, &rights);
 1082         /*
 1083          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1084          * may be specified.
 1085          */
 1086         if (flags & O_EXEC) {
 1087                 if (flags & O_ACCMODE)
 1088                         return (EINVAL);
 1089         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1090                 return (EINVAL);
 1091         } else {
 1092                 flags = FFLAGS(flags);
 1093         }
 1094 
 1095         /*
 1096          * Allocate a file structure. The descriptor to reference it
 1097          * is allocated and set by finstall() below.
 1098          */
 1099         error = falloc_noinstall(td, &fp);
 1100         if (error != 0)
 1101                 return (error);
 1102         /*
 1103          * An extra reference on `fp' has been held for us by
 1104          * falloc_noinstall().
 1105          */
 1106         /* Set the flags early so the finit in devfs can pick them up. */
 1107         fp->f_flag = flags & FMASK;
 1108         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1109         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1110             &rights, td);
 1111         td->td_dupfd = -1;              /* XXX check for fdopen */
 1112         error = vn_open(&nd, &flags, cmode, fp);
 1113         if (error != 0) {
 1114                 /*
 1115                  * If the vn_open replaced the method vector, something
 1116                  * wonderous happened deep below and we just pass it up
 1117                  * pretending we know what we do.
 1118                  */
 1119                 if (error == ENXIO && fp->f_ops != &badfileops)
 1120                         goto success;
 1121 
 1122                 /*
 1123                  * Handle special fdopen() case. bleh.
 1124                  *
 1125                  * Don't do this for relative (capability) lookups; we don't
 1126                  * understand exactly what would happen, and we don't think
 1127                  * that it ever should.
 1128                  */
 1129                 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 &&
 1130                     (error == ENODEV || error == ENXIO) &&
 1131                     td->td_dupfd >= 0) {
 1132                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1133                             &indx);
 1134                         if (error == 0)
 1135                                 goto success;
 1136                 }
 1137 
 1138                 goto bad;
 1139         }
 1140         td->td_dupfd = 0;
 1141         NDFREE(&nd, NDF_ONLY_PNBUF);
 1142         vp = nd.ni_vp;
 1143 
 1144         /*
 1145          * Store the vnode, for any f_type. Typically, the vnode use
 1146          * count is decremented by direct call to vn_closefile() for
 1147          * files that switched type in the cdevsw fdopen() method.
 1148          */
 1149         fp->f_vnode = vp;
 1150         /*
 1151          * If the file wasn't claimed by devfs bind it to the normal
 1152          * vnode operations here.
 1153          */
 1154         if (fp->f_ops == &badfileops) {
 1155                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1156                 fp->f_seqcount = 1;
 1157                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1158                     DTYPE_VNODE, vp, &vnops);
 1159         }
 1160 
 1161         VOP_UNLOCK(vp, 0);
 1162         if (flags & O_TRUNC) {
 1163                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1164                 if (error != 0)
 1165                         goto bad;
 1166         }
 1167 success:
 1168         /*
 1169          * If we haven't already installed the FD (for dupfdopen), do so now.
 1170          */
 1171         if (indx == -1) {
 1172                 struct filecaps *fcaps;
 1173 
 1174 #ifdef CAPABILITIES
 1175                 if ((nd.ni_resflags & NIRES_STRICTREL) != 0)
 1176                         fcaps = &nd.ni_filecaps;
 1177                 else
 1178 #endif
 1179                         fcaps = NULL;
 1180                 error = finstall(td, fp, &indx, flags, fcaps);
 1181                 /* On success finstall() consumes fcaps. */
 1182                 if (error != 0) {
 1183                         filecaps_free(&nd.ni_filecaps);
 1184                         goto bad;
 1185                 }
 1186         } else {
 1187                 filecaps_free(&nd.ni_filecaps);
 1188         }
 1189 
 1190         /*
 1191          * Release our private reference, leaving the one associated with
 1192          * the descriptor table intact.
 1193          */
 1194         fdrop(fp, td);
 1195         td->td_retval[0] = indx;
 1196         return (0);
 1197 bad:
 1198         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1199         fdrop(fp, td);
 1200         return (error);
 1201 }
 1202 
 1203 #ifdef COMPAT_43
 1204 /*
 1205  * Create a file.
 1206  */
 1207 #ifndef _SYS_SYSPROTO_H_
 1208 struct ocreat_args {
 1209         char    *path;
 1210         int     mode;
 1211 };
 1212 #endif
 1213 int
 1214 ocreat(struct thread *td, struct ocreat_args *uap)
 1215 {
 1216 
 1217         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1218             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1219 }
 1220 #endif /* COMPAT_43 */
 1221 
 1222 /*
 1223  * Create a special file.
 1224  */
 1225 #ifndef _SYS_SYSPROTO_H_
 1226 struct mknodat_args {
 1227         int     fd;
 1228         char    *path;
 1229         mode_t  mode;
 1230         dev_t   dev;
 1231 };
 1232 #endif
 1233 int
 1234 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1235 {
 1236 
 1237         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1238             uap->dev));
 1239 }
 1240 
 1241 #if defined(COMPAT_FREEBSD11)
 1242 int
 1243 freebsd11_mknod(struct thread *td,
 1244     struct freebsd11_mknod_args *uap)
 1245 {
 1246 
 1247         return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1248             uap->mode, uap->dev));
 1249 }
 1250 
 1251 int
 1252 freebsd11_mknodat(struct thread *td,
 1253     struct freebsd11_mknodat_args *uap)
 1254 {
 1255 
 1256         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1257             uap->dev));
 1258 }
 1259 #endif /* COMPAT_FREEBSD11 */
 1260 
 1261 int
 1262 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1263     int mode, dev_t dev)
 1264 {
 1265         struct vnode *vp;
 1266         struct mount *mp;
 1267         struct vattr vattr;
 1268         struct nameidata nd;
 1269         int error, whiteout = 0;
 1270 
 1271         AUDIT_ARG_MODE(mode);
 1272         AUDIT_ARG_DEV(dev);
 1273         switch (mode & S_IFMT) {
 1274         case S_IFCHR:
 1275         case S_IFBLK:
 1276                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1277                 if (error == 0 && dev == VNOVAL)
 1278                         error = EINVAL;
 1279                 break;
 1280         case S_IFWHT:
 1281                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1282                 break;
 1283         case S_IFIFO:
 1284                 if (dev == 0)
 1285                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1286                 /* FALLTHROUGH */
 1287         default:
 1288                 error = EINVAL;
 1289                 break;
 1290         }
 1291         if (error != 0)
 1292                 return (error);
 1293 restart:
 1294         bwillwrite();
 1295         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1296             NOCACHE, pathseg, path, fd, &cap_mknodat_rights,
 1297             td);
 1298         if ((error = namei(&nd)) != 0)
 1299                 return (error);
 1300         vp = nd.ni_vp;
 1301         if (vp != NULL) {
 1302                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1303                 if (vp == nd.ni_dvp)
 1304                         vrele(nd.ni_dvp);
 1305                 else
 1306                         vput(nd.ni_dvp);
 1307                 vrele(vp);
 1308                 return (EEXIST);
 1309         } else {
 1310                 VATTR_NULL(&vattr);
 1311                 vattr.va_mode = (mode & ALLPERMS) &
 1312                     ~td->td_proc->p_fd->fd_cmask;
 1313                 vattr.va_rdev = dev;
 1314                 whiteout = 0;
 1315 
 1316                 switch (mode & S_IFMT) {
 1317                 case S_IFCHR:
 1318                         vattr.va_type = VCHR;
 1319                         break;
 1320                 case S_IFBLK:
 1321                         vattr.va_type = VBLK;
 1322                         break;
 1323                 case S_IFWHT:
 1324                         whiteout = 1;
 1325                         break;
 1326                 default:
 1327                         panic("kern_mknod: invalid mode");
 1328                 }
 1329         }
 1330         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1331                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1332                 vput(nd.ni_dvp);
 1333                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1334                         return (error);
 1335                 goto restart;
 1336         }
 1337 #ifdef MAC
 1338         if (error == 0 && !whiteout)
 1339                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1340                     &nd.ni_cnd, &vattr);
 1341 #endif
 1342         if (error == 0) {
 1343                 if (whiteout)
 1344                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1345                 else {
 1346                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1347                                                 &nd.ni_cnd, &vattr);
 1348                         if (error == 0)
 1349                                 vput(nd.ni_vp);
 1350                 }
 1351         }
 1352         NDFREE(&nd, NDF_ONLY_PNBUF);
 1353         vput(nd.ni_dvp);
 1354         vn_finished_write(mp);
 1355         return (error);
 1356 }
 1357 
 1358 /*
 1359  * Create a named pipe.
 1360  */
 1361 #ifndef _SYS_SYSPROTO_H_
 1362 struct mkfifo_args {
 1363         char    *path;
 1364         int     mode;
 1365 };
 1366 #endif
 1367 int
 1368 sys_mkfifo(struct thread *td, struct mkfifo_args *uap)
 1369 {
 1370 
 1371         return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1372             uap->mode));
 1373 }
 1374 
 1375 #ifndef _SYS_SYSPROTO_H_
 1376 struct mkfifoat_args {
 1377         int     fd;
 1378         char    *path;
 1379         mode_t  mode;
 1380 };
 1381 #endif
 1382 int
 1383 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1384 {
 1385 
 1386         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1387             uap->mode));
 1388 }
 1389 
 1390 int
 1391 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1392     int mode)
 1393 {
 1394         struct mount *mp;
 1395         struct vattr vattr;
 1396         struct nameidata nd;
 1397         int error;
 1398 
 1399         AUDIT_ARG_MODE(mode);
 1400 restart:
 1401         bwillwrite();
 1402         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1403             NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights,
 1404             td);
 1405         if ((error = namei(&nd)) != 0)
 1406                 return (error);
 1407         if (nd.ni_vp != NULL) {
 1408                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1409                 if (nd.ni_vp == nd.ni_dvp)
 1410                         vrele(nd.ni_dvp);
 1411                 else
 1412                         vput(nd.ni_dvp);
 1413                 vrele(nd.ni_vp);
 1414                 return (EEXIST);
 1415         }
 1416         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1417                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1418                 vput(nd.ni_dvp);
 1419                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1420                         return (error);
 1421                 goto restart;
 1422         }
 1423         VATTR_NULL(&vattr);
 1424         vattr.va_type = VFIFO;
 1425         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1426 #ifdef MAC
 1427         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1428             &vattr);
 1429         if (error != 0)
 1430                 goto out;
 1431 #endif
 1432         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1433         if (error == 0)
 1434                 vput(nd.ni_vp);
 1435 #ifdef MAC
 1436 out:
 1437 #endif
 1438         vput(nd.ni_dvp);
 1439         vn_finished_write(mp);
 1440         NDFREE(&nd, NDF_ONLY_PNBUF);
 1441         return (error);
 1442 }
 1443 
 1444 /*
 1445  * Make a hard file link.
 1446  */
 1447 #ifndef _SYS_SYSPROTO_H_
 1448 struct link_args {
 1449         char    *path;
 1450         char    *link;
 1451 };
 1452 #endif
 1453 int
 1454 sys_link(struct thread *td, struct link_args *uap)
 1455 {
 1456 
 1457         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 1458             UIO_USERSPACE, FOLLOW));
 1459 }
 1460 
 1461 #ifndef _SYS_SYSPROTO_H_
 1462 struct linkat_args {
 1463         int     fd1;
 1464         char    *path1;
 1465         int     fd2;
 1466         char    *path2;
 1467         int     flag;
 1468 };
 1469 #endif
 1470 int
 1471 sys_linkat(struct thread *td, struct linkat_args *uap)
 1472 {
 1473         int flag;
 1474 
 1475         flag = uap->flag;
 1476         if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH)) != 0)
 1477                 return (EINVAL);
 1478 
 1479         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1480             UIO_USERSPACE, at2cnpflags(flag, AT_SYMLINK_FOLLOW |
 1481             AT_RESOLVE_BENEATH)));
 1482 }
 1483 
 1484 int hardlink_check_uid = 0;
 1485 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1486     &hardlink_check_uid, 0,
 1487     "Unprivileged processes cannot create hard links to files owned by other "
 1488     "users");
 1489 static int hardlink_check_gid = 0;
 1490 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1491     &hardlink_check_gid, 0,
 1492     "Unprivileged processes cannot create hard links to files owned by other "
 1493     "groups");
 1494 
 1495 static int
 1496 can_hardlink(struct vnode *vp, struct ucred *cred)
 1497 {
 1498         struct vattr va;
 1499         int error;
 1500 
 1501         if (!hardlink_check_uid && !hardlink_check_gid)
 1502                 return (0);
 1503 
 1504         error = VOP_GETATTR(vp, &va, cred);
 1505         if (error != 0)
 1506                 return (error);
 1507 
 1508         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1509                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1510                 if (error != 0)
 1511                         return (error);
 1512         }
 1513 
 1514         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1515                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1516                 if (error != 0)
 1517                         return (error);
 1518         }
 1519 
 1520         return (0);
 1521 }
 1522 
 1523 int
 1524 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1525     enum uio_seg segflag, int follow)
 1526 {
 1527         struct nameidata nd;
 1528         int error;
 1529 
 1530         do {
 1531                 bwillwrite();
 1532                 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag,
 1533                     path1, fd1, &cap_linkat_source_rights, td);
 1534                 if ((error = namei(&nd)) != 0)
 1535                         return (error);
 1536                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1537                 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
 1538         } while (error ==  EAGAIN);
 1539         return (error);
 1540 }
 1541 
 1542 static int
 1543 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path,
 1544     enum uio_seg segflag)
 1545 {
 1546         struct nameidata nd;
 1547         struct mount *mp;
 1548         int error;
 1549 
 1550         if (vp->v_type == VDIR) {
 1551                 vrele(vp);
 1552                 return (EPERM);         /* POSIX */
 1553         }
 1554         NDINIT_ATRIGHTS(&nd, CREATE,
 1555             LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd,
 1556             &cap_linkat_target_rights, td);
 1557         if ((error = namei(&nd)) == 0) {
 1558                 if (nd.ni_vp != NULL) {
 1559                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1560                         if (nd.ni_dvp == nd.ni_vp)
 1561                                 vrele(nd.ni_dvp);
 1562                         else
 1563                                 vput(nd.ni_dvp);
 1564                         vrele(nd.ni_vp);
 1565                         vrele(vp);
 1566                         return (EEXIST);
 1567                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1568                         /*
 1569                          * Cross-device link.  No need to recheck
 1570                          * vp->v_type, since it cannot change, except
 1571                          * to VBAD.
 1572                          */
 1573                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1574                         vput(nd.ni_dvp);
 1575                         vrele(vp);
 1576                         return (EXDEV);
 1577                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1578                         error = can_hardlink(vp, td->td_ucred);
 1579 #ifdef MAC
 1580                         if (error == 0)
 1581                                 error = mac_vnode_check_link(td->td_ucred,
 1582                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1583 #endif
 1584                         if (error != 0) {
 1585                                 vput(vp);
 1586                                 vput(nd.ni_dvp);
 1587                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1588                                 return (error);
 1589                         }
 1590                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1591                         if (error != 0) {
 1592                                 vput(vp);
 1593                                 vput(nd.ni_dvp);
 1594                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1595                                 error = vn_start_write(NULL, &mp,
 1596                                     V_XSLEEP | PCATCH);
 1597                                 if (error != 0)
 1598                                         return (error);
 1599                                 return (EAGAIN);
 1600                         }
 1601                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1602                         VOP_UNLOCK(vp, 0);
 1603                         vput(nd.ni_dvp);
 1604                         vn_finished_write(mp);
 1605                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1606                 } else {
 1607                         vput(nd.ni_dvp);
 1608                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1609                         vrele(vp);
 1610                         return (EAGAIN);
 1611                 }
 1612         }
 1613         vrele(vp);
 1614         return (error);
 1615 }
 1616 
 1617 /*
 1618  * Make a symbolic link.
 1619  */
 1620 #ifndef _SYS_SYSPROTO_H_
 1621 struct symlink_args {
 1622         char    *path;
 1623         char    *link;
 1624 };
 1625 #endif
 1626 int
 1627 sys_symlink(struct thread *td, struct symlink_args *uap)
 1628 {
 1629 
 1630         return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 1631             UIO_USERSPACE));
 1632 }
 1633 
 1634 #ifndef _SYS_SYSPROTO_H_
 1635 struct symlinkat_args {
 1636         char    *path;
 1637         int     fd;
 1638         char    *path2;
 1639 };
 1640 #endif
 1641 int
 1642 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1643 {
 1644 
 1645         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1646             UIO_USERSPACE));
 1647 }
 1648 
 1649 int
 1650 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1651     enum uio_seg segflg)
 1652 {
 1653         struct mount *mp;
 1654         struct vattr vattr;
 1655         char *syspath;
 1656         struct nameidata nd;
 1657         int error;
 1658 
 1659         if (segflg == UIO_SYSSPACE) {
 1660                 syspath = path1;
 1661         } else {
 1662                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1663                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1664                         goto out;
 1665         }
 1666         AUDIT_ARG_TEXT(syspath);
 1667 restart:
 1668         bwillwrite();
 1669         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1670             NOCACHE, segflg, path2, fd, &cap_symlinkat_rights,
 1671             td);
 1672         if ((error = namei(&nd)) != 0)
 1673                 goto out;
 1674         if (nd.ni_vp) {
 1675                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1676                 if (nd.ni_vp == nd.ni_dvp)
 1677                         vrele(nd.ni_dvp);
 1678                 else
 1679                         vput(nd.ni_dvp);
 1680                 vrele(nd.ni_vp);
 1681                 error = EEXIST;
 1682                 goto out;
 1683         }
 1684         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1685                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1686                 vput(nd.ni_dvp);
 1687                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1688                         goto out;
 1689                 goto restart;
 1690         }
 1691         VATTR_NULL(&vattr);
 1692         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1693 #ifdef MAC
 1694         vattr.va_type = VLNK;
 1695         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1696             &vattr);
 1697         if (error != 0)
 1698                 goto out2;
 1699 #endif
 1700         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1701         if (error == 0)
 1702                 vput(nd.ni_vp);
 1703 #ifdef MAC
 1704 out2:
 1705 #endif
 1706         NDFREE(&nd, NDF_ONLY_PNBUF);
 1707         vput(nd.ni_dvp);
 1708         vn_finished_write(mp);
 1709 out:
 1710         if (segflg != UIO_SYSSPACE)
 1711                 uma_zfree(namei_zone, syspath);
 1712         return (error);
 1713 }
 1714 
 1715 /*
 1716  * Delete a whiteout from the filesystem.
 1717  */
 1718 #ifndef _SYS_SYSPROTO_H_
 1719 struct undelete_args {
 1720         char *path;
 1721 };
 1722 #endif
 1723 int
 1724 sys_undelete(struct thread *td, struct undelete_args *uap)
 1725 {
 1726         struct mount *mp;
 1727         struct nameidata nd;
 1728         int error;
 1729 
 1730 restart:
 1731         bwillwrite();
 1732         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1733             UIO_USERSPACE, uap->path, td);
 1734         error = namei(&nd);
 1735         if (error != 0)
 1736                 return (error);
 1737 
 1738         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1739                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1740                 if (nd.ni_vp == nd.ni_dvp)
 1741                         vrele(nd.ni_dvp);
 1742                 else
 1743                         vput(nd.ni_dvp);
 1744                 if (nd.ni_vp)
 1745                         vrele(nd.ni_vp);
 1746                 return (EEXIST);
 1747         }
 1748         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1749                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1750                 vput(nd.ni_dvp);
 1751                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1752                         return (error);
 1753                 goto restart;
 1754         }
 1755         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1756         NDFREE(&nd, NDF_ONLY_PNBUF);
 1757         vput(nd.ni_dvp);
 1758         vn_finished_write(mp);
 1759         return (error);
 1760 }
 1761 
 1762 /*
 1763  * Delete a name from the filesystem.
 1764  */
 1765 #ifndef _SYS_SYSPROTO_H_
 1766 struct unlink_args {
 1767         char    *path;
 1768 };
 1769 #endif
 1770 int
 1771 sys_unlink(struct thread *td, struct unlink_args *uap)
 1772 {
 1773 
 1774         return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, 0));
 1775 }
 1776 
 1777 #ifndef _SYS_SYSPROTO_H_
 1778 struct unlinkat_args {
 1779         int     fd;
 1780         char    *path;
 1781         int     flag;
 1782 };
 1783 #endif
 1784 int
 1785 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1786 {
 1787         int fd, flag;
 1788         char *path;
 1789 
 1790         flag = uap->flag;
 1791         fd = uap->fd;
 1792         path = uap->path;
 1793 
 1794         if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0)
 1795                 return (EINVAL);
 1796 
 1797         if ((uap->flag & AT_REMOVEDIR) != 0)
 1798                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE, flag));
 1799         else
 1800                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, flag, 0));
 1801 }
 1802 
 1803 int
 1804 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1805     int flag, ino_t oldinum)
 1806 {
 1807         struct mount *mp;
 1808         struct vnode *vp;
 1809         struct nameidata nd;
 1810         struct stat sb;
 1811         int error;
 1812 
 1813 restart:
 1814         bwillwrite();
 1815         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 1816             at2cnpflags(flag, AT_RESOLVE_BENEATH),
 1817             pathseg, path, fd, &cap_unlinkat_rights, td);
 1818         if ((error = namei(&nd)) != 0)
 1819                 return (error == EINVAL ? EPERM : error);
 1820         vp = nd.ni_vp;
 1821         if (vp->v_type == VDIR && oldinum == 0) {
 1822                 error = EPERM;          /* POSIX */
 1823         } else if (oldinum != 0 &&
 1824                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1825                   sb.st_ino != oldinum) {
 1826                         error = EIDRM;  /* Identifier removed */
 1827         } else {
 1828                 /*
 1829                  * The root of a mounted filesystem cannot be deleted.
 1830                  *
 1831                  * XXX: can this only be a VDIR case?
 1832                  */
 1833                 if (vp->v_vflag & VV_ROOT)
 1834                         error = EBUSY;
 1835         }
 1836         if (error == 0) {
 1837                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1838                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1839                         vput(nd.ni_dvp);
 1840                         if (vp == nd.ni_dvp)
 1841                                 vrele(vp);
 1842                         else
 1843                                 vput(vp);
 1844                         if ((error = vn_start_write(NULL, &mp,
 1845                             V_XSLEEP | PCATCH)) != 0)
 1846                                 return (error);
 1847                         goto restart;
 1848                 }
 1849 #ifdef MAC
 1850                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1851                     &nd.ni_cnd);
 1852                 if (error != 0)
 1853                         goto out;
 1854 #endif
 1855                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1856                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1857 #ifdef MAC
 1858 out:
 1859 #endif
 1860                 vn_finished_write(mp);
 1861         }
 1862         NDFREE(&nd, NDF_ONLY_PNBUF);
 1863         vput(nd.ni_dvp);
 1864         if (vp == nd.ni_dvp)
 1865                 vrele(vp);
 1866         else
 1867                 vput(vp);
 1868         return (error);
 1869 }
 1870 
 1871 /*
 1872  * Reposition read/write file offset.
 1873  */
 1874 #ifndef _SYS_SYSPROTO_H_
 1875 struct lseek_args {
 1876         int     fd;
 1877         int     pad;
 1878         off_t   offset;
 1879         int     whence;
 1880 };
 1881 #endif
 1882 int
 1883 sys_lseek(struct thread *td, struct lseek_args *uap)
 1884 {
 1885 
 1886         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1887 }
 1888 
 1889 int
 1890 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 1891 {
 1892         struct file *fp;
 1893         int error;
 1894 
 1895         AUDIT_ARG_FD(fd);
 1896         error = fget(td, fd, &cap_seek_rights, &fp);
 1897         if (error != 0)
 1898                 return (error);
 1899         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1900             fo_seek(fp, offset, whence, td) : ESPIPE;
 1901         fdrop(fp, td);
 1902         return (error);
 1903 }
 1904 
 1905 #if defined(COMPAT_43)
 1906 /*
 1907  * Reposition read/write file offset.
 1908  */
 1909 #ifndef _SYS_SYSPROTO_H_
 1910 struct olseek_args {
 1911         int     fd;
 1912         long    offset;
 1913         int     whence;
 1914 };
 1915 #endif
 1916 int
 1917 olseek(struct thread *td, struct olseek_args *uap)
 1918 {
 1919 
 1920         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1921 }
 1922 #endif /* COMPAT_43 */
 1923 
 1924 #if defined(COMPAT_FREEBSD6)
 1925 /* Version with the 'pad' argument */
 1926 int
 1927 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 1928 {
 1929 
 1930         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1931 }
 1932 #endif
 1933 
 1934 /*
 1935  * Check access permissions using passed credentials.
 1936  */
 1937 static int
 1938 vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
 1939      struct thread *td)
 1940 {
 1941         accmode_t accmode;
 1942         int error;
 1943 
 1944         /* Flags == 0 means only check for existence. */
 1945         if (user_flags == 0)
 1946                 return (0);
 1947 
 1948         accmode = 0;
 1949         if (user_flags & R_OK)
 1950                 accmode |= VREAD;
 1951         if (user_flags & W_OK)
 1952                 accmode |= VWRITE;
 1953         if (user_flags & X_OK)
 1954                 accmode |= VEXEC;
 1955 #ifdef MAC
 1956         error = mac_vnode_check_access(cred, vp, accmode);
 1957         if (error != 0)
 1958                 return (error);
 1959 #endif
 1960         if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1961                 error = VOP_ACCESS(vp, accmode, cred, td);
 1962         return (error);
 1963 }
 1964 
 1965 /*
 1966  * Check access permissions using "real" credentials.
 1967  */
 1968 #ifndef _SYS_SYSPROTO_H_
 1969 struct access_args {
 1970         char    *path;
 1971         int     amode;
 1972 };
 1973 #endif
 1974 int
 1975 sys_access(struct thread *td, struct access_args *uap)
 1976 {
 1977 
 1978         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1979             0, uap->amode));
 1980 }
 1981 
 1982 #ifndef _SYS_SYSPROTO_H_
 1983 struct faccessat_args {
 1984         int     dirfd;
 1985         char    *path;
 1986         int     amode;
 1987         int     flag;
 1988 }
 1989 #endif
 1990 int
 1991 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 1992 {
 1993 
 1994         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1995             uap->amode));
 1996 }
 1997 
 1998 int
 1999 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2000     int flag, int amode)
 2001 {
 2002         struct ucred *cred, *usecred;
 2003         struct vnode *vp;
 2004         struct nameidata nd;
 2005         int error;
 2006 
 2007         if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH)) != 0)
 2008                 return (EINVAL);
 2009         if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 2010                 return (EINVAL);
 2011 
 2012         /*
 2013          * Create and modify a temporary credential instead of one that
 2014          * is potentially shared (if we need one).
 2015          */
 2016         cred = td->td_ucred;
 2017         if ((flag & AT_EACCESS) == 0 &&
 2018             ((cred->cr_uid != cred->cr_ruid ||
 2019             cred->cr_rgid != cred->cr_groups[0]))) {
 2020                 usecred = crdup(cred);
 2021                 usecred->cr_uid = cred->cr_ruid;
 2022                 usecred->cr_groups[0] = cred->cr_rgid;
 2023                 td->td_ucred = usecred;
 2024         } else
 2025                 usecred = cred;
 2026         AUDIT_ARG_VALUE(amode);
 2027         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2028             AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH),
 2029             pathseg, path, fd, &cap_fstat_rights, td);
 2030         if ((error = namei(&nd)) != 0)
 2031                 goto out;
 2032         vp = nd.ni_vp;
 2033 
 2034         error = vn_access(vp, amode, usecred, td);
 2035         NDFREE(&nd, NDF_ONLY_PNBUF);
 2036         vput(vp);
 2037 out:
 2038         if (usecred != cred) {
 2039                 td->td_ucred = cred;
 2040                 crfree(usecred);
 2041         }
 2042         return (error);
 2043 }
 2044 
 2045 /*
 2046  * Check access permissions using "effective" credentials.
 2047  */
 2048 #ifndef _SYS_SYSPROTO_H_
 2049 struct eaccess_args {
 2050         char    *path;
 2051         int     amode;
 2052 };
 2053 #endif
 2054 int
 2055 sys_eaccess(struct thread *td, struct eaccess_args *uap)
 2056 {
 2057 
 2058         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2059             AT_EACCESS, uap->amode));
 2060 }
 2061 
 2062 #if defined(COMPAT_43)
 2063 /*
 2064  * Get file status; this version follows links.
 2065  */
 2066 #ifndef _SYS_SYSPROTO_H_
 2067 struct ostat_args {
 2068         char    *path;
 2069         struct ostat *ub;
 2070 };
 2071 #endif
 2072 int
 2073 ostat(struct thread *td, struct ostat_args *uap)
 2074 {
 2075         struct stat sb;
 2076         struct ostat osb;
 2077         int error;
 2078 
 2079         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2080             &sb, NULL);
 2081         if (error != 0)
 2082                 return (error);
 2083         cvtstat(&sb, &osb);
 2084         return (copyout(&osb, uap->ub, sizeof (osb)));
 2085 }
 2086 
 2087 /*
 2088  * Get file status; this version does not follow links.
 2089  */
 2090 #ifndef _SYS_SYSPROTO_H_
 2091 struct olstat_args {
 2092         char    *path;
 2093         struct ostat *ub;
 2094 };
 2095 #endif
 2096 int
 2097 olstat(struct thread *td, struct olstat_args *uap)
 2098 {
 2099         struct stat sb;
 2100         struct ostat osb;
 2101         int error;
 2102 
 2103         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2104             UIO_USERSPACE, &sb, NULL);
 2105         if (error != 0)
 2106                 return (error);
 2107         cvtstat(&sb, &osb);
 2108         return (copyout(&osb, uap->ub, sizeof (osb)));
 2109 }
 2110 
 2111 /*
 2112  * Convert from an old to a new stat structure.
 2113  * XXX: many values are blindly truncated.
 2114  */
 2115 void
 2116 cvtstat(struct stat *st, struct ostat *ost)
 2117 {
 2118 
 2119         bzero(ost, sizeof(*ost));
 2120         ost->st_dev = st->st_dev;
 2121         ost->st_ino = st->st_ino;
 2122         ost->st_mode = st->st_mode;
 2123         ost->st_nlink = st->st_nlink;
 2124         ost->st_uid = st->st_uid;
 2125         ost->st_gid = st->st_gid;
 2126         ost->st_rdev = st->st_rdev;
 2127         ost->st_size = MIN(st->st_size, INT32_MAX);
 2128         ost->st_atim = st->st_atim;
 2129         ost->st_mtim = st->st_mtim;
 2130         ost->st_ctim = st->st_ctim;
 2131         ost->st_blksize = st->st_blksize;
 2132         ost->st_blocks = st->st_blocks;
 2133         ost->st_flags = st->st_flags;
 2134         ost->st_gen = st->st_gen;
 2135 }
 2136 #endif /* COMPAT_43 */
 2137 
 2138 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 2139 int ino64_trunc_error;
 2140 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW,
 2141     &ino64_trunc_error, 0,
 2142     "Error on truncation of device, file or inode number, or link count");
 2143 
 2144 int
 2145 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost)
 2146 {
 2147 
 2148         ost->st_dev = st->st_dev;
 2149         if (ost->st_dev != st->st_dev) {
 2150                 switch (ino64_trunc_error) {
 2151                 default:
 2152                         /*
 2153                          * Since dev_t is almost raw, don't clamp to the
 2154                          * maximum for case 2, but ignore the error.
 2155                          */
 2156                         break;
 2157                 case 1:
 2158                         return (EOVERFLOW);
 2159                 }
 2160         }
 2161         ost->st_ino = st->st_ino;
 2162         if (ost->st_ino != st->st_ino) {
 2163                 switch (ino64_trunc_error) {
 2164                 default:
 2165                 case 0:
 2166                         break;
 2167                 case 1:
 2168                         return (EOVERFLOW);
 2169                 case 2:
 2170                         ost->st_ino = UINT32_MAX;
 2171                         break;
 2172                 }
 2173         }
 2174         ost->st_mode = st->st_mode;
 2175         ost->st_nlink = st->st_nlink;
 2176         if (ost->st_nlink != st->st_nlink) {
 2177                 switch (ino64_trunc_error) {
 2178                 default:
 2179                 case 0:
 2180                         break;
 2181                 case 1:
 2182                         return (EOVERFLOW);
 2183                 case 2:
 2184                         ost->st_nlink = UINT16_MAX;
 2185                         break;
 2186                 }
 2187         }
 2188         ost->st_uid = st->st_uid;
 2189         ost->st_gid = st->st_gid;
 2190         ost->st_rdev = st->st_rdev;
 2191         if (ost->st_rdev != st->st_rdev) {
 2192                 switch (ino64_trunc_error) {
 2193                 default:
 2194                         break;
 2195                 case 1:
 2196                         return (EOVERFLOW);
 2197                 }
 2198         }
 2199         ost->st_atim = st->st_atim;
 2200         ost->st_mtim = st->st_mtim;
 2201         ost->st_ctim = st->st_ctim;
 2202         ost->st_size = st->st_size;
 2203         ost->st_blocks = st->st_blocks;
 2204         ost->st_blksize = st->st_blksize;
 2205         ost->st_flags = st->st_flags;
 2206         ost->st_gen = st->st_gen;
 2207         ost->st_lspare = 0;
 2208         ost->st_birthtim = st->st_birthtim;
 2209         bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim),
 2210             sizeof(*ost) - offsetof(struct freebsd11_stat,
 2211             st_birthtim) - sizeof(ost->st_birthtim));
 2212         return (0);
 2213 }
 2214 
 2215 int
 2216 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap)
 2217 {
 2218         struct stat sb;
 2219         struct freebsd11_stat osb;
 2220         int error;
 2221 
 2222         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2223             &sb, NULL);
 2224         if (error != 0)
 2225                 return (error);
 2226         error = freebsd11_cvtstat(&sb, &osb);
 2227         if (error == 0)
 2228                 error = copyout(&osb, uap->ub, sizeof(osb));
 2229         return (error);
 2230 }
 2231 
 2232 int
 2233 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap)
 2234 {
 2235         struct stat sb;
 2236         struct freebsd11_stat osb;
 2237         int error;
 2238 
 2239         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2240             UIO_USERSPACE, &sb, NULL);
 2241         if (error != 0)
 2242                 return (error);
 2243         error = freebsd11_cvtstat(&sb, &osb);
 2244         if (error == 0)
 2245                 error = copyout(&osb, uap->ub, sizeof(osb));
 2246         return (error);
 2247 }
 2248 
 2249 int
 2250 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap)
 2251 {
 2252         struct fhandle fh;
 2253         struct stat sb;
 2254         struct freebsd11_stat osb;
 2255         int error;
 2256 
 2257         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 2258         if (error != 0)
 2259                 return (error);
 2260         error = kern_fhstat(td, fh, &sb);
 2261         if (error != 0)
 2262                 return (error);
 2263         error = freebsd11_cvtstat(&sb, &osb);
 2264         if (error == 0)
 2265                 error = copyout(&osb, uap->sb, sizeof(osb));
 2266         return (error);
 2267 }
 2268 
 2269 int
 2270 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap)
 2271 {
 2272         struct stat sb;
 2273         struct freebsd11_stat osb;
 2274         int error;
 2275 
 2276         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2277             UIO_USERSPACE, &sb, NULL);
 2278         if (error != 0)
 2279                 return (error);
 2280         error = freebsd11_cvtstat(&sb, &osb);
 2281         if (error == 0)
 2282                 error = copyout(&osb, uap->buf, sizeof(osb));
 2283         return (error);
 2284 }
 2285 #endif  /* COMPAT_FREEBSD11 */
 2286 
 2287 /*
 2288  * Get file status
 2289  */
 2290 #ifndef _SYS_SYSPROTO_H_
 2291 struct fstatat_args {
 2292         int     fd;
 2293         char    *path;
 2294         struct stat     *buf;
 2295         int     flag;
 2296 }
 2297 #endif
 2298 int
 2299 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2300 {
 2301         struct stat sb;
 2302         int error;
 2303 
 2304         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2305             UIO_USERSPACE, &sb, NULL);
 2306         if (error == 0)
 2307                 error = copyout(&sb, uap->buf, sizeof (sb));
 2308         return (error);
 2309 }
 2310 
 2311 int
 2312 kern_statat(struct thread *td, int flag, int fd, char *path,
 2313     enum uio_seg pathseg, struct stat *sbp,
 2314     void (*hook)(struct vnode *vp, struct stat *sbp))
 2315 {
 2316         struct nameidata nd;
 2317         struct stat sb;
 2318         int error;
 2319 
 2320         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2321                 return (EINVAL);
 2322 
 2323         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH |
 2324             AT_SYMLINK_NOFOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2325             pathseg, path, fd, &cap_fstat_rights, td);
 2326 
 2327         if ((error = namei(&nd)) != 0)
 2328                 return (error);
 2329         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2330         if (error == 0) {
 2331                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2332                 if (S_ISREG(sb.st_mode))
 2333                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2334                 if (__predict_false(hook != NULL))
 2335                         hook(nd.ni_vp, &sb);
 2336         }
 2337         NDFREE(&nd, NDF_ONLY_PNBUF);
 2338         vput(nd.ni_vp);
 2339         if (error != 0)
 2340                 return (error);
 2341 #ifdef __STAT_TIME_T_EXT
 2342         sb.st_atim_ext = 0;
 2343         sb.st_mtim_ext = 0;
 2344         sb.st_ctim_ext = 0;
 2345         sb.st_btim_ext = 0;
 2346 #endif
 2347         *sbp = sb;
 2348 #ifdef KTRACE
 2349         if (KTRPOINT(td, KTR_STRUCT))
 2350                 ktrstat(&sb);
 2351 #endif
 2352         return (0);
 2353 }
 2354 
 2355 #if defined(COMPAT_FREEBSD11)
 2356 /*
 2357  * Implementation of the NetBSD [l]stat() functions.
 2358  */
 2359 void
 2360 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb)
 2361 {
 2362 
 2363         bzero(nsb, sizeof(*nsb));
 2364         nsb->st_dev = sb->st_dev;
 2365         nsb->st_ino = sb->st_ino;
 2366         nsb->st_mode = sb->st_mode;
 2367         nsb->st_nlink = sb->st_nlink;
 2368         nsb->st_uid = sb->st_uid;
 2369         nsb->st_gid = sb->st_gid;
 2370         nsb->st_rdev = sb->st_rdev;
 2371         nsb->st_atim = sb->st_atim;
 2372         nsb->st_mtim = sb->st_mtim;
 2373         nsb->st_ctim = sb->st_ctim;
 2374         nsb->st_size = sb->st_size;
 2375         nsb->st_blocks = sb->st_blocks;
 2376         nsb->st_blksize = sb->st_blksize;
 2377         nsb->st_flags = sb->st_flags;
 2378         nsb->st_gen = sb->st_gen;
 2379         nsb->st_birthtim = sb->st_birthtim;
 2380 }
 2381 
 2382 #ifndef _SYS_SYSPROTO_H_
 2383 struct freebsd11_nstat_args {
 2384         char    *path;
 2385         struct nstat *ub;
 2386 };
 2387 #endif
 2388 int
 2389 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap)
 2390 {
 2391         struct stat sb;
 2392         struct nstat nsb;
 2393         int error;
 2394 
 2395         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2396             &sb, NULL);
 2397         if (error != 0)
 2398                 return (error);
 2399         freebsd11_cvtnstat(&sb, &nsb);
 2400         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2401 }
 2402 
 2403 /*
 2404  * NetBSD lstat.  Get file status; this version does not follow links.
 2405  */
 2406 #ifndef _SYS_SYSPROTO_H_
 2407 struct freebsd11_nlstat_args {
 2408         char    *path;
 2409         struct nstat *ub;
 2410 };
 2411 #endif
 2412 int
 2413 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap)
 2414 {
 2415         struct stat sb;
 2416         struct nstat nsb;
 2417         int error;
 2418 
 2419         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2420             UIO_USERSPACE, &sb, NULL);
 2421         if (error != 0)
 2422                 return (error);
 2423         freebsd11_cvtnstat(&sb, &nsb);
 2424         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2425 }
 2426 #endif /* COMPAT_FREEBSD11 */
 2427 
 2428 /*
 2429  * Get configurable pathname variables.
 2430  */
 2431 #ifndef _SYS_SYSPROTO_H_
 2432 struct pathconf_args {
 2433         char    *path;
 2434         int     name;
 2435 };
 2436 #endif
 2437 int
 2438 sys_pathconf(struct thread *td, struct pathconf_args *uap)
 2439 {
 2440         long value;
 2441         int error;
 2442 
 2443         error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW,
 2444             &value);
 2445         if (error == 0)
 2446                 td->td_retval[0] = value;
 2447         return (error);
 2448 }
 2449 
 2450 #ifndef _SYS_SYSPROTO_H_
 2451 struct lpathconf_args {
 2452         char    *path;
 2453         int     name;
 2454 };
 2455 #endif
 2456 int
 2457 sys_lpathconf(struct thread *td, struct lpathconf_args *uap)
 2458 {
 2459         long value;
 2460         int error;
 2461 
 2462         error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2463             NOFOLLOW, &value);
 2464         if (error == 0)
 2465                 td->td_retval[0] = value;
 2466         return (error);
 2467 }
 2468 
 2469 int
 2470 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2471     u_long flags, long *valuep)
 2472 {
 2473         struct nameidata nd;
 2474         int error;
 2475 
 2476         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2477             pathseg, path, td);
 2478         if ((error = namei(&nd)) != 0)
 2479                 return (error);
 2480         NDFREE(&nd, NDF_ONLY_PNBUF);
 2481 
 2482         error = VOP_PATHCONF(nd.ni_vp, name, valuep);
 2483         vput(nd.ni_vp);
 2484         return (error);
 2485 }
 2486 
 2487 /*
 2488  * Return target name of a symbolic link.
 2489  */
 2490 #ifndef _SYS_SYSPROTO_H_
 2491 struct readlink_args {
 2492         char    *path;
 2493         char    *buf;
 2494         size_t  count;
 2495 };
 2496 #endif
 2497 int
 2498 sys_readlink(struct thread *td, struct readlink_args *uap)
 2499 {
 2500 
 2501         return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2502             uap->buf, UIO_USERSPACE, uap->count));
 2503 }
 2504 #ifndef _SYS_SYSPROTO_H_
 2505 struct readlinkat_args {
 2506         int     fd;
 2507         char    *path;
 2508         char    *buf;
 2509         size_t  bufsize;
 2510 };
 2511 #endif
 2512 int
 2513 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2514 {
 2515 
 2516         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2517             uap->buf, UIO_USERSPACE, uap->bufsize));
 2518 }
 2519 
 2520 int
 2521 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2522     char *buf, enum uio_seg bufseg, size_t count)
 2523 {
 2524         struct vnode *vp;
 2525         struct nameidata nd;
 2526         int error;
 2527 
 2528         if (count > IOSIZE_MAX)
 2529                 return (EINVAL);
 2530 
 2531         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2532             pathseg, path, fd, td);
 2533 
 2534         if ((error = namei(&nd)) != 0)
 2535                 return (error);
 2536         NDFREE(&nd, NDF_ONLY_PNBUF);
 2537         vp = nd.ni_vp;
 2538 
 2539         error = kern_readlink_vp(vp, buf, bufseg, count, td);
 2540         vput(vp);
 2541 
 2542         return (error);
 2543 }
 2544 
 2545 /*
 2546  * Helper function to readlink from a vnode
 2547  */
 2548 static int
 2549 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count,
 2550     struct thread *td)
 2551 {
 2552         struct iovec aiov;
 2553         struct uio auio;
 2554         int error;
 2555 
 2556         ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked");
 2557 #ifdef MAC
 2558         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2559         if (error != 0)
 2560                 return (error);
 2561 #endif
 2562         if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
 2563                 return (EINVAL);
 2564 
 2565         aiov.iov_base = buf;
 2566         aiov.iov_len = count;
 2567         auio.uio_iov = &aiov;
 2568         auio.uio_iovcnt = 1;
 2569         auio.uio_offset = 0;
 2570         auio.uio_rw = UIO_READ;
 2571         auio.uio_segflg = bufseg;
 2572         auio.uio_td = td;
 2573         auio.uio_resid = count;
 2574         error = VOP_READLINK(vp, &auio, td->td_ucred);
 2575         td->td_retval[0] = count - auio.uio_resid;
 2576         return (error);
 2577 }
 2578 
 2579 /*
 2580  * Common implementation code for chflags() and fchflags().
 2581  */
 2582 static int
 2583 setfflags(struct thread *td, struct vnode *vp, u_long flags)
 2584 {
 2585         struct mount *mp;
 2586         struct vattr vattr;
 2587         int error;
 2588 
 2589         /* We can't support the value matching VNOVAL. */
 2590         if (flags == VNOVAL)
 2591                 return (EOPNOTSUPP);
 2592 
 2593         /*
 2594          * Prevent non-root users from setting flags on devices.  When
 2595          * a device is reused, users can retain ownership of the device
 2596          * if they are allowed to set flags and programs assume that
 2597          * chown can't fail when done as root.
 2598          */
 2599         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2600                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2601                 if (error != 0)
 2602                         return (error);
 2603         }
 2604 
 2605         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2606                 return (error);
 2607         VATTR_NULL(&vattr);
 2608         vattr.va_flags = flags;
 2609         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2610 #ifdef MAC
 2611         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2612         if (error == 0)
 2613 #endif
 2614                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2615         VOP_UNLOCK(vp, 0);
 2616         vn_finished_write(mp);
 2617         return (error);
 2618 }
 2619 
 2620 /*
 2621  * Change flags of a file given a path name.
 2622  */
 2623 #ifndef _SYS_SYSPROTO_H_
 2624 struct chflags_args {
 2625         const char *path;
 2626         u_long  flags;
 2627 };
 2628 #endif
 2629 int
 2630 sys_chflags(struct thread *td, struct chflags_args *uap)
 2631 {
 2632 
 2633         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2634             uap->flags, 0));
 2635 }
 2636 
 2637 #ifndef _SYS_SYSPROTO_H_
 2638 struct chflagsat_args {
 2639         int     fd;
 2640         const char *path;
 2641         u_long  flags;
 2642         int     atflag;
 2643 }
 2644 #endif
 2645 int
 2646 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2647 {
 2648 
 2649         if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2650                 return (EINVAL);
 2651 
 2652         return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE,
 2653             uap->flags, uap->atflag));
 2654 }
 2655 
 2656 /*
 2657  * Same as chflags() but doesn't follow symlinks.
 2658  */
 2659 #ifndef _SYS_SYSPROTO_H_
 2660 struct lchflags_args {
 2661         const char *path;
 2662         u_long flags;
 2663 };
 2664 #endif
 2665 int
 2666 sys_lchflags(struct thread *td, struct lchflags_args *uap)
 2667 {
 2668 
 2669         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2670             uap->flags, AT_SYMLINK_NOFOLLOW));
 2671 }
 2672 
 2673 static int
 2674 kern_chflagsat(struct thread *td, int fd, const char *path,
 2675     enum uio_seg pathseg, u_long flags, int atflag)
 2676 {
 2677         struct nameidata nd;
 2678         int error;
 2679 
 2680         AUDIT_ARG_FFLAGS(flags);
 2681         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW |
 2682             AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd,
 2683             &cap_fchflags_rights, td);
 2684         if ((error = namei(&nd)) != 0)
 2685                 return (error);
 2686         NDFREE(&nd, NDF_ONLY_PNBUF);
 2687         error = setfflags(td, nd.ni_vp, flags);
 2688         vrele(nd.ni_vp);
 2689         return (error);
 2690 }
 2691 
 2692 /*
 2693  * Change flags of a file given a file descriptor.
 2694  */
 2695 #ifndef _SYS_SYSPROTO_H_
 2696 struct fchflags_args {
 2697         int     fd;
 2698         u_long  flags;
 2699 };
 2700 #endif
 2701 int
 2702 sys_fchflags(struct thread *td, struct fchflags_args *uap)
 2703 {
 2704         struct file *fp;
 2705         int error;
 2706 
 2707         AUDIT_ARG_FD(uap->fd);
 2708         AUDIT_ARG_FFLAGS(uap->flags);
 2709         error = getvnode(td, uap->fd, &cap_fchflags_rights,
 2710             &fp);
 2711         if (error != 0)
 2712                 return (error);
 2713 #ifdef AUDIT
 2714         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2715         AUDIT_ARG_VNODE1(fp->f_vnode);
 2716         VOP_UNLOCK(fp->f_vnode, 0);
 2717 #endif
 2718         error = setfflags(td, fp->f_vnode, uap->flags);
 2719         fdrop(fp, td);
 2720         return (error);
 2721 }
 2722 
 2723 /*
 2724  * Common implementation code for chmod(), lchmod() and fchmod().
 2725  */
 2726 int
 2727 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode)
 2728 {
 2729         struct mount *mp;
 2730         struct vattr vattr;
 2731         int error;
 2732 
 2733         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2734                 return (error);
 2735         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2736         VATTR_NULL(&vattr);
 2737         vattr.va_mode = mode & ALLPERMS;
 2738 #ifdef MAC
 2739         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2740         if (error == 0)
 2741 #endif
 2742                 error = VOP_SETATTR(vp, &vattr, cred);
 2743         VOP_UNLOCK(vp, 0);
 2744         vn_finished_write(mp);
 2745         return (error);
 2746 }
 2747 
 2748 /*
 2749  * Change mode of a file given path name.
 2750  */
 2751 #ifndef _SYS_SYSPROTO_H_
 2752 struct chmod_args {
 2753         char    *path;
 2754         int     mode;
 2755 };
 2756 #endif
 2757 int
 2758 sys_chmod(struct thread *td, struct chmod_args *uap)
 2759 {
 2760 
 2761         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2762             uap->mode, 0));
 2763 }
 2764 
 2765 #ifndef _SYS_SYSPROTO_H_
 2766 struct fchmodat_args {
 2767         int     dirfd;
 2768         char    *path;
 2769         mode_t  mode;
 2770         int     flag;
 2771 }
 2772 #endif
 2773 int
 2774 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2775 {
 2776 
 2777         if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2778                 return (EINVAL);
 2779 
 2780         return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE,
 2781             uap->mode, uap->flag));
 2782 }
 2783 
 2784 /*
 2785  * Change mode of a file given path name (don't follow links.)
 2786  */
 2787 #ifndef _SYS_SYSPROTO_H_
 2788 struct lchmod_args {
 2789         char    *path;
 2790         int     mode;
 2791 };
 2792 #endif
 2793 int
 2794 sys_lchmod(struct thread *td, struct lchmod_args *uap)
 2795 {
 2796 
 2797         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2798             uap->mode, AT_SYMLINK_NOFOLLOW));
 2799 }
 2800 
 2801 int
 2802 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2803     mode_t mode, int flag)
 2804 {
 2805         struct nameidata nd;
 2806         int error;
 2807 
 2808         AUDIT_ARG_MODE(mode);
 2809         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 2810             AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd,
 2811             &cap_fchmod_rights, td);
 2812         if ((error = namei(&nd)) != 0)
 2813                 return (error);
 2814         NDFREE(&nd, NDF_ONLY_PNBUF);
 2815         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2816         vrele(nd.ni_vp);
 2817         return (error);
 2818 }
 2819 
 2820 /*
 2821  * Change mode of a file given a file descriptor.
 2822  */
 2823 #ifndef _SYS_SYSPROTO_H_
 2824 struct fchmod_args {
 2825         int     fd;
 2826         int     mode;
 2827 };
 2828 #endif
 2829 int
 2830 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2831 {
 2832         struct file *fp;
 2833         int error;
 2834 
 2835         AUDIT_ARG_FD(uap->fd);
 2836         AUDIT_ARG_MODE(uap->mode);
 2837 
 2838         error = fget(td, uap->fd, &cap_fchmod_rights, &fp);
 2839         if (error != 0)
 2840                 return (error);
 2841         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2842         fdrop(fp, td);
 2843         return (error);
 2844 }
 2845 
 2846 /*
 2847  * Common implementation for chown(), lchown(), and fchown()
 2848  */
 2849 int
 2850 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
 2851     gid_t gid)
 2852 {
 2853         struct mount *mp;
 2854         struct vattr vattr;
 2855         int error;
 2856 
 2857         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2858                 return (error);
 2859         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2860         VATTR_NULL(&vattr);
 2861         vattr.va_uid = uid;
 2862         vattr.va_gid = gid;
 2863 #ifdef MAC
 2864         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2865             vattr.va_gid);
 2866         if (error == 0)
 2867 #endif
 2868                 error = VOP_SETATTR(vp, &vattr, cred);
 2869         VOP_UNLOCK(vp, 0);
 2870         vn_finished_write(mp);
 2871         return (error);
 2872 }
 2873 
 2874 /*
 2875  * Set ownership given a path name.
 2876  */
 2877 #ifndef _SYS_SYSPROTO_H_
 2878 struct chown_args {
 2879         char    *path;
 2880         int     uid;
 2881         int     gid;
 2882 };
 2883 #endif
 2884 int
 2885 sys_chown(struct thread *td, struct chown_args *uap)
 2886 {
 2887 
 2888         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 2889             uap->gid, 0));
 2890 }
 2891 
 2892 #ifndef _SYS_SYSPROTO_H_
 2893 struct fchownat_args {
 2894         int fd;
 2895         const char * path;
 2896         uid_t uid;
 2897         gid_t gid;
 2898         int flag;
 2899 };
 2900 #endif
 2901 int
 2902 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2903 {
 2904 
 2905         if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 2906                 return (EINVAL);
 2907 
 2908         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2909             uap->gid, uap->flag));
 2910 }
 2911 
 2912 int
 2913 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2914     int uid, int gid, int flag)
 2915 {
 2916         struct nameidata nd;
 2917         int error;
 2918 
 2919         AUDIT_ARG_OWNER(uid, gid);
 2920         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 2921             AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd,
 2922             &cap_fchown_rights, td);
 2923 
 2924         if ((error = namei(&nd)) != 0)
 2925                 return (error);
 2926         NDFREE(&nd, NDF_ONLY_PNBUF);
 2927         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2928         vrele(nd.ni_vp);
 2929         return (error);
 2930 }
 2931 
 2932 /*
 2933  * Set ownership given a path name, do not cross symlinks.
 2934  */
 2935 #ifndef _SYS_SYSPROTO_H_
 2936 struct lchown_args {
 2937         char    *path;
 2938         int     uid;
 2939         int     gid;
 2940 };
 2941 #endif
 2942 int
 2943 sys_lchown(struct thread *td, struct lchown_args *uap)
 2944 {
 2945 
 2946         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2947             uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 2948 }
 2949 
 2950 /*
 2951  * Set ownership given a file descriptor.
 2952  */
 2953 #ifndef _SYS_SYSPROTO_H_
 2954 struct fchown_args {
 2955         int     fd;
 2956         int     uid;
 2957         int     gid;
 2958 };
 2959 #endif
 2960 int
 2961 sys_fchown(struct thread *td, struct fchown_args *uap)
 2962 {
 2963         struct file *fp;
 2964         int error;
 2965 
 2966         AUDIT_ARG_FD(uap->fd);
 2967         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 2968         error = fget(td, uap->fd, &cap_fchown_rights, &fp);
 2969         if (error != 0)
 2970                 return (error);
 2971         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 2972         fdrop(fp, td);
 2973         return (error);
 2974 }
 2975 
 2976 /*
 2977  * Common implementation code for utimes(), lutimes(), and futimes().
 2978  */
 2979 static int
 2980 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg,
 2981     struct timespec *tsp)
 2982 {
 2983         struct timeval tv[2];
 2984         const struct timeval *tvp;
 2985         int error;
 2986 
 2987         if (usrtvp == NULL) {
 2988                 vfs_timestamp(&tsp[0]);
 2989                 tsp[1] = tsp[0];
 2990         } else {
 2991                 if (tvpseg == UIO_SYSSPACE) {
 2992                         tvp = usrtvp;
 2993                 } else {
 2994                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 2995                                 return (error);
 2996                         tvp = tv;
 2997                 }
 2998 
 2999                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3000                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3001                         return (EINVAL);
 3002                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3003                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3004         }
 3005         return (0);
 3006 }
 3007 
 3008 /*
 3009  * Common implementation code for futimens(), utimensat().
 3010  */
 3011 #define UTIMENS_NULL    0x1
 3012 #define UTIMENS_EXIT    0x2
 3013 static int
 3014 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 3015     struct timespec *tsp, int *retflags)
 3016 {
 3017         struct timespec tsnow;
 3018         int error;
 3019 
 3020         vfs_timestamp(&tsnow);
 3021         *retflags = 0;
 3022         if (usrtsp == NULL) {
 3023                 tsp[0] = tsnow;
 3024                 tsp[1] = tsnow;
 3025                 *retflags |= UTIMENS_NULL;
 3026                 return (0);
 3027         }
 3028         if (tspseg == UIO_SYSSPACE) {
 3029                 tsp[0] = usrtsp[0];
 3030                 tsp[1] = usrtsp[1];
 3031         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 3032                 return (error);
 3033         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 3034                 *retflags |= UTIMENS_EXIT;
 3035         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 3036                 *retflags |= UTIMENS_NULL;
 3037         if (tsp[0].tv_nsec == UTIME_OMIT)
 3038                 tsp[0].tv_sec = VNOVAL;
 3039         else if (tsp[0].tv_nsec == UTIME_NOW)
 3040                 tsp[0] = tsnow;
 3041         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 3042                 return (EINVAL);
 3043         if (tsp[1].tv_nsec == UTIME_OMIT)
 3044                 tsp[1].tv_sec = VNOVAL;
 3045         else if (tsp[1].tv_nsec == UTIME_NOW)
 3046                 tsp[1] = tsnow;
 3047         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 3048                 return (EINVAL);
 3049 
 3050         return (0);
 3051 }
 3052 
 3053 /*
 3054  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 3055  * and utimensat().
 3056  */
 3057 static int
 3058 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts,
 3059     int numtimes, int nullflag)
 3060 {
 3061         struct mount *mp;
 3062         struct vattr vattr;
 3063         int error, setbirthtime;
 3064 
 3065         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3066                 return (error);
 3067         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3068         setbirthtime = 0;
 3069         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3070             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3071                 setbirthtime = 1;
 3072         VATTR_NULL(&vattr);
 3073         vattr.va_atime = ts[0];
 3074         vattr.va_mtime = ts[1];
 3075         if (setbirthtime)
 3076                 vattr.va_birthtime = ts[1];
 3077         if (numtimes > 2)
 3078                 vattr.va_birthtime = ts[2];
 3079         if (nullflag)
 3080                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3081 #ifdef MAC
 3082         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3083             vattr.va_mtime);
 3084 #endif
 3085         if (error == 0)
 3086                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3087         VOP_UNLOCK(vp, 0);
 3088         vn_finished_write(mp);
 3089         return (error);
 3090 }
 3091 
 3092 /*
 3093  * Set the access and modification times of a file.
 3094  */
 3095 #ifndef _SYS_SYSPROTO_H_
 3096 struct utimes_args {
 3097         char    *path;
 3098         struct  timeval *tptr;
 3099 };
 3100 #endif
 3101 int
 3102 sys_utimes(struct thread *td, struct utimes_args *uap)
 3103 {
 3104 
 3105         return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3106             uap->tptr, UIO_USERSPACE));
 3107 }
 3108 
 3109 #ifndef _SYS_SYSPROTO_H_
 3110 struct futimesat_args {
 3111         int fd;
 3112         const char * path;
 3113         const struct timeval * times;
 3114 };
 3115 #endif
 3116 int
 3117 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3118 {
 3119 
 3120         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3121             uap->times, UIO_USERSPACE));
 3122 }
 3123 
 3124 int
 3125 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3126     struct timeval *tptr, enum uio_seg tptrseg)
 3127 {
 3128         struct nameidata nd;
 3129         struct timespec ts[2];
 3130         int error;
 3131 
 3132         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3133                 return (error);
 3134         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3135             &cap_futimes_rights, td);
 3136 
 3137         if ((error = namei(&nd)) != 0)
 3138                 return (error);
 3139         NDFREE(&nd, NDF_ONLY_PNBUF);
 3140         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3141         vrele(nd.ni_vp);
 3142         return (error);
 3143 }
 3144 
 3145 /*
 3146  * Set the access and modification times of a file.
 3147  */
 3148 #ifndef _SYS_SYSPROTO_H_
 3149 struct lutimes_args {
 3150         char    *path;
 3151         struct  timeval *tptr;
 3152 };
 3153 #endif
 3154 int
 3155 sys_lutimes(struct thread *td, struct lutimes_args *uap)
 3156 {
 3157 
 3158         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3159             UIO_USERSPACE));
 3160 }
 3161 
 3162 int
 3163 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3164     struct timeval *tptr, enum uio_seg tptrseg)
 3165 {
 3166         struct timespec ts[2];
 3167         struct nameidata nd;
 3168         int error;
 3169 
 3170         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3171                 return (error);
 3172         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3173         if ((error = namei(&nd)) != 0)
 3174                 return (error);
 3175         NDFREE(&nd, NDF_ONLY_PNBUF);
 3176         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3177         vrele(nd.ni_vp);
 3178         return (error);
 3179 }
 3180 
 3181 /*
 3182  * Set the access and modification times of a file.
 3183  */
 3184 #ifndef _SYS_SYSPROTO_H_
 3185 struct futimes_args {
 3186         int     fd;
 3187         struct  timeval *tptr;
 3188 };
 3189 #endif
 3190 int
 3191 sys_futimes(struct thread *td, struct futimes_args *uap)
 3192 {
 3193 
 3194         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3195 }
 3196 
 3197 int
 3198 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3199     enum uio_seg tptrseg)
 3200 {
 3201         struct timespec ts[2];
 3202         struct file *fp;
 3203         int error;
 3204 
 3205         AUDIT_ARG_FD(fd);
 3206         error = getutimes(tptr, tptrseg, ts);
 3207         if (error != 0)
 3208                 return (error);
 3209         error = getvnode(td, fd, &cap_futimes_rights, &fp);
 3210         if (error != 0)
 3211                 return (error);
 3212 #ifdef AUDIT
 3213         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3214         AUDIT_ARG_VNODE1(fp->f_vnode);
 3215         VOP_UNLOCK(fp->f_vnode, 0);
 3216 #endif
 3217         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3218         fdrop(fp, td);
 3219         return (error);
 3220 }
 3221 
 3222 int
 3223 sys_futimens(struct thread *td, struct futimens_args *uap)
 3224 {
 3225 
 3226         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3227 }
 3228 
 3229 int
 3230 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3231     enum uio_seg tptrseg)
 3232 {
 3233         struct timespec ts[2];
 3234         struct file *fp;
 3235         int error, flags;
 3236 
 3237         AUDIT_ARG_FD(fd);
 3238         error = getutimens(tptr, tptrseg, ts, &flags);
 3239         if (error != 0)
 3240                 return (error);
 3241         if (flags & UTIMENS_EXIT)
 3242                 return (0);
 3243         error = getvnode(td, fd, &cap_futimes_rights, &fp);
 3244         if (error != 0)
 3245                 return (error);
 3246 #ifdef AUDIT
 3247         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3248         AUDIT_ARG_VNODE1(fp->f_vnode);
 3249         VOP_UNLOCK(fp->f_vnode, 0);
 3250 #endif
 3251         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3252         fdrop(fp, td);
 3253         return (error);
 3254 }
 3255 
 3256 int
 3257 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3258 {
 3259 
 3260         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3261             uap->times, UIO_USERSPACE, uap->flag));
 3262 }
 3263 
 3264 int
 3265 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3266     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 3267 {
 3268         struct nameidata nd;
 3269         struct timespec ts[2];
 3270         int error, flags;
 3271 
 3272         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 3273                 return (EINVAL);
 3274 
 3275         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3276                 return (error);
 3277         NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 3278             AT_RESOLVE_BENEATH) | AUDITVNODE1,
 3279             pathseg, path, fd, &cap_futimes_rights, td);
 3280         if ((error = namei(&nd)) != 0)
 3281                 return (error);
 3282         /*
 3283          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3284          * POSIX states:
 3285          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3286          * "Search permission is denied by a component of the path prefix."
 3287          */
 3288         NDFREE(&nd, NDF_ONLY_PNBUF);
 3289         if ((flags & UTIMENS_EXIT) == 0)
 3290                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3291         vrele(nd.ni_vp);
 3292         return (error);
 3293 }
 3294 
 3295 /*
 3296  * Truncate a file given its path name.
 3297  */
 3298 #ifndef _SYS_SYSPROTO_H_
 3299 struct truncate_args {
 3300         char    *path;
 3301         int     pad;
 3302         off_t   length;
 3303 };
 3304 #endif
 3305 int
 3306 sys_truncate(struct thread *td, struct truncate_args *uap)
 3307 {
 3308 
 3309         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3310 }
 3311 
 3312 int
 3313 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3314 {
 3315         struct mount *mp;
 3316         struct vnode *vp;
 3317         void *rl_cookie;
 3318         struct vattr vattr;
 3319         struct nameidata nd;
 3320         int error;
 3321 
 3322         if (length < 0)
 3323                 return(EINVAL);
 3324         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3325         if ((error = namei(&nd)) != 0)
 3326                 return (error);
 3327         vp = nd.ni_vp;
 3328         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3329         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3330                 vn_rangelock_unlock(vp, rl_cookie);
 3331                 vrele(vp);
 3332                 return (error);
 3333         }
 3334         NDFREE(&nd, NDF_ONLY_PNBUF);
 3335         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3336         if (vp->v_type == VDIR)
 3337                 error = EISDIR;
 3338 #ifdef MAC
 3339         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3340         }
 3341 #endif
 3342         else if ((error = vn_writechk(vp)) == 0 &&
 3343             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3344                 VATTR_NULL(&vattr);
 3345                 vattr.va_size = length;
 3346                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3347         }
 3348         VOP_UNLOCK(vp, 0);
 3349         vn_finished_write(mp);
 3350         vn_rangelock_unlock(vp, rl_cookie);
 3351         vrele(vp);
 3352         return (error);
 3353 }
 3354 
 3355 #if defined(COMPAT_43)
 3356 /*
 3357  * Truncate a file given its path name.
 3358  */
 3359 #ifndef _SYS_SYSPROTO_H_
 3360 struct otruncate_args {
 3361         char    *path;
 3362         long    length;
 3363 };
 3364 #endif
 3365 int
 3366 otruncate(struct thread *td, struct otruncate_args *uap)
 3367 {
 3368 
 3369         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3370 }
 3371 #endif /* COMPAT_43 */
 3372 
 3373 #if defined(COMPAT_FREEBSD6)
 3374 /* Versions with the pad argument */
 3375 int
 3376 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3377 {
 3378 
 3379         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3380 }
 3381 
 3382 int
 3383 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3384 {
 3385 
 3386         return (kern_ftruncate(td, uap->fd, uap->length));
 3387 }
 3388 #endif
 3389 
 3390 int
 3391 kern_fsync(struct thread *td, int fd, bool fullsync)
 3392 {
 3393         struct vnode *vp;
 3394         struct mount *mp;
 3395         struct file *fp;
 3396         int error, lock_flags;
 3397 
 3398         AUDIT_ARG_FD(fd);
 3399         error = getvnode(td, fd, &cap_fsync_rights, &fp);
 3400         if (error != 0)
 3401                 return (error);
 3402         vp = fp->f_vnode;
 3403 #if 0
 3404         if (!fullsync)
 3405                 /* XXXKIB: compete outstanding aio writes */;
 3406 #endif
 3407         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3408         if (error != 0)
 3409                 goto drop;
 3410         if (MNT_SHARED_WRITES(mp) ||
 3411             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3412                 lock_flags = LK_SHARED;
 3413         } else {
 3414                 lock_flags = LK_EXCLUSIVE;
 3415         }
 3416         vn_lock(vp, lock_flags | LK_RETRY);
 3417         AUDIT_ARG_VNODE1(vp);
 3418         if (vp->v_object != NULL) {
 3419                 VM_OBJECT_WLOCK(vp->v_object);
 3420                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3421                 VM_OBJECT_WUNLOCK(vp->v_object);
 3422         }
 3423         error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 3424         VOP_UNLOCK(vp, 0);
 3425         vn_finished_write(mp);
 3426 drop:
 3427         fdrop(fp, td);
 3428         return (error);
 3429 }
 3430 
 3431 /*
 3432  * Sync an open file.
 3433  */
 3434 #ifndef _SYS_SYSPROTO_H_
 3435 struct fsync_args {
 3436         int     fd;
 3437 };
 3438 #endif
 3439 int
 3440 sys_fsync(struct thread *td, struct fsync_args *uap)
 3441 {
 3442 
 3443         return (kern_fsync(td, uap->fd, true));
 3444 }
 3445 
 3446 int
 3447 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 3448 {
 3449 
 3450         return (kern_fsync(td, uap->fd, false));
 3451 }
 3452 
 3453 /*
 3454  * Rename files.  Source and destination must either both be directories, or
 3455  * both not be directories.  If target is a directory, it must be empty.
 3456  */
 3457 #ifndef _SYS_SYSPROTO_H_
 3458 struct rename_args {
 3459         char    *from;
 3460         char    *to;
 3461 };
 3462 #endif
 3463 int
 3464 sys_rename(struct thread *td, struct rename_args *uap)
 3465 {
 3466 
 3467         return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 3468             uap->to, UIO_USERSPACE));
 3469 }
 3470 
 3471 #ifndef _SYS_SYSPROTO_H_
 3472 struct renameat_args {
 3473         int     oldfd;
 3474         char    *old;
 3475         int     newfd;
 3476         char    *new;
 3477 };
 3478 #endif
 3479 int
 3480 sys_renameat(struct thread *td, struct renameat_args *uap)
 3481 {
 3482 
 3483         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3484             UIO_USERSPACE));
 3485 }
 3486 
 3487 int
 3488 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3489     enum uio_seg pathseg)
 3490 {
 3491         struct mount *mp = NULL;
 3492         struct vnode *tvp, *fvp, *tdvp;
 3493         struct nameidata fromnd, tond;
 3494         int error;
 3495 
 3496 again:
 3497         bwillwrite();
 3498 #ifdef MAC
 3499         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3500             AUDITVNODE1, pathseg, old, oldfd,
 3501             &cap_renameat_source_rights, td);
 3502         if ((error = namei(&fromnd)) != 0)
 3503                 return (error);
 3504         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3505             fromnd.ni_vp, &fromnd.ni_cnd);
 3506         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3507         if (fromnd.ni_dvp != fromnd.ni_vp)
 3508                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3509         if (error != 0) {
 3510                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3511                 vrele(fromnd.ni_dvp);
 3512                 vrele(fromnd.ni_vp);
 3513                 if (fromnd.ni_startdir)
 3514                         vrele(fromnd.ni_startdir);
 3515                 return (error);
 3516         }
 3517 #else
 3518         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3519             pathseg, old, oldfd,
 3520             &cap_renameat_source_rights, td);
 3521         if ((error = namei(&fromnd)) != 0)
 3522                 return (error);
 3523 #endif
 3524         fvp = fromnd.ni_vp;
 3525         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3526             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3527             &cap_renameat_target_rights, td);
 3528         if (fromnd.ni_vp->v_type == VDIR)
 3529                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3530         if ((error = namei(&tond)) != 0) {
 3531                 /* Translate error code for rename("dir1", "dir2/."). */
 3532                 if (error == EISDIR && fvp->v_type == VDIR)
 3533                         error = EINVAL;
 3534                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3535                 vrele(fromnd.ni_dvp);
 3536                 vrele(fvp);
 3537                 goto out1;
 3538         }
 3539         tdvp = tond.ni_dvp;
 3540         tvp = tond.ni_vp;
 3541         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3542         if (error != 0) {
 3543                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3544                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3545                 if (tvp != NULL)
 3546                         vput(tvp);
 3547                 if (tdvp == tvp)
 3548                         vrele(tdvp);
 3549                 else
 3550                         vput(tdvp);
 3551                 vrele(fromnd.ni_dvp);
 3552                 vrele(fvp);
 3553                 vrele(tond.ni_startdir);
 3554                 if (fromnd.ni_startdir != NULL)
 3555                         vrele(fromnd.ni_startdir);
 3556                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3557                 if (error != 0)
 3558                         return (error);
 3559                 goto again;
 3560         }
 3561         if (tvp != NULL) {
 3562                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3563                         error = ENOTDIR;
 3564                         goto out;
 3565                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3566                         error = EISDIR;
 3567                         goto out;
 3568                 }
 3569 #ifdef CAPABILITIES
 3570                 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) {
 3571                         /*
 3572                          * If the target already exists we require CAP_UNLINKAT
 3573                          * from 'newfd', when newfd was used for the lookup.
 3574                          */
 3575                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3576                             &cap_unlinkat_rights);
 3577                         if (error != 0)
 3578                                 goto out;
 3579                 }
 3580 #endif
 3581         }
 3582         if (fvp == tdvp) {
 3583                 error = EINVAL;
 3584                 goto out;
 3585         }
 3586         /*
 3587          * If the source is the same as the destination (that is, if they
 3588          * are links to the same vnode), then there is nothing to do.
 3589          */
 3590         if (fvp == tvp)
 3591                 error = -1;
 3592 #ifdef MAC
 3593         else
 3594                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3595                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3596 #endif
 3597 out:
 3598         if (error == 0) {
 3599                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3600                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3601                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3602                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3603         } else {
 3604                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3605                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3606                 if (tvp != NULL)
 3607                         vput(tvp);
 3608                 if (tdvp == tvp)
 3609                         vrele(tdvp);
 3610                 else
 3611                         vput(tdvp);
 3612                 vrele(fromnd.ni_dvp);
 3613                 vrele(fvp);
 3614         }
 3615         vrele(tond.ni_startdir);
 3616         vn_finished_write(mp);
 3617 out1:
 3618         if (fromnd.ni_startdir)
 3619                 vrele(fromnd.ni_startdir);
 3620         if (error == -1)
 3621                 return (0);
 3622         return (error);
 3623 }
 3624 
 3625 /*
 3626  * Make a directory file.
 3627  */
 3628 #ifndef _SYS_SYSPROTO_H_
 3629 struct mkdir_args {
 3630         char    *path;
 3631         int     mode;
 3632 };
 3633 #endif
 3634 int
 3635 sys_mkdir(struct thread *td, struct mkdir_args *uap)
 3636 {
 3637 
 3638         return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3639             uap->mode));
 3640 }
 3641 
 3642 #ifndef _SYS_SYSPROTO_H_
 3643 struct mkdirat_args {
 3644         int     fd;
 3645         char    *path;
 3646         mode_t  mode;
 3647 };
 3648 #endif
 3649 int
 3650 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3651 {
 3652 
 3653         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3654 }
 3655 
 3656 int
 3657 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3658     int mode)
 3659 {
 3660         struct mount *mp;
 3661         struct vnode *vp;
 3662         struct vattr vattr;
 3663         struct nameidata nd;
 3664         int error;
 3665 
 3666         AUDIT_ARG_MODE(mode);
 3667 restart:
 3668         bwillwrite();
 3669         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3670             NOCACHE, segflg, path, fd, &cap_mkdirat_rights,
 3671             td);
 3672         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3673         if ((error = namei(&nd)) != 0)
 3674                 return (error);
 3675         vp = nd.ni_vp;
 3676         if (vp != NULL) {
 3677                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3678                 /*
 3679                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3680                  * the strange behaviour of leaving the vnode unlocked
 3681                  * if the target is the same vnode as the parent.
 3682                  */
 3683                 if (vp == nd.ni_dvp)
 3684                         vrele(nd.ni_dvp);
 3685                 else
 3686                         vput(nd.ni_dvp);
 3687                 vrele(vp);
 3688                 return (EEXIST);
 3689         }
 3690         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3691                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3692                 vput(nd.ni_dvp);
 3693                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3694                         return (error);
 3695                 goto restart;
 3696         }
 3697         VATTR_NULL(&vattr);
 3698         vattr.va_type = VDIR;
 3699         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3700 #ifdef MAC
 3701         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3702             &vattr);
 3703         if (error != 0)
 3704                 goto out;
 3705 #endif
 3706         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3707 #ifdef MAC
 3708 out:
 3709 #endif
 3710         NDFREE(&nd, NDF_ONLY_PNBUF);
 3711         vput(nd.ni_dvp);
 3712         if (error == 0)
 3713                 vput(nd.ni_vp);
 3714         vn_finished_write(mp);
 3715         return (error);
 3716 }
 3717 
 3718 /*
 3719  * Remove a directory file.
 3720  */
 3721 #ifndef _SYS_SYSPROTO_H_
 3722 struct rmdir_args {
 3723         char    *path;
 3724 };
 3725 #endif
 3726 int
 3727 sys_rmdir(struct thread *td, struct rmdir_args *uap)
 3728 {
 3729 
 3730         return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0));
 3731 }
 3732 
 3733 int
 3734 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3735     int flag)
 3736 {
 3737         struct mount *mp;
 3738         struct vnode *vp;
 3739         struct nameidata nd;
 3740         int error;
 3741 
 3742 restart:
 3743         bwillwrite();
 3744         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 3745             at2cnpflags(flag, AT_RESOLVE_BENEATH),
 3746             pathseg, path, fd, &cap_unlinkat_rights, td);
 3747         if ((error = namei(&nd)) != 0)
 3748                 return (error);
 3749         vp = nd.ni_vp;
 3750         if (vp->v_type != VDIR) {
 3751                 error = ENOTDIR;
 3752                 goto out;
 3753         }
 3754         /*
 3755          * No rmdir "." please.
 3756          */
 3757         if (nd.ni_dvp == vp) {
 3758                 error = EINVAL;
 3759                 goto out;
 3760         }
 3761         /*
 3762          * The root of a mounted filesystem cannot be deleted.
 3763          */
 3764         if (vp->v_vflag & VV_ROOT) {
 3765                 error = EBUSY;
 3766                 goto out;
 3767         }
 3768 #ifdef MAC
 3769         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3770             &nd.ni_cnd);
 3771         if (error != 0)
 3772                 goto out;
 3773 #endif
 3774         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3775                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3776                 vput(vp);
 3777                 if (nd.ni_dvp == vp)
 3778                         vrele(nd.ni_dvp);
 3779                 else
 3780                         vput(nd.ni_dvp);
 3781                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3782                         return (error);
 3783                 goto restart;
 3784         }
 3785         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3786         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3787         vn_finished_write(mp);
 3788 out:
 3789         NDFREE(&nd, NDF_ONLY_PNBUF);
 3790         vput(vp);
 3791         if (nd.ni_dvp == vp)
 3792                 vrele(nd.ni_dvp);
 3793         else
 3794                 vput(nd.ni_dvp);
 3795         return (error);
 3796 }
 3797 
 3798 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 3799 int
 3800 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count,
 3801     long *basep, void (*func)(struct freebsd11_dirent *))
 3802 {
 3803         struct freebsd11_dirent dstdp;
 3804         struct dirent *dp, *edp;
 3805         char *dirbuf;
 3806         off_t base;
 3807         ssize_t resid, ucount;
 3808         int error;
 3809 
 3810         /* XXX arbitrary sanity limit on `count'. */
 3811         count = min(count, 64 * 1024);
 3812 
 3813         dirbuf = malloc(count, M_TEMP, M_WAITOK);
 3814 
 3815         error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid,
 3816             UIO_SYSSPACE);
 3817         if (error != 0)
 3818                 goto done;
 3819         if (basep != NULL)
 3820                 *basep = base;
 3821 
 3822         ucount = 0;
 3823         for (dp = (struct dirent *)dirbuf,
 3824             edp = (struct dirent *)&dirbuf[count - resid];
 3825             ucount < count && dp < edp; ) {
 3826                 if (dp->d_reclen == 0)
 3827                         break;
 3828                 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0));
 3829                 if (dp->d_namlen >= sizeof(dstdp.d_name))
 3830                         continue;
 3831                 dstdp.d_type = dp->d_type;
 3832                 dstdp.d_namlen = dp->d_namlen;
 3833                 dstdp.d_fileno = dp->d_fileno;          /* truncate */
 3834                 if (dstdp.d_fileno != dp->d_fileno) {
 3835                         switch (ino64_trunc_error) {
 3836                         default:
 3837                         case 0:
 3838                                 break;
 3839                         case 1:
 3840                                 error = EOVERFLOW;
 3841                                 goto done;
 3842                         case 2:
 3843                                 dstdp.d_fileno = UINT32_MAX;
 3844                                 break;
 3845                         }
 3846                 }
 3847                 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) +
 3848                     ((dp->d_namlen + 1 + 3) &~ 3);
 3849                 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 3850                 bzero(dstdp.d_name + dstdp.d_namlen,
 3851                     dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) -
 3852                     dstdp.d_namlen);
 3853                 MPASS(dstdp.d_reclen <= dp->d_reclen);
 3854                 MPASS(ucount + dstdp.d_reclen <= count);
 3855                 if (func != NULL)
 3856                         func(&dstdp);
 3857                 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen);
 3858                 if (error != 0)
 3859                         break;
 3860                 dp = (struct dirent *)((char *)dp + dp->d_reclen);
 3861                 ucount += dstdp.d_reclen;
 3862         }
 3863 
 3864 done:
 3865         free(dirbuf, M_TEMP);
 3866         if (error == 0)
 3867                 td->td_retval[0] = ucount;
 3868         return (error);
 3869 }
 3870 #endif /* COMPAT */
 3871 
 3872 #ifdef COMPAT_43
 3873 static void
 3874 ogetdirentries_cvt(struct freebsd11_dirent *dp)
 3875 {
 3876 #if (BYTE_ORDER == LITTLE_ENDIAN)
 3877         /*
 3878          * The expected low byte of dp->d_namlen is our dp->d_type.
 3879          * The high MBZ byte of dp->d_namlen is our dp->d_namlen.
 3880          */
 3881         dp->d_type = dp->d_namlen;
 3882         dp->d_namlen = 0;
 3883 #else
 3884         /*
 3885          * The dp->d_type is the high byte of the expected dp->d_namlen,
 3886          * so must be zero'ed.
 3887          */
 3888         dp->d_type = 0;
 3889 #endif
 3890 }
 3891 
 3892 /*
 3893  * Read a block of directory entries in a filesystem independent format.
 3894  */
 3895 #ifndef _SYS_SYSPROTO_H_
 3896 struct ogetdirentries_args {
 3897         int     fd;
 3898         char    *buf;
 3899         u_int   count;
 3900         long    *basep;
 3901 };
 3902 #endif
 3903 int
 3904 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3905 {
 3906         long loff;
 3907         int error;
 3908 
 3909         error = kern_ogetdirentries(td, uap, &loff);
 3910         if (error == 0)
 3911                 error = copyout(&loff, uap->basep, sizeof(long));
 3912         return (error);
 3913 }
 3914 
 3915 int
 3916 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3917     long *ploff)
 3918 {
 3919         long base;
 3920         int error;
 3921 
 3922         /* XXX arbitrary sanity limit on `count'. */
 3923         if (uap->count > 64 * 1024)
 3924                 return (EINVAL);
 3925 
 3926         error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 3927             &base, ogetdirentries_cvt);
 3928 
 3929         if (error == 0 && uap->basep != NULL)
 3930                 error = copyout(&base, uap->basep, sizeof(long));
 3931 
 3932         return (error);
 3933 }
 3934 #endif /* COMPAT_43 */
 3935 
 3936 #if defined(COMPAT_FREEBSD11)
 3937 #ifndef _SYS_SYSPROTO_H_
 3938 struct freebsd11_getdirentries_args {
 3939         int     fd;
 3940         char    *buf;
 3941         u_int   count;
 3942         long    *basep;
 3943 };
 3944 #endif
 3945 int
 3946 freebsd11_getdirentries(struct thread *td,
 3947     struct freebsd11_getdirentries_args *uap)
 3948 {
 3949         long base;
 3950         int error;
 3951 
 3952         error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 3953             &base, NULL);
 3954 
 3955         if (error == 0 && uap->basep != NULL)
 3956                 error = copyout(&base, uap->basep, sizeof(long));
 3957         return (error);
 3958 }
 3959 
 3960 int
 3961 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap)
 3962 {
 3963         struct freebsd11_getdirentries_args ap;
 3964 
 3965         ap.fd = uap->fd;
 3966         ap.buf = uap->buf;
 3967         ap.count = uap->count;
 3968         ap.basep = NULL;
 3969         return (freebsd11_getdirentries(td, &ap));
 3970 }
 3971 #endif /* COMPAT_FREEBSD11 */
 3972 
 3973 /*
 3974  * Read a block of directory entries in a filesystem independent format.
 3975  */
 3976 int
 3977 sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
 3978 {
 3979         off_t base;
 3980         int error;
 3981 
 3982         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 3983             NULL, UIO_USERSPACE);
 3984         if (error != 0)
 3985                 return (error);
 3986         if (uap->basep != NULL)
 3987                 error = copyout(&base, uap->basep, sizeof(off_t));
 3988         return (error);
 3989 }
 3990 
 3991 int
 3992 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
 3993     off_t *basep, ssize_t *residp, enum uio_seg bufseg)
 3994 {
 3995         struct vnode *vp;
 3996         struct file *fp;
 3997         struct uio auio;
 3998         struct iovec aiov;
 3999         off_t loff;
 4000         int error, eofflag;
 4001         off_t foffset;
 4002 
 4003         AUDIT_ARG_FD(fd);
 4004         if (count > IOSIZE_MAX)
 4005                 return (EINVAL);
 4006         auio.uio_resid = count;
 4007         error = getvnode(td, fd, &cap_read_rights, &fp);
 4008         if (error != 0)
 4009                 return (error);
 4010         if ((fp->f_flag & FREAD) == 0) {
 4011                 fdrop(fp, td);
 4012                 return (EBADF);
 4013         }
 4014         vp = fp->f_vnode;
 4015         foffset = foffset_lock(fp, 0);
 4016 unionread:
 4017         if (vp->v_type != VDIR) {
 4018                 error = EINVAL;
 4019                 goto fail;
 4020         }
 4021         aiov.iov_base = buf;
 4022         aiov.iov_len = count;
 4023         auio.uio_iov = &aiov;
 4024         auio.uio_iovcnt = 1;
 4025         auio.uio_rw = UIO_READ;
 4026         auio.uio_segflg = bufseg;
 4027         auio.uio_td = td;
 4028         vn_lock(vp, LK_SHARED | LK_RETRY);
 4029         AUDIT_ARG_VNODE1(vp);
 4030         loff = auio.uio_offset = foffset;
 4031 #ifdef MAC
 4032         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4033         if (error == 0)
 4034 #endif
 4035                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4036                     NULL);
 4037         foffset = auio.uio_offset;
 4038         if (error != 0) {
 4039                 VOP_UNLOCK(vp, 0);
 4040                 goto fail;
 4041         }
 4042         if (count == auio.uio_resid &&
 4043             (vp->v_vflag & VV_ROOT) &&
 4044             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4045                 struct vnode *tvp = vp;
 4046 
 4047                 vp = vp->v_mount->mnt_vnodecovered;
 4048                 VREF(vp);
 4049                 fp->f_vnode = vp;
 4050                 fp->f_data = vp;
 4051                 foffset = 0;
 4052                 vput(tvp);
 4053                 goto unionread;
 4054         }
 4055         VOP_UNLOCK(vp, 0);
 4056         *basep = loff;
 4057         if (residp != NULL)
 4058                 *residp = auio.uio_resid;
 4059         td->td_retval[0] = count - auio.uio_resid;
 4060 fail:
 4061         foffset_unlock(fp, foffset, 0);
 4062         fdrop(fp, td);
 4063         return (error);
 4064 }
 4065 
 4066 /*
 4067  * Set the mode mask for creation of filesystem nodes.
 4068  */
 4069 #ifndef _SYS_SYSPROTO_H_
 4070 struct umask_args {
 4071         int     newmask;
 4072 };
 4073 #endif
 4074 int
 4075 sys_umask(struct thread *td, struct umask_args *uap)
 4076 {
 4077         struct filedesc *fdp;
 4078 
 4079         fdp = td->td_proc->p_fd;
 4080         FILEDESC_XLOCK(fdp);
 4081         td->td_retval[0] = fdp->fd_cmask;
 4082         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4083         FILEDESC_XUNLOCK(fdp);
 4084         return (0);
 4085 }
 4086 
 4087 /*
 4088  * Void all references to file by ripping underlying filesystem away from
 4089  * vnode.
 4090  */
 4091 #ifndef _SYS_SYSPROTO_H_
 4092 struct revoke_args {
 4093         char    *path;
 4094 };
 4095 #endif
 4096 int
 4097 sys_revoke(struct thread *td, struct revoke_args *uap)
 4098 {
 4099         struct vnode *vp;
 4100         struct vattr vattr;
 4101         struct nameidata nd;
 4102         int error;
 4103 
 4104         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4105             uap->path, td);
 4106         if ((error = namei(&nd)) != 0)
 4107                 return (error);
 4108         vp = nd.ni_vp;
 4109         NDFREE(&nd, NDF_ONLY_PNBUF);
 4110         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4111                 error = EINVAL;
 4112                 goto out;
 4113         }
 4114 #ifdef MAC
 4115         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4116         if (error != 0)
 4117                 goto out;
 4118 #endif
 4119         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4120         if (error != 0)
 4121                 goto out;
 4122         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4123                 error = priv_check(td, PRIV_VFS_ADMIN);
 4124                 if (error != 0)
 4125                         goto out;
 4126         }
 4127         if (vcount(vp) > 1)
 4128                 VOP_REVOKE(vp, REVOKEALL);
 4129 out:
 4130         vput(vp);
 4131         return (error);
 4132 }
 4133 
 4134 /*
 4135  * Convert a user file descriptor to a kernel file entry and check that, if it
 4136  * is a capability, the correct rights are present. A reference on the file
 4137  * entry is held upon returning.
 4138  */
 4139 int
 4140 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 4141 {
 4142         struct file *fp;
 4143         int error;
 4144 
 4145         error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
 4146         if (error != 0)
 4147                 return (error);
 4148 
 4149         /*
 4150          * The file could be not of the vnode type, or it may be not
 4151          * yet fully initialized, in which case the f_vnode pointer
 4152          * may be set, but f_ops is still badfileops.  E.g.,
 4153          * devfs_open() transiently create such situation to
 4154          * facilitate csw d_fdopen().
 4155          *
 4156          * Dupfdopen() handling in kern_openat() installs the
 4157          * half-baked file into the process descriptor table, allowing
 4158          * other thread to dereference it. Guard against the race by
 4159          * checking f_ops.
 4160          */
 4161         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4162                 fdrop(fp, td);
 4163                 return (EINVAL);
 4164         }
 4165         *fpp = fp;
 4166         return (0);
 4167 }
 4168 
 4169 
 4170 /*
 4171  * Get an (NFS) file handle.
 4172  */
 4173 #ifndef _SYS_SYSPROTO_H_
 4174 struct lgetfh_args {
 4175         char *fname;
 4176         fhandle_t *fhp;
 4177 };
 4178 #endif
 4179 int
 4180 sys_lgetfh(struct thread *td, struct lgetfh_args *uap)
 4181 {
 4182 
 4183         return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname,
 4184             UIO_USERSPACE, uap->fhp));
 4185 }
 4186 
 4187 #ifndef _SYS_SYSPROTO_H_
 4188 struct getfh_args {
 4189         char *fname;
 4190         fhandle_t *fhp;
 4191 };
 4192 #endif
 4193 int
 4194 sys_getfh(struct thread *td, struct getfh_args *uap)
 4195 {
 4196 
 4197         return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE,
 4198             uap->fhp));
 4199 }
 4200 
 4201 /*
 4202  * syscall for the rpc.lockd to use to translate an open descriptor into
 4203  * a NFS file handle.
 4204  *
 4205  * warning: do not remove the priv_check() call or this becomes one giant
 4206  * security hole.
 4207  */
 4208 #ifndef _SYS_SYSPROTO_H_
 4209 struct getfhat_args {
 4210         int fd;
 4211         char *path;
 4212         fhandle_t *fhp;
 4213         int flags;
 4214 };
 4215 #endif
 4216 int
 4217 sys_getfhat(struct thread *td, struct getfhat_args *uap)
 4218 {
 4219 
 4220         if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 4221                 return (EINVAL);
 4222         return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE,
 4223             uap->fhp));
 4224 }
 4225 
 4226 static int
 4227 kern_getfhat(struct thread *td, int flags, int fd, const char *path,
 4228     enum uio_seg pathseg, fhandle_t *fhp)
 4229 {
 4230         struct nameidata nd;
 4231         fhandle_t fh;
 4232         struct vnode *vp;
 4233         int error;
 4234 
 4235         error = priv_check(td, PRIV_VFS_GETFH);
 4236         if (error != 0)
 4237                 return (error);
 4238         NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW |
 4239             AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path,
 4240             fd, td);
 4241         error = namei(&nd);
 4242         if (error != 0)
 4243                 return (error);
 4244         NDFREE(&nd, NDF_ONLY_PNBUF);
 4245         vp = nd.ni_vp;
 4246         bzero(&fh, sizeof(fh));
 4247         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4248         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4249         vput(vp);
 4250         if (error == 0)
 4251                 error = copyout(&fh, fhp, sizeof (fh));
 4252         return (error);
 4253 }
 4254 
 4255 #ifndef _SYS_SYSPROTO_H_
 4256 struct fhlink_args {
 4257         fhandle_t *fhp;
 4258         const char *to;
 4259 };
 4260 #endif
 4261 int
 4262 sys_fhlink(struct thread *td, struct fhlink_args *uap)
 4263 {
 4264 
 4265         return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp));
 4266 }
 4267 
 4268 #ifndef _SYS_SYSPROTO_H_
 4269 struct fhlinkat_args {
 4270         fhandle_t *fhp;
 4271         int tofd;
 4272         const char *to;
 4273 };
 4274 #endif
 4275 int
 4276 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap)
 4277 {
 4278 
 4279         return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp));
 4280 }
 4281 
 4282 static int
 4283 kern_fhlinkat(struct thread *td, int fd, const char *path,
 4284     enum uio_seg pathseg, fhandle_t *fhp)
 4285 {
 4286         fhandle_t fh;
 4287         struct mount *mp;
 4288         struct vnode *vp;
 4289         int error;
 4290 
 4291         error = priv_check(td, PRIV_VFS_GETFH);
 4292         if (error != 0)
 4293                 return (error);
 4294         error = copyin(fhp, &fh, sizeof(fh));
 4295         if (error != 0)
 4296                 return (error);
 4297         do {
 4298                 bwillwrite();
 4299                 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4300                         return (ESTALE);
 4301                 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 4302                 vfs_unbusy(mp);
 4303                 if (error != 0)
 4304                         return (error);
 4305                 VOP_UNLOCK(vp, 0);
 4306         } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN);
 4307         return (error);
 4308 }
 4309 
 4310 #ifndef _SYS_SYSPROTO_H_
 4311 struct fhreadlink_args {
 4312         fhandle_t *fhp;
 4313         char *buf;
 4314         size_t bufsize;
 4315 };
 4316 #endif
 4317 int
 4318 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap)
 4319 {
 4320         fhandle_t fh;
 4321         struct mount *mp;
 4322         struct vnode *vp;
 4323         int error;
 4324 
 4325         error = priv_check(td, PRIV_VFS_GETFH);
 4326         if (error != 0)
 4327                 return (error);
 4328         if (uap->bufsize > IOSIZE_MAX)
 4329                 return (EINVAL);
 4330         error = copyin(uap->fhp, &fh, sizeof(fh));
 4331         if (error != 0)
 4332                 return (error);
 4333         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4334                 return (ESTALE);
 4335         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 4336         vfs_unbusy(mp);
 4337         if (error != 0)
 4338                 return (error);
 4339         error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td);
 4340         vput(vp);
 4341         return (error);
 4342 }
 4343 
 4344 /*
 4345  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4346  * open descriptor.
 4347  *
 4348  * warning: do not remove the priv_check() call or this becomes one giant
 4349  * security hole.
 4350  */
 4351 #ifndef _SYS_SYSPROTO_H_
 4352 struct fhopen_args {
 4353         const struct fhandle *u_fhp;
 4354         int flags;
 4355 };
 4356 #endif
 4357 int
 4358 sys_fhopen(struct thread *td, struct fhopen_args *uap)
 4359 {
 4360         struct mount *mp;
 4361         struct vnode *vp;
 4362         struct fhandle fhp;
 4363         struct file *fp;
 4364         int fmode, error;
 4365         int indx;
 4366 
 4367         error = priv_check(td, PRIV_VFS_FHOPEN);
 4368         if (error != 0)
 4369                 return (error);
 4370         indx = -1;
 4371         fmode = FFLAGS(uap->flags);
 4372         /* why not allow a non-read/write open for our lockd? */
 4373         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4374                 return (EINVAL);
 4375         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4376         if (error != 0)
 4377                 return(error);
 4378         /* find the mount point */
 4379         mp = vfs_busyfs(&fhp.fh_fsid);
 4380         if (mp == NULL)
 4381                 return (ESTALE);
 4382         /* now give me my vnode, it gets returned to me locked */
 4383         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4384         vfs_unbusy(mp);
 4385         if (error != 0)
 4386                 return (error);
 4387 
 4388         error = falloc_noinstall(td, &fp);
 4389         if (error != 0) {
 4390                 vput(vp);
 4391                 return (error);
 4392         }
 4393         /*
 4394          * An extra reference on `fp' has been held for us by
 4395          * falloc_noinstall().
 4396          */
 4397 
 4398 #ifdef INVARIANTS
 4399         td->td_dupfd = -1;
 4400 #endif
 4401         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4402         if (error != 0) {
 4403                 KASSERT(fp->f_ops == &badfileops,
 4404                     ("VOP_OPEN in fhopen() set f_ops"));
 4405                 KASSERT(td->td_dupfd < 0,
 4406                     ("fhopen() encountered fdopen()"));
 4407 
 4408                 vput(vp);
 4409                 goto bad;
 4410         }
 4411 #ifdef INVARIANTS
 4412         td->td_dupfd = 0;
 4413 #endif
 4414         fp->f_vnode = vp;
 4415         fp->f_seqcount = 1;
 4416         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4417             &vnops);
 4418         VOP_UNLOCK(vp, 0);
 4419         if ((fmode & O_TRUNC) != 0) {
 4420                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4421                 if (error != 0)
 4422                         goto bad;
 4423         }
 4424 
 4425         error = finstall(td, fp, &indx, fmode, NULL);
 4426 bad:
 4427         fdrop(fp, td);
 4428         td->td_retval[0] = indx;
 4429         return (error);
 4430 }
 4431 
 4432 /*
 4433  * Stat an (NFS) file handle.
 4434  */
 4435 #ifndef _SYS_SYSPROTO_H_
 4436 struct fhstat_args {
 4437         struct fhandle *u_fhp;
 4438         struct stat *sb;
 4439 };
 4440 #endif
 4441 int
 4442 sys_fhstat(struct thread *td, struct fhstat_args *uap)
 4443 {
 4444         struct stat sb;
 4445         struct fhandle fh;
 4446         int error;
 4447 
 4448         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4449         if (error != 0)
 4450                 return (error);
 4451         error = kern_fhstat(td, fh, &sb);
 4452         if (error == 0)
 4453                 error = copyout(&sb, uap->sb, sizeof(sb));
 4454         return (error);
 4455 }
 4456 
 4457 int
 4458 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4459 {
 4460         struct mount *mp;
 4461         struct vnode *vp;
 4462         int error;
 4463 
 4464         error = priv_check(td, PRIV_VFS_FHSTAT);
 4465         if (error != 0)
 4466                 return (error);
 4467         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4468                 return (ESTALE);
 4469         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4470         vfs_unbusy(mp);
 4471         if (error != 0)
 4472                 return (error);
 4473         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4474         vput(vp);
 4475         return (error);
 4476 }
 4477 
 4478 /*
 4479  * Implement fstatfs() for (NFS) file handles.
 4480  */
 4481 #ifndef _SYS_SYSPROTO_H_
 4482 struct fhstatfs_args {
 4483         struct fhandle *u_fhp;
 4484         struct statfs *buf;
 4485 };
 4486 #endif
 4487 int
 4488 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap)
 4489 {
 4490         struct statfs *sfp;
 4491         fhandle_t fh;
 4492         int error;
 4493 
 4494         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4495         if (error != 0)
 4496                 return (error);
 4497         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 4498         error = kern_fhstatfs(td, fh, sfp);
 4499         if (error == 0)
 4500                 error = copyout(sfp, uap->buf, sizeof(*sfp));
 4501         free(sfp, M_STATFS);
 4502         return (error);
 4503 }
 4504 
 4505 int
 4506 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4507 {
 4508         struct statfs *sp;
 4509         struct mount *mp;
 4510         struct vnode *vp;
 4511         int error;
 4512 
 4513         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4514         if (error != 0)
 4515                 return (error);
 4516         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4517                 return (ESTALE);
 4518         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4519         if (error != 0) {
 4520                 vfs_unbusy(mp);
 4521                 return (error);
 4522         }
 4523         vput(vp);
 4524         error = prison_canseemount(td->td_ucred, mp);
 4525         if (error != 0)
 4526                 goto out;
 4527 #ifdef MAC
 4528         error = mac_mount_check_stat(td->td_ucred, mp);
 4529         if (error != 0)
 4530                 goto out;
 4531 #endif
 4532         /*
 4533          * Set these in case the underlying filesystem fails to do so.
 4534          */
 4535         sp = &mp->mnt_stat;
 4536         sp->f_version = STATFS_VERSION;
 4537         sp->f_namemax = NAME_MAX;
 4538         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4539         error = VFS_STATFS(mp, sp);
 4540         if (error == 0)
 4541                 *buf = *sp;
 4542 out:
 4543         vfs_unbusy(mp);
 4544         return (error);
 4545 }
 4546 
 4547 int
 4548 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4549 {
 4550         struct file *fp;
 4551         struct mount *mp;
 4552         struct vnode *vp;
 4553         off_t olen, ooffset;
 4554         int error;
 4555 #ifdef AUDIT
 4556         int audited_vnode1 = 0;
 4557 #endif
 4558 
 4559         AUDIT_ARG_FD(fd);
 4560         if (offset < 0 || len <= 0)
 4561                 return (EINVAL);
 4562         /* Check for wrap. */
 4563         if (offset > OFF_MAX - len)
 4564                 return (EFBIG);
 4565         AUDIT_ARG_FD(fd);
 4566         error = fget(td, fd, &cap_pwrite_rights, &fp);
 4567         if (error != 0)
 4568                 return (error);
 4569         AUDIT_ARG_FILE(td->td_proc, fp);
 4570         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4571                 error = ESPIPE;
 4572                 goto out;
 4573         }
 4574         if ((fp->f_flag & FWRITE) == 0) {
 4575                 error = EBADF;
 4576                 goto out;
 4577         }
 4578         if (fp->f_type != DTYPE_VNODE) {
 4579                 error = ENODEV;
 4580                 goto out;
 4581         }
 4582         vp = fp->f_vnode;
 4583         if (vp->v_type != VREG) {
 4584                 error = ENODEV;
 4585                 goto out;
 4586         }
 4587 
 4588         /* Allocating blocks may take a long time, so iterate. */
 4589         for (;;) {
 4590                 olen = len;
 4591                 ooffset = offset;
 4592 
 4593                 bwillwrite();
 4594                 mp = NULL;
 4595                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4596                 if (error != 0)
 4597                         break;
 4598                 error = vn_lock(vp, LK_EXCLUSIVE);
 4599                 if (error != 0) {
 4600                         vn_finished_write(mp);
 4601                         break;
 4602                 }
 4603 #ifdef AUDIT
 4604                 if (!audited_vnode1) {
 4605                         AUDIT_ARG_VNODE1(vp);
 4606                         audited_vnode1 = 1;
 4607                 }
 4608 #endif
 4609 #ifdef MAC
 4610                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4611                 if (error == 0)
 4612 #endif
 4613                         error = VOP_ALLOCATE(vp, &offset, &len);
 4614                 VOP_UNLOCK(vp, 0);
 4615                 vn_finished_write(mp);
 4616 
 4617                 if (olen + ooffset != offset + len) {
 4618                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4619                             ooffset, olen, offset, len);
 4620                 }
 4621                 if (error != 0 || len == 0)
 4622                         break;
 4623                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4624                 maybe_yield();
 4625         }
 4626  out:
 4627         fdrop(fp, td);
 4628         return (error);
 4629 }
 4630 
 4631 int
 4632 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4633 {
 4634         int error;
 4635 
 4636         error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
 4637         return (kern_posix_error(td, error));
 4638 }
 4639 
 4640 /*
 4641  * Unlike madvise(2), we do not make a best effort to remember every
 4642  * possible caching hint.  Instead, we remember the last setting with
 4643  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4644  * region of any current setting.
 4645  */
 4646 int
 4647 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4648     int advice)
 4649 {
 4650         struct fadvise_info *fa, *new;
 4651         struct file *fp;
 4652         struct vnode *vp;
 4653         off_t end;
 4654         int error;
 4655 
 4656         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4657                 return (EINVAL);
 4658         AUDIT_ARG_VALUE(advice);
 4659         switch (advice) {
 4660         case POSIX_FADV_SEQUENTIAL:
 4661         case POSIX_FADV_RANDOM:
 4662         case POSIX_FADV_NOREUSE:
 4663                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4664                 break;
 4665         case POSIX_FADV_NORMAL:
 4666         case POSIX_FADV_WILLNEED:
 4667         case POSIX_FADV_DONTNEED:
 4668                 new = NULL;
 4669                 break;
 4670         default:
 4671                 return (EINVAL);
 4672         }
 4673         /* XXX: CAP_POSIX_FADVISE? */
 4674         AUDIT_ARG_FD(fd);
 4675         error = fget(td, fd, &cap_no_rights, &fp);
 4676         if (error != 0)
 4677                 goto out;
 4678         AUDIT_ARG_FILE(td->td_proc, fp);
 4679         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4680                 error = ESPIPE;
 4681                 goto out;
 4682         }
 4683         if (fp->f_type != DTYPE_VNODE) {
 4684                 error = ENODEV;
 4685                 goto out;
 4686         }
 4687         vp = fp->f_vnode;
 4688         if (vp->v_type != VREG) {
 4689                 error = ENODEV;
 4690                 goto out;
 4691         }
 4692         if (len == 0)
 4693                 end = OFF_MAX;
 4694         else
 4695                 end = offset + len - 1;
 4696         switch (advice) {
 4697         case POSIX_FADV_SEQUENTIAL:
 4698         case POSIX_FADV_RANDOM:
 4699         case POSIX_FADV_NOREUSE:
 4700                 /*
 4701                  * Try to merge any existing non-standard region with
 4702                  * this new region if possible, otherwise create a new
 4703                  * non-standard region for this request.
 4704                  */
 4705                 mtx_pool_lock(mtxpool_sleep, fp);
 4706                 fa = fp->f_advice;
 4707                 if (fa != NULL && fa->fa_advice == advice &&
 4708                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4709                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4710                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4711                         if (offset < fa->fa_start)
 4712                                 fa->fa_start = offset;
 4713                         if (end > fa->fa_end)
 4714                                 fa->fa_end = end;
 4715                 } else {
 4716                         new->fa_advice = advice;
 4717                         new->fa_start = offset;
 4718                         new->fa_end = end;
 4719                         fp->f_advice = new;
 4720                         new = fa;
 4721                 }
 4722                 mtx_pool_unlock(mtxpool_sleep, fp);
 4723                 break;
 4724         case POSIX_FADV_NORMAL:
 4725                 /*
 4726                  * If a the "normal" region overlaps with an existing
 4727                  * non-standard region, trim or remove the
 4728                  * non-standard region.
 4729                  */
 4730                 mtx_pool_lock(mtxpool_sleep, fp);
 4731                 fa = fp->f_advice;
 4732                 if (fa != NULL) {
 4733                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4734                                 new = fa;
 4735                                 fp->f_advice = NULL;
 4736                         } else if (offset <= fa->fa_start &&
 4737                             end >= fa->fa_start)
 4738                                 fa->fa_start = end + 1;
 4739                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4740                                 fa->fa_end = offset - 1;
 4741                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4742                                 /*
 4743                                  * If the "normal" region is a middle
 4744                                  * portion of the existing
 4745                                  * non-standard region, just remove
 4746                                  * the whole thing rather than picking
 4747                                  * one side or the other to
 4748                                  * preserve.
 4749                                  */
 4750                                 new = fa;
 4751                                 fp->f_advice = NULL;
 4752                         }
 4753                 }
 4754                 mtx_pool_unlock(mtxpool_sleep, fp);
 4755                 break;
 4756         case POSIX_FADV_WILLNEED:
 4757         case POSIX_FADV_DONTNEED:
 4758                 error = VOP_ADVISE(vp, offset, end, advice);
 4759                 break;
 4760         }
 4761 out:
 4762         if (fp != NULL)
 4763                 fdrop(fp, td);
 4764         free(new, M_FADVISE);
 4765         return (error);
 4766 }
 4767 
 4768 int
 4769 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4770 {
 4771         int error;
 4772 
 4773         error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4774             uap->advice);
 4775         return (kern_posix_error(td, error));
 4776 }

Cache object: 9e0d25826335314080ef9dc003ac7bcc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.