The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD11  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: stable/10/sys/kern/vfs_syscalls.c 325099 2017-10-29 09:48:28Z kib $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   97 
   98 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   99 static int kern_chflags(struct thread *td, const char *path,
  100     enum uio_seg pathseg, u_long flags);
  101 static int kern_chflagsat(struct thread *td, int fd, const char *path,
  102     enum uio_seg pathseg, u_long flags, int atflag);
  103 static int setfflags(struct thread *td, struct vnode *, u_long);
  104 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  105 static int getutimens(const struct timespec *, enum uio_seg,
  106     struct timespec *, int *);
  107 static int setutimes(struct thread *td, struct vnode *,
  108     const struct timespec *, int, int);
  109 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  110     struct thread *td);
  111 
  112 /*
  113  * The module initialization routine for POSIX asynchronous I/O will
  114  * set this to the version of AIO that it implements.  (Zero means
  115  * that it is not implemented.)  This value is used here by pathconf()
  116  * and in kern_descrip.c by fpathconf().
  117  */
  118 int async_io_version;
  119 
  120 /*
  121  * Sync each mounted filesystem.
  122  */
  123 #ifndef _SYS_SYSPROTO_H_
  124 struct sync_args {
  125         int     dummy;
  126 };
  127 #endif
  128 /* ARGSUSED */
  129 int
  130 sys_sync(td, uap)
  131         struct thread *td;
  132         struct sync_args *uap;
  133 {
  134         struct mount *mp, *nmp;
  135         int save;
  136 
  137         mtx_lock(&mountlist_mtx);
  138         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  139                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  140                         nmp = TAILQ_NEXT(mp, mnt_list);
  141                         continue;
  142                 }
  143                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  144                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  145                         save = curthread_pflags_set(TDP_SYNCIO);
  146                         vfs_msync(mp, MNT_NOWAIT);
  147                         VFS_SYNC(mp, MNT_NOWAIT);
  148                         curthread_pflags_restore(save);
  149                         vn_finished_write(mp);
  150                 }
  151                 mtx_lock(&mountlist_mtx);
  152                 nmp = TAILQ_NEXT(mp, mnt_list);
  153                 vfs_unbusy(mp);
  154         }
  155         mtx_unlock(&mountlist_mtx);
  156         return (0);
  157 }
  158 
  159 /*
  160  * Change filesystem quotas.
  161  */
  162 #ifndef _SYS_SYSPROTO_H_
  163 struct quotactl_args {
  164         char *path;
  165         int cmd;
  166         int uid;
  167         caddr_t arg;
  168 };
  169 #endif
  170 int
  171 sys_quotactl(td, uap)
  172         struct thread *td;
  173         register struct quotactl_args /* {
  174                 char *path;
  175                 int cmd;
  176                 int uid;
  177                 caddr_t arg;
  178         } */ *uap;
  179 {
  180         struct mount *mp;
  181         struct nameidata nd;
  182         int error;
  183 
  184         AUDIT_ARG_CMD(uap->cmd);
  185         AUDIT_ARG_UID(uap->uid);
  186         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  187                 return (EPERM);
  188         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  189             uap->path, td);
  190         if ((error = namei(&nd)) != 0)
  191                 return (error);
  192         NDFREE(&nd, NDF_ONLY_PNBUF);
  193         mp = nd.ni_vp->v_mount;
  194         vfs_ref(mp);
  195         vput(nd.ni_vp);
  196         error = vfs_busy(mp, 0);
  197         vfs_rel(mp);
  198         if (error != 0)
  199                 return (error);
  200         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  201 
  202         /*
  203          * Since quota on operation typically needs to open quota
  204          * file, the Q_QUOTAON handler needs to unbusy the mount point
  205          * before calling into namei.  Otherwise, unmount might be
  206          * started between two vfs_busy() invocations (first is our,
  207          * second is from mount point cross-walk code in lookup()),
  208          * causing deadlock.
  209          *
  210          * Require that Q_QUOTAON handles the vfs_busy() reference on
  211          * its own, always returning with ubusied mount point.
  212          */
  213         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  214                 vfs_unbusy(mp);
  215         return (error);
  216 }
  217 
  218 /*
  219  * Used by statfs conversion routines to scale the block size up if
  220  * necessary so that all of the block counts are <= 'max_size'.  Note
  221  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  222  * value of 'n'.
  223  */
  224 void
  225 statfs_scale_blocks(struct statfs *sf, long max_size)
  226 {
  227         uint64_t count;
  228         int shift;
  229 
  230         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  231 
  232         /*
  233          * Attempt to scale the block counts to give a more accurate
  234          * overview to userland of the ratio of free space to used
  235          * space.  To do this, find the largest block count and compute
  236          * a divisor that lets it fit into a signed integer <= max_size.
  237          */
  238         if (sf->f_bavail < 0)
  239                 count = -sf->f_bavail;
  240         else
  241                 count = sf->f_bavail;
  242         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  243         if (count <= max_size)
  244                 return;
  245 
  246         count >>= flsl(max_size);
  247         shift = 0;
  248         while (count > 0) {
  249                 shift++;
  250                 count >>=1;
  251         }
  252 
  253         sf->f_bsize <<= shift;
  254         sf->f_blocks >>= shift;
  255         sf->f_bfree >>= shift;
  256         sf->f_bavail >>= shift;
  257 }
  258 
  259 static int
  260 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
  261 {
  262         struct statfs *sp;
  263         int error;
  264 
  265         if (mp == NULL)
  266                 return (EBADF);
  267         error = vfs_busy(mp, 0);
  268         vfs_rel(mp);
  269         if (error != 0)
  270                 return (error);
  271 #ifdef MAC
  272         error = mac_mount_check_stat(td->td_ucred, mp);
  273         if (error != 0)
  274                 goto out;
  275 #endif
  276         /*
  277          * Set these in case the underlying filesystem fails to do so.
  278          */
  279         sp = &mp->mnt_stat;
  280         sp->f_version = STATFS_VERSION;
  281         sp->f_namemax = NAME_MAX;
  282         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  283         error = VFS_STATFS(mp, sp);
  284         if (error != 0)
  285                 goto out;
  286         *buf = *sp;
  287         if (priv_check(td, PRIV_VFS_GENERATION)) {
  288                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
  289                 prison_enforce_statfs(td->td_ucred, mp, buf);
  290         }
  291 out:
  292         vfs_unbusy(mp);
  293         return (error);
  294 }
  295 
  296 /*
  297  * Get filesystem statistics.
  298  */
  299 #ifndef _SYS_SYSPROTO_H_
  300 struct statfs_args {
  301         char *path;
  302         struct statfs *buf;
  303 };
  304 #endif
  305 int
  306 sys_statfs(td, uap)
  307         struct thread *td;
  308         register struct statfs_args /* {
  309                 char *path;
  310                 struct statfs *buf;
  311         } */ *uap;
  312 {
  313         struct statfs sf;
  314         int error;
  315 
  316         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  317         if (error == 0)
  318                 error = copyout(&sf, uap->buf, sizeof(sf));
  319         return (error);
  320 }
  321 
  322 int
  323 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  324     struct statfs *buf)
  325 {
  326         struct mount *mp;
  327         struct nameidata nd;
  328         int error;
  329 
  330         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  331             pathseg, path, td);
  332         error = namei(&nd);
  333         if (error != 0)
  334                 return (error);
  335         mp = nd.ni_vp->v_mount;
  336         vfs_ref(mp);
  337         NDFREE(&nd, NDF_ONLY_PNBUF);
  338         vput(nd.ni_vp);
  339         return (kern_do_statfs(td, mp, buf));
  340 }
  341 
  342 /*
  343  * Get filesystem statistics.
  344  */
  345 #ifndef _SYS_SYSPROTO_H_
  346 struct fstatfs_args {
  347         int fd;
  348         struct statfs *buf;
  349 };
  350 #endif
  351 int
  352 sys_fstatfs(td, uap)
  353         struct thread *td;
  354         register struct fstatfs_args /* {
  355                 int fd;
  356                 struct statfs *buf;
  357         } */ *uap;
  358 {
  359         struct statfs sf;
  360         int error;
  361 
  362         error = kern_fstatfs(td, uap->fd, &sf);
  363         if (error == 0)
  364                 error = copyout(&sf, uap->buf, sizeof(sf));
  365         return (error);
  366 }
  367 
  368 int
  369 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  370 {
  371         struct file *fp;
  372         struct mount *mp;
  373         struct vnode *vp;
  374         cap_rights_t rights;
  375         int error;
  376 
  377         AUDIT_ARG_FD(fd);
  378         error = getvnode(td->td_proc->p_fd, fd,
  379             cap_rights_init(&rights, CAP_FSTATFS), &fp);
  380         if (error != 0)
  381                 return (error);
  382         vp = fp->f_vnode;
  383         vn_lock(vp, LK_SHARED | LK_RETRY);
  384 #ifdef AUDIT
  385         AUDIT_ARG_VNODE1(vp);
  386 #endif
  387         mp = vp->v_mount;
  388         if (mp != NULL)
  389                 vfs_ref(mp);
  390         VOP_UNLOCK(vp, 0);
  391         fdrop(fp, td);
  392         return (kern_do_statfs(td, mp, buf));
  393 }
  394 
  395 /*
  396  * Get statistics on all filesystems.
  397  */
  398 #ifndef _SYS_SYSPROTO_H_
  399 struct getfsstat_args {
  400         struct statfs *buf;
  401         long bufsize;
  402         int flags;
  403 };
  404 #endif
  405 int
  406 sys_getfsstat(td, uap)
  407         struct thread *td;
  408         register struct getfsstat_args /* {
  409                 struct statfs *buf;
  410                 long bufsize;
  411                 int flags;
  412         } */ *uap;
  413 {
  414 
  415         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  416             uap->flags));
  417 }
  418 
  419 /*
  420  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  421  *      The caller is responsible for freeing memory which will be allocated
  422  *      in '*buf'.
  423  */
  424 int
  425 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  426     enum uio_seg bufseg, int flags)
  427 {
  428         struct mount *mp, *nmp;
  429         struct statfs *sfsp, *sp, sb;
  430         size_t count, maxcount;
  431         int error;
  432 
  433         maxcount = bufsize / sizeof(struct statfs);
  434         if (bufsize == 0)
  435                 sfsp = NULL;
  436         else if (bufseg == UIO_USERSPACE)
  437                 sfsp = *buf;
  438         else /* if (bufseg == UIO_SYSSPACE) */ {
  439                 count = 0;
  440                 mtx_lock(&mountlist_mtx);
  441                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  442                         count++;
  443                 }
  444                 mtx_unlock(&mountlist_mtx);
  445                 if (maxcount > count)
  446                         maxcount = count;
  447                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  448                     M_WAITOK);
  449         }
  450         count = 0;
  451         mtx_lock(&mountlist_mtx);
  452         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  453                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  454                         nmp = TAILQ_NEXT(mp, mnt_list);
  455                         continue;
  456                 }
  457 #ifdef MAC
  458                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  459                         nmp = TAILQ_NEXT(mp, mnt_list);
  460                         continue;
  461                 }
  462 #endif
  463                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  464                         nmp = TAILQ_NEXT(mp, mnt_list);
  465                         continue;
  466                 }
  467                 if (sfsp != NULL && count < maxcount) {
  468                         sp = &mp->mnt_stat;
  469                         /*
  470                          * Set these in case the underlying filesystem
  471                          * fails to do so.
  472                          */
  473                         sp->f_version = STATFS_VERSION;
  474                         sp->f_namemax = NAME_MAX;
  475                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  476                         /*
  477                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  478                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  479                          * overrides MNT_WAIT.
  480                          */
  481                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  482                             (flags & MNT_WAIT)) &&
  483                             (error = VFS_STATFS(mp, sp))) {
  484                                 mtx_lock(&mountlist_mtx);
  485                                 nmp = TAILQ_NEXT(mp, mnt_list);
  486                                 vfs_unbusy(mp);
  487                                 continue;
  488                         }
  489                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  490                                 bcopy(sp, &sb, sizeof(sb));
  491                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  492                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  493                                 sp = &sb;
  494                         }
  495                         if (bufseg == UIO_SYSSPACE)
  496                                 bcopy(sp, sfsp, sizeof(*sp));
  497                         else /* if (bufseg == UIO_USERSPACE) */ {
  498                                 error = copyout(sp, sfsp, sizeof(*sp));
  499                                 if (error != 0) {
  500                                         vfs_unbusy(mp);
  501                                         return (error);
  502                                 }
  503                         }
  504                         sfsp++;
  505                 }
  506                 count++;
  507                 mtx_lock(&mountlist_mtx);
  508                 nmp = TAILQ_NEXT(mp, mnt_list);
  509                 vfs_unbusy(mp);
  510         }
  511         mtx_unlock(&mountlist_mtx);
  512         if (sfsp != NULL && count > maxcount)
  513                 td->td_retval[0] = maxcount;
  514         else
  515                 td->td_retval[0] = count;
  516         return (0);
  517 }
  518 
  519 #ifdef COMPAT_FREEBSD4
  520 /*
  521  * Get old format filesystem statistics.
  522  */
  523 static void cvtstatfs(struct statfs *, struct ostatfs *);
  524 
  525 #ifndef _SYS_SYSPROTO_H_
  526 struct freebsd4_statfs_args {
  527         char *path;
  528         struct ostatfs *buf;
  529 };
  530 #endif
  531 int
  532 freebsd4_statfs(td, uap)
  533         struct thread *td;
  534         struct freebsd4_statfs_args /* {
  535                 char *path;
  536                 struct ostatfs *buf;
  537         } */ *uap;
  538 {
  539         struct ostatfs osb;
  540         struct statfs sf;
  541         int error;
  542 
  543         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  544         if (error != 0)
  545                 return (error);
  546         cvtstatfs(&sf, &osb);
  547         return (copyout(&osb, uap->buf, sizeof(osb)));
  548 }
  549 
  550 /*
  551  * Get filesystem statistics.
  552  */
  553 #ifndef _SYS_SYSPROTO_H_
  554 struct freebsd4_fstatfs_args {
  555         int fd;
  556         struct ostatfs *buf;
  557 };
  558 #endif
  559 int
  560 freebsd4_fstatfs(td, uap)
  561         struct thread *td;
  562         struct freebsd4_fstatfs_args /* {
  563                 int fd;
  564                 struct ostatfs *buf;
  565         } */ *uap;
  566 {
  567         struct ostatfs osb;
  568         struct statfs sf;
  569         int error;
  570 
  571         error = kern_fstatfs(td, uap->fd, &sf);
  572         if (error != 0)
  573                 return (error);
  574         cvtstatfs(&sf, &osb);
  575         return (copyout(&osb, uap->buf, sizeof(osb)));
  576 }
  577 
  578 /*
  579  * Get statistics on all filesystems.
  580  */
  581 #ifndef _SYS_SYSPROTO_H_
  582 struct freebsd4_getfsstat_args {
  583         struct ostatfs *buf;
  584         long bufsize;
  585         int flags;
  586 };
  587 #endif
  588 int
  589 freebsd4_getfsstat(td, uap)
  590         struct thread *td;
  591         register struct freebsd4_getfsstat_args /* {
  592                 struct ostatfs *buf;
  593                 long bufsize;
  594                 int flags;
  595         } */ *uap;
  596 {
  597         struct statfs *buf, *sp;
  598         struct ostatfs osb;
  599         size_t count, size;
  600         int error;
  601 
  602         count = uap->bufsize / sizeof(struct ostatfs);
  603         size = count * sizeof(struct statfs);
  604         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  605         if (size > 0) {
  606                 count = td->td_retval[0];
  607                 sp = buf;
  608                 while (count > 0 && error == 0) {
  609                         cvtstatfs(sp, &osb);
  610                         error = copyout(&osb, uap->buf, sizeof(osb));
  611                         sp++;
  612                         uap->buf++;
  613                         count--;
  614                 }
  615                 free(buf, M_TEMP);
  616         }
  617         return (error);
  618 }
  619 
  620 /*
  621  * Implement fstatfs() for (NFS) file handles.
  622  */
  623 #ifndef _SYS_SYSPROTO_H_
  624 struct freebsd4_fhstatfs_args {
  625         struct fhandle *u_fhp;
  626         struct ostatfs *buf;
  627 };
  628 #endif
  629 int
  630 freebsd4_fhstatfs(td, uap)
  631         struct thread *td;
  632         struct freebsd4_fhstatfs_args /* {
  633                 struct fhandle *u_fhp;
  634                 struct ostatfs *buf;
  635         } */ *uap;
  636 {
  637         struct ostatfs osb;
  638         struct statfs sf;
  639         fhandle_t fh;
  640         int error;
  641 
  642         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  643         if (error != 0)
  644                 return (error);
  645         error = kern_fhstatfs(td, fh, &sf);
  646         if (error != 0)
  647                 return (error);
  648         cvtstatfs(&sf, &osb);
  649         return (copyout(&osb, uap->buf, sizeof(osb)));
  650 }
  651 
  652 /*
  653  * Convert a new format statfs structure to an old format statfs structure.
  654  */
  655 static void
  656 cvtstatfs(nsp, osp)
  657         struct statfs *nsp;
  658         struct ostatfs *osp;
  659 {
  660 
  661         statfs_scale_blocks(nsp, LONG_MAX);
  662         bzero(osp, sizeof(*osp));
  663         osp->f_bsize = nsp->f_bsize;
  664         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  665         osp->f_blocks = nsp->f_blocks;
  666         osp->f_bfree = nsp->f_bfree;
  667         osp->f_bavail = nsp->f_bavail;
  668         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  669         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  670         osp->f_owner = nsp->f_owner;
  671         osp->f_type = nsp->f_type;
  672         osp->f_flags = nsp->f_flags;
  673         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  674         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  675         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  676         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  677         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  678             MIN(MFSNAMELEN, OMFSNAMELEN));
  679         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  680             MIN(MNAMELEN, OMNAMELEN));
  681         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  682             MIN(MNAMELEN, OMNAMELEN));
  683         osp->f_fsid = nsp->f_fsid;
  684 }
  685 #endif /* COMPAT_FREEBSD4 */
  686 
  687 /*
  688  * Change current working directory to a given file descriptor.
  689  */
  690 #ifndef _SYS_SYSPROTO_H_
  691 struct fchdir_args {
  692         int     fd;
  693 };
  694 #endif
  695 int
  696 sys_fchdir(td, uap)
  697         struct thread *td;
  698         struct fchdir_args /* {
  699                 int fd;
  700         } */ *uap;
  701 {
  702         register struct filedesc *fdp = td->td_proc->p_fd;
  703         struct vnode *vp, *tdp, *vpold;
  704         struct mount *mp;
  705         struct file *fp;
  706         cap_rights_t rights;
  707         int error;
  708 
  709         AUDIT_ARG_FD(uap->fd);
  710         error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  711             &fp);
  712         if (error != 0)
  713                 return (error);
  714         vp = fp->f_vnode;
  715         VREF(vp);
  716         fdrop(fp, td);
  717         vn_lock(vp, LK_SHARED | LK_RETRY);
  718         AUDIT_ARG_VNODE1(vp);
  719         error = change_dir(vp, td);
  720         while (!error && (mp = vp->v_mountedhere) != NULL) {
  721                 if (vfs_busy(mp, 0))
  722                         continue;
  723                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  724                 vfs_unbusy(mp);
  725                 if (error != 0)
  726                         break;
  727                 vput(vp);
  728                 vp = tdp;
  729         }
  730         if (error != 0) {
  731                 vput(vp);
  732                 return (error);
  733         }
  734         VOP_UNLOCK(vp, 0);
  735         FILEDESC_XLOCK(fdp);
  736         vpold = fdp->fd_cdir;
  737         fdp->fd_cdir = vp;
  738         FILEDESC_XUNLOCK(fdp);
  739         vrele(vpold);
  740         return (0);
  741 }
  742 
  743 /*
  744  * Change current working directory (``.'').
  745  */
  746 #ifndef _SYS_SYSPROTO_H_
  747 struct chdir_args {
  748         char    *path;
  749 };
  750 #endif
  751 int
  752 sys_chdir(td, uap)
  753         struct thread *td;
  754         struct chdir_args /* {
  755                 char *path;
  756         } */ *uap;
  757 {
  758 
  759         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  760 }
  761 
  762 int
  763 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  764 {
  765         register struct filedesc *fdp = td->td_proc->p_fd;
  766         struct nameidata nd;
  767         struct vnode *vp;
  768         int error;
  769 
  770         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  771             pathseg, path, td);
  772         if ((error = namei(&nd)) != 0)
  773                 return (error);
  774         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  775                 vput(nd.ni_vp);
  776                 NDFREE(&nd, NDF_ONLY_PNBUF);
  777                 return (error);
  778         }
  779         VOP_UNLOCK(nd.ni_vp, 0);
  780         NDFREE(&nd, NDF_ONLY_PNBUF);
  781         FILEDESC_XLOCK(fdp);
  782         vp = fdp->fd_cdir;
  783         fdp->fd_cdir = nd.ni_vp;
  784         FILEDESC_XUNLOCK(fdp);
  785         vrele(vp);
  786         return (0);
  787 }
  788 
  789 /*
  790  * Helper function for raised chroot(2) security function:  Refuse if
  791  * any filedescriptors are open directories.
  792  */
  793 static int
  794 chroot_refuse_vdir_fds(fdp)
  795         struct filedesc *fdp;
  796 {
  797         struct vnode *vp;
  798         struct file *fp;
  799         int fd;
  800 
  801         FILEDESC_LOCK_ASSERT(fdp);
  802 
  803         for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
  804                 fp = fget_locked(fdp, fd);
  805                 if (fp == NULL)
  806                         continue;
  807                 if (fp->f_type == DTYPE_VNODE) {
  808                         vp = fp->f_vnode;
  809                         if (vp->v_type == VDIR)
  810                                 return (EPERM);
  811                 }
  812         }
  813         return (0);
  814 }
  815 
  816 /*
  817  * This sysctl determines if we will allow a process to chroot(2) if it
  818  * has a directory open:
  819  *      0: disallowed for all processes.
  820  *      1: allowed for processes that were not already chroot(2)'ed.
  821  *      2: allowed for all processes.
  822  */
  823 
  824 static int chroot_allow_open_directories = 1;
  825 
  826 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  827      &chroot_allow_open_directories, 0,
  828      "Allow a process to chroot(2) if it has a directory open");
  829 
  830 /*
  831  * Change notion of root (``/'') directory.
  832  */
  833 #ifndef _SYS_SYSPROTO_H_
  834 struct chroot_args {
  835         char    *path;
  836 };
  837 #endif
  838 int
  839 sys_chroot(td, uap)
  840         struct thread *td;
  841         struct chroot_args /* {
  842                 char *path;
  843         } */ *uap;
  844 {
  845         struct nameidata nd;
  846         int error;
  847 
  848         error = priv_check(td, PRIV_VFS_CHROOT);
  849         if (error != 0)
  850                 return (error);
  851         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  852             UIO_USERSPACE, uap->path, td);
  853         error = namei(&nd);
  854         if (error != 0)
  855                 goto error;
  856         error = change_dir(nd.ni_vp, td);
  857         if (error != 0)
  858                 goto e_vunlock;
  859 #ifdef MAC
  860         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  861         if (error != 0)
  862                 goto e_vunlock;
  863 #endif
  864         VOP_UNLOCK(nd.ni_vp, 0);
  865         error = change_root(nd.ni_vp, td);
  866         vrele(nd.ni_vp);
  867         NDFREE(&nd, NDF_ONLY_PNBUF);
  868         return (error);
  869 e_vunlock:
  870         vput(nd.ni_vp);
  871 error:
  872         NDFREE(&nd, NDF_ONLY_PNBUF);
  873         return (error);
  874 }
  875 
  876 /*
  877  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  878  * instance.
  879  */
  880 int
  881 change_dir(vp, td)
  882         struct vnode *vp;
  883         struct thread *td;
  884 {
  885 #ifdef MAC
  886         int error;
  887 #endif
  888 
  889         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  890         if (vp->v_type != VDIR)
  891                 return (ENOTDIR);
  892 #ifdef MAC
  893         error = mac_vnode_check_chdir(td->td_ucred, vp);
  894         if (error != 0)
  895                 return (error);
  896 #endif
  897         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  898 }
  899 
  900 /*
  901  * Common routine for kern_chroot() and jail_attach().  The caller is
  902  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  903  * authorize this operation.
  904  */
  905 int
  906 change_root(vp, td)
  907         struct vnode *vp;
  908         struct thread *td;
  909 {
  910         struct filedesc *fdp;
  911         struct vnode *oldvp;
  912         int error;
  913 
  914         fdp = td->td_proc->p_fd;
  915         FILEDESC_XLOCK(fdp);
  916         if (chroot_allow_open_directories == 0 ||
  917             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  918                 error = chroot_refuse_vdir_fds(fdp);
  919                 if (error != 0) {
  920                         FILEDESC_XUNLOCK(fdp);
  921                         return (error);
  922                 }
  923         }
  924         oldvp = fdp->fd_rdir;
  925         fdp->fd_rdir = vp;
  926         VREF(fdp->fd_rdir);
  927         if (!fdp->fd_jdir) {
  928                 fdp->fd_jdir = vp;
  929                 VREF(fdp->fd_jdir);
  930         }
  931         FILEDESC_XUNLOCK(fdp);
  932         vrele(oldvp);
  933         return (0);
  934 }
  935 
  936 static __inline void
  937 flags_to_rights(int flags, cap_rights_t *rightsp)
  938 {
  939 
  940         if (flags & O_EXEC) {
  941                 cap_rights_set(rightsp, CAP_FEXECVE);
  942         } else {
  943                 switch ((flags & O_ACCMODE)) {
  944                 case O_RDONLY:
  945                         cap_rights_set(rightsp, CAP_READ);
  946                         break;
  947                 case O_RDWR:
  948                         cap_rights_set(rightsp, CAP_READ);
  949                         /* FALLTHROUGH */
  950                 case O_WRONLY:
  951                         cap_rights_set(rightsp, CAP_WRITE);
  952                         if (!(flags & (O_APPEND | O_TRUNC)))
  953                                 cap_rights_set(rightsp, CAP_SEEK);
  954                         break;
  955                 }
  956         }
  957 
  958         if (flags & O_CREAT)
  959                 cap_rights_set(rightsp, CAP_CREATE);
  960 
  961         if (flags & O_TRUNC)
  962                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  963 
  964         if (flags & (O_SYNC | O_FSYNC))
  965                 cap_rights_set(rightsp, CAP_FSYNC);
  966 
  967         if (flags & (O_EXLOCK | O_SHLOCK))
  968                 cap_rights_set(rightsp, CAP_FLOCK);
  969 }
  970 
  971 /*
  972  * Check permissions, allocate an open file structure, and call the device
  973  * open routine if any.
  974  */
  975 #ifndef _SYS_SYSPROTO_H_
  976 struct open_args {
  977         char    *path;
  978         int     flags;
  979         int     mode;
  980 };
  981 #endif
  982 int
  983 sys_open(td, uap)
  984         struct thread *td;
  985         register struct open_args /* {
  986                 char *path;
  987                 int flags;
  988                 int mode;
  989         } */ *uap;
  990 {
  991 
  992         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
  993 }
  994 
  995 #ifndef _SYS_SYSPROTO_H_
  996 struct openat_args {
  997         int     fd;
  998         char    *path;
  999         int     flag;
 1000         int     mode;
 1001 };
 1002 #endif
 1003 int
 1004 sys_openat(struct thread *td, struct openat_args *uap)
 1005 {
 1006 
 1007         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1008             uap->mode));
 1009 }
 1010 
 1011 int
 1012 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1013     int mode)
 1014 {
 1015 
 1016         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1017 }
 1018 
 1019 int
 1020 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1021     int flags, int mode)
 1022 {
 1023         struct proc *p = td->td_proc;
 1024         struct filedesc *fdp = p->p_fd;
 1025         struct file *fp;
 1026         struct vnode *vp;
 1027         struct nameidata nd;
 1028         cap_rights_t rights;
 1029         int cmode, error, indx;
 1030 
 1031         indx = -1;
 1032 
 1033         AUDIT_ARG_FFLAGS(flags);
 1034         AUDIT_ARG_MODE(mode);
 1035         /* XXX: audit dirfd */
 1036         cap_rights_init(&rights, CAP_LOOKUP);
 1037         flags_to_rights(flags, &rights);
 1038         /*
 1039          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1040          * may be specified.
 1041          */
 1042         if (flags & O_EXEC) {
 1043                 if (flags & O_ACCMODE)
 1044                         return (EINVAL);
 1045         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1046                 return (EINVAL);
 1047         } else {
 1048                 flags = FFLAGS(flags);
 1049         }
 1050 
 1051         /*
 1052          * Allocate the file descriptor, but don't install a descriptor yet.
 1053          */
 1054         error = falloc_noinstall(td, &fp);
 1055         if (error != 0)
 1056                 return (error);
 1057         /*
 1058          * An extra reference on `fp' has been held for us by
 1059          * falloc_noinstall().
 1060          */
 1061         /* Set the flags early so the finit in devfs can pick them up. */
 1062         fp->f_flag = flags & FMASK;
 1063         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1064         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1065             &rights, td);
 1066         td->td_dupfd = -1;              /* XXX check for fdopen */
 1067         error = vn_open(&nd, &flags, cmode, fp);
 1068         if (error != 0) {
 1069                 /*
 1070                  * If the vn_open replaced the method vector, something
 1071                  * wonderous happened deep below and we just pass it up
 1072                  * pretending we know what we do.
 1073                  */
 1074                 if (error == ENXIO && fp->f_ops != &badfileops)
 1075                         goto success;
 1076 
 1077                 /*
 1078                  * Handle special fdopen() case. bleh.
 1079                  *
 1080                  * Don't do this for relative (capability) lookups; we don't
 1081                  * understand exactly what would happen, and we don't think
 1082                  * that it ever should.
 1083                  */
 1084                 if (nd.ni_strictrelative == 0 &&
 1085                     (error == ENODEV || error == ENXIO) &&
 1086                     td->td_dupfd >= 0) {
 1087                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1088                             &indx);
 1089                         if (error == 0)
 1090                                 goto success;
 1091                 }
 1092 
 1093                 goto bad;
 1094         }
 1095         td->td_dupfd = 0;
 1096         NDFREE(&nd, NDF_ONLY_PNBUF);
 1097         vp = nd.ni_vp;
 1098 
 1099         /*
 1100          * Store the vnode, for any f_type. Typically, the vnode use
 1101          * count is decremented by direct call to vn_closefile() for
 1102          * files that switched type in the cdevsw fdopen() method.
 1103          */
 1104         fp->f_vnode = vp;
 1105         /*
 1106          * If the file wasn't claimed by devfs bind it to the normal
 1107          * vnode operations here.
 1108          */
 1109         if (fp->f_ops == &badfileops) {
 1110                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1111                 fp->f_seqcount = 1;
 1112                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1113                     DTYPE_VNODE, vp, &vnops);
 1114         }
 1115 
 1116         VOP_UNLOCK(vp, 0);
 1117         if (flags & O_TRUNC) {
 1118                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1119                 if (error != 0)
 1120                         goto bad;
 1121         }
 1122 success:
 1123         /*
 1124          * If we haven't already installed the FD (for dupfdopen), do so now.
 1125          */
 1126         if (indx == -1) {
 1127                 struct filecaps *fcaps;
 1128 
 1129 #ifdef CAPABILITIES
 1130                 if (nd.ni_strictrelative == 1)
 1131                         fcaps = &nd.ni_filecaps;
 1132                 else
 1133 #endif
 1134                         fcaps = NULL;
 1135                 error = finstall(td, fp, &indx, flags, fcaps);
 1136                 /* On success finstall() consumes fcaps. */
 1137                 if (error != 0) {
 1138                         filecaps_free(&nd.ni_filecaps);
 1139                         goto bad;
 1140                 }
 1141         } else {
 1142                 filecaps_free(&nd.ni_filecaps);
 1143         }
 1144 
 1145         /*
 1146          * Release our private reference, leaving the one associated with
 1147          * the descriptor table intact.
 1148          */
 1149         fdrop(fp, td);
 1150         td->td_retval[0] = indx;
 1151         return (0);
 1152 bad:
 1153         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1154         fdrop(fp, td);
 1155         return (error);
 1156 }
 1157 
 1158 #ifdef COMPAT_43
 1159 /*
 1160  * Create a file.
 1161  */
 1162 #ifndef _SYS_SYSPROTO_H_
 1163 struct ocreat_args {
 1164         char    *path;
 1165         int     mode;
 1166 };
 1167 #endif
 1168 int
 1169 ocreat(td, uap)
 1170         struct thread *td;
 1171         register struct ocreat_args /* {
 1172                 char *path;
 1173                 int mode;
 1174         } */ *uap;
 1175 {
 1176 
 1177         return (kern_open(td, uap->path, UIO_USERSPACE,
 1178             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1179 }
 1180 #endif /* COMPAT_43 */
 1181 
 1182 /*
 1183  * Create a special file.
 1184  */
 1185 #ifndef _SYS_SYSPROTO_H_
 1186 struct mknod_args {
 1187         char    *path;
 1188         int     mode;
 1189         int     dev;
 1190 };
 1191 #endif
 1192 int
 1193 sys_mknod(td, uap)
 1194         struct thread *td;
 1195         register struct mknod_args /* {
 1196                 char *path;
 1197                 int mode;
 1198                 int dev;
 1199         } */ *uap;
 1200 {
 1201 
 1202         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1203 }
 1204 
 1205 #ifndef _SYS_SYSPROTO_H_
 1206 struct mknodat_args {
 1207         int     fd;
 1208         char    *path;
 1209         mode_t  mode;
 1210         dev_t   dev;
 1211 };
 1212 #endif
 1213 int
 1214 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1215 {
 1216 
 1217         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1218             uap->dev));
 1219 }
 1220 
 1221 int
 1222 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1223     int dev)
 1224 {
 1225 
 1226         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1227 }
 1228 
 1229 int
 1230 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1231     int mode, int dev)
 1232 {
 1233         struct vnode *vp;
 1234         struct mount *mp;
 1235         struct vattr vattr;
 1236         struct nameidata nd;
 1237         cap_rights_t rights;
 1238         int error, whiteout = 0;
 1239 
 1240         AUDIT_ARG_MODE(mode);
 1241         AUDIT_ARG_DEV(dev);
 1242         switch (mode & S_IFMT) {
 1243         case S_IFCHR:
 1244         case S_IFBLK:
 1245                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1246                 if (error == 0 && dev == VNOVAL)
 1247                         error = EINVAL;
 1248                 break;
 1249         case S_IFWHT:
 1250                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1251                 break;
 1252         case S_IFIFO:
 1253                 if (dev == 0)
 1254                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1255                 /* FALLTHROUGH */
 1256         default:
 1257                 error = EINVAL;
 1258                 break;
 1259         }
 1260         if (error != 0)
 1261                 return (error);
 1262 restart:
 1263         bwillwrite();
 1264         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1265             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 1266             td);
 1267         if ((error = namei(&nd)) != 0)
 1268                 return (error);
 1269         vp = nd.ni_vp;
 1270         if (vp != NULL) {
 1271                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1272                 if (vp == nd.ni_dvp)
 1273                         vrele(nd.ni_dvp);
 1274                 else
 1275                         vput(nd.ni_dvp);
 1276                 vrele(vp);
 1277                 return (EEXIST);
 1278         } else {
 1279                 VATTR_NULL(&vattr);
 1280                 vattr.va_mode = (mode & ALLPERMS) &
 1281                     ~td->td_proc->p_fd->fd_cmask;
 1282                 vattr.va_rdev = dev;
 1283                 whiteout = 0;
 1284 
 1285                 switch (mode & S_IFMT) {
 1286                 case S_IFCHR:
 1287                         vattr.va_type = VCHR;
 1288                         break;
 1289                 case S_IFBLK:
 1290                         vattr.va_type = VBLK;
 1291                         break;
 1292                 case S_IFWHT:
 1293                         whiteout = 1;
 1294                         break;
 1295                 default:
 1296                         panic("kern_mknod: invalid mode");
 1297                 }
 1298         }
 1299         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1300                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1301                 vput(nd.ni_dvp);
 1302                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1303                         return (error);
 1304                 goto restart;
 1305         }
 1306 #ifdef MAC
 1307         if (error == 0 && !whiteout)
 1308                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1309                     &nd.ni_cnd, &vattr);
 1310 #endif
 1311         if (error == 0) {
 1312                 if (whiteout)
 1313                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1314                 else {
 1315                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1316                                                 &nd.ni_cnd, &vattr);
 1317                         if (error == 0)
 1318                                 vput(nd.ni_vp);
 1319                 }
 1320         }
 1321         NDFREE(&nd, NDF_ONLY_PNBUF);
 1322         vput(nd.ni_dvp);
 1323         vn_finished_write(mp);
 1324         return (error);
 1325 }
 1326 
 1327 /*
 1328  * Create a named pipe.
 1329  */
 1330 #ifndef _SYS_SYSPROTO_H_
 1331 struct mkfifo_args {
 1332         char    *path;
 1333         int     mode;
 1334 };
 1335 #endif
 1336 int
 1337 sys_mkfifo(td, uap)
 1338         struct thread *td;
 1339         register struct mkfifo_args /* {
 1340                 char *path;
 1341                 int mode;
 1342         } */ *uap;
 1343 {
 1344 
 1345         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1346 }
 1347 
 1348 #ifndef _SYS_SYSPROTO_H_
 1349 struct mkfifoat_args {
 1350         int     fd;
 1351         char    *path;
 1352         mode_t  mode;
 1353 };
 1354 #endif
 1355 int
 1356 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1357 {
 1358 
 1359         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1360             uap->mode));
 1361 }
 1362 
 1363 int
 1364 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1365 {
 1366 
 1367         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1368 }
 1369 
 1370 int
 1371 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1372     int mode)
 1373 {
 1374         struct mount *mp;
 1375         struct vattr vattr;
 1376         struct nameidata nd;
 1377         cap_rights_t rights;
 1378         int error;
 1379 
 1380         AUDIT_ARG_MODE(mode);
 1381 restart:
 1382         bwillwrite();
 1383         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1384             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 1385             td);
 1386         if ((error = namei(&nd)) != 0)
 1387                 return (error);
 1388         if (nd.ni_vp != NULL) {
 1389                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1390                 if (nd.ni_vp == nd.ni_dvp)
 1391                         vrele(nd.ni_dvp);
 1392                 else
 1393                         vput(nd.ni_dvp);
 1394                 vrele(nd.ni_vp);
 1395                 return (EEXIST);
 1396         }
 1397         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1398                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1399                 vput(nd.ni_dvp);
 1400                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1401                         return (error);
 1402                 goto restart;
 1403         }
 1404         VATTR_NULL(&vattr);
 1405         vattr.va_type = VFIFO;
 1406         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1407 #ifdef MAC
 1408         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1409             &vattr);
 1410         if (error != 0)
 1411                 goto out;
 1412 #endif
 1413         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1414         if (error == 0)
 1415                 vput(nd.ni_vp);
 1416 #ifdef MAC
 1417 out:
 1418 #endif
 1419         vput(nd.ni_dvp);
 1420         vn_finished_write(mp);
 1421         NDFREE(&nd, NDF_ONLY_PNBUF);
 1422         return (error);
 1423 }
 1424 
 1425 /*
 1426  * Make a hard file link.
 1427  */
 1428 #ifndef _SYS_SYSPROTO_H_
 1429 struct link_args {
 1430         char    *path;
 1431         char    *link;
 1432 };
 1433 #endif
 1434 int
 1435 sys_link(td, uap)
 1436         struct thread *td;
 1437         register struct link_args /* {
 1438                 char *path;
 1439                 char *link;
 1440         } */ *uap;
 1441 {
 1442 
 1443         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1444 }
 1445 
 1446 #ifndef _SYS_SYSPROTO_H_
 1447 struct linkat_args {
 1448         int     fd1;
 1449         char    *path1;
 1450         int     fd2;
 1451         char    *path2;
 1452         int     flag;
 1453 };
 1454 #endif
 1455 int
 1456 sys_linkat(struct thread *td, struct linkat_args *uap)
 1457 {
 1458         int flag;
 1459 
 1460         flag = uap->flag;
 1461         if (flag & ~AT_SYMLINK_FOLLOW)
 1462                 return (EINVAL);
 1463 
 1464         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1465             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1466 }
 1467 
 1468 int hardlink_check_uid = 0;
 1469 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1470     &hardlink_check_uid, 0,
 1471     "Unprivileged processes cannot create hard links to files owned by other "
 1472     "users");
 1473 static int hardlink_check_gid = 0;
 1474 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1475     &hardlink_check_gid, 0,
 1476     "Unprivileged processes cannot create hard links to files owned by other "
 1477     "groups");
 1478 
 1479 static int
 1480 can_hardlink(struct vnode *vp, struct ucred *cred)
 1481 {
 1482         struct vattr va;
 1483         int error;
 1484 
 1485         if (!hardlink_check_uid && !hardlink_check_gid)
 1486                 return (0);
 1487 
 1488         error = VOP_GETATTR(vp, &va, cred);
 1489         if (error != 0)
 1490                 return (error);
 1491 
 1492         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1493                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1494                 if (error != 0)
 1495                         return (error);
 1496         }
 1497 
 1498         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1499                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1500                 if (error != 0)
 1501                         return (error);
 1502         }
 1503 
 1504         return (0);
 1505 }
 1506 
 1507 int
 1508 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1509 {
 1510 
 1511         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1512 }
 1513 
 1514 int
 1515 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1516     enum uio_seg segflg, int follow)
 1517 {
 1518         struct vnode *vp;
 1519         struct mount *mp;
 1520         struct nameidata nd;
 1521         cap_rights_t rights;
 1522         int error;
 1523 
 1524 again:
 1525         bwillwrite();
 1526         NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 1527 
 1528         if ((error = namei(&nd)) != 0)
 1529                 return (error);
 1530         NDFREE(&nd, NDF_ONLY_PNBUF);
 1531         vp = nd.ni_vp;
 1532         if (vp->v_type == VDIR) {
 1533                 vrele(vp);
 1534                 return (EPERM);         /* POSIX */
 1535         }
 1536         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 |
 1537             NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT),
 1538             td);
 1539         if ((error = namei(&nd)) == 0) {
 1540                 if (nd.ni_vp != NULL) {
 1541                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1542                         if (nd.ni_dvp == nd.ni_vp)
 1543                                 vrele(nd.ni_dvp);
 1544                         else
 1545                                 vput(nd.ni_dvp);
 1546                         vrele(nd.ni_vp);
 1547                         vrele(vp);
 1548                         return (EEXIST);
 1549                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1550                         /*
 1551                          * Cross-device link.  No need to recheck
 1552                          * vp->v_type, since it cannot change, except
 1553                          * to VBAD.
 1554                          */
 1555                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1556                         vput(nd.ni_dvp);
 1557                         vrele(vp);
 1558                         return (EXDEV);
 1559                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1560                         error = can_hardlink(vp, td->td_ucred);
 1561 #ifdef MAC
 1562                         if (error == 0)
 1563                                 error = mac_vnode_check_link(td->td_ucred,
 1564                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1565 #endif
 1566                         if (error != 0) {
 1567                                 vput(vp);
 1568                                 vput(nd.ni_dvp);
 1569                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1570                                 return (error);
 1571                         }
 1572                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1573                         if (error != 0) {
 1574                                 vput(vp);
 1575                                 vput(nd.ni_dvp);
 1576                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1577                                 error = vn_start_write(NULL, &mp,
 1578                                     V_XSLEEP | PCATCH);
 1579                                 if (error != 0)
 1580                                         return (error);
 1581                                 goto again;
 1582                         }
 1583                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1584                         VOP_UNLOCK(vp, 0);
 1585                         vput(nd.ni_dvp);
 1586                         vn_finished_write(mp);
 1587                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1588                 } else {
 1589                         vput(nd.ni_dvp);
 1590                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1591                         vrele(vp);
 1592                         goto again;
 1593                 }
 1594         }
 1595         vrele(vp);
 1596         return (error);
 1597 }
 1598 
 1599 /*
 1600  * Make a symbolic link.
 1601  */
 1602 #ifndef _SYS_SYSPROTO_H_
 1603 struct symlink_args {
 1604         char    *path;
 1605         char    *link;
 1606 };
 1607 #endif
 1608 int
 1609 sys_symlink(td, uap)
 1610         struct thread *td;
 1611         register struct symlink_args /* {
 1612                 char *path;
 1613                 char *link;
 1614         } */ *uap;
 1615 {
 1616 
 1617         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1618 }
 1619 
 1620 #ifndef _SYS_SYSPROTO_H_
 1621 struct symlinkat_args {
 1622         char    *path;
 1623         int     fd;
 1624         char    *path2;
 1625 };
 1626 #endif
 1627 int
 1628 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1629 {
 1630 
 1631         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1632             UIO_USERSPACE));
 1633 }
 1634 
 1635 int
 1636 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1637 {
 1638 
 1639         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1640 }
 1641 
 1642 int
 1643 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1644     enum uio_seg segflg)
 1645 {
 1646         struct mount *mp;
 1647         struct vattr vattr;
 1648         char *syspath;
 1649         struct nameidata nd;
 1650         int error;
 1651         cap_rights_t rights;
 1652 
 1653         if (segflg == UIO_SYSSPACE) {
 1654                 syspath = path1;
 1655         } else {
 1656                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1657                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1658                         goto out;
 1659         }
 1660         AUDIT_ARG_TEXT(syspath);
 1661 restart:
 1662         bwillwrite();
 1663         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1664             NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 1665             td);
 1666         if ((error = namei(&nd)) != 0)
 1667                 goto out;
 1668         if (nd.ni_vp) {
 1669                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1670                 if (nd.ni_vp == nd.ni_dvp)
 1671                         vrele(nd.ni_dvp);
 1672                 else
 1673                         vput(nd.ni_dvp);
 1674                 vrele(nd.ni_vp);
 1675                 error = EEXIST;
 1676                 goto out;
 1677         }
 1678         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1679                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1680                 vput(nd.ni_dvp);
 1681                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1682                         goto out;
 1683                 goto restart;
 1684         }
 1685         VATTR_NULL(&vattr);
 1686         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1687 #ifdef MAC
 1688         vattr.va_type = VLNK;
 1689         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1690             &vattr);
 1691         if (error != 0)
 1692                 goto out2;
 1693 #endif
 1694         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1695         if (error == 0)
 1696                 vput(nd.ni_vp);
 1697 #ifdef MAC
 1698 out2:
 1699 #endif
 1700         NDFREE(&nd, NDF_ONLY_PNBUF);
 1701         vput(nd.ni_dvp);
 1702         vn_finished_write(mp);
 1703 out:
 1704         if (segflg != UIO_SYSSPACE)
 1705                 uma_zfree(namei_zone, syspath);
 1706         return (error);
 1707 }
 1708 
 1709 /*
 1710  * Delete a whiteout from the filesystem.
 1711  */
 1712 int
 1713 sys_undelete(td, uap)
 1714         struct thread *td;
 1715         register struct undelete_args /* {
 1716                 char *path;
 1717         } */ *uap;
 1718 {
 1719         struct mount *mp;
 1720         struct nameidata nd;
 1721         int error;
 1722 
 1723 restart:
 1724         bwillwrite();
 1725         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1726             UIO_USERSPACE, uap->path, td);
 1727         error = namei(&nd);
 1728         if (error != 0)
 1729                 return (error);
 1730 
 1731         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1732                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1733                 if (nd.ni_vp == nd.ni_dvp)
 1734                         vrele(nd.ni_dvp);
 1735                 else
 1736                         vput(nd.ni_dvp);
 1737                 if (nd.ni_vp)
 1738                         vrele(nd.ni_vp);
 1739                 return (EEXIST);
 1740         }
 1741         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1742                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1743                 vput(nd.ni_dvp);
 1744                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1745                         return (error);
 1746                 goto restart;
 1747         }
 1748         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1749         NDFREE(&nd, NDF_ONLY_PNBUF);
 1750         vput(nd.ni_dvp);
 1751         vn_finished_write(mp);
 1752         return (error);
 1753 }
 1754 
 1755 /*
 1756  * Delete a name from the filesystem.
 1757  */
 1758 #ifndef _SYS_SYSPROTO_H_
 1759 struct unlink_args {
 1760         char    *path;
 1761 };
 1762 #endif
 1763 int
 1764 sys_unlink(td, uap)
 1765         struct thread *td;
 1766         struct unlink_args /* {
 1767                 char *path;
 1768         } */ *uap;
 1769 {
 1770 
 1771         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1772 }
 1773 
 1774 #ifndef _SYS_SYSPROTO_H_
 1775 struct unlinkat_args {
 1776         int     fd;
 1777         char    *path;
 1778         int     flag;
 1779 };
 1780 #endif
 1781 int
 1782 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1783 {
 1784         int flag = uap->flag;
 1785         int fd = uap->fd;
 1786         char *path = uap->path;
 1787 
 1788         if (flag & ~AT_REMOVEDIR)
 1789                 return (EINVAL);
 1790 
 1791         if (flag & AT_REMOVEDIR)
 1792                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1793         else
 1794                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1795 }
 1796 
 1797 int
 1798 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1799 {
 1800 
 1801         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1802 }
 1803 
 1804 int
 1805 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1806     ino_t oldinum)
 1807 {
 1808         struct mount *mp;
 1809         struct vnode *vp;
 1810         struct nameidata nd;
 1811         struct stat sb;
 1812         cap_rights_t rights;
 1813         int error;
 1814 
 1815 restart:
 1816         bwillwrite();
 1817         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1818             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1819         if ((error = namei(&nd)) != 0)
 1820                 return (error == EINVAL ? EPERM : error);
 1821         vp = nd.ni_vp;
 1822         if (vp->v_type == VDIR && oldinum == 0) {
 1823                 error = EPERM;          /* POSIX */
 1824         } else if (oldinum != 0 &&
 1825                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1826                   sb.st_ino != oldinum) {
 1827                         error = EIDRM;  /* Identifier removed */
 1828         } else {
 1829                 /*
 1830                  * The root of a mounted filesystem cannot be deleted.
 1831                  *
 1832                  * XXX: can this only be a VDIR case?
 1833                  */
 1834                 if (vp->v_vflag & VV_ROOT)
 1835                         error = EBUSY;
 1836         }
 1837         if (error == 0) {
 1838                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1839                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1840                         vput(nd.ni_dvp);
 1841                         if (vp == nd.ni_dvp)
 1842                                 vrele(vp);
 1843                         else
 1844                                 vput(vp);
 1845                         if ((error = vn_start_write(NULL, &mp,
 1846                             V_XSLEEP | PCATCH)) != 0)
 1847                                 return (error);
 1848                         goto restart;
 1849                 }
 1850 #ifdef MAC
 1851                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1852                     &nd.ni_cnd);
 1853                 if (error != 0)
 1854                         goto out;
 1855 #endif
 1856                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1857                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1858 #ifdef MAC
 1859 out:
 1860 #endif
 1861                 vn_finished_write(mp);
 1862         }
 1863         NDFREE(&nd, NDF_ONLY_PNBUF);
 1864         vput(nd.ni_dvp);
 1865         if (vp == nd.ni_dvp)
 1866                 vrele(vp);
 1867         else
 1868                 vput(vp);
 1869         return (error);
 1870 }
 1871 
 1872 /*
 1873  * Reposition read/write file offset.
 1874  */
 1875 #ifndef _SYS_SYSPROTO_H_
 1876 struct lseek_args {
 1877         int     fd;
 1878         int     pad;
 1879         off_t   offset;
 1880         int     whence;
 1881 };
 1882 #endif
 1883 int
 1884 sys_lseek(td, uap)
 1885         struct thread *td;
 1886         register struct lseek_args /* {
 1887                 int fd;
 1888                 int pad;
 1889                 off_t offset;
 1890                 int whence;
 1891         } */ *uap;
 1892 {
 1893         struct file *fp;
 1894         cap_rights_t rights;
 1895         int error;
 1896 
 1897         AUDIT_ARG_FD(uap->fd);
 1898         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1899         if (error != 0)
 1900                 return (error);
 1901         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1902             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1903         fdrop(fp, td);
 1904         return (error);
 1905 }
 1906 
 1907 #if defined(COMPAT_43)
 1908 /*
 1909  * Reposition read/write file offset.
 1910  */
 1911 #ifndef _SYS_SYSPROTO_H_
 1912 struct olseek_args {
 1913         int     fd;
 1914         long    offset;
 1915         int     whence;
 1916 };
 1917 #endif
 1918 int
 1919 olseek(td, uap)
 1920         struct thread *td;
 1921         register struct olseek_args /* {
 1922                 int fd;
 1923                 long offset;
 1924                 int whence;
 1925         } */ *uap;
 1926 {
 1927         struct lseek_args /* {
 1928                 int fd;
 1929                 int pad;
 1930                 off_t offset;
 1931                 int whence;
 1932         } */ nuap;
 1933 
 1934         nuap.fd = uap->fd;
 1935         nuap.offset = uap->offset;
 1936         nuap.whence = uap->whence;
 1937         return (sys_lseek(td, &nuap));
 1938 }
 1939 #endif /* COMPAT_43 */
 1940 
 1941 /* Version with the 'pad' argument */
 1942 int
 1943 freebsd6_lseek(td, uap)
 1944         struct thread *td;
 1945         register struct freebsd6_lseek_args *uap;
 1946 {
 1947         struct lseek_args ouap;
 1948 
 1949         ouap.fd = uap->fd;
 1950         ouap.offset = uap->offset;
 1951         ouap.whence = uap->whence;
 1952         return (sys_lseek(td, &ouap));
 1953 }
 1954 
 1955 /*
 1956  * Check access permissions using passed credentials.
 1957  */
 1958 static int
 1959 vn_access(vp, user_flags, cred, td)
 1960         struct vnode    *vp;
 1961         int             user_flags;
 1962         struct ucred    *cred;
 1963         struct thread   *td;
 1964 {
 1965         accmode_t accmode;
 1966         int error;
 1967 
 1968         /* Flags == 0 means only check for existence. */
 1969         error = 0;
 1970         if (user_flags) {
 1971                 accmode = 0;
 1972                 if (user_flags & R_OK)
 1973                         accmode |= VREAD;
 1974                 if (user_flags & W_OK)
 1975                         accmode |= VWRITE;
 1976                 if (user_flags & X_OK)
 1977                         accmode |= VEXEC;
 1978 #ifdef MAC
 1979                 error = mac_vnode_check_access(cred, vp, accmode);
 1980                 if (error != 0)
 1981                         return (error);
 1982 #endif
 1983                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1984                         error = VOP_ACCESS(vp, accmode, cred, td);
 1985         }
 1986         return (error);
 1987 }
 1988 
 1989 /*
 1990  * Check access permissions using "real" credentials.
 1991  */
 1992 #ifndef _SYS_SYSPROTO_H_
 1993 struct access_args {
 1994         char    *path;
 1995         int     amode;
 1996 };
 1997 #endif
 1998 int
 1999 sys_access(td, uap)
 2000         struct thread *td;
 2001         register struct access_args /* {
 2002                 char *path;
 2003                 int amode;
 2004         } */ *uap;
 2005 {
 2006 
 2007         return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode));
 2008 }
 2009 
 2010 #ifndef _SYS_SYSPROTO_H_
 2011 struct faccessat_args {
 2012         int     dirfd;
 2013         char    *path;
 2014         int     amode;
 2015         int     flag;
 2016 }
 2017 #endif
 2018 int
 2019 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2020 {
 2021 
 2022         if (uap->flag & ~AT_EACCESS)
 2023                 return (EINVAL);
 2024         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2025             uap->amode));
 2026 }
 2027 
 2028 int
 2029 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2030 {
 2031 
 2032         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode));
 2033 }
 2034 
 2035 int
 2036 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2037     int flag, int amode)
 2038 {
 2039         struct ucred *cred, *tmpcred;
 2040         struct vnode *vp;
 2041         struct nameidata nd;
 2042         cap_rights_t rights;
 2043         int error;
 2044 
 2045         /*
 2046          * Create and modify a temporary credential instead of one that
 2047          * is potentially shared.
 2048          */
 2049         if (!(flag & AT_EACCESS)) {
 2050                 cred = td->td_ucred;
 2051                 tmpcred = crdup(cred);
 2052                 tmpcred->cr_uid = cred->cr_ruid;
 2053                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2054                 td->td_ucred = tmpcred;
 2055         } else
 2056                 cred = tmpcred = td->td_ucred;
 2057         AUDIT_ARG_VALUE(amode);
 2058         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2059             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 2060             td);
 2061         if ((error = namei(&nd)) != 0)
 2062                 goto out1;
 2063         vp = nd.ni_vp;
 2064 
 2065         error = vn_access(vp, amode, tmpcred, td);
 2066         NDFREE(&nd, NDF_ONLY_PNBUF);
 2067         vput(vp);
 2068 out1:
 2069         if (!(flag & AT_EACCESS)) {
 2070                 td->td_ucred = cred;
 2071                 crfree(tmpcred);
 2072         }
 2073         return (error);
 2074 }
 2075 
 2076 /*
 2077  * Check access permissions using "effective" credentials.
 2078  */
 2079 #ifndef _SYS_SYSPROTO_H_
 2080 struct eaccess_args {
 2081         char    *path;
 2082         int     amode;
 2083 };
 2084 #endif
 2085 int
 2086 sys_eaccess(td, uap)
 2087         struct thread *td;
 2088         register struct eaccess_args /* {
 2089                 char *path;
 2090                 int amode;
 2091         } */ *uap;
 2092 {
 2093 
 2094         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode));
 2095 }
 2096 
 2097 int
 2098 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2099 {
 2100 
 2101         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode));
 2102 }
 2103 
 2104 #if defined(COMPAT_43)
 2105 /*
 2106  * Get file status; this version follows links.
 2107  */
 2108 #ifndef _SYS_SYSPROTO_H_
 2109 struct ostat_args {
 2110         char    *path;
 2111         struct ostat *ub;
 2112 };
 2113 #endif
 2114 int
 2115 ostat(td, uap)
 2116         struct thread *td;
 2117         register struct ostat_args /* {
 2118                 char *path;
 2119                 struct ostat *ub;
 2120         } */ *uap;
 2121 {
 2122         struct stat sb;
 2123         struct ostat osb;
 2124         int error;
 2125 
 2126         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2127         if (error != 0)
 2128                 return (error);
 2129         cvtstat(&sb, &osb);
 2130         return (copyout(&osb, uap->ub, sizeof (osb)));
 2131 }
 2132 
 2133 /*
 2134  * Get file status; this version does not follow links.
 2135  */
 2136 #ifndef _SYS_SYSPROTO_H_
 2137 struct olstat_args {
 2138         char    *path;
 2139         struct ostat *ub;
 2140 };
 2141 #endif
 2142 int
 2143 olstat(td, uap)
 2144         struct thread *td;
 2145         register struct olstat_args /* {
 2146                 char *path;
 2147                 struct ostat *ub;
 2148         } */ *uap;
 2149 {
 2150         struct stat sb;
 2151         struct ostat osb;
 2152         int error;
 2153 
 2154         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2155         if (error != 0)
 2156                 return (error);
 2157         cvtstat(&sb, &osb);
 2158         return (copyout(&osb, uap->ub, sizeof (osb)));
 2159 }
 2160 
 2161 /*
 2162  * Convert from an old to a new stat structure.
 2163  */
 2164 void
 2165 cvtstat(st, ost)
 2166         struct stat *st;
 2167         struct ostat *ost;
 2168 {
 2169 
 2170         bzero(ost, sizeof(*ost));
 2171         ost->st_dev = st->st_dev;
 2172         ost->st_ino = st->st_ino;
 2173         ost->st_mode = st->st_mode;
 2174         ost->st_nlink = st->st_nlink;
 2175         ost->st_uid = st->st_uid;
 2176         ost->st_gid = st->st_gid;
 2177         ost->st_rdev = st->st_rdev;
 2178         if (st->st_size < (quad_t)1 << 32)
 2179                 ost->st_size = st->st_size;
 2180         else
 2181                 ost->st_size = -2;
 2182         ost->st_atim = st->st_atim;
 2183         ost->st_mtim = st->st_mtim;
 2184         ost->st_ctim = st->st_ctim;
 2185         ost->st_blksize = st->st_blksize;
 2186         ost->st_blocks = st->st_blocks;
 2187         ost->st_flags = st->st_flags;
 2188         ost->st_gen = st->st_gen;
 2189 }
 2190 #endif /* COMPAT_43 */
 2191 
 2192 /*
 2193  * Get file status; this version follows links.
 2194  */
 2195 #ifndef _SYS_SYSPROTO_H_
 2196 struct stat_args {
 2197         char    *path;
 2198         struct stat *ub;
 2199 };
 2200 #endif
 2201 int
 2202 sys_stat(td, uap)
 2203         struct thread *td;
 2204         register struct stat_args /* {
 2205                 char *path;
 2206                 struct stat *ub;
 2207         } */ *uap;
 2208 {
 2209         struct stat sb;
 2210         int error;
 2211 
 2212         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2213         if (error == 0)
 2214                 error = copyout(&sb, uap->ub, sizeof (sb));
 2215         return (error);
 2216 }
 2217 
 2218 #ifndef _SYS_SYSPROTO_H_
 2219 struct fstatat_args {
 2220         int     fd;
 2221         char    *path;
 2222         struct stat     *buf;
 2223         int     flag;
 2224 }
 2225 #endif
 2226 int
 2227 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2228 {
 2229         struct stat sb;
 2230         int error;
 2231 
 2232         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2233             UIO_USERSPACE, &sb);
 2234         if (error == 0)
 2235                 error = copyout(&sb, uap->buf, sizeof (sb));
 2236         return (error);
 2237 }
 2238 
 2239 int
 2240 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2241 {
 2242 
 2243         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2244 }
 2245 
 2246 int
 2247 kern_statat(struct thread *td, int flag, int fd, char *path,
 2248     enum uio_seg pathseg, struct stat *sbp)
 2249 {
 2250 
 2251         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2252 }
 2253 
 2254 int
 2255 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2256     enum uio_seg pathseg, struct stat *sbp,
 2257     void (*hook)(struct vnode *vp, struct stat *sbp))
 2258 {
 2259         struct nameidata nd;
 2260         struct stat sb;
 2261         cap_rights_t rights;
 2262         int error;
 2263 
 2264         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2265                 return (EINVAL);
 2266 
 2267         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2268             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2269             cap_rights_init(&rights, CAP_FSTAT), td);
 2270 
 2271         if ((error = namei(&nd)) != 0)
 2272                 return (error);
 2273         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2274         if (error == 0) {
 2275                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2276                 if (S_ISREG(sb.st_mode))
 2277                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2278                 if (__predict_false(hook != NULL))
 2279                         hook(nd.ni_vp, &sb);
 2280         }
 2281         NDFREE(&nd, NDF_ONLY_PNBUF);
 2282         vput(nd.ni_vp);
 2283         if (error != 0)
 2284                 return (error);
 2285         *sbp = sb;
 2286 #ifdef KTRACE
 2287         if (KTRPOINT(td, KTR_STRUCT))
 2288                 ktrstat(&sb);
 2289 #endif
 2290         return (0);
 2291 }
 2292 
 2293 /*
 2294  * Get file status; this version does not follow links.
 2295  */
 2296 #ifndef _SYS_SYSPROTO_H_
 2297 struct lstat_args {
 2298         char    *path;
 2299         struct stat *ub;
 2300 };
 2301 #endif
 2302 int
 2303 sys_lstat(td, uap)
 2304         struct thread *td;
 2305         register struct lstat_args /* {
 2306                 char *path;
 2307                 struct stat *ub;
 2308         } */ *uap;
 2309 {
 2310         struct stat sb;
 2311         int error;
 2312 
 2313         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2314         if (error == 0)
 2315                 error = copyout(&sb, uap->ub, sizeof (sb));
 2316         return (error);
 2317 }
 2318 
 2319 int
 2320 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2321 {
 2322 
 2323         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2324             sbp));
 2325 }
 2326 
 2327 /*
 2328  * Implementation of the NetBSD [l]stat() functions.
 2329  */
 2330 void
 2331 cvtnstat(sb, nsb)
 2332         struct stat *sb;
 2333         struct nstat *nsb;
 2334 {
 2335 
 2336         bzero(nsb, sizeof *nsb);
 2337         nsb->st_dev = sb->st_dev;
 2338         nsb->st_ino = sb->st_ino;
 2339         nsb->st_mode = sb->st_mode;
 2340         nsb->st_nlink = sb->st_nlink;
 2341         nsb->st_uid = sb->st_uid;
 2342         nsb->st_gid = sb->st_gid;
 2343         nsb->st_rdev = sb->st_rdev;
 2344         nsb->st_atim = sb->st_atim;
 2345         nsb->st_mtim = sb->st_mtim;
 2346         nsb->st_ctim = sb->st_ctim;
 2347         nsb->st_size = sb->st_size;
 2348         nsb->st_blocks = sb->st_blocks;
 2349         nsb->st_blksize = sb->st_blksize;
 2350         nsb->st_flags = sb->st_flags;
 2351         nsb->st_gen = sb->st_gen;
 2352         nsb->st_birthtim = sb->st_birthtim;
 2353 }
 2354 
 2355 #ifndef _SYS_SYSPROTO_H_
 2356 struct nstat_args {
 2357         char    *path;
 2358         struct nstat *ub;
 2359 };
 2360 #endif
 2361 int
 2362 sys_nstat(td, uap)
 2363         struct thread *td;
 2364         register struct nstat_args /* {
 2365                 char *path;
 2366                 struct nstat *ub;
 2367         } */ *uap;
 2368 {
 2369         struct stat sb;
 2370         struct nstat nsb;
 2371         int error;
 2372 
 2373         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2374         if (error != 0)
 2375                 return (error);
 2376         cvtnstat(&sb, &nsb);
 2377         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2378 }
 2379 
 2380 /*
 2381  * NetBSD lstat.  Get file status; this version does not follow links.
 2382  */
 2383 #ifndef _SYS_SYSPROTO_H_
 2384 struct lstat_args {
 2385         char    *path;
 2386         struct stat *ub;
 2387 };
 2388 #endif
 2389 int
 2390 sys_nlstat(td, uap)
 2391         struct thread *td;
 2392         register struct nlstat_args /* {
 2393                 char *path;
 2394                 struct nstat *ub;
 2395         } */ *uap;
 2396 {
 2397         struct stat sb;
 2398         struct nstat nsb;
 2399         int error;
 2400 
 2401         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2402         if (error != 0)
 2403                 return (error);
 2404         cvtnstat(&sb, &nsb);
 2405         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2406 }
 2407 
 2408 /*
 2409  * Get configurable pathname variables.
 2410  */
 2411 #ifndef _SYS_SYSPROTO_H_
 2412 struct pathconf_args {
 2413         char    *path;
 2414         int     name;
 2415 };
 2416 #endif
 2417 int
 2418 sys_pathconf(td, uap)
 2419         struct thread *td;
 2420         register struct pathconf_args /* {
 2421                 char *path;
 2422                 int name;
 2423         } */ *uap;
 2424 {
 2425 
 2426         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2427 }
 2428 
 2429 #ifndef _SYS_SYSPROTO_H_
 2430 struct lpathconf_args {
 2431         char    *path;
 2432         int     name;
 2433 };
 2434 #endif
 2435 int
 2436 sys_lpathconf(td, uap)
 2437         struct thread *td;
 2438         register struct lpathconf_args /* {
 2439                 char *path;
 2440                 int name;
 2441         } */ *uap;
 2442 {
 2443 
 2444         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2445             NOFOLLOW));
 2446 }
 2447 
 2448 int
 2449 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2450     u_long flags)
 2451 {
 2452         struct nameidata nd;
 2453         int error;
 2454 
 2455         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2456             pathseg, path, td);
 2457         if ((error = namei(&nd)) != 0)
 2458                 return (error);
 2459         NDFREE(&nd, NDF_ONLY_PNBUF);
 2460 
 2461         /* If asynchronous I/O is available, it works for all files. */
 2462         if (name == _PC_ASYNC_IO)
 2463                 td->td_retval[0] = async_io_version;
 2464         else
 2465                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2466         vput(nd.ni_vp);
 2467         return (error);
 2468 }
 2469 
 2470 /*
 2471  * Return target name of a symbolic link.
 2472  */
 2473 #ifndef _SYS_SYSPROTO_H_
 2474 struct readlink_args {
 2475         char    *path;
 2476         char    *buf;
 2477         size_t  count;
 2478 };
 2479 #endif
 2480 int
 2481 sys_readlink(td, uap)
 2482         struct thread *td;
 2483         register struct readlink_args /* {
 2484                 char *path;
 2485                 char *buf;
 2486                 size_t count;
 2487         } */ *uap;
 2488 {
 2489 
 2490         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2491             UIO_USERSPACE, uap->count));
 2492 }
 2493 #ifndef _SYS_SYSPROTO_H_
 2494 struct readlinkat_args {
 2495         int     fd;
 2496         char    *path;
 2497         char    *buf;
 2498         size_t  bufsize;
 2499 };
 2500 #endif
 2501 int
 2502 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2503 {
 2504 
 2505         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2506             uap->buf, UIO_USERSPACE, uap->bufsize));
 2507 }
 2508 
 2509 int
 2510 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2511     enum uio_seg bufseg, size_t count)
 2512 {
 2513 
 2514         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2515             count));
 2516 }
 2517 
 2518 int
 2519 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2520     char *buf, enum uio_seg bufseg, size_t count)
 2521 {
 2522         struct vnode *vp;
 2523         struct iovec aiov;
 2524         struct uio auio;
 2525         struct nameidata nd;
 2526         int error;
 2527 
 2528         if (count > IOSIZE_MAX)
 2529                 return (EINVAL);
 2530 
 2531         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2532             pathseg, path, fd, td);
 2533 
 2534         if ((error = namei(&nd)) != 0)
 2535                 return (error);
 2536         NDFREE(&nd, NDF_ONLY_PNBUF);
 2537         vp = nd.ni_vp;
 2538 #ifdef MAC
 2539         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2540         if (error != 0) {
 2541                 vput(vp);
 2542                 return (error);
 2543         }
 2544 #endif
 2545         if (vp->v_type != VLNK)
 2546                 error = EINVAL;
 2547         else {
 2548                 aiov.iov_base = buf;
 2549                 aiov.iov_len = count;
 2550                 auio.uio_iov = &aiov;
 2551                 auio.uio_iovcnt = 1;
 2552                 auio.uio_offset = 0;
 2553                 auio.uio_rw = UIO_READ;
 2554                 auio.uio_segflg = bufseg;
 2555                 auio.uio_td = td;
 2556                 auio.uio_resid = count;
 2557                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2558                 td->td_retval[0] = count - auio.uio_resid;
 2559         }
 2560         vput(vp);
 2561         return (error);
 2562 }
 2563 
 2564 /*
 2565  * Common implementation code for chflags() and fchflags().
 2566  */
 2567 static int
 2568 setfflags(td, vp, flags)
 2569         struct thread *td;
 2570         struct vnode *vp;
 2571         u_long flags;
 2572 {
 2573         struct mount *mp;
 2574         struct vattr vattr;
 2575         int error;
 2576 
 2577         /* We can't support the value matching VNOVAL. */
 2578         if (flags == VNOVAL)
 2579                 return (EOPNOTSUPP);
 2580 
 2581         /*
 2582          * Prevent non-root users from setting flags on devices.  When
 2583          * a device is reused, users can retain ownership of the device
 2584          * if they are allowed to set flags and programs assume that
 2585          * chown can't fail when done as root.
 2586          */
 2587         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2588                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2589                 if (error != 0)
 2590                         return (error);
 2591         }
 2592 
 2593         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2594                 return (error);
 2595         VATTR_NULL(&vattr);
 2596         vattr.va_flags = flags;
 2597         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2598 #ifdef MAC
 2599         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2600         if (error == 0)
 2601 #endif
 2602                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2603         VOP_UNLOCK(vp, 0);
 2604         vn_finished_write(mp);
 2605         return (error);
 2606 }
 2607 
 2608 /*
 2609  * Change flags of a file given a path name.
 2610  */
 2611 #ifndef _SYS_SYSPROTO_H_
 2612 struct chflags_args {
 2613         const char *path;
 2614         u_long  flags;
 2615 };
 2616 #endif
 2617 int
 2618 sys_chflags(td, uap)
 2619         struct thread *td;
 2620         register struct chflags_args /* {
 2621                 const char *path;
 2622                 u_long flags;
 2623         } */ *uap;
 2624 {
 2625 
 2626         return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags));
 2627 }
 2628 
 2629 #ifndef _SYS_SYSPROTO_H_
 2630 struct chflagsat_args {
 2631         int     fd;
 2632         const char *path;
 2633         u_long  flags;
 2634         int     atflag;
 2635 }
 2636 #endif
 2637 int
 2638 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2639 {
 2640         int fd = uap->fd;
 2641         const char *path = uap->path;
 2642         u_long flags = uap->flags;
 2643         int atflag = uap->atflag;
 2644 
 2645         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2646                 return (EINVAL);
 2647 
 2648         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2649 }
 2650 
 2651 static int
 2652 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg,
 2653     u_long flags)
 2654 {
 2655 
 2656         return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0));
 2657 }
 2658 
 2659 /*
 2660  * Same as chflags() but doesn't follow symlinks.
 2661  */
 2662 int
 2663 sys_lchflags(td, uap)
 2664         struct thread *td;
 2665         register struct lchflags_args /* {
 2666                 const char *path;
 2667                 u_long flags;
 2668         } */ *uap;
 2669 {
 2670 
 2671         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2672             uap->flags, AT_SYMLINK_NOFOLLOW));
 2673 }
 2674 
 2675 static int
 2676 kern_chflagsat(struct thread *td, int fd, const char *path,
 2677     enum uio_seg pathseg, u_long flags, int atflag)
 2678 {
 2679         struct nameidata nd;
 2680         cap_rights_t rights;
 2681         int error, follow;
 2682 
 2683         AUDIT_ARG_FFLAGS(flags);
 2684         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2685         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2686             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2687         if ((error = namei(&nd)) != 0)
 2688                 return (error);
 2689         NDFREE(&nd, NDF_ONLY_PNBUF);
 2690         error = setfflags(td, nd.ni_vp, flags);
 2691         vrele(nd.ni_vp);
 2692         return (error);
 2693 }
 2694 
 2695 /*
 2696  * Change flags of a file given a file descriptor.
 2697  */
 2698 #ifndef _SYS_SYSPROTO_H_
 2699 struct fchflags_args {
 2700         int     fd;
 2701         u_long  flags;
 2702 };
 2703 #endif
 2704 int
 2705 sys_fchflags(td, uap)
 2706         struct thread *td;
 2707         register struct fchflags_args /* {
 2708                 int fd;
 2709                 u_long flags;
 2710         } */ *uap;
 2711 {
 2712         struct file *fp;
 2713         cap_rights_t rights;
 2714         int error;
 2715 
 2716         AUDIT_ARG_FD(uap->fd);
 2717         AUDIT_ARG_FFLAGS(uap->flags);
 2718         error = getvnode(td->td_proc->p_fd, uap->fd,
 2719             cap_rights_init(&rights, CAP_FCHFLAGS), &fp);
 2720         if (error != 0)
 2721                 return (error);
 2722 #ifdef AUDIT
 2723         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2724         AUDIT_ARG_VNODE1(fp->f_vnode);
 2725         VOP_UNLOCK(fp->f_vnode, 0);
 2726 #endif
 2727         error = setfflags(td, fp->f_vnode, uap->flags);
 2728         fdrop(fp, td);
 2729         return (error);
 2730 }
 2731 
 2732 /*
 2733  * Common implementation code for chmod(), lchmod() and fchmod().
 2734  */
 2735 int
 2736 setfmode(td, cred, vp, mode)
 2737         struct thread *td;
 2738         struct ucred *cred;
 2739         struct vnode *vp;
 2740         int mode;
 2741 {
 2742         struct mount *mp;
 2743         struct vattr vattr;
 2744         int error;
 2745 
 2746         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2747                 return (error);
 2748         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2749         VATTR_NULL(&vattr);
 2750         vattr.va_mode = mode & ALLPERMS;
 2751 #ifdef MAC
 2752         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2753         if (error == 0)
 2754 #endif
 2755                 error = VOP_SETATTR(vp, &vattr, cred);
 2756         VOP_UNLOCK(vp, 0);
 2757         vn_finished_write(mp);
 2758         return (error);
 2759 }
 2760 
 2761 /*
 2762  * Change mode of a file given path name.
 2763  */
 2764 #ifndef _SYS_SYSPROTO_H_
 2765 struct chmod_args {
 2766         char    *path;
 2767         int     mode;
 2768 };
 2769 #endif
 2770 int
 2771 sys_chmod(td, uap)
 2772         struct thread *td;
 2773         register struct chmod_args /* {
 2774                 char *path;
 2775                 int mode;
 2776         } */ *uap;
 2777 {
 2778 
 2779         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2780 }
 2781 
 2782 #ifndef _SYS_SYSPROTO_H_
 2783 struct fchmodat_args {
 2784         int     dirfd;
 2785         char    *path;
 2786         mode_t  mode;
 2787         int     flag;
 2788 }
 2789 #endif
 2790 int
 2791 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2792 {
 2793         int flag = uap->flag;
 2794         int fd = uap->fd;
 2795         char *path = uap->path;
 2796         mode_t mode = uap->mode;
 2797 
 2798         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2799                 return (EINVAL);
 2800 
 2801         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2802 }
 2803 
 2804 int
 2805 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2806 {
 2807 
 2808         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2809 }
 2810 
 2811 /*
 2812  * Change mode of a file given path name (don't follow links.)
 2813  */
 2814 #ifndef _SYS_SYSPROTO_H_
 2815 struct lchmod_args {
 2816         char    *path;
 2817         int     mode;
 2818 };
 2819 #endif
 2820 int
 2821 sys_lchmod(td, uap)
 2822         struct thread *td;
 2823         register struct lchmod_args /* {
 2824                 char *path;
 2825                 int mode;
 2826         } */ *uap;
 2827 {
 2828 
 2829         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2830             uap->mode, AT_SYMLINK_NOFOLLOW));
 2831 }
 2832 
 2833 int
 2834 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2835     mode_t mode, int flag)
 2836 {
 2837         struct nameidata nd;
 2838         cap_rights_t rights;
 2839         int error, follow;
 2840 
 2841         AUDIT_ARG_MODE(mode);
 2842         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2843         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2844             cap_rights_init(&rights, CAP_FCHMOD), td);
 2845         if ((error = namei(&nd)) != 0)
 2846                 return (error);
 2847         NDFREE(&nd, NDF_ONLY_PNBUF);
 2848         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2849         vrele(nd.ni_vp);
 2850         return (error);
 2851 }
 2852 
 2853 /*
 2854  * Change mode of a file given a file descriptor.
 2855  */
 2856 #ifndef _SYS_SYSPROTO_H_
 2857 struct fchmod_args {
 2858         int     fd;
 2859         int     mode;
 2860 };
 2861 #endif
 2862 int
 2863 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2864 {
 2865         struct file *fp;
 2866         cap_rights_t rights;
 2867         int error;
 2868 
 2869         AUDIT_ARG_FD(uap->fd);
 2870         AUDIT_ARG_MODE(uap->mode);
 2871 
 2872         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2873         if (error != 0)
 2874                 return (error);
 2875         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2876         fdrop(fp, td);
 2877         return (error);
 2878 }
 2879 
 2880 /*
 2881  * Common implementation for chown(), lchown(), and fchown()
 2882  */
 2883 int
 2884 setfown(td, cred, vp, uid, gid)
 2885         struct thread *td;
 2886         struct ucred *cred;
 2887         struct vnode *vp;
 2888         uid_t uid;
 2889         gid_t gid;
 2890 {
 2891         struct mount *mp;
 2892         struct vattr vattr;
 2893         int error;
 2894 
 2895         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2896                 return (error);
 2897         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2898         VATTR_NULL(&vattr);
 2899         vattr.va_uid = uid;
 2900         vattr.va_gid = gid;
 2901 #ifdef MAC
 2902         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2903             vattr.va_gid);
 2904         if (error == 0)
 2905 #endif
 2906                 error = VOP_SETATTR(vp, &vattr, cred);
 2907         VOP_UNLOCK(vp, 0);
 2908         vn_finished_write(mp);
 2909         return (error);
 2910 }
 2911 
 2912 /*
 2913  * Set ownership given a path name.
 2914  */
 2915 #ifndef _SYS_SYSPROTO_H_
 2916 struct chown_args {
 2917         char    *path;
 2918         int     uid;
 2919         int     gid;
 2920 };
 2921 #endif
 2922 int
 2923 sys_chown(td, uap)
 2924         struct thread *td;
 2925         register struct chown_args /* {
 2926                 char *path;
 2927                 int uid;
 2928                 int gid;
 2929         } */ *uap;
 2930 {
 2931 
 2932         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2933 }
 2934 
 2935 #ifndef _SYS_SYSPROTO_H_
 2936 struct fchownat_args {
 2937         int fd;
 2938         const char * path;
 2939         uid_t uid;
 2940         gid_t gid;
 2941         int flag;
 2942 };
 2943 #endif
 2944 int
 2945 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2946 {
 2947         int flag;
 2948 
 2949         flag = uap->flag;
 2950         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2951                 return (EINVAL);
 2952 
 2953         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2954             uap->gid, uap->flag));
 2955 }
 2956 
 2957 int
 2958 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2959     int gid)
 2960 {
 2961 
 2962         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 2963 }
 2964 
 2965 int
 2966 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2967     int uid, int gid, int flag)
 2968 {
 2969         struct nameidata nd;
 2970         cap_rights_t rights;
 2971         int error, follow;
 2972 
 2973         AUDIT_ARG_OWNER(uid, gid);
 2974         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2975         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2976             cap_rights_init(&rights, CAP_FCHOWN), td);
 2977 
 2978         if ((error = namei(&nd)) != 0)
 2979                 return (error);
 2980         NDFREE(&nd, NDF_ONLY_PNBUF);
 2981         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2982         vrele(nd.ni_vp);
 2983         return (error);
 2984 }
 2985 
 2986 /*
 2987  * Set ownership given a path name, do not cross symlinks.
 2988  */
 2989 #ifndef _SYS_SYSPROTO_H_
 2990 struct lchown_args {
 2991         char    *path;
 2992         int     uid;
 2993         int     gid;
 2994 };
 2995 #endif
 2996 int
 2997 sys_lchown(td, uap)
 2998         struct thread *td;
 2999         register struct lchown_args /* {
 3000                 char *path;
 3001                 int uid;
 3002                 int gid;
 3003         } */ *uap;
 3004 {
 3005 
 3006         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3007 }
 3008 
 3009 int
 3010 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3011     int gid)
 3012 {
 3013 
 3014         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3015             AT_SYMLINK_NOFOLLOW));
 3016 }
 3017 
 3018 /*
 3019  * Set ownership given a file descriptor.
 3020  */
 3021 #ifndef _SYS_SYSPROTO_H_
 3022 struct fchown_args {
 3023         int     fd;
 3024         int     uid;
 3025         int     gid;
 3026 };
 3027 #endif
 3028 int
 3029 sys_fchown(td, uap)
 3030         struct thread *td;
 3031         register struct fchown_args /* {
 3032                 int fd;
 3033                 int uid;
 3034                 int gid;
 3035         } */ *uap;
 3036 {
 3037         struct file *fp;
 3038         cap_rights_t rights;
 3039         int error;
 3040 
 3041         AUDIT_ARG_FD(uap->fd);
 3042         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3043         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 3044         if (error != 0)
 3045                 return (error);
 3046         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3047         fdrop(fp, td);
 3048         return (error);
 3049 }
 3050 
 3051 /*
 3052  * Common implementation code for utimes(), lutimes(), and futimes().
 3053  */
 3054 static int
 3055 getutimes(usrtvp, tvpseg, tsp)
 3056         const struct timeval *usrtvp;
 3057         enum uio_seg tvpseg;
 3058         struct timespec *tsp;
 3059 {
 3060         struct timeval tv[2];
 3061         const struct timeval *tvp;
 3062         int error;
 3063 
 3064         if (usrtvp == NULL) {
 3065                 vfs_timestamp(&tsp[0]);
 3066                 tsp[1] = tsp[0];
 3067         } else {
 3068                 if (tvpseg == UIO_SYSSPACE) {
 3069                         tvp = usrtvp;
 3070                 } else {
 3071                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3072                                 return (error);
 3073                         tvp = tv;
 3074                 }
 3075 
 3076                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3077                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3078                         return (EINVAL);
 3079                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3080                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3081         }
 3082         return (0);
 3083 }
 3084 
 3085 /*
 3086  * Common implementation code for futimens(), utimensat().
 3087  */
 3088 #define UTIMENS_NULL    0x1
 3089 #define UTIMENS_EXIT    0x2
 3090 static int
 3091 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 3092     struct timespec *tsp, int *retflags)
 3093 {
 3094         struct timespec tsnow;
 3095         int error;
 3096 
 3097         vfs_timestamp(&tsnow);
 3098         *retflags = 0;
 3099         if (usrtsp == NULL) {
 3100                 tsp[0] = tsnow;
 3101                 tsp[1] = tsnow;
 3102                 *retflags |= UTIMENS_NULL;
 3103                 return (0);
 3104         }
 3105         if (tspseg == UIO_SYSSPACE) {
 3106                 tsp[0] = usrtsp[0];
 3107                 tsp[1] = usrtsp[1];
 3108         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 3109                 return (error);
 3110         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 3111                 *retflags |= UTIMENS_EXIT;
 3112         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 3113                 *retflags |= UTIMENS_NULL;
 3114         if (tsp[0].tv_nsec == UTIME_OMIT)
 3115                 tsp[0].tv_sec = VNOVAL;
 3116         else if (tsp[0].tv_nsec == UTIME_NOW)
 3117                 tsp[0] = tsnow;
 3118         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 3119                 return (EINVAL);
 3120         if (tsp[1].tv_nsec == UTIME_OMIT)
 3121                 tsp[1].tv_sec = VNOVAL;
 3122         else if (tsp[1].tv_nsec == UTIME_NOW)
 3123                 tsp[1] = tsnow;
 3124         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 3125                 return (EINVAL);
 3126 
 3127         return (0);
 3128 }
 3129 
 3130 /*
 3131  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 3132  * and utimensat().
 3133  */
 3134 static int
 3135 setutimes(td, vp, ts, numtimes, nullflag)
 3136         struct thread *td;
 3137         struct vnode *vp;
 3138         const struct timespec *ts;
 3139         int numtimes;
 3140         int nullflag;
 3141 {
 3142         struct mount *mp;
 3143         struct vattr vattr;
 3144         int error, setbirthtime;
 3145 
 3146         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3147                 return (error);
 3148         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3149         setbirthtime = 0;
 3150         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3151             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3152                 setbirthtime = 1;
 3153         VATTR_NULL(&vattr);
 3154         vattr.va_atime = ts[0];
 3155         vattr.va_mtime = ts[1];
 3156         if (setbirthtime)
 3157                 vattr.va_birthtime = ts[1];
 3158         if (numtimes > 2)
 3159                 vattr.va_birthtime = ts[2];
 3160         if (nullflag)
 3161                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3162 #ifdef MAC
 3163         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3164             vattr.va_mtime);
 3165 #endif
 3166         if (error == 0)
 3167                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3168         VOP_UNLOCK(vp, 0);
 3169         vn_finished_write(mp);
 3170         return (error);
 3171 }
 3172 
 3173 /*
 3174  * Set the access and modification times of a file.
 3175  */
 3176 #ifndef _SYS_SYSPROTO_H_
 3177 struct utimes_args {
 3178         char    *path;
 3179         struct  timeval *tptr;
 3180 };
 3181 #endif
 3182 int
 3183 sys_utimes(td, uap)
 3184         struct thread *td;
 3185         register struct utimes_args /* {
 3186                 char *path;
 3187                 struct timeval *tptr;
 3188         } */ *uap;
 3189 {
 3190 
 3191         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3192             UIO_USERSPACE));
 3193 }
 3194 
 3195 #ifndef _SYS_SYSPROTO_H_
 3196 struct futimesat_args {
 3197         int fd;
 3198         const char * path;
 3199         const struct timeval * times;
 3200 };
 3201 #endif
 3202 int
 3203 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3204 {
 3205 
 3206         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3207             uap->times, UIO_USERSPACE));
 3208 }
 3209 
 3210 int
 3211 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3212     struct timeval *tptr, enum uio_seg tptrseg)
 3213 {
 3214 
 3215         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3216 }
 3217 
 3218 int
 3219 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3220     struct timeval *tptr, enum uio_seg tptrseg)
 3221 {
 3222         struct nameidata nd;
 3223         struct timespec ts[2];
 3224         cap_rights_t rights;
 3225         int error;
 3226 
 3227         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3228                 return (error);
 3229         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3230             cap_rights_init(&rights, CAP_FUTIMES), td);
 3231 
 3232         if ((error = namei(&nd)) != 0)
 3233                 return (error);
 3234         NDFREE(&nd, NDF_ONLY_PNBUF);
 3235         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3236         vrele(nd.ni_vp);
 3237         return (error);
 3238 }
 3239 
 3240 /*
 3241  * Set the access and modification times of a file.
 3242  */
 3243 #ifndef _SYS_SYSPROTO_H_
 3244 struct lutimes_args {
 3245         char    *path;
 3246         struct  timeval *tptr;
 3247 };
 3248 #endif
 3249 int
 3250 sys_lutimes(td, uap)
 3251         struct thread *td;
 3252         register struct lutimes_args /* {
 3253                 char *path;
 3254                 struct timeval *tptr;
 3255         } */ *uap;
 3256 {
 3257 
 3258         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3259             UIO_USERSPACE));
 3260 }
 3261 
 3262 int
 3263 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3264     struct timeval *tptr, enum uio_seg tptrseg)
 3265 {
 3266         struct timespec ts[2];
 3267         struct nameidata nd;
 3268         int error;
 3269 
 3270         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3271                 return (error);
 3272         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3273         if ((error = namei(&nd)) != 0)
 3274                 return (error);
 3275         NDFREE(&nd, NDF_ONLY_PNBUF);
 3276         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3277         vrele(nd.ni_vp);
 3278         return (error);
 3279 }
 3280 
 3281 /*
 3282  * Set the access and modification times of a file.
 3283  */
 3284 #ifndef _SYS_SYSPROTO_H_
 3285 struct futimes_args {
 3286         int     fd;
 3287         struct  timeval *tptr;
 3288 };
 3289 #endif
 3290 int
 3291 sys_futimes(td, uap)
 3292         struct thread *td;
 3293         register struct futimes_args /* {
 3294                 int  fd;
 3295                 struct timeval *tptr;
 3296         } */ *uap;
 3297 {
 3298 
 3299         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3300 }
 3301 
 3302 int
 3303 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3304     enum uio_seg tptrseg)
 3305 {
 3306         struct timespec ts[2];
 3307         struct file *fp;
 3308         cap_rights_t rights;
 3309         int error;
 3310 
 3311         AUDIT_ARG_FD(fd);
 3312         error = getutimes(tptr, tptrseg, ts);
 3313         if (error != 0)
 3314                 return (error);
 3315         error = getvnode(td->td_proc->p_fd, fd,
 3316             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3317         if (error != 0)
 3318                 return (error);
 3319 #ifdef AUDIT
 3320         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3321         AUDIT_ARG_VNODE1(fp->f_vnode);
 3322         VOP_UNLOCK(fp->f_vnode, 0);
 3323 #endif
 3324         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3325         fdrop(fp, td);
 3326         return (error);
 3327 }
 3328 
 3329 int
 3330 sys_futimens(struct thread *td, struct futimens_args *uap)
 3331 {
 3332 
 3333         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3334 }
 3335 
 3336 int
 3337 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3338     enum uio_seg tptrseg)
 3339 {
 3340         struct timespec ts[2];
 3341         struct file *fp;
 3342         cap_rights_t rights;
 3343         int error, flags;
 3344 
 3345         AUDIT_ARG_FD(fd);
 3346         error = getutimens(tptr, tptrseg, ts, &flags);
 3347         if (error != 0)
 3348                 return (error);
 3349         if (flags & UTIMENS_EXIT)
 3350                 return (0);
 3351         error = getvnode(td->td_proc->p_fd, fd,
 3352             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3353         if (error != 0)
 3354                 return (error);
 3355 #ifdef AUDIT
 3356         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3357         AUDIT_ARG_VNODE1(fp->f_vnode);
 3358         VOP_UNLOCK(fp->f_vnode, 0);
 3359 #endif
 3360         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3361         fdrop(fp, td);
 3362         return (error);
 3363 }
 3364 
 3365 int
 3366 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3367 {
 3368 
 3369         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3370             uap->times, UIO_USERSPACE, uap->flag));
 3371 }
 3372 
 3373 int
 3374 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3375     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 3376 {
 3377         struct nameidata nd;
 3378         struct timespec ts[2];
 3379         cap_rights_t rights;
 3380         int error, flags;
 3381 
 3382         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3383                 return (EINVAL);
 3384 
 3385         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3386                 return (error);
 3387         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 3388             FOLLOW) | AUDITVNODE1, pathseg, path, fd,
 3389             cap_rights_init(&rights, CAP_FUTIMES), td);
 3390         if ((error = namei(&nd)) != 0)
 3391                 return (error);
 3392         /*
 3393          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3394          * POSIX states:
 3395          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3396          * "Search permission is denied by a component of the path prefix."
 3397          */
 3398         NDFREE(&nd, NDF_ONLY_PNBUF);
 3399         if ((flags & UTIMENS_EXIT) == 0)
 3400                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3401         vrele(nd.ni_vp);
 3402         return (error);
 3403 }
 3404 
 3405 /*
 3406  * Truncate a file given its path name.
 3407  */
 3408 #ifndef _SYS_SYSPROTO_H_
 3409 struct truncate_args {
 3410         char    *path;
 3411         int     pad;
 3412         off_t   length;
 3413 };
 3414 #endif
 3415 int
 3416 sys_truncate(td, uap)
 3417         struct thread *td;
 3418         register struct truncate_args /* {
 3419                 char *path;
 3420                 int pad;
 3421                 off_t length;
 3422         } */ *uap;
 3423 {
 3424 
 3425         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3426 }
 3427 
 3428 int
 3429 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3430 {
 3431         struct mount *mp;
 3432         struct vnode *vp;
 3433         void *rl_cookie;
 3434         struct vattr vattr;
 3435         struct nameidata nd;
 3436         int error;
 3437 
 3438         if (length < 0)
 3439                 return(EINVAL);
 3440         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3441         if ((error = namei(&nd)) != 0)
 3442                 return (error);
 3443         vp = nd.ni_vp;
 3444         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3445         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3446                 vn_rangelock_unlock(vp, rl_cookie);
 3447                 vrele(vp);
 3448                 return (error);
 3449         }
 3450         NDFREE(&nd, NDF_ONLY_PNBUF);
 3451         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3452         if (vp->v_type == VDIR)
 3453                 error = EISDIR;
 3454 #ifdef MAC
 3455         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3456         }
 3457 #endif
 3458         else if ((error = vn_writechk(vp)) == 0 &&
 3459             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3460                 VATTR_NULL(&vattr);
 3461                 vattr.va_size = length;
 3462                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3463         }
 3464         VOP_UNLOCK(vp, 0);
 3465         vn_finished_write(mp);
 3466         vn_rangelock_unlock(vp, rl_cookie);
 3467         vrele(vp);
 3468         return (error);
 3469 }
 3470 
 3471 #if defined(COMPAT_43)
 3472 /*
 3473  * Truncate a file given its path name.
 3474  */
 3475 #ifndef _SYS_SYSPROTO_H_
 3476 struct otruncate_args {
 3477         char    *path;
 3478         long    length;
 3479 };
 3480 #endif
 3481 int
 3482 otruncate(td, uap)
 3483         struct thread *td;
 3484         register struct otruncate_args /* {
 3485                 char *path;
 3486                 long length;
 3487         } */ *uap;
 3488 {
 3489         struct truncate_args /* {
 3490                 char *path;
 3491                 int pad;
 3492                 off_t length;
 3493         } */ nuap;
 3494 
 3495         nuap.path = uap->path;
 3496         nuap.length = uap->length;
 3497         return (sys_truncate(td, &nuap));
 3498 }
 3499 #endif /* COMPAT_43 */
 3500 
 3501 /* Versions with the pad argument */
 3502 int
 3503 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3504 {
 3505         struct truncate_args ouap;
 3506 
 3507         ouap.path = uap->path;
 3508         ouap.length = uap->length;
 3509         return (sys_truncate(td, &ouap));
 3510 }
 3511 
 3512 int
 3513 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3514 {
 3515         struct ftruncate_args ouap;
 3516 
 3517         ouap.fd = uap->fd;
 3518         ouap.length = uap->length;
 3519         return (sys_ftruncate(td, &ouap));
 3520 }
 3521 
 3522 /*
 3523  * Sync an open file.
 3524  */
 3525 #ifndef _SYS_SYSPROTO_H_
 3526 struct fsync_args {
 3527         int     fd;
 3528 };
 3529 #endif
 3530 int
 3531 sys_fsync(td, uap)
 3532         struct thread *td;
 3533         struct fsync_args /* {
 3534                 int fd;
 3535         } */ *uap;
 3536 {
 3537         struct vnode *vp;
 3538         struct mount *mp;
 3539         struct file *fp;
 3540         cap_rights_t rights;
 3541         int error, lock_flags;
 3542 
 3543         AUDIT_ARG_FD(uap->fd);
 3544         error = getvnode(td->td_proc->p_fd, uap->fd,
 3545             cap_rights_init(&rights, CAP_FSYNC), &fp);
 3546         if (error != 0)
 3547                 return (error);
 3548         vp = fp->f_vnode;
 3549         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3550         if (error != 0)
 3551                 goto drop;
 3552         if (MNT_SHARED_WRITES(mp) ||
 3553             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3554                 lock_flags = LK_SHARED;
 3555         } else {
 3556                 lock_flags = LK_EXCLUSIVE;
 3557         }
 3558         vn_lock(vp, lock_flags | LK_RETRY);
 3559         AUDIT_ARG_VNODE1(vp);
 3560         if (vp->v_object != NULL) {
 3561                 VM_OBJECT_WLOCK(vp->v_object);
 3562                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3563                 VM_OBJECT_WUNLOCK(vp->v_object);
 3564         }
 3565         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3566 
 3567         VOP_UNLOCK(vp, 0);
 3568         vn_finished_write(mp);
 3569 drop:
 3570         fdrop(fp, td);
 3571         return (error);
 3572 }
 3573 
 3574 /*
 3575  * Rename files.  Source and destination must either both be directories, or
 3576  * both not be directories.  If target is a directory, it must be empty.
 3577  */
 3578 #ifndef _SYS_SYSPROTO_H_
 3579 struct rename_args {
 3580         char    *from;
 3581         char    *to;
 3582 };
 3583 #endif
 3584 int
 3585 sys_rename(td, uap)
 3586         struct thread *td;
 3587         register struct rename_args /* {
 3588                 char *from;
 3589                 char *to;
 3590         } */ *uap;
 3591 {
 3592 
 3593         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3594 }
 3595 
 3596 #ifndef _SYS_SYSPROTO_H_
 3597 struct renameat_args {
 3598         int     oldfd;
 3599         char    *old;
 3600         int     newfd;
 3601         char    *new;
 3602 };
 3603 #endif
 3604 int
 3605 sys_renameat(struct thread *td, struct renameat_args *uap)
 3606 {
 3607 
 3608         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3609             UIO_USERSPACE));
 3610 }
 3611 
 3612 int
 3613 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3614 {
 3615 
 3616         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3617 }
 3618 
 3619 int
 3620 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3621     enum uio_seg pathseg)
 3622 {
 3623         struct mount *mp = NULL;
 3624         struct vnode *tvp, *fvp, *tdvp;
 3625         struct nameidata fromnd, tond;
 3626         cap_rights_t rights;
 3627         int error;
 3628 
 3629 again:
 3630         bwillwrite();
 3631 #ifdef MAC
 3632         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3633             AUDITVNODE1, pathseg, old, oldfd,
 3634             cap_rights_init(&rights, CAP_RENAMEAT), td);
 3635 #else
 3636         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3637             pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td);
 3638 #endif
 3639 
 3640         if ((error = namei(&fromnd)) != 0)
 3641                 return (error);
 3642 #ifdef MAC
 3643         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3644             fromnd.ni_vp, &fromnd.ni_cnd);
 3645         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3646         if (fromnd.ni_dvp != fromnd.ni_vp)
 3647                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3648 #endif
 3649         fvp = fromnd.ni_vp;
 3650         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3651             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3652             cap_rights_init(&rights, CAP_LINKAT), td);
 3653         if (fromnd.ni_vp->v_type == VDIR)
 3654                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3655         if ((error = namei(&tond)) != 0) {
 3656                 /* Translate error code for rename("dir1", "dir2/."). */
 3657                 if (error == EISDIR && fvp->v_type == VDIR)
 3658                         error = EINVAL;
 3659                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3660                 vrele(fromnd.ni_dvp);
 3661                 vrele(fvp);
 3662                 goto out1;
 3663         }
 3664         tdvp = tond.ni_dvp;
 3665         tvp = tond.ni_vp;
 3666         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3667         if (error != 0) {
 3668                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3669                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3670                 if (tvp != NULL)
 3671                         vput(tvp);
 3672                 if (tdvp == tvp)
 3673                         vrele(tdvp);
 3674                 else
 3675                         vput(tdvp);
 3676                 vrele(fromnd.ni_dvp);
 3677                 vrele(fvp);
 3678                 vrele(tond.ni_startdir);
 3679                 if (fromnd.ni_startdir != NULL)
 3680                         vrele(fromnd.ni_startdir);
 3681                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3682                 if (error != 0)
 3683                         return (error);
 3684                 goto again;
 3685         }
 3686         if (tvp != NULL) {
 3687                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3688                         error = ENOTDIR;
 3689                         goto out;
 3690                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3691                         error = EISDIR;
 3692                         goto out;
 3693                 }
 3694 #ifdef CAPABILITIES
 3695                 if (newfd != AT_FDCWD) {
 3696                         /*
 3697                          * If the target already exists we require CAP_UNLINKAT
 3698                          * from 'newfd'.
 3699                          */
 3700                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3701                             cap_rights_init(&rights, CAP_UNLINKAT));
 3702                         if (error != 0)
 3703                                 goto out;
 3704                 }
 3705 #endif
 3706         }
 3707         if (fvp == tdvp) {
 3708                 error = EINVAL;
 3709                 goto out;
 3710         }
 3711         /*
 3712          * If the source is the same as the destination (that is, if they
 3713          * are links to the same vnode), then there is nothing to do.
 3714          */
 3715         if (fvp == tvp)
 3716                 error = -1;
 3717 #ifdef MAC
 3718         else
 3719                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3720                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3721 #endif
 3722 out:
 3723         if (error == 0) {
 3724                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3725                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3726                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3727                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3728         } else {
 3729                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3730                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3731                 if (tvp != NULL)
 3732                         vput(tvp);
 3733                 if (tdvp == tvp)
 3734                         vrele(tdvp);
 3735                 else
 3736                         vput(tdvp);
 3737                 vrele(fromnd.ni_dvp);
 3738                 vrele(fvp);
 3739         }
 3740         vrele(tond.ni_startdir);
 3741         vn_finished_write(mp);
 3742 out1:
 3743         if (fromnd.ni_startdir)
 3744                 vrele(fromnd.ni_startdir);
 3745         if (error == -1)
 3746                 return (0);
 3747         return (error);
 3748 }
 3749 
 3750 /*
 3751  * Make a directory file.
 3752  */
 3753 #ifndef _SYS_SYSPROTO_H_
 3754 struct mkdir_args {
 3755         char    *path;
 3756         int     mode;
 3757 };
 3758 #endif
 3759 int
 3760 sys_mkdir(td, uap)
 3761         struct thread *td;
 3762         register struct mkdir_args /* {
 3763                 char *path;
 3764                 int mode;
 3765         } */ *uap;
 3766 {
 3767 
 3768         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3769 }
 3770 
 3771 #ifndef _SYS_SYSPROTO_H_
 3772 struct mkdirat_args {
 3773         int     fd;
 3774         char    *path;
 3775         mode_t  mode;
 3776 };
 3777 #endif
 3778 int
 3779 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3780 {
 3781 
 3782         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3783 }
 3784 
 3785 int
 3786 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3787 {
 3788 
 3789         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3790 }
 3791 
 3792 int
 3793 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3794     int mode)
 3795 {
 3796         struct mount *mp;
 3797         struct vnode *vp;
 3798         struct vattr vattr;
 3799         struct nameidata nd;
 3800         cap_rights_t rights;
 3801         int error;
 3802 
 3803         AUDIT_ARG_MODE(mode);
 3804 restart:
 3805         bwillwrite();
 3806         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3807             NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 3808             td);
 3809         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3810         if ((error = namei(&nd)) != 0)
 3811                 return (error);
 3812         vp = nd.ni_vp;
 3813         if (vp != NULL) {
 3814                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3815                 /*
 3816                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3817                  * the strange behaviour of leaving the vnode unlocked
 3818                  * if the target is the same vnode as the parent.
 3819                  */
 3820                 if (vp == nd.ni_dvp)
 3821                         vrele(nd.ni_dvp);
 3822                 else
 3823                         vput(nd.ni_dvp);
 3824                 vrele(vp);
 3825                 return (EEXIST);
 3826         }
 3827         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3828                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3829                 vput(nd.ni_dvp);
 3830                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3831                         return (error);
 3832                 goto restart;
 3833         }
 3834         VATTR_NULL(&vattr);
 3835         vattr.va_type = VDIR;
 3836         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3837 #ifdef MAC
 3838         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3839             &vattr);
 3840         if (error != 0)
 3841                 goto out;
 3842 #endif
 3843         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3844 #ifdef MAC
 3845 out:
 3846 #endif
 3847         NDFREE(&nd, NDF_ONLY_PNBUF);
 3848         vput(nd.ni_dvp);
 3849         if (error == 0)
 3850                 vput(nd.ni_vp);
 3851         vn_finished_write(mp);
 3852         return (error);
 3853 }
 3854 
 3855 /*
 3856  * Remove a directory file.
 3857  */
 3858 #ifndef _SYS_SYSPROTO_H_
 3859 struct rmdir_args {
 3860         char    *path;
 3861 };
 3862 #endif
 3863 int
 3864 sys_rmdir(td, uap)
 3865         struct thread *td;
 3866         struct rmdir_args /* {
 3867                 char *path;
 3868         } */ *uap;
 3869 {
 3870 
 3871         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3872 }
 3873 
 3874 int
 3875 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3876 {
 3877 
 3878         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3879 }
 3880 
 3881 int
 3882 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3883 {
 3884         struct mount *mp;
 3885         struct vnode *vp;
 3886         struct nameidata nd;
 3887         cap_rights_t rights;
 3888         int error;
 3889 
 3890 restart:
 3891         bwillwrite();
 3892         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3893             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3894         if ((error = namei(&nd)) != 0)
 3895                 return (error);
 3896         vp = nd.ni_vp;
 3897         if (vp->v_type != VDIR) {
 3898                 error = ENOTDIR;
 3899                 goto out;
 3900         }
 3901         /*
 3902          * No rmdir "." please.
 3903          */
 3904         if (nd.ni_dvp == vp) {
 3905                 error = EINVAL;
 3906                 goto out;
 3907         }
 3908         /*
 3909          * The root of a mounted filesystem cannot be deleted.
 3910          */
 3911         if (vp->v_vflag & VV_ROOT) {
 3912                 error = EBUSY;
 3913                 goto out;
 3914         }
 3915 #ifdef MAC
 3916         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3917             &nd.ni_cnd);
 3918         if (error != 0)
 3919                 goto out;
 3920 #endif
 3921         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3922                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3923                 vput(vp);
 3924                 if (nd.ni_dvp == vp)
 3925                         vrele(nd.ni_dvp);
 3926                 else
 3927                         vput(nd.ni_dvp);
 3928                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3929                         return (error);
 3930                 goto restart;
 3931         }
 3932         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3933         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3934         vn_finished_write(mp);
 3935 out:
 3936         NDFREE(&nd, NDF_ONLY_PNBUF);
 3937         vput(vp);
 3938         if (nd.ni_dvp == vp)
 3939                 vrele(nd.ni_dvp);
 3940         else
 3941                 vput(nd.ni_dvp);
 3942         return (error);
 3943 }
 3944 
 3945 #ifdef COMPAT_43
 3946 /*
 3947  * Read a block of directory entries in a filesystem independent format.
 3948  */
 3949 #ifndef _SYS_SYSPROTO_H_
 3950 struct ogetdirentries_args {
 3951         int     fd;
 3952         char    *buf;
 3953         u_int   count;
 3954         long    *basep;
 3955 };
 3956 #endif
 3957 int
 3958 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3959 {
 3960         long loff;
 3961         int error;
 3962 
 3963         error = kern_ogetdirentries(td, uap, &loff);
 3964         if (error == 0)
 3965                 error = copyout(&loff, uap->basep, sizeof(long));
 3966         return (error);
 3967 }
 3968 
 3969 int
 3970 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3971     long *ploff)
 3972 {
 3973         struct vnode *vp;
 3974         struct file *fp;
 3975         struct uio auio, kuio;
 3976         struct iovec aiov, kiov;
 3977         struct dirent *dp, *edp;
 3978         cap_rights_t rights;
 3979         caddr_t dirbuf;
 3980         int error, eofflag, readcnt;
 3981         long loff;
 3982         off_t foffset;
 3983 
 3984         /* XXX arbitrary sanity limit on `count'. */
 3985         if (uap->count > 64 * 1024)
 3986                 return (EINVAL);
 3987         error = getvnode(td->td_proc->p_fd, uap->fd,
 3988             cap_rights_init(&rights, CAP_READ), &fp);
 3989         if (error != 0)
 3990                 return (error);
 3991         if ((fp->f_flag & FREAD) == 0) {
 3992                 fdrop(fp, td);
 3993                 return (EBADF);
 3994         }
 3995         vp = fp->f_vnode;
 3996         foffset = foffset_lock(fp, 0);
 3997 unionread:
 3998         if (vp->v_type != VDIR) {
 3999                 foffset_unlock(fp, foffset, 0);
 4000                 fdrop(fp, td);
 4001                 return (EINVAL);
 4002         }
 4003         aiov.iov_base = uap->buf;
 4004         aiov.iov_len = uap->count;
 4005         auio.uio_iov = &aiov;
 4006         auio.uio_iovcnt = 1;
 4007         auio.uio_rw = UIO_READ;
 4008         auio.uio_segflg = UIO_USERSPACE;
 4009         auio.uio_td = td;
 4010         auio.uio_resid = uap->count;
 4011         vn_lock(vp, LK_SHARED | LK_RETRY);
 4012         loff = auio.uio_offset = foffset;
 4013 #ifdef MAC
 4014         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4015         if (error != 0) {
 4016                 VOP_UNLOCK(vp, 0);
 4017                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 4018                 fdrop(fp, td);
 4019                 return (error);
 4020         }
 4021 #endif
 4022 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4023                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4024                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4025                             NULL, NULL);
 4026                         foffset = auio.uio_offset;
 4027                 } else
 4028 #       endif
 4029         {
 4030                 kuio = auio;
 4031                 kuio.uio_iov = &kiov;
 4032                 kuio.uio_segflg = UIO_SYSSPACE;
 4033                 kiov.iov_len = uap->count;
 4034                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4035                 kiov.iov_base = dirbuf;
 4036                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4037                             NULL, NULL);
 4038                 foffset = kuio.uio_offset;
 4039                 if (error == 0) {
 4040                         readcnt = uap->count - kuio.uio_resid;
 4041                         edp = (struct dirent *)&dirbuf[readcnt];
 4042                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4043 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4044                                         /*
 4045                                          * The expected low byte of
 4046                                          * dp->d_namlen is our dp->d_type.
 4047                                          * The high MBZ byte of dp->d_namlen
 4048                                          * is our dp->d_namlen.
 4049                                          */
 4050                                         dp->d_type = dp->d_namlen;
 4051                                         dp->d_namlen = 0;
 4052 #                               else
 4053                                         /*
 4054                                          * The dp->d_type is the high byte
 4055                                          * of the expected dp->d_namlen,
 4056                                          * so must be zero'ed.
 4057                                          */
 4058                                         dp->d_type = 0;
 4059 #                               endif
 4060                                 if (dp->d_reclen > 0) {
 4061                                         dp = (struct dirent *)
 4062                                             ((char *)dp + dp->d_reclen);
 4063                                 } else {
 4064                                         error = EIO;
 4065                                         break;
 4066                                 }
 4067                         }
 4068                         if (dp >= edp)
 4069                                 error = uiomove(dirbuf, readcnt, &auio);
 4070                 }
 4071                 free(dirbuf, M_TEMP);
 4072         }
 4073         if (error != 0) {
 4074                 VOP_UNLOCK(vp, 0);
 4075                 foffset_unlock(fp, foffset, 0);
 4076                 fdrop(fp, td);
 4077                 return (error);
 4078         }
 4079         if (uap->count == auio.uio_resid &&
 4080             (vp->v_vflag & VV_ROOT) &&
 4081             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4082                 struct vnode *tvp = vp;
 4083                 vp = vp->v_mount->mnt_vnodecovered;
 4084                 VREF(vp);
 4085                 fp->f_vnode = vp;
 4086                 fp->f_data = vp;
 4087                 foffset = 0;
 4088                 vput(tvp);
 4089                 goto unionread;
 4090         }
 4091         VOP_UNLOCK(vp, 0);
 4092         foffset_unlock(fp, foffset, 0);
 4093         fdrop(fp, td);
 4094         td->td_retval[0] = uap->count - auio.uio_resid;
 4095         if (error == 0)
 4096                 *ploff = loff;
 4097         return (error);
 4098 }
 4099 #endif /* COMPAT_43 */
 4100 
 4101 /*
 4102  * Read a block of directory entries in a filesystem independent format.
 4103  */
 4104 #ifndef _SYS_SYSPROTO_H_
 4105 struct getdirentries_args {
 4106         int     fd;
 4107         char    *buf;
 4108         u_int   count;
 4109         long    *basep;
 4110 };
 4111 #endif
 4112 int
 4113 sys_getdirentries(td, uap)
 4114         struct thread *td;
 4115         register struct getdirentries_args /* {
 4116                 int fd;
 4117                 char *buf;
 4118                 u_int count;
 4119                 long *basep;
 4120         } */ *uap;
 4121 {
 4122         long base;
 4123         int error;
 4124 
 4125         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4126             NULL, UIO_USERSPACE);
 4127         if (error != 0)
 4128                 return (error);
 4129         if (uap->basep != NULL)
 4130                 error = copyout(&base, uap->basep, sizeof(long));
 4131         return (error);
 4132 }
 4133 
 4134 int
 4135 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4136     long *basep, ssize_t *residp, enum uio_seg bufseg)
 4137 {
 4138         struct vnode *vp;
 4139         struct file *fp;
 4140         struct uio auio;
 4141         struct iovec aiov;
 4142         cap_rights_t rights;
 4143         long loff;
 4144         int error, eofflag;
 4145         off_t foffset;
 4146 
 4147         AUDIT_ARG_FD(fd);
 4148         if (count > IOSIZE_MAX)
 4149                 return (EINVAL);
 4150         auio.uio_resid = count;
 4151         error = getvnode(td->td_proc->p_fd, fd,
 4152             cap_rights_init(&rights, CAP_READ), &fp);
 4153         if (error != 0)
 4154                 return (error);
 4155         if ((fp->f_flag & FREAD) == 0) {
 4156                 fdrop(fp, td);
 4157                 return (EBADF);
 4158         }
 4159         vp = fp->f_vnode;
 4160         foffset = foffset_lock(fp, 0);
 4161 unionread:
 4162         if (vp->v_type != VDIR) {
 4163                 error = EINVAL;
 4164                 goto fail;
 4165         }
 4166         aiov.iov_base = buf;
 4167         aiov.iov_len = count;
 4168         auio.uio_iov = &aiov;
 4169         auio.uio_iovcnt = 1;
 4170         auio.uio_rw = UIO_READ;
 4171         auio.uio_segflg = bufseg;
 4172         auio.uio_td = td;
 4173         vn_lock(vp, LK_SHARED | LK_RETRY);
 4174         AUDIT_ARG_VNODE1(vp);
 4175         loff = auio.uio_offset = foffset;
 4176 #ifdef MAC
 4177         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4178         if (error == 0)
 4179 #endif
 4180                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4181                     NULL);
 4182         foffset = auio.uio_offset;
 4183         if (error != 0) {
 4184                 VOP_UNLOCK(vp, 0);
 4185                 goto fail;
 4186         }
 4187         if (count == auio.uio_resid &&
 4188             (vp->v_vflag & VV_ROOT) &&
 4189             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4190                 struct vnode *tvp = vp;
 4191 
 4192                 vp = vp->v_mount->mnt_vnodecovered;
 4193                 VREF(vp);
 4194                 fp->f_vnode = vp;
 4195                 fp->f_data = vp;
 4196                 foffset = 0;
 4197                 vput(tvp);
 4198                 goto unionread;
 4199         }
 4200         VOP_UNLOCK(vp, 0);
 4201         *basep = loff;
 4202         if (residp != NULL)
 4203                 *residp = auio.uio_resid;
 4204         td->td_retval[0] = count - auio.uio_resid;
 4205 fail:
 4206         foffset_unlock(fp, foffset, 0);
 4207         fdrop(fp, td);
 4208         return (error);
 4209 }
 4210 
 4211 #ifndef _SYS_SYSPROTO_H_
 4212 struct getdents_args {
 4213         int fd;
 4214         char *buf;
 4215         size_t count;
 4216 };
 4217 #endif
 4218 int
 4219 sys_getdents(td, uap)
 4220         struct thread *td;
 4221         register struct getdents_args /* {
 4222                 int fd;
 4223                 char *buf;
 4224                 u_int count;
 4225         } */ *uap;
 4226 {
 4227         struct getdirentries_args ap;
 4228 
 4229         ap.fd = uap->fd;
 4230         ap.buf = uap->buf;
 4231         ap.count = uap->count;
 4232         ap.basep = NULL;
 4233         return (sys_getdirentries(td, &ap));
 4234 }
 4235 
 4236 /*
 4237  * Set the mode mask for creation of filesystem nodes.
 4238  */
 4239 #ifndef _SYS_SYSPROTO_H_
 4240 struct umask_args {
 4241         int     newmask;
 4242 };
 4243 #endif
 4244 int
 4245 sys_umask(td, uap)
 4246         struct thread *td;
 4247         struct umask_args /* {
 4248                 int newmask;
 4249         } */ *uap;
 4250 {
 4251         register struct filedesc *fdp;
 4252 
 4253         FILEDESC_XLOCK(td->td_proc->p_fd);
 4254         fdp = td->td_proc->p_fd;
 4255         td->td_retval[0] = fdp->fd_cmask;
 4256         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4257         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4258         return (0);
 4259 }
 4260 
 4261 /*
 4262  * Void all references to file by ripping underlying filesystem away from
 4263  * vnode.
 4264  */
 4265 #ifndef _SYS_SYSPROTO_H_
 4266 struct revoke_args {
 4267         char    *path;
 4268 };
 4269 #endif
 4270 int
 4271 sys_revoke(td, uap)
 4272         struct thread *td;
 4273         register struct revoke_args /* {
 4274                 char *path;
 4275         } */ *uap;
 4276 {
 4277         struct vnode *vp;
 4278         struct vattr vattr;
 4279         struct nameidata nd;
 4280         int error;
 4281 
 4282         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4283             uap->path, td);
 4284         if ((error = namei(&nd)) != 0)
 4285                 return (error);
 4286         vp = nd.ni_vp;
 4287         NDFREE(&nd, NDF_ONLY_PNBUF);
 4288         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4289                 error = EINVAL;
 4290                 goto out;
 4291         }
 4292 #ifdef MAC
 4293         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4294         if (error != 0)
 4295                 goto out;
 4296 #endif
 4297         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4298         if (error != 0)
 4299                 goto out;
 4300         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4301                 error = priv_check(td, PRIV_VFS_ADMIN);
 4302                 if (error != 0)
 4303                         goto out;
 4304         }
 4305         if (vcount(vp) > 1)
 4306                 VOP_REVOKE(vp, REVOKEALL);
 4307 out:
 4308         vput(vp);
 4309         return (error);
 4310 }
 4311 
 4312 /*
 4313  * Convert a user file descriptor to a kernel file entry and check that, if it
 4314  * is a capability, the correct rights are present. A reference on the file
 4315  * entry is held upon returning.
 4316  */
 4317 int
 4318 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp)
 4319 {
 4320         struct file *fp;
 4321         int error;
 4322 
 4323         error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL);
 4324         if (error != 0)
 4325                 return (error);
 4326 
 4327         /*
 4328          * The file could be not of the vnode type, or it may be not
 4329          * yet fully initialized, in which case the f_vnode pointer
 4330          * may be set, but f_ops is still badfileops.  E.g.,
 4331          * devfs_open() transiently create such situation to
 4332          * facilitate csw d_fdopen().
 4333          *
 4334          * Dupfdopen() handling in kern_openat() installs the
 4335          * half-baked file into the process descriptor table, allowing
 4336          * other thread to dereference it. Guard against the race by
 4337          * checking f_ops.
 4338          */
 4339         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4340                 fdrop(fp, curthread);
 4341                 return (EINVAL);
 4342         }
 4343         *fpp = fp;
 4344         return (0);
 4345 }
 4346 
 4347 
 4348 /*
 4349  * Get an (NFS) file handle.
 4350  */
 4351 #ifndef _SYS_SYSPROTO_H_
 4352 struct lgetfh_args {
 4353         char    *fname;
 4354         fhandle_t *fhp;
 4355 };
 4356 #endif
 4357 int
 4358 sys_lgetfh(td, uap)
 4359         struct thread *td;
 4360         register struct lgetfh_args *uap;
 4361 {
 4362         struct nameidata nd;
 4363         fhandle_t fh;
 4364         register struct vnode *vp;
 4365         int error;
 4366 
 4367         error = priv_check(td, PRIV_VFS_GETFH);
 4368         if (error != 0)
 4369                 return (error);
 4370         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4371             uap->fname, td);
 4372         error = namei(&nd);
 4373         if (error != 0)
 4374                 return (error);
 4375         NDFREE(&nd, NDF_ONLY_PNBUF);
 4376         vp = nd.ni_vp;
 4377         bzero(&fh, sizeof(fh));
 4378         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4379         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4380         vput(vp);
 4381         if (error == 0)
 4382                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4383         return (error);
 4384 }
 4385 
 4386 #ifndef _SYS_SYSPROTO_H_
 4387 struct getfh_args {
 4388         char    *fname;
 4389         fhandle_t *fhp;
 4390 };
 4391 #endif
 4392 int
 4393 sys_getfh(td, uap)
 4394         struct thread *td;
 4395         register struct getfh_args *uap;
 4396 {
 4397         struct nameidata nd;
 4398         fhandle_t fh;
 4399         register struct vnode *vp;
 4400         int error;
 4401 
 4402         error = priv_check(td, PRIV_VFS_GETFH);
 4403         if (error != 0)
 4404                 return (error);
 4405         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4406             uap->fname, td);
 4407         error = namei(&nd);
 4408         if (error != 0)
 4409                 return (error);
 4410         NDFREE(&nd, NDF_ONLY_PNBUF);
 4411         vp = nd.ni_vp;
 4412         bzero(&fh, sizeof(fh));
 4413         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4414         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4415         vput(vp);
 4416         if (error == 0)
 4417                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4418         return (error);
 4419 }
 4420 
 4421 /*
 4422  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4423  * open descriptor.
 4424  *
 4425  * warning: do not remove the priv_check() call or this becomes one giant
 4426  * security hole.
 4427  */
 4428 #ifndef _SYS_SYSPROTO_H_
 4429 struct fhopen_args {
 4430         const struct fhandle *u_fhp;
 4431         int flags;
 4432 };
 4433 #endif
 4434 int
 4435 sys_fhopen(td, uap)
 4436         struct thread *td;
 4437         struct fhopen_args /* {
 4438                 const struct fhandle *u_fhp;
 4439                 int flags;
 4440         } */ *uap;
 4441 {
 4442         struct mount *mp;
 4443         struct vnode *vp;
 4444         struct fhandle fhp;
 4445         struct file *fp;
 4446         int fmode, error;
 4447         int indx;
 4448 
 4449         error = priv_check(td, PRIV_VFS_FHOPEN);
 4450         if (error != 0)
 4451                 return (error);
 4452         indx = -1;
 4453         fmode = FFLAGS(uap->flags);
 4454         /* why not allow a non-read/write open for our lockd? */
 4455         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4456                 return (EINVAL);
 4457         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4458         if (error != 0)
 4459                 return(error);
 4460         /* find the mount point */
 4461         mp = vfs_busyfs(&fhp.fh_fsid);
 4462         if (mp == NULL)
 4463                 return (ESTALE);
 4464         /* now give me my vnode, it gets returned to me locked */
 4465         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4466         vfs_unbusy(mp);
 4467         if (error != 0)
 4468                 return (error);
 4469 
 4470         error = falloc_noinstall(td, &fp);
 4471         if (error != 0) {
 4472                 vput(vp);
 4473                 return (error);
 4474         }
 4475         /*
 4476          * An extra reference on `fp' has been held for us by
 4477          * falloc_noinstall().
 4478          */
 4479 
 4480 #ifdef INVARIANTS
 4481         td->td_dupfd = -1;
 4482 #endif
 4483         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4484         if (error != 0) {
 4485                 KASSERT(fp->f_ops == &badfileops,
 4486                     ("VOP_OPEN in fhopen() set f_ops"));
 4487                 KASSERT(td->td_dupfd < 0,
 4488                     ("fhopen() encountered fdopen()"));
 4489 
 4490                 vput(vp);
 4491                 goto bad;
 4492         }
 4493 #ifdef INVARIANTS
 4494         td->td_dupfd = 0;
 4495 #endif
 4496         fp->f_vnode = vp;
 4497         fp->f_seqcount = 1;
 4498         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4499             &vnops);
 4500         VOP_UNLOCK(vp, 0);
 4501         if ((fmode & O_TRUNC) != 0) {
 4502                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4503                 if (error != 0)
 4504                         goto bad;
 4505         }
 4506 
 4507         error = finstall(td, fp, &indx, fmode, NULL);
 4508 bad:
 4509         fdrop(fp, td);
 4510         td->td_retval[0] = indx;
 4511         return (error);
 4512 }
 4513 
 4514 /*
 4515  * Stat an (NFS) file handle.
 4516  */
 4517 #ifndef _SYS_SYSPROTO_H_
 4518 struct fhstat_args {
 4519         struct fhandle *u_fhp;
 4520         struct stat *sb;
 4521 };
 4522 #endif
 4523 int
 4524 sys_fhstat(td, uap)
 4525         struct thread *td;
 4526         register struct fhstat_args /* {
 4527                 struct fhandle *u_fhp;
 4528                 struct stat *sb;
 4529         } */ *uap;
 4530 {
 4531         struct stat sb;
 4532         struct fhandle fh;
 4533         int error;
 4534 
 4535         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4536         if (error != 0)
 4537                 return (error);
 4538         error = kern_fhstat(td, fh, &sb);
 4539         if (error == 0)
 4540                 error = copyout(&sb, uap->sb, sizeof(sb));
 4541         return (error);
 4542 }
 4543 
 4544 int
 4545 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4546 {
 4547         struct mount *mp;
 4548         struct vnode *vp;
 4549         int error;
 4550 
 4551         error = priv_check(td, PRIV_VFS_FHSTAT);
 4552         if (error != 0)
 4553                 return (error);
 4554         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4555                 return (ESTALE);
 4556         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4557         vfs_unbusy(mp);
 4558         if (error != 0)
 4559                 return (error);
 4560         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4561         vput(vp);
 4562         return (error);
 4563 }
 4564 
 4565 /*
 4566  * Implement fstatfs() for (NFS) file handles.
 4567  */
 4568 #ifndef _SYS_SYSPROTO_H_
 4569 struct fhstatfs_args {
 4570         struct fhandle *u_fhp;
 4571         struct statfs *buf;
 4572 };
 4573 #endif
 4574 int
 4575 sys_fhstatfs(td, uap)
 4576         struct thread *td;
 4577         struct fhstatfs_args /* {
 4578                 struct fhandle *u_fhp;
 4579                 struct statfs *buf;
 4580         } */ *uap;
 4581 {
 4582         struct statfs sf;
 4583         fhandle_t fh;
 4584         int error;
 4585 
 4586         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4587         if (error != 0)
 4588                 return (error);
 4589         error = kern_fhstatfs(td, fh, &sf);
 4590         if (error != 0)
 4591                 return (error);
 4592         return (copyout(&sf, uap->buf, sizeof(sf)));
 4593 }
 4594 
 4595 int
 4596 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4597 {
 4598         struct statfs *sp;
 4599         struct mount *mp;
 4600         struct vnode *vp;
 4601         int error;
 4602 
 4603         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4604         if (error != 0)
 4605                 return (error);
 4606         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4607                 return (ESTALE);
 4608         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4609         if (error != 0) {
 4610                 vfs_unbusy(mp);
 4611                 return (error);
 4612         }
 4613         vput(vp);
 4614         error = prison_canseemount(td->td_ucred, mp);
 4615         if (error != 0)
 4616                 goto out;
 4617 #ifdef MAC
 4618         error = mac_mount_check_stat(td->td_ucred, mp);
 4619         if (error != 0)
 4620                 goto out;
 4621 #endif
 4622         /*
 4623          * Set these in case the underlying filesystem fails to do so.
 4624          */
 4625         sp = &mp->mnt_stat;
 4626         sp->f_version = STATFS_VERSION;
 4627         sp->f_namemax = NAME_MAX;
 4628         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4629         error = VFS_STATFS(mp, sp);
 4630         if (error == 0)
 4631                 *buf = *sp;
 4632 out:
 4633         vfs_unbusy(mp);
 4634         return (error);
 4635 }
 4636 
 4637 int
 4638 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4639 {
 4640         struct file *fp;
 4641         struct mount *mp;
 4642         struct vnode *vp;
 4643         cap_rights_t rights;
 4644         off_t olen, ooffset;
 4645         int error;
 4646 
 4647         if (offset < 0 || len <= 0)
 4648                 return (EINVAL);
 4649         /* Check for wrap. */
 4650         if (offset > OFF_MAX - len)
 4651                 return (EFBIG);
 4652         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4653         if (error != 0)
 4654                 return (error);
 4655         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4656                 error = ESPIPE;
 4657                 goto out;
 4658         }
 4659         if ((fp->f_flag & FWRITE) == 0) {
 4660                 error = EBADF;
 4661                 goto out;
 4662         }
 4663         if (fp->f_type != DTYPE_VNODE) {
 4664                 error = ENODEV;
 4665                 goto out;
 4666         }
 4667         vp = fp->f_vnode;
 4668         if (vp->v_type != VREG) {
 4669                 error = ENODEV;
 4670                 goto out;
 4671         }
 4672 
 4673         /* Allocating blocks may take a long time, so iterate. */
 4674         for (;;) {
 4675                 olen = len;
 4676                 ooffset = offset;
 4677 
 4678                 bwillwrite();
 4679                 mp = NULL;
 4680                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4681                 if (error != 0)
 4682                         break;
 4683                 error = vn_lock(vp, LK_EXCLUSIVE);
 4684                 if (error != 0) {
 4685                         vn_finished_write(mp);
 4686                         break;
 4687                 }
 4688 #ifdef MAC
 4689                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4690                 if (error == 0)
 4691 #endif
 4692                         error = VOP_ALLOCATE(vp, &offset, &len);
 4693                 VOP_UNLOCK(vp, 0);
 4694                 vn_finished_write(mp);
 4695 
 4696                 if (olen + ooffset != offset + len) {
 4697                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4698                             ooffset, olen, offset, len);
 4699                 }
 4700                 if (error != 0 || len == 0)
 4701                         break;
 4702                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4703                 maybe_yield();
 4704         }
 4705  out:
 4706         fdrop(fp, td);
 4707         return (error);
 4708 }
 4709 
 4710 int
 4711 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4712 {
 4713 
 4714         td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
 4715             uap->len);
 4716         return (0);
 4717 }
 4718 
 4719 /*
 4720  * Unlike madvise(2), we do not make a best effort to remember every
 4721  * possible caching hint.  Instead, we remember the last setting with
 4722  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4723  * region of any current setting.
 4724  */
 4725 int
 4726 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4727     int advice)
 4728 {
 4729         struct fadvise_info *fa, *new;
 4730         struct file *fp;
 4731         struct vnode *vp;
 4732         cap_rights_t rights;
 4733         off_t end;
 4734         int error;
 4735 
 4736         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4737                 return (EINVAL);
 4738         switch (advice) {
 4739         case POSIX_FADV_SEQUENTIAL:
 4740         case POSIX_FADV_RANDOM:
 4741         case POSIX_FADV_NOREUSE:
 4742                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4743                 break;
 4744         case POSIX_FADV_NORMAL:
 4745         case POSIX_FADV_WILLNEED:
 4746         case POSIX_FADV_DONTNEED:
 4747                 new = NULL;
 4748                 break;
 4749         default:
 4750                 return (EINVAL);
 4751         }
 4752         /* XXX: CAP_POSIX_FADVISE? */
 4753         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4754         if (error != 0)
 4755                 goto out;
 4756         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4757                 error = ESPIPE;
 4758                 goto out;
 4759         }
 4760         if (fp->f_type != DTYPE_VNODE) {
 4761                 error = ENODEV;
 4762                 goto out;
 4763         }
 4764         vp = fp->f_vnode;
 4765         if (vp->v_type != VREG) {
 4766                 error = ENODEV;
 4767                 goto out;
 4768         }
 4769         if (len == 0)
 4770                 end = OFF_MAX;
 4771         else
 4772                 end = offset + len - 1;
 4773         switch (advice) {
 4774         case POSIX_FADV_SEQUENTIAL:
 4775         case POSIX_FADV_RANDOM:
 4776         case POSIX_FADV_NOREUSE:
 4777                 /*
 4778                  * Try to merge any existing non-standard region with
 4779                  * this new region if possible, otherwise create a new
 4780                  * non-standard region for this request.
 4781                  */
 4782                 mtx_pool_lock(mtxpool_sleep, fp);
 4783                 fa = fp->f_advice;
 4784                 if (fa != NULL && fa->fa_advice == advice &&
 4785                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4786                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4787                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4788                         if (offset < fa->fa_start)
 4789                                 fa->fa_start = offset;
 4790                         if (end > fa->fa_end)
 4791                                 fa->fa_end = end;
 4792                 } else {
 4793                         new->fa_advice = advice;
 4794                         new->fa_start = offset;
 4795                         new->fa_end = end;
 4796                         new->fa_prevstart = 0;
 4797                         new->fa_prevend = 0;
 4798                         fp->f_advice = new;
 4799                         new = fa;
 4800                 }
 4801                 mtx_pool_unlock(mtxpool_sleep, fp);
 4802                 break;
 4803         case POSIX_FADV_NORMAL:
 4804                 /*
 4805                  * If a the "normal" region overlaps with an existing
 4806                  * non-standard region, trim or remove the
 4807                  * non-standard region.
 4808                  */
 4809                 mtx_pool_lock(mtxpool_sleep, fp);
 4810                 fa = fp->f_advice;
 4811                 if (fa != NULL) {
 4812                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4813                                 new = fa;
 4814                                 fp->f_advice = NULL;
 4815                         } else if (offset <= fa->fa_start &&
 4816                             end >= fa->fa_start)
 4817                                 fa->fa_start = end + 1;
 4818                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4819                                 fa->fa_end = offset - 1;
 4820                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4821                                 /*
 4822                                  * If the "normal" region is a middle
 4823                                  * portion of the existing
 4824                                  * non-standard region, just remove
 4825                                  * the whole thing rather than picking
 4826                                  * one side or the other to
 4827                                  * preserve.
 4828                                  */
 4829                                 new = fa;
 4830                                 fp->f_advice = NULL;
 4831                         }
 4832                 }
 4833                 mtx_pool_unlock(mtxpool_sleep, fp);
 4834                 break;
 4835         case POSIX_FADV_WILLNEED:
 4836         case POSIX_FADV_DONTNEED:
 4837                 error = VOP_ADVISE(vp, offset, end, advice);
 4838                 break;
 4839         }
 4840 out:
 4841         if (fp != NULL)
 4842                 fdrop(fp, td);
 4843         free(new, M_FADVISE);
 4844         return (error);
 4845 }
 4846 
 4847 int
 4848 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4849 {
 4850 
 4851         td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
 4852             uap->len, uap->advice);
 4853         return (0);
 4854 }

Cache object: 59ef13fbb09c0a852389663a2a13d62b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.