The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/11.1/sys/kern/vfs_syscalls.c 338979 2018-09-27 18:32:14Z gordon $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/bio.h>
   47 #include <sys/buf.h>
   48 #include <sys/capsicum.h>
   49 #include <sys/disk.h>
   50 #include <sys/sysent.h>
   51 #include <sys/malloc.h>
   52 #include <sys/mount.h>
   53 #include <sys/mutex.h>
   54 #include <sys/sysproto.h>
   55 #include <sys/namei.h>
   56 #include <sys/filedesc.h>
   57 #include <sys/kernel.h>
   58 #include <sys/fcntl.h>
   59 #include <sys/file.h>
   60 #include <sys/filio.h>
   61 #include <sys/limits.h>
   62 #include <sys/linker.h>
   63 #include <sys/rwlock.h>
   64 #include <sys/sdt.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/unistd.h>
   68 #include <sys/vnode.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/dirent.h>
   72 #include <sys/jail.h>
   73 #include <sys/syscallsubr.h>
   74 #include <sys/sysctl.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 
   79 #include <machine/stdarg.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/uma.h>
   88 
   89 #include <ufs/ufs/quota.h>
   90 
   91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   92 
   93 SDT_PROVIDER_DEFINE(vfs);
   94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   96 
   97 static int kern_chflagsat(struct thread *td, int fd, const char *path,
   98     enum uio_seg pathseg, u_long flags, int atflag);
   99 static int setfflags(struct thread *td, struct vnode *, u_long);
  100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  101 static int getutimens(const struct timespec *, enum uio_seg,
  102     struct timespec *, int *);
  103 static int setutimes(struct thread *td, struct vnode *,
  104     const struct timespec *, int, int);
  105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  106     struct thread *td);
  107 
  108 /*
  109  * Sync each mounted filesystem.
  110  */
  111 #ifndef _SYS_SYSPROTO_H_
  112 struct sync_args {
  113         int     dummy;
  114 };
  115 #endif
  116 /* ARGSUSED */
  117 int
  118 sys_sync(td, uap)
  119         struct thread *td;
  120         struct sync_args *uap;
  121 {
  122         struct mount *mp, *nmp;
  123         int save;
  124 
  125         mtx_lock(&mountlist_mtx);
  126         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  127                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  128                         nmp = TAILQ_NEXT(mp, mnt_list);
  129                         continue;
  130                 }
  131                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  132                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  133                         save = curthread_pflags_set(TDP_SYNCIO);
  134                         vfs_msync(mp, MNT_NOWAIT);
  135                         VFS_SYNC(mp, MNT_NOWAIT);
  136                         curthread_pflags_restore(save);
  137                         vn_finished_write(mp);
  138                 }
  139                 mtx_lock(&mountlist_mtx);
  140                 nmp = TAILQ_NEXT(mp, mnt_list);
  141                 vfs_unbusy(mp);
  142         }
  143         mtx_unlock(&mountlist_mtx);
  144         return (0);
  145 }
  146 
  147 /*
  148  * Change filesystem quotas.
  149  */
  150 #ifndef _SYS_SYSPROTO_H_
  151 struct quotactl_args {
  152         char *path;
  153         int cmd;
  154         int uid;
  155         caddr_t arg;
  156 };
  157 #endif
  158 int
  159 sys_quotactl(td, uap)
  160         struct thread *td;
  161         register struct quotactl_args /* {
  162                 char *path;
  163                 int cmd;
  164                 int uid;
  165                 caddr_t arg;
  166         } */ *uap;
  167 {
  168         struct mount *mp;
  169         struct nameidata nd;
  170         int error;
  171 
  172         AUDIT_ARG_CMD(uap->cmd);
  173         AUDIT_ARG_UID(uap->uid);
  174         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  175                 return (EPERM);
  176         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  177             uap->path, td);
  178         if ((error = namei(&nd)) != 0)
  179                 return (error);
  180         NDFREE(&nd, NDF_ONLY_PNBUF);
  181         mp = nd.ni_vp->v_mount;
  182         vfs_ref(mp);
  183         vput(nd.ni_vp);
  184         error = vfs_busy(mp, 0);
  185         vfs_rel(mp);
  186         if (error != 0)
  187                 return (error);
  188         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  189 
  190         /*
  191          * Since quota on operation typically needs to open quota
  192          * file, the Q_QUOTAON handler needs to unbusy the mount point
  193          * before calling into namei.  Otherwise, unmount might be
  194          * started between two vfs_busy() invocations (first is our,
  195          * second is from mount point cross-walk code in lookup()),
  196          * causing deadlock.
  197          *
  198          * Require that Q_QUOTAON handles the vfs_busy() reference on
  199          * its own, always returning with ubusied mount point.
  200          */
  201         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  202                 vfs_unbusy(mp);
  203         return (error);
  204 }
  205 
  206 /*
  207  * Used by statfs conversion routines to scale the block size up if
  208  * necessary so that all of the block counts are <= 'max_size'.  Note
  209  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  210  * value of 'n'.
  211  */
  212 void
  213 statfs_scale_blocks(struct statfs *sf, long max_size)
  214 {
  215         uint64_t count;
  216         int shift;
  217 
  218         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  219 
  220         /*
  221          * Attempt to scale the block counts to give a more accurate
  222          * overview to userland of the ratio of free space to used
  223          * space.  To do this, find the largest block count and compute
  224          * a divisor that lets it fit into a signed integer <= max_size.
  225          */
  226         if (sf->f_bavail < 0)
  227                 count = -sf->f_bavail;
  228         else
  229                 count = sf->f_bavail;
  230         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  231         if (count <= max_size)
  232                 return;
  233 
  234         count >>= flsl(max_size);
  235         shift = 0;
  236         while (count > 0) {
  237                 shift++;
  238                 count >>=1;
  239         }
  240 
  241         sf->f_bsize <<= shift;
  242         sf->f_blocks >>= shift;
  243         sf->f_bfree >>= shift;
  244         sf->f_bavail >>= shift;
  245 }
  246 
  247 static int
  248 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
  249 {
  250         struct statfs *sp;
  251         int error;
  252 
  253         if (mp == NULL)
  254                 return (EBADF);
  255         error = vfs_busy(mp, 0);
  256         vfs_rel(mp);
  257         if (error != 0)
  258                 return (error);
  259 #ifdef MAC
  260         error = mac_mount_check_stat(td->td_ucred, mp);
  261         if (error != 0)
  262                 goto out;
  263 #endif
  264         /*
  265          * Set these in case the underlying filesystem fails to do so.
  266          */
  267         sp = &mp->mnt_stat;
  268         sp->f_version = STATFS_VERSION;
  269         sp->f_namemax = NAME_MAX;
  270         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  271         error = VFS_STATFS(mp, sp);
  272         if (error != 0)
  273                 goto out;
  274         *buf = *sp;
  275         if (priv_check(td, PRIV_VFS_GENERATION)) {
  276                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
  277                 prison_enforce_statfs(td->td_ucred, mp, buf);
  278         }
  279 out:
  280         vfs_unbusy(mp);
  281         return (error);
  282 }
  283 
  284 /*
  285  * Get filesystem statistics.
  286  */
  287 #ifndef _SYS_SYSPROTO_H_
  288 struct statfs_args {
  289         char *path;
  290         struct statfs *buf;
  291 };
  292 #endif
  293 int
  294 sys_statfs(td, uap)
  295         struct thread *td;
  296         register struct statfs_args /* {
  297                 char *path;
  298                 struct statfs *buf;
  299         } */ *uap;
  300 {
  301         struct statfs *sfp;
  302         int error;
  303 
  304         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  305         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  306         if (error == 0)
  307                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  308         free(sfp, M_STATFS);
  309         return (error);
  310 }
  311 
  312 int
  313 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  314     struct statfs *buf)
  315 {
  316         struct mount *mp;
  317         struct nameidata nd;
  318         int error;
  319 
  320         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  321             pathseg, path, td);
  322         error = namei(&nd);
  323         if (error != 0)
  324                 return (error);
  325         mp = nd.ni_vp->v_mount;
  326         vfs_ref(mp);
  327         NDFREE(&nd, NDF_ONLY_PNBUF);
  328         vput(nd.ni_vp);
  329         return (kern_do_statfs(td, mp, buf));
  330 }
  331 
  332 /*
  333  * Get filesystem statistics.
  334  */
  335 #ifndef _SYS_SYSPROTO_H_
  336 struct fstatfs_args {
  337         int fd;
  338         struct statfs *buf;
  339 };
  340 #endif
  341 int
  342 sys_fstatfs(td, uap)
  343         struct thread *td;
  344         register struct fstatfs_args /* {
  345                 int fd;
  346                 struct statfs *buf;
  347         } */ *uap;
  348 {
  349         struct statfs *sfp;
  350         int error;
  351 
  352         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  353         error = kern_fstatfs(td, uap->fd, sfp);
  354         if (error == 0)
  355                 error = copyout(sfp, uap->buf, sizeof(struct statfs));
  356         free(sfp, M_STATFS);
  357         return (error);
  358 }
  359 
  360 int
  361 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  362 {
  363         struct file *fp;
  364         struct mount *mp;
  365         struct vnode *vp;
  366         cap_rights_t rights;
  367         int error;
  368 
  369         AUDIT_ARG_FD(fd);
  370         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp);
  371         if (error != 0)
  372                 return (error);
  373         vp = fp->f_vnode;
  374         vn_lock(vp, LK_SHARED | LK_RETRY);
  375 #ifdef AUDIT
  376         AUDIT_ARG_VNODE1(vp);
  377 #endif
  378         mp = vp->v_mount;
  379         if (mp != NULL)
  380                 vfs_ref(mp);
  381         VOP_UNLOCK(vp, 0);
  382         fdrop(fp, td);
  383         return (kern_do_statfs(td, mp, buf));
  384 }
  385 
  386 /*
  387  * Get statistics on all filesystems.
  388  */
  389 #ifndef _SYS_SYSPROTO_H_
  390 struct getfsstat_args {
  391         struct statfs *buf;
  392         long bufsize;
  393         int mode;
  394 };
  395 #endif
  396 int
  397 sys_getfsstat(td, uap)
  398         struct thread *td;
  399         register struct getfsstat_args /* {
  400                 struct statfs *buf;
  401                 long bufsize;
  402                 int mode;
  403         } */ *uap;
  404 {
  405         size_t count;
  406         int error;
  407 
  408         if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
  409                 return (EINVAL);
  410         error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
  411             UIO_USERSPACE, uap->mode);
  412         if (error == 0)
  413                 td->td_retval[0] = count;
  414         return (error);
  415 }
  416 
  417 /*
  418  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  419  *      The caller is responsible for freeing memory which will be allocated
  420  *      in '*buf'.
  421  */
  422 int
  423 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  424     size_t *countp, enum uio_seg bufseg, int mode)
  425 {
  426         struct mount *mp, *nmp;
  427         struct statfs *sfsp, *sp, *sptmp, *tofree;
  428         size_t count, maxcount;
  429         int error;
  430 
  431         switch (mode) {
  432         case MNT_WAIT:
  433         case MNT_NOWAIT:
  434                 break;
  435         default:
  436                 return (EINVAL);
  437         }
  438 restart:
  439         maxcount = bufsize / sizeof(struct statfs);
  440         if (bufsize == 0) {
  441                 sfsp = NULL;
  442                 tofree = NULL;
  443         } else if (bufseg == UIO_USERSPACE) {
  444                 sfsp = *buf;
  445                 tofree = NULL;
  446         } else /* if (bufseg == UIO_SYSSPACE) */ {
  447                 count = 0;
  448                 mtx_lock(&mountlist_mtx);
  449                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  450                         count++;
  451                 }
  452                 mtx_unlock(&mountlist_mtx);
  453                 if (maxcount > count)
  454                         maxcount = count;
  455                 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
  456                     M_STATFS, M_WAITOK);
  457         }
  458         count = 0;
  459         mtx_lock(&mountlist_mtx);
  460         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  461                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  462                         nmp = TAILQ_NEXT(mp, mnt_list);
  463                         continue;
  464                 }
  465 #ifdef MAC
  466                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  467                         nmp = TAILQ_NEXT(mp, mnt_list);
  468                         continue;
  469                 }
  470 #endif
  471                 if (mode == MNT_WAIT) {
  472                         if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
  473                                 /*
  474                                  * If vfs_busy() failed, and MBF_NOWAIT
  475                                  * wasn't passed, then the mp is gone.
  476                                  * Furthermore, because of MBF_MNTLSTLOCK,
  477                                  * the mountlist_mtx was dropped.  We have
  478                                  * no other choice than to start over.
  479                                  */
  480                                 mtx_unlock(&mountlist_mtx);
  481                                 free(tofree, M_STATFS);
  482                                 goto restart;
  483                         }
  484                 } else {
  485                         if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
  486                                 nmp = TAILQ_NEXT(mp, mnt_list);
  487                                 continue;
  488                         }
  489                 }
  490                 if (sfsp != NULL && count < maxcount) {
  491                         sp = &mp->mnt_stat;
  492                         /*
  493                          * Set these in case the underlying filesystem
  494                          * fails to do so.
  495                          */
  496                         sp->f_version = STATFS_VERSION;
  497                         sp->f_namemax = NAME_MAX;
  498                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  499                         /*
  500                          * If MNT_NOWAIT is specified, do not refresh
  501                          * the fsstat cache.
  502                          */
  503                         if (mode != MNT_NOWAIT) {
  504                                 error = VFS_STATFS(mp, sp);
  505                                 if (error != 0) {
  506                                         mtx_lock(&mountlist_mtx);
  507                                         nmp = TAILQ_NEXT(mp, mnt_list);
  508                                         vfs_unbusy(mp);
  509                                         continue;
  510                                 }
  511                         }
  512                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  513                                 sptmp = malloc(sizeof(struct statfs), M_STATFS,
  514                                     M_WAITOK);
  515                                 *sptmp = *sp;
  516                                 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
  517                                 prison_enforce_statfs(td->td_ucred, mp, sptmp);
  518                                 sp = sptmp;
  519                         } else
  520                                 sptmp = NULL;
  521                         if (bufseg == UIO_SYSSPACE) {
  522                                 bcopy(sp, sfsp, sizeof(*sp));
  523                                 free(sptmp, M_STATFS);
  524                         } else /* if (bufseg == UIO_USERSPACE) */ {
  525                                 error = copyout(sp, sfsp, sizeof(*sp));
  526                                 free(sptmp, M_STATFS);
  527                                 if (error != 0) {
  528                                         vfs_unbusy(mp);
  529                                         return (error);
  530                                 }
  531                         }
  532                         sfsp++;
  533                 }
  534                 count++;
  535                 mtx_lock(&mountlist_mtx);
  536                 nmp = TAILQ_NEXT(mp, mnt_list);
  537                 vfs_unbusy(mp);
  538         }
  539         mtx_unlock(&mountlist_mtx);
  540         if (sfsp != NULL && count > maxcount)
  541                 *countp = maxcount;
  542         else
  543                 *countp = count;
  544         return (0);
  545 }
  546 
  547 #ifdef COMPAT_FREEBSD4
  548 /*
  549  * Get old format filesystem statistics.
  550  */
  551 static void cvtstatfs(struct statfs *, struct ostatfs *);
  552 
  553 #ifndef _SYS_SYSPROTO_H_
  554 struct freebsd4_statfs_args {
  555         char *path;
  556         struct ostatfs *buf;
  557 };
  558 #endif
  559 int
  560 freebsd4_statfs(td, uap)
  561         struct thread *td;
  562         struct freebsd4_statfs_args /* {
  563                 char *path;
  564                 struct ostatfs *buf;
  565         } */ *uap;
  566 {
  567         struct ostatfs osb;
  568         struct statfs *sfp;
  569         int error;
  570 
  571         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  572         error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
  573         if (error == 0) {
  574                 cvtstatfs(sfp, &osb);
  575                 error = copyout(&osb, uap->buf, sizeof(osb));
  576         }
  577         free(sfp, M_STATFS);
  578         return (error);
  579 }
  580 
  581 /*
  582  * Get filesystem statistics.
  583  */
  584 #ifndef _SYS_SYSPROTO_H_
  585 struct freebsd4_fstatfs_args {
  586         int fd;
  587         struct ostatfs *buf;
  588 };
  589 #endif
  590 int
  591 freebsd4_fstatfs(td, uap)
  592         struct thread *td;
  593         struct freebsd4_fstatfs_args /* {
  594                 int fd;
  595                 struct ostatfs *buf;
  596         } */ *uap;
  597 {
  598         struct ostatfs osb;
  599         struct statfs *sfp;
  600         int error;
  601 
  602         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  603         error = kern_fstatfs(td, uap->fd, sfp);
  604         if (error == 0) {
  605                 cvtstatfs(sfp, &osb);
  606                 error = copyout(&osb, uap->buf, sizeof(osb));
  607         }
  608         free(sfp, M_STATFS);
  609         return (error);
  610 }
  611 
  612 /*
  613  * Get statistics on all filesystems.
  614  */
  615 #ifndef _SYS_SYSPROTO_H_
  616 struct freebsd4_getfsstat_args {
  617         struct ostatfs *buf;
  618         long bufsize;
  619         int mode;
  620 };
  621 #endif
  622 int
  623 freebsd4_getfsstat(td, uap)
  624         struct thread *td;
  625         register struct freebsd4_getfsstat_args /* {
  626                 struct ostatfs *buf;
  627                 long bufsize;
  628                 int mode;
  629         } */ *uap;
  630 {
  631         struct statfs *buf, *sp;
  632         struct ostatfs osb;
  633         size_t count, size;
  634         int error;
  635 
  636         if (uap->bufsize < 0)
  637                 return (EINVAL);
  638         count = uap->bufsize / sizeof(struct ostatfs);
  639         if (count > SIZE_MAX / sizeof(struct statfs))
  640                 return (EINVAL);
  641         size = count * sizeof(struct statfs);
  642         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  643             uap->mode);
  644         if (buf == NULL)
  645                 return (EINVAL);
  646         td->td_retval[0] = count;
  647         if (size != 0) {
  648                 sp = buf;
  649                 while (count != 0 && error == 0) {
  650                         cvtstatfs(sp, &osb);
  651                         error = copyout(&osb, uap->buf, sizeof(osb));
  652                         sp++;
  653                         uap->buf++;
  654                         count--;
  655                 }
  656                 free(buf, M_STATFS);
  657         }
  658         return (error);
  659 }
  660 
  661 /*
  662  * Implement fstatfs() for (NFS) file handles.
  663  */
  664 #ifndef _SYS_SYSPROTO_H_
  665 struct freebsd4_fhstatfs_args {
  666         struct fhandle *u_fhp;
  667         struct ostatfs *buf;
  668 };
  669 #endif
  670 int
  671 freebsd4_fhstatfs(td, uap)
  672         struct thread *td;
  673         struct freebsd4_fhstatfs_args /* {
  674                 struct fhandle *u_fhp;
  675                 struct ostatfs *buf;
  676         } */ *uap;
  677 {
  678         struct ostatfs osb;
  679         struct statfs *sfp;
  680         fhandle_t fh;
  681         int error;
  682 
  683         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  684         if (error != 0)
  685                 return (error);
  686         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  687         error = kern_fhstatfs(td, fh, sfp);
  688         if (error == 0) {
  689                 cvtstatfs(sfp, &osb);
  690                 error = copyout(&osb, uap->buf, sizeof(osb));
  691         }
  692         free(sfp, M_STATFS);
  693         return (error);
  694 }
  695 
  696 /*
  697  * Convert a new format statfs structure to an old format statfs structure.
  698  */
  699 static void
  700 cvtstatfs(nsp, osp)
  701         struct statfs *nsp;
  702         struct ostatfs *osp;
  703 {
  704 
  705         statfs_scale_blocks(nsp, LONG_MAX);
  706         bzero(osp, sizeof(*osp));
  707         osp->f_bsize = nsp->f_bsize;
  708         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  709         osp->f_blocks = nsp->f_blocks;
  710         osp->f_bfree = nsp->f_bfree;
  711         osp->f_bavail = nsp->f_bavail;
  712         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  713         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  714         osp->f_owner = nsp->f_owner;
  715         osp->f_type = nsp->f_type;
  716         osp->f_flags = nsp->f_flags;
  717         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  718         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  719         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  720         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  721         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  722             MIN(MFSNAMELEN, OMFSNAMELEN));
  723         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  724             MIN(MNAMELEN, OMNAMELEN));
  725         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  726             MIN(MNAMELEN, OMNAMELEN));
  727         osp->f_fsid = nsp->f_fsid;
  728 }
  729 #endif /* COMPAT_FREEBSD4 */
  730 
  731 /*
  732  * Change current working directory to a given file descriptor.
  733  */
  734 #ifndef _SYS_SYSPROTO_H_
  735 struct fchdir_args {
  736         int     fd;
  737 };
  738 #endif
  739 int
  740 sys_fchdir(td, uap)
  741         struct thread *td;
  742         struct fchdir_args /* {
  743                 int fd;
  744         } */ *uap;
  745 {
  746         struct vnode *vp, *tdp;
  747         struct mount *mp;
  748         struct file *fp;
  749         cap_rights_t rights;
  750         int error;
  751 
  752         AUDIT_ARG_FD(uap->fd);
  753         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  754             &fp);
  755         if (error != 0)
  756                 return (error);
  757         vp = fp->f_vnode;
  758         vrefact(vp);
  759         fdrop(fp, td);
  760         vn_lock(vp, LK_SHARED | LK_RETRY);
  761         AUDIT_ARG_VNODE1(vp);
  762         error = change_dir(vp, td);
  763         while (!error && (mp = vp->v_mountedhere) != NULL) {
  764                 if (vfs_busy(mp, 0))
  765                         continue;
  766                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  767                 vfs_unbusy(mp);
  768                 if (error != 0)
  769                         break;
  770                 vput(vp);
  771                 vp = tdp;
  772         }
  773         if (error != 0) {
  774                 vput(vp);
  775                 return (error);
  776         }
  777         VOP_UNLOCK(vp, 0);
  778         pwd_chdir(td, vp);
  779         return (0);
  780 }
  781 
  782 /*
  783  * Change current working directory (``.'').
  784  */
  785 #ifndef _SYS_SYSPROTO_H_
  786 struct chdir_args {
  787         char    *path;
  788 };
  789 #endif
  790 int
  791 sys_chdir(td, uap)
  792         struct thread *td;
  793         struct chdir_args /* {
  794                 char *path;
  795         } */ *uap;
  796 {
  797 
  798         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  799 }
  800 
  801 int
  802 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  803 {
  804         struct nameidata nd;
  805         int error;
  806 
  807         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  808             pathseg, path, td);
  809         if ((error = namei(&nd)) != 0)
  810                 return (error);
  811         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  812                 vput(nd.ni_vp);
  813                 NDFREE(&nd, NDF_ONLY_PNBUF);
  814                 return (error);
  815         }
  816         VOP_UNLOCK(nd.ni_vp, 0);
  817         NDFREE(&nd, NDF_ONLY_PNBUF);
  818         pwd_chdir(td, nd.ni_vp);
  819         return (0);
  820 }
  821 
  822 /*
  823  * Change notion of root (``/'') directory.
  824  */
  825 #ifndef _SYS_SYSPROTO_H_
  826 struct chroot_args {
  827         char    *path;
  828 };
  829 #endif
  830 int
  831 sys_chroot(td, uap)
  832         struct thread *td;
  833         struct chroot_args /* {
  834                 char *path;
  835         } */ *uap;
  836 {
  837         struct nameidata nd;
  838         int error;
  839 
  840         error = priv_check(td, PRIV_VFS_CHROOT);
  841         if (error != 0)
  842                 return (error);
  843         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  844             UIO_USERSPACE, uap->path, td);
  845         error = namei(&nd);
  846         if (error != 0)
  847                 goto error;
  848         error = change_dir(nd.ni_vp, td);
  849         if (error != 0)
  850                 goto e_vunlock;
  851 #ifdef MAC
  852         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  853         if (error != 0)
  854                 goto e_vunlock;
  855 #endif
  856         VOP_UNLOCK(nd.ni_vp, 0);
  857         error = pwd_chroot(td, nd.ni_vp);
  858         vrele(nd.ni_vp);
  859         NDFREE(&nd, NDF_ONLY_PNBUF);
  860         return (error);
  861 e_vunlock:
  862         vput(nd.ni_vp);
  863 error:
  864         NDFREE(&nd, NDF_ONLY_PNBUF);
  865         return (error);
  866 }
  867 
  868 /*
  869  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  870  * instance.
  871  */
  872 int
  873 change_dir(vp, td)
  874         struct vnode *vp;
  875         struct thread *td;
  876 {
  877 #ifdef MAC
  878         int error;
  879 #endif
  880 
  881         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  882         if (vp->v_type != VDIR)
  883                 return (ENOTDIR);
  884 #ifdef MAC
  885         error = mac_vnode_check_chdir(td->td_ucred, vp);
  886         if (error != 0)
  887                 return (error);
  888 #endif
  889         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  890 }
  891 
  892 static __inline void
  893 flags_to_rights(int flags, cap_rights_t *rightsp)
  894 {
  895 
  896         if (flags & O_EXEC) {
  897                 cap_rights_set(rightsp, CAP_FEXECVE);
  898         } else {
  899                 switch ((flags & O_ACCMODE)) {
  900                 case O_RDONLY:
  901                         cap_rights_set(rightsp, CAP_READ);
  902                         break;
  903                 case O_RDWR:
  904                         cap_rights_set(rightsp, CAP_READ);
  905                         /* FALLTHROUGH */
  906                 case O_WRONLY:
  907                         cap_rights_set(rightsp, CAP_WRITE);
  908                         if (!(flags & (O_APPEND | O_TRUNC)))
  909                                 cap_rights_set(rightsp, CAP_SEEK);
  910                         break;
  911                 }
  912         }
  913 
  914         if (flags & O_CREAT)
  915                 cap_rights_set(rightsp, CAP_CREATE);
  916 
  917         if (flags & O_TRUNC)
  918                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  919 
  920         if (flags & (O_SYNC | O_FSYNC))
  921                 cap_rights_set(rightsp, CAP_FSYNC);
  922 
  923         if (flags & (O_EXLOCK | O_SHLOCK))
  924                 cap_rights_set(rightsp, CAP_FLOCK);
  925 }
  926 
  927 /*
  928  * Check permissions, allocate an open file structure, and call the device
  929  * open routine if any.
  930  */
  931 #ifndef _SYS_SYSPROTO_H_
  932 struct open_args {
  933         char    *path;
  934         int     flags;
  935         int     mode;
  936 };
  937 #endif
  938 int
  939 sys_open(td, uap)
  940         struct thread *td;
  941         register struct open_args /* {
  942                 char *path;
  943                 int flags;
  944                 int mode;
  945         } */ *uap;
  946 {
  947 
  948         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
  949             uap->flags, uap->mode));
  950 }
  951 
  952 #ifndef _SYS_SYSPROTO_H_
  953 struct openat_args {
  954         int     fd;
  955         char    *path;
  956         int     flag;
  957         int     mode;
  958 };
  959 #endif
  960 int
  961 sys_openat(struct thread *td, struct openat_args *uap)
  962 {
  963 
  964         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
  965             uap->mode));
  966 }
  967 
  968 int
  969 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
  970     int flags, int mode)
  971 {
  972         struct proc *p = td->td_proc;
  973         struct filedesc *fdp = p->p_fd;
  974         struct file *fp;
  975         struct vnode *vp;
  976         struct nameidata nd;
  977         cap_rights_t rights;
  978         int cmode, error, indx;
  979 
  980         indx = -1;
  981 
  982         AUDIT_ARG_FFLAGS(flags);
  983         AUDIT_ARG_MODE(mode);
  984         /* XXX: audit dirfd */
  985         cap_rights_init(&rights, CAP_LOOKUP);
  986         flags_to_rights(flags, &rights);
  987         /*
  988          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
  989          * may be specified.
  990          */
  991         if (flags & O_EXEC) {
  992                 if (flags & O_ACCMODE)
  993                         return (EINVAL);
  994         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
  995                 return (EINVAL);
  996         } else {
  997                 flags = FFLAGS(flags);
  998         }
  999 
 1000         /*
 1001          * Allocate a file structure. The descriptor to reference it
 1002          * is allocated and set by finstall() below.
 1003          */
 1004         error = falloc_noinstall(td, &fp);
 1005         if (error != 0)
 1006                 return (error);
 1007         /*
 1008          * An extra reference on `fp' has been held for us by
 1009          * falloc_noinstall().
 1010          */
 1011         /* Set the flags early so the finit in devfs can pick them up. */
 1012         fp->f_flag = flags & FMASK;
 1013         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1014         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1015             &rights, td);
 1016         td->td_dupfd = -1;              /* XXX check for fdopen */
 1017         error = vn_open(&nd, &flags, cmode, fp);
 1018         if (error != 0) {
 1019                 /*
 1020                  * If the vn_open replaced the method vector, something
 1021                  * wonderous happened deep below and we just pass it up
 1022                  * pretending we know what we do.
 1023                  */
 1024                 if (error == ENXIO && fp->f_ops != &badfileops)
 1025                         goto success;
 1026 
 1027                 /*
 1028                  * Handle special fdopen() case. bleh.
 1029                  *
 1030                  * Don't do this for relative (capability) lookups; we don't
 1031                  * understand exactly what would happen, and we don't think
 1032                  * that it ever should.
 1033                  */
 1034                 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 &&
 1035                     (error == ENODEV || error == ENXIO) &&
 1036                     td->td_dupfd >= 0) {
 1037                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1038                             &indx);
 1039                         if (error == 0)
 1040                                 goto success;
 1041                 }
 1042 
 1043                 goto bad;
 1044         }
 1045         td->td_dupfd = 0;
 1046         NDFREE(&nd, NDF_ONLY_PNBUF);
 1047         vp = nd.ni_vp;
 1048 
 1049         /*
 1050          * Store the vnode, for any f_type. Typically, the vnode use
 1051          * count is decremented by direct call to vn_closefile() for
 1052          * files that switched type in the cdevsw fdopen() method.
 1053          */
 1054         fp->f_vnode = vp;
 1055         /*
 1056          * If the file wasn't claimed by devfs bind it to the normal
 1057          * vnode operations here.
 1058          */
 1059         if (fp->f_ops == &badfileops) {
 1060                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1061                 fp->f_seqcount = 1;
 1062                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1063                     DTYPE_VNODE, vp, &vnops);
 1064         }
 1065 
 1066         VOP_UNLOCK(vp, 0);
 1067         if (flags & O_TRUNC) {
 1068                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1069                 if (error != 0)
 1070                         goto bad;
 1071         }
 1072 success:
 1073         /*
 1074          * If we haven't already installed the FD (for dupfdopen), do so now.
 1075          */
 1076         if (indx == -1) {
 1077                 struct filecaps *fcaps;
 1078 
 1079 #ifdef CAPABILITIES
 1080                 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0)
 1081                         fcaps = &nd.ni_filecaps;
 1082                 else
 1083 #endif
 1084                         fcaps = NULL;
 1085                 error = finstall(td, fp, &indx, flags, fcaps);
 1086                 /* On success finstall() consumes fcaps. */
 1087                 if (error != 0) {
 1088                         filecaps_free(&nd.ni_filecaps);
 1089                         goto bad;
 1090                 }
 1091         } else {
 1092                 filecaps_free(&nd.ni_filecaps);
 1093         }
 1094 
 1095         /*
 1096          * Release our private reference, leaving the one associated with
 1097          * the descriptor table intact.
 1098          */
 1099         fdrop(fp, td);
 1100         td->td_retval[0] = indx;
 1101         return (0);
 1102 bad:
 1103         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1104         fdrop(fp, td);
 1105         return (error);
 1106 }
 1107 
 1108 #ifdef COMPAT_43
 1109 /*
 1110  * Create a file.
 1111  */
 1112 #ifndef _SYS_SYSPROTO_H_
 1113 struct ocreat_args {
 1114         char    *path;
 1115         int     mode;
 1116 };
 1117 #endif
 1118 int
 1119 ocreat(td, uap)
 1120         struct thread *td;
 1121         register struct ocreat_args /* {
 1122                 char *path;
 1123                 int mode;
 1124         } */ *uap;
 1125 {
 1126 
 1127         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1128             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1129 }
 1130 #endif /* COMPAT_43 */
 1131 
 1132 /*
 1133  * Create a special file.
 1134  */
 1135 #ifndef _SYS_SYSPROTO_H_
 1136 struct mknod_args {
 1137         char    *path;
 1138         int     mode;
 1139         int     dev;
 1140 };
 1141 #endif
 1142 int
 1143 sys_mknod(td, uap)
 1144         struct thread *td;
 1145         register struct mknod_args /* {
 1146                 char *path;
 1147                 int mode;
 1148                 int dev;
 1149         } */ *uap;
 1150 {
 1151 
 1152         return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1153             uap->mode, uap->dev));
 1154 }
 1155 
 1156 #ifndef _SYS_SYSPROTO_H_
 1157 struct mknodat_args {
 1158         int     fd;
 1159         char    *path;
 1160         mode_t  mode;
 1161         dev_t   dev;
 1162 };
 1163 #endif
 1164 int
 1165 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1166 {
 1167 
 1168         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1169             uap->dev));
 1170 }
 1171 
 1172 int
 1173 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1174     int mode, int dev)
 1175 {
 1176         struct vnode *vp;
 1177         struct mount *mp;
 1178         struct vattr vattr;
 1179         struct nameidata nd;
 1180         cap_rights_t rights;
 1181         int error, whiteout = 0;
 1182 
 1183         AUDIT_ARG_MODE(mode);
 1184         AUDIT_ARG_DEV(dev);
 1185         switch (mode & S_IFMT) {
 1186         case S_IFCHR:
 1187         case S_IFBLK:
 1188                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1189                 if (error == 0 && dev == VNOVAL)
 1190                         error = EINVAL;
 1191                 break;
 1192         case S_IFMT:
 1193                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1194                 break;
 1195         case S_IFWHT:
 1196                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1197                 break;
 1198         case S_IFIFO:
 1199                 if (dev == 0)
 1200                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1201                 /* FALLTHROUGH */
 1202         default:
 1203                 error = EINVAL;
 1204                 break;
 1205         }
 1206         if (error != 0)
 1207                 return (error);
 1208 restart:
 1209         bwillwrite();
 1210         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1211             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 1212             td);
 1213         if ((error = namei(&nd)) != 0)
 1214                 return (error);
 1215         vp = nd.ni_vp;
 1216         if (vp != NULL) {
 1217                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1218                 if (vp == nd.ni_dvp)
 1219                         vrele(nd.ni_dvp);
 1220                 else
 1221                         vput(nd.ni_dvp);
 1222                 vrele(vp);
 1223                 return (EEXIST);
 1224         } else {
 1225                 VATTR_NULL(&vattr);
 1226                 vattr.va_mode = (mode & ALLPERMS) &
 1227                     ~td->td_proc->p_fd->fd_cmask;
 1228                 vattr.va_rdev = dev;
 1229                 whiteout = 0;
 1230 
 1231                 switch (mode & S_IFMT) {
 1232                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1233                         vattr.va_type = VBAD;
 1234                         break;
 1235                 case S_IFCHR:
 1236                         vattr.va_type = VCHR;
 1237                         break;
 1238                 case S_IFBLK:
 1239                         vattr.va_type = VBLK;
 1240                         break;
 1241                 case S_IFWHT:
 1242                         whiteout = 1;
 1243                         break;
 1244                 default:
 1245                         panic("kern_mknod: invalid mode");
 1246                 }
 1247         }
 1248         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1249                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1250                 vput(nd.ni_dvp);
 1251                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1252                         return (error);
 1253                 goto restart;
 1254         }
 1255 #ifdef MAC
 1256         if (error == 0 && !whiteout)
 1257                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1258                     &nd.ni_cnd, &vattr);
 1259 #endif
 1260         if (error == 0) {
 1261                 if (whiteout)
 1262                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1263                 else {
 1264                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1265                                                 &nd.ni_cnd, &vattr);
 1266                         if (error == 0)
 1267                                 vput(nd.ni_vp);
 1268                 }
 1269         }
 1270         NDFREE(&nd, NDF_ONLY_PNBUF);
 1271         vput(nd.ni_dvp);
 1272         vn_finished_write(mp);
 1273         return (error);
 1274 }
 1275 
 1276 /*
 1277  * Create a named pipe.
 1278  */
 1279 #ifndef _SYS_SYSPROTO_H_
 1280 struct mkfifo_args {
 1281         char    *path;
 1282         int     mode;
 1283 };
 1284 #endif
 1285 int
 1286 sys_mkfifo(td, uap)
 1287         struct thread *td;
 1288         register struct mkfifo_args /* {
 1289                 char *path;
 1290                 int mode;
 1291         } */ *uap;
 1292 {
 1293 
 1294         return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1295             uap->mode));
 1296 }
 1297 
 1298 #ifndef _SYS_SYSPROTO_H_
 1299 struct mkfifoat_args {
 1300         int     fd;
 1301         char    *path;
 1302         mode_t  mode;
 1303 };
 1304 #endif
 1305 int
 1306 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1307 {
 1308 
 1309         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1310             uap->mode));
 1311 }
 1312 
 1313 int
 1314 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1315     int mode)
 1316 {
 1317         struct mount *mp;
 1318         struct vattr vattr;
 1319         struct nameidata nd;
 1320         cap_rights_t rights;
 1321         int error;
 1322 
 1323         AUDIT_ARG_MODE(mode);
 1324 restart:
 1325         bwillwrite();
 1326         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1327             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 1328             td);
 1329         if ((error = namei(&nd)) != 0)
 1330                 return (error);
 1331         if (nd.ni_vp != NULL) {
 1332                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1333                 if (nd.ni_vp == nd.ni_dvp)
 1334                         vrele(nd.ni_dvp);
 1335                 else
 1336                         vput(nd.ni_dvp);
 1337                 vrele(nd.ni_vp);
 1338                 return (EEXIST);
 1339         }
 1340         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1341                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1342                 vput(nd.ni_dvp);
 1343                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1344                         return (error);
 1345                 goto restart;
 1346         }
 1347         VATTR_NULL(&vattr);
 1348         vattr.va_type = VFIFO;
 1349         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1350 #ifdef MAC
 1351         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1352             &vattr);
 1353         if (error != 0)
 1354                 goto out;
 1355 #endif
 1356         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1357         if (error == 0)
 1358                 vput(nd.ni_vp);
 1359 #ifdef MAC
 1360 out:
 1361 #endif
 1362         vput(nd.ni_dvp);
 1363         vn_finished_write(mp);
 1364         NDFREE(&nd, NDF_ONLY_PNBUF);
 1365         return (error);
 1366 }
 1367 
 1368 /*
 1369  * Make a hard file link.
 1370  */
 1371 #ifndef _SYS_SYSPROTO_H_
 1372 struct link_args {
 1373         char    *path;
 1374         char    *link;
 1375 };
 1376 #endif
 1377 int
 1378 sys_link(td, uap)
 1379         struct thread *td;
 1380         register struct link_args /* {
 1381                 char *path;
 1382                 char *link;
 1383         } */ *uap;
 1384 {
 1385 
 1386         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 1387             UIO_USERSPACE, FOLLOW));
 1388 }
 1389 
 1390 #ifndef _SYS_SYSPROTO_H_
 1391 struct linkat_args {
 1392         int     fd1;
 1393         char    *path1;
 1394         int     fd2;
 1395         char    *path2;
 1396         int     flag;
 1397 };
 1398 #endif
 1399 int
 1400 sys_linkat(struct thread *td, struct linkat_args *uap)
 1401 {
 1402         int flag;
 1403 
 1404         flag = uap->flag;
 1405         if (flag & ~AT_SYMLINK_FOLLOW)
 1406                 return (EINVAL);
 1407 
 1408         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1409             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1410 }
 1411 
 1412 int hardlink_check_uid = 0;
 1413 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1414     &hardlink_check_uid, 0,
 1415     "Unprivileged processes cannot create hard links to files owned by other "
 1416     "users");
 1417 static int hardlink_check_gid = 0;
 1418 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1419     &hardlink_check_gid, 0,
 1420     "Unprivileged processes cannot create hard links to files owned by other "
 1421     "groups");
 1422 
 1423 static int
 1424 can_hardlink(struct vnode *vp, struct ucred *cred)
 1425 {
 1426         struct vattr va;
 1427         int error;
 1428 
 1429         if (!hardlink_check_uid && !hardlink_check_gid)
 1430                 return (0);
 1431 
 1432         error = VOP_GETATTR(vp, &va, cred);
 1433         if (error != 0)
 1434                 return (error);
 1435 
 1436         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1437                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1438                 if (error != 0)
 1439                         return (error);
 1440         }
 1441 
 1442         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1443                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1444                 if (error != 0)
 1445                         return (error);
 1446         }
 1447 
 1448         return (0);
 1449 }
 1450 
 1451 int
 1452 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1453     enum uio_seg segflg, int follow)
 1454 {
 1455         struct vnode *vp;
 1456         struct mount *mp;
 1457         struct nameidata nd;
 1458         cap_rights_t rights;
 1459         int error;
 1460 
 1461 again:
 1462         bwillwrite();
 1463         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1,
 1464             cap_rights_init(&rights, CAP_LINKAT_SOURCE), td);
 1465 
 1466         if ((error = namei(&nd)) != 0)
 1467                 return (error);
 1468         NDFREE(&nd, NDF_ONLY_PNBUF);
 1469         vp = nd.ni_vp;
 1470         if (vp->v_type == VDIR) {
 1471                 vrele(vp);
 1472                 return (EPERM);         /* POSIX */
 1473         }
 1474         NDINIT_ATRIGHTS(&nd, CREATE,
 1475             LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2,
 1476             cap_rights_init(&rights, CAP_LINKAT_TARGET), td);
 1477         if ((error = namei(&nd)) == 0) {
 1478                 if (nd.ni_vp != NULL) {
 1479                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1480                         if (nd.ni_dvp == nd.ni_vp)
 1481                                 vrele(nd.ni_dvp);
 1482                         else
 1483                                 vput(nd.ni_dvp);
 1484                         vrele(nd.ni_vp);
 1485                         vrele(vp);
 1486                         return (EEXIST);
 1487                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1488                         /*
 1489                          * Cross-device link.  No need to recheck
 1490                          * vp->v_type, since it cannot change, except
 1491                          * to VBAD.
 1492                          */
 1493                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1494                         vput(nd.ni_dvp);
 1495                         vrele(vp);
 1496                         return (EXDEV);
 1497                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1498                         error = can_hardlink(vp, td->td_ucred);
 1499 #ifdef MAC
 1500                         if (error == 0)
 1501                                 error = mac_vnode_check_link(td->td_ucred,
 1502                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1503 #endif
 1504                         if (error != 0) {
 1505                                 vput(vp);
 1506                                 vput(nd.ni_dvp);
 1507                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1508                                 return (error);
 1509                         }
 1510                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1511                         if (error != 0) {
 1512                                 vput(vp);
 1513                                 vput(nd.ni_dvp);
 1514                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1515                                 error = vn_start_write(NULL, &mp,
 1516                                     V_XSLEEP | PCATCH);
 1517                                 if (error != 0)
 1518                                         return (error);
 1519                                 goto again;
 1520                         }
 1521                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1522                         VOP_UNLOCK(vp, 0);
 1523                         vput(nd.ni_dvp);
 1524                         vn_finished_write(mp);
 1525                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1526                 } else {
 1527                         vput(nd.ni_dvp);
 1528                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1529                         vrele(vp);
 1530                         goto again;
 1531                 }
 1532         }
 1533         vrele(vp);
 1534         return (error);
 1535 }
 1536 
 1537 /*
 1538  * Make a symbolic link.
 1539  */
 1540 #ifndef _SYS_SYSPROTO_H_
 1541 struct symlink_args {
 1542         char    *path;
 1543         char    *link;
 1544 };
 1545 #endif
 1546 int
 1547 sys_symlink(td, uap)
 1548         struct thread *td;
 1549         register struct symlink_args /* {
 1550                 char *path;
 1551                 char *link;
 1552         } */ *uap;
 1553 {
 1554 
 1555         return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 1556             UIO_USERSPACE));
 1557 }
 1558 
 1559 #ifndef _SYS_SYSPROTO_H_
 1560 struct symlinkat_args {
 1561         char    *path;
 1562         int     fd;
 1563         char    *path2;
 1564 };
 1565 #endif
 1566 int
 1567 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1568 {
 1569 
 1570         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1571             UIO_USERSPACE));
 1572 }
 1573 
 1574 int
 1575 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1576     enum uio_seg segflg)
 1577 {
 1578         struct mount *mp;
 1579         struct vattr vattr;
 1580         char *syspath;
 1581         struct nameidata nd;
 1582         int error;
 1583         cap_rights_t rights;
 1584 
 1585         if (segflg == UIO_SYSSPACE) {
 1586                 syspath = path1;
 1587         } else {
 1588                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1589                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1590                         goto out;
 1591         }
 1592         AUDIT_ARG_TEXT(syspath);
 1593 restart:
 1594         bwillwrite();
 1595         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1596             NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 1597             td);
 1598         if ((error = namei(&nd)) != 0)
 1599                 goto out;
 1600         if (nd.ni_vp) {
 1601                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1602                 if (nd.ni_vp == nd.ni_dvp)
 1603                         vrele(nd.ni_dvp);
 1604                 else
 1605                         vput(nd.ni_dvp);
 1606                 vrele(nd.ni_vp);
 1607                 error = EEXIST;
 1608                 goto out;
 1609         }
 1610         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1611                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1612                 vput(nd.ni_dvp);
 1613                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1614                         goto out;
 1615                 goto restart;
 1616         }
 1617         VATTR_NULL(&vattr);
 1618         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1619 #ifdef MAC
 1620         vattr.va_type = VLNK;
 1621         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1622             &vattr);
 1623         if (error != 0)
 1624                 goto out2;
 1625 #endif
 1626         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1627         if (error == 0)
 1628                 vput(nd.ni_vp);
 1629 #ifdef MAC
 1630 out2:
 1631 #endif
 1632         NDFREE(&nd, NDF_ONLY_PNBUF);
 1633         vput(nd.ni_dvp);
 1634         vn_finished_write(mp);
 1635 out:
 1636         if (segflg != UIO_SYSSPACE)
 1637                 uma_zfree(namei_zone, syspath);
 1638         return (error);
 1639 }
 1640 
 1641 /*
 1642  * Delete a whiteout from the filesystem.
 1643  */
 1644 int
 1645 sys_undelete(td, uap)
 1646         struct thread *td;
 1647         register struct undelete_args /* {
 1648                 char *path;
 1649         } */ *uap;
 1650 {
 1651         struct mount *mp;
 1652         struct nameidata nd;
 1653         int error;
 1654 
 1655 restart:
 1656         bwillwrite();
 1657         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1658             UIO_USERSPACE, uap->path, td);
 1659         error = namei(&nd);
 1660         if (error != 0)
 1661                 return (error);
 1662 
 1663         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1664                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1665                 if (nd.ni_vp == nd.ni_dvp)
 1666                         vrele(nd.ni_dvp);
 1667                 else
 1668                         vput(nd.ni_dvp);
 1669                 if (nd.ni_vp)
 1670                         vrele(nd.ni_vp);
 1671                 return (EEXIST);
 1672         }
 1673         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1674                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1675                 vput(nd.ni_dvp);
 1676                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1677                         return (error);
 1678                 goto restart;
 1679         }
 1680         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1681         NDFREE(&nd, NDF_ONLY_PNBUF);
 1682         vput(nd.ni_dvp);
 1683         vn_finished_write(mp);
 1684         return (error);
 1685 }
 1686 
 1687 /*
 1688  * Delete a name from the filesystem.
 1689  */
 1690 #ifndef _SYS_SYSPROTO_H_
 1691 struct unlink_args {
 1692         char    *path;
 1693 };
 1694 #endif
 1695 int
 1696 sys_unlink(td, uap)
 1697         struct thread *td;
 1698         struct unlink_args /* {
 1699                 char *path;
 1700         } */ *uap;
 1701 {
 1702 
 1703         return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0));
 1704 }
 1705 
 1706 #ifndef _SYS_SYSPROTO_H_
 1707 struct unlinkat_args {
 1708         int     fd;
 1709         char    *path;
 1710         int     flag;
 1711 };
 1712 #endif
 1713 int
 1714 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1715 {
 1716         int flag = uap->flag;
 1717         int fd = uap->fd;
 1718         char *path = uap->path;
 1719 
 1720         if (flag & ~AT_REMOVEDIR)
 1721                 return (EINVAL);
 1722 
 1723         if (flag & AT_REMOVEDIR)
 1724                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1725         else
 1726                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1727 }
 1728 
 1729 int
 1730 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1731     ino_t oldinum)
 1732 {
 1733         struct mount *mp;
 1734         struct vnode *vp;
 1735         struct nameidata nd;
 1736         struct stat sb;
 1737         cap_rights_t rights;
 1738         int error;
 1739 
 1740 restart:
 1741         bwillwrite();
 1742         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1743             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1744         if ((error = namei(&nd)) != 0)
 1745                 return (error == EINVAL ? EPERM : error);
 1746         vp = nd.ni_vp;
 1747         if (vp->v_type == VDIR && oldinum == 0) {
 1748                 error = EPERM;          /* POSIX */
 1749         } else if (oldinum != 0 &&
 1750                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1751                   sb.st_ino != oldinum) {
 1752                         error = EIDRM;  /* Identifier removed */
 1753         } else {
 1754                 /*
 1755                  * The root of a mounted filesystem cannot be deleted.
 1756                  *
 1757                  * XXX: can this only be a VDIR case?
 1758                  */
 1759                 if (vp->v_vflag & VV_ROOT)
 1760                         error = EBUSY;
 1761         }
 1762         if (error == 0) {
 1763                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1764                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1765                         vput(nd.ni_dvp);
 1766                         if (vp == nd.ni_dvp)
 1767                                 vrele(vp);
 1768                         else
 1769                                 vput(vp);
 1770                         if ((error = vn_start_write(NULL, &mp,
 1771                             V_XSLEEP | PCATCH)) != 0)
 1772                                 return (error);
 1773                         goto restart;
 1774                 }
 1775 #ifdef MAC
 1776                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1777                     &nd.ni_cnd);
 1778                 if (error != 0)
 1779                         goto out;
 1780 #endif
 1781                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1782                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1783 #ifdef MAC
 1784 out:
 1785 #endif
 1786                 vn_finished_write(mp);
 1787         }
 1788         NDFREE(&nd, NDF_ONLY_PNBUF);
 1789         vput(nd.ni_dvp);
 1790         if (vp == nd.ni_dvp)
 1791                 vrele(vp);
 1792         else
 1793                 vput(vp);
 1794         return (error);
 1795 }
 1796 
 1797 /*
 1798  * Reposition read/write file offset.
 1799  */
 1800 #ifndef _SYS_SYSPROTO_H_
 1801 struct lseek_args {
 1802         int     fd;
 1803         int     pad;
 1804         off_t   offset;
 1805         int     whence;
 1806 };
 1807 #endif
 1808 int
 1809 sys_lseek(struct thread *td, struct lseek_args *uap)
 1810 {
 1811 
 1812         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1813 }
 1814 
 1815 int
 1816 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 1817 {
 1818         struct file *fp;
 1819         cap_rights_t rights;
 1820         int error;
 1821 
 1822         AUDIT_ARG_FD(fd);
 1823         error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1824         if (error != 0)
 1825                 return (error);
 1826         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1827             fo_seek(fp, offset, whence, td) : ESPIPE;
 1828         fdrop(fp, td);
 1829         return (error);
 1830 }
 1831 
 1832 #if defined(COMPAT_43)
 1833 /*
 1834  * Reposition read/write file offset.
 1835  */
 1836 #ifndef _SYS_SYSPROTO_H_
 1837 struct olseek_args {
 1838         int     fd;
 1839         long    offset;
 1840         int     whence;
 1841 };
 1842 #endif
 1843 int
 1844 olseek(struct thread *td, struct olseek_args *uap)
 1845 {
 1846 
 1847         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1848 }
 1849 #endif /* COMPAT_43 */
 1850 
 1851 #if defined(COMPAT_FREEBSD6)
 1852 /* Version with the 'pad' argument */
 1853 int
 1854 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 1855 {
 1856 
 1857         return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 1858 }
 1859 #endif
 1860 
 1861 /*
 1862  * Check access permissions using passed credentials.
 1863  */
 1864 static int
 1865 vn_access(vp, user_flags, cred, td)
 1866         struct vnode    *vp;
 1867         int             user_flags;
 1868         struct ucred    *cred;
 1869         struct thread   *td;
 1870 {
 1871         accmode_t accmode;
 1872         int error;
 1873 
 1874         /* Flags == 0 means only check for existence. */
 1875         if (user_flags == 0)
 1876                 return (0);
 1877 
 1878         accmode = 0;
 1879         if (user_flags & R_OK)
 1880                 accmode |= VREAD;
 1881         if (user_flags & W_OK)
 1882                 accmode |= VWRITE;
 1883         if (user_flags & X_OK)
 1884                 accmode |= VEXEC;
 1885 #ifdef MAC
 1886         error = mac_vnode_check_access(cred, vp, accmode);
 1887         if (error != 0)
 1888                 return (error);
 1889 #endif
 1890         if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1891                 error = VOP_ACCESS(vp, accmode, cred, td);
 1892         return (error);
 1893 }
 1894 
 1895 /*
 1896  * Check access permissions using "real" credentials.
 1897  */
 1898 #ifndef _SYS_SYSPROTO_H_
 1899 struct access_args {
 1900         char    *path;
 1901         int     amode;
 1902 };
 1903 #endif
 1904 int
 1905 sys_access(td, uap)
 1906         struct thread *td;
 1907         register struct access_args /* {
 1908                 char *path;
 1909                 int amode;
 1910         } */ *uap;
 1911 {
 1912 
 1913         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1914             0, uap->amode));
 1915 }
 1916 
 1917 #ifndef _SYS_SYSPROTO_H_
 1918 struct faccessat_args {
 1919         int     dirfd;
 1920         char    *path;
 1921         int     amode;
 1922         int     flag;
 1923 }
 1924 #endif
 1925 int
 1926 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 1927 {
 1928 
 1929         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1930             uap->amode));
 1931 }
 1932 
 1933 int
 1934 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1935     int flag, int amode)
 1936 {
 1937         struct ucred *cred, *usecred;
 1938         struct vnode *vp;
 1939         struct nameidata nd;
 1940         cap_rights_t rights;
 1941         int error;
 1942 
 1943         if (flag & ~AT_EACCESS)
 1944                 return (EINVAL);
 1945         if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 1946                 return (EINVAL);
 1947 
 1948         /*
 1949          * Create and modify a temporary credential instead of one that
 1950          * is potentially shared (if we need one).
 1951          */
 1952         cred = td->td_ucred;
 1953         if ((flag & AT_EACCESS) == 0 &&
 1954             ((cred->cr_uid != cred->cr_ruid ||
 1955             cred->cr_rgid != cred->cr_groups[0]))) {
 1956                 usecred = crdup(cred);
 1957                 usecred->cr_uid = cred->cr_ruid;
 1958                 usecred->cr_groups[0] = cred->cr_rgid;
 1959                 td->td_ucred = usecred;
 1960         } else
 1961                 usecred = cred;
 1962         AUDIT_ARG_VALUE(amode);
 1963         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 1964             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 1965             td);
 1966         if ((error = namei(&nd)) != 0)
 1967                 goto out;
 1968         vp = nd.ni_vp;
 1969 
 1970         error = vn_access(vp, amode, usecred, td);
 1971         NDFREE(&nd, NDF_ONLY_PNBUF);
 1972         vput(vp);
 1973 out:
 1974         if (usecred != cred) {
 1975                 td->td_ucred = cred;
 1976                 crfree(usecred);
 1977         }
 1978         return (error);
 1979 }
 1980 
 1981 /*
 1982  * Check access permissions using "effective" credentials.
 1983  */
 1984 #ifndef _SYS_SYSPROTO_H_
 1985 struct eaccess_args {
 1986         char    *path;
 1987         int     amode;
 1988 };
 1989 #endif
 1990 int
 1991 sys_eaccess(td, uap)
 1992         struct thread *td;
 1993         register struct eaccess_args /* {
 1994                 char *path;
 1995                 int amode;
 1996         } */ *uap;
 1997 {
 1998 
 1999         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2000             AT_EACCESS, uap->amode));
 2001 }
 2002 
 2003 #if defined(COMPAT_43)
 2004 /*
 2005  * Get file status; this version follows links.
 2006  */
 2007 #ifndef _SYS_SYSPROTO_H_
 2008 struct ostat_args {
 2009         char    *path;
 2010         struct ostat *ub;
 2011 };
 2012 #endif
 2013 int
 2014 ostat(td, uap)
 2015         struct thread *td;
 2016         register struct ostat_args /* {
 2017                 char *path;
 2018                 struct ostat *ub;
 2019         } */ *uap;
 2020 {
 2021         struct stat sb;
 2022         struct ostat osb;
 2023         int error;
 2024 
 2025         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2026             &sb, NULL);
 2027         if (error != 0)
 2028                 return (error);
 2029         cvtstat(&sb, &osb);
 2030         return (copyout(&osb, uap->ub, sizeof (osb)));
 2031 }
 2032 
 2033 /*
 2034  * Get file status; this version does not follow links.
 2035  */
 2036 #ifndef _SYS_SYSPROTO_H_
 2037 struct olstat_args {
 2038         char    *path;
 2039         struct ostat *ub;
 2040 };
 2041 #endif
 2042 int
 2043 olstat(td, uap)
 2044         struct thread *td;
 2045         register struct olstat_args /* {
 2046                 char *path;
 2047                 struct ostat *ub;
 2048         } */ *uap;
 2049 {
 2050         struct stat sb;
 2051         struct ostat osb;
 2052         int error;
 2053 
 2054         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2055             UIO_USERSPACE, &sb, NULL);
 2056         if (error != 0)
 2057                 return (error);
 2058         cvtstat(&sb, &osb);
 2059         return (copyout(&osb, uap->ub, sizeof (osb)));
 2060 }
 2061 
 2062 /*
 2063  * Convert from an old to a new stat structure.
 2064  */
 2065 void
 2066 cvtstat(st, ost)
 2067         struct stat *st;
 2068         struct ostat *ost;
 2069 {
 2070 
 2071         bzero(ost, sizeof(*ost));
 2072         ost->st_dev = st->st_dev;
 2073         ost->st_ino = st->st_ino;
 2074         ost->st_mode = st->st_mode;
 2075         ost->st_nlink = st->st_nlink;
 2076         ost->st_uid = st->st_uid;
 2077         ost->st_gid = st->st_gid;
 2078         ost->st_rdev = st->st_rdev;
 2079         if (st->st_size < (quad_t)1 << 32)
 2080                 ost->st_size = st->st_size;
 2081         else
 2082                 ost->st_size = -2;
 2083         ost->st_atim = st->st_atim;
 2084         ost->st_mtim = st->st_mtim;
 2085         ost->st_ctim = st->st_ctim;
 2086         ost->st_blksize = st->st_blksize;
 2087         ost->st_blocks = st->st_blocks;
 2088         ost->st_flags = st->st_flags;
 2089         ost->st_gen = st->st_gen;
 2090 }
 2091 #endif /* COMPAT_43 */
 2092 
 2093 /*
 2094  * Get file status; this version follows links.
 2095  */
 2096 #ifndef _SYS_SYSPROTO_H_
 2097 struct stat_args {
 2098         char    *path;
 2099         struct stat *ub;
 2100 };
 2101 #endif
 2102 int
 2103 sys_stat(td, uap)
 2104         struct thread *td;
 2105         register struct stat_args /* {
 2106                 char *path;
 2107                 struct stat *ub;
 2108         } */ *uap;
 2109 {
 2110         struct stat sb;
 2111         int error;
 2112 
 2113         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2114             &sb, NULL);
 2115         if (error == 0)
 2116                 error = copyout(&sb, uap->ub, sizeof (sb));
 2117         return (error);
 2118 }
 2119 
 2120 #ifndef _SYS_SYSPROTO_H_
 2121 struct fstatat_args {
 2122         int     fd;
 2123         char    *path;
 2124         struct stat     *buf;
 2125         int     flag;
 2126 }
 2127 #endif
 2128 int
 2129 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2130 {
 2131         struct stat sb;
 2132         int error;
 2133 
 2134         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2135             UIO_USERSPACE, &sb, NULL);
 2136         if (error == 0)
 2137                 error = copyout(&sb, uap->buf, sizeof (sb));
 2138         return (error);
 2139 }
 2140 
 2141 int
 2142 kern_statat(struct thread *td, int flag, int fd, char *path,
 2143     enum uio_seg pathseg, struct stat *sbp,
 2144     void (*hook)(struct vnode *vp, struct stat *sbp))
 2145 {
 2146         struct nameidata nd;
 2147         struct stat sb;
 2148         cap_rights_t rights;
 2149         int error;
 2150 
 2151         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2152                 return (EINVAL);
 2153 
 2154         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2155             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2156             cap_rights_init(&rights, CAP_FSTAT), td);
 2157 
 2158         if ((error = namei(&nd)) != 0)
 2159                 return (error);
 2160         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2161         if (error == 0) {
 2162                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2163                 if (S_ISREG(sb.st_mode))
 2164                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2165                 if (__predict_false(hook != NULL))
 2166                         hook(nd.ni_vp, &sb);
 2167         }
 2168         NDFREE(&nd, NDF_ONLY_PNBUF);
 2169         vput(nd.ni_vp);
 2170         if (error != 0)
 2171                 return (error);
 2172         *sbp = sb;
 2173 #ifdef KTRACE
 2174         if (KTRPOINT(td, KTR_STRUCT))
 2175                 ktrstat(&sb);
 2176 #endif
 2177         return (0);
 2178 }
 2179 
 2180 /*
 2181  * Get file status; this version does not follow links.
 2182  */
 2183 #ifndef _SYS_SYSPROTO_H_
 2184 struct lstat_args {
 2185         char    *path;
 2186         struct stat *ub;
 2187 };
 2188 #endif
 2189 int
 2190 sys_lstat(td, uap)
 2191         struct thread *td;
 2192         register struct lstat_args /* {
 2193                 char *path;
 2194                 struct stat *ub;
 2195         } */ *uap;
 2196 {
 2197         struct stat sb;
 2198         int error;
 2199 
 2200         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2201             UIO_USERSPACE, &sb, NULL);
 2202         if (error == 0)
 2203                 error = copyout(&sb, uap->ub, sizeof (sb));
 2204         return (error);
 2205 }
 2206 
 2207 /*
 2208  * Implementation of the NetBSD [l]stat() functions.
 2209  */
 2210 void
 2211 cvtnstat(sb, nsb)
 2212         struct stat *sb;
 2213         struct nstat *nsb;
 2214 {
 2215 
 2216         bzero(nsb, sizeof *nsb);
 2217         nsb->st_dev = sb->st_dev;
 2218         nsb->st_ino = sb->st_ino;
 2219         nsb->st_mode = sb->st_mode;
 2220         nsb->st_nlink = sb->st_nlink;
 2221         nsb->st_uid = sb->st_uid;
 2222         nsb->st_gid = sb->st_gid;
 2223         nsb->st_rdev = sb->st_rdev;
 2224         nsb->st_atim = sb->st_atim;
 2225         nsb->st_mtim = sb->st_mtim;
 2226         nsb->st_ctim = sb->st_ctim;
 2227         nsb->st_size = sb->st_size;
 2228         nsb->st_blocks = sb->st_blocks;
 2229         nsb->st_blksize = sb->st_blksize;
 2230         nsb->st_flags = sb->st_flags;
 2231         nsb->st_gen = sb->st_gen;
 2232         nsb->st_birthtim = sb->st_birthtim;
 2233 }
 2234 
 2235 #ifndef _SYS_SYSPROTO_H_
 2236 struct nstat_args {
 2237         char    *path;
 2238         struct nstat *ub;
 2239 };
 2240 #endif
 2241 int
 2242 sys_nstat(td, uap)
 2243         struct thread *td;
 2244         register struct nstat_args /* {
 2245                 char *path;
 2246                 struct nstat *ub;
 2247         } */ *uap;
 2248 {
 2249         struct stat sb;
 2250         struct nstat nsb;
 2251         int error;
 2252 
 2253         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2254             &sb, NULL);
 2255         if (error != 0)
 2256                 return (error);
 2257         cvtnstat(&sb, &nsb);
 2258         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2259 }
 2260 
 2261 /*
 2262  * NetBSD lstat.  Get file status; this version does not follow links.
 2263  */
 2264 #ifndef _SYS_SYSPROTO_H_
 2265 struct lstat_args {
 2266         char    *path;
 2267         struct stat *ub;
 2268 };
 2269 #endif
 2270 int
 2271 sys_nlstat(td, uap)
 2272         struct thread *td;
 2273         register struct nlstat_args /* {
 2274                 char *path;
 2275                 struct nstat *ub;
 2276         } */ *uap;
 2277 {
 2278         struct stat sb;
 2279         struct nstat nsb;
 2280         int error;
 2281 
 2282         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2283             UIO_USERSPACE, &sb, NULL);
 2284         if (error != 0)
 2285                 return (error);
 2286         cvtnstat(&sb, &nsb);
 2287         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2288 }
 2289 
 2290 /*
 2291  * Get configurable pathname variables.
 2292  */
 2293 #ifndef _SYS_SYSPROTO_H_
 2294 struct pathconf_args {
 2295         char    *path;
 2296         int     name;
 2297 };
 2298 #endif
 2299 int
 2300 sys_pathconf(td, uap)
 2301         struct thread *td;
 2302         register struct pathconf_args /* {
 2303                 char *path;
 2304                 int name;
 2305         } */ *uap;
 2306 {
 2307 
 2308         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2309 }
 2310 
 2311 #ifndef _SYS_SYSPROTO_H_
 2312 struct lpathconf_args {
 2313         char    *path;
 2314         int     name;
 2315 };
 2316 #endif
 2317 int
 2318 sys_lpathconf(td, uap)
 2319         struct thread *td;
 2320         register struct lpathconf_args /* {
 2321                 char *path;
 2322                 int name;
 2323         } */ *uap;
 2324 {
 2325 
 2326         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2327             NOFOLLOW));
 2328 }
 2329 
 2330 int
 2331 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2332     u_long flags)
 2333 {
 2334         struct nameidata nd;
 2335         int error;
 2336 
 2337         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2338             pathseg, path, td);
 2339         if ((error = namei(&nd)) != 0)
 2340                 return (error);
 2341         NDFREE(&nd, NDF_ONLY_PNBUF);
 2342 
 2343         error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2344         vput(nd.ni_vp);
 2345         return (error);
 2346 }
 2347 
 2348 /*
 2349  * Return target name of a symbolic link.
 2350  */
 2351 #ifndef _SYS_SYSPROTO_H_
 2352 struct readlink_args {
 2353         char    *path;
 2354         char    *buf;
 2355         size_t  count;
 2356 };
 2357 #endif
 2358 int
 2359 sys_readlink(td, uap)
 2360         struct thread *td;
 2361         register struct readlink_args /* {
 2362                 char *path;
 2363                 char *buf;
 2364                 size_t count;
 2365         } */ *uap;
 2366 {
 2367 
 2368         return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2369             uap->buf, UIO_USERSPACE, uap->count));
 2370 }
 2371 #ifndef _SYS_SYSPROTO_H_
 2372 struct readlinkat_args {
 2373         int     fd;
 2374         char    *path;
 2375         char    *buf;
 2376         size_t  bufsize;
 2377 };
 2378 #endif
 2379 int
 2380 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2381 {
 2382 
 2383         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2384             uap->buf, UIO_USERSPACE, uap->bufsize));
 2385 }
 2386 
 2387 int
 2388 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2389     char *buf, enum uio_seg bufseg, size_t count)
 2390 {
 2391         struct vnode *vp;
 2392         struct iovec aiov;
 2393         struct uio auio;
 2394         struct nameidata nd;
 2395         int error;
 2396 
 2397         if (count > IOSIZE_MAX)
 2398                 return (EINVAL);
 2399 
 2400         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2401             pathseg, path, fd, td);
 2402 
 2403         if ((error = namei(&nd)) != 0)
 2404                 return (error);
 2405         NDFREE(&nd, NDF_ONLY_PNBUF);
 2406         vp = nd.ni_vp;
 2407 #ifdef MAC
 2408         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2409         if (error != 0) {
 2410                 vput(vp);
 2411                 return (error);
 2412         }
 2413 #endif
 2414         if (vp->v_type != VLNK)
 2415                 error = EINVAL;
 2416         else {
 2417                 aiov.iov_base = buf;
 2418                 aiov.iov_len = count;
 2419                 auio.uio_iov = &aiov;
 2420                 auio.uio_iovcnt = 1;
 2421                 auio.uio_offset = 0;
 2422                 auio.uio_rw = UIO_READ;
 2423                 auio.uio_segflg = bufseg;
 2424                 auio.uio_td = td;
 2425                 auio.uio_resid = count;
 2426                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2427                 td->td_retval[0] = count - auio.uio_resid;
 2428         }
 2429         vput(vp);
 2430         return (error);
 2431 }
 2432 
 2433 /*
 2434  * Common implementation code for chflags() and fchflags().
 2435  */
 2436 static int
 2437 setfflags(td, vp, flags)
 2438         struct thread *td;
 2439         struct vnode *vp;
 2440         u_long flags;
 2441 {
 2442         struct mount *mp;
 2443         struct vattr vattr;
 2444         int error;
 2445 
 2446         /* We can't support the value matching VNOVAL. */
 2447         if (flags == VNOVAL)
 2448                 return (EOPNOTSUPP);
 2449 
 2450         /*
 2451          * Prevent non-root users from setting flags on devices.  When
 2452          * a device is reused, users can retain ownership of the device
 2453          * if they are allowed to set flags and programs assume that
 2454          * chown can't fail when done as root.
 2455          */
 2456         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2457                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2458                 if (error != 0)
 2459                         return (error);
 2460         }
 2461 
 2462         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2463                 return (error);
 2464         VATTR_NULL(&vattr);
 2465         vattr.va_flags = flags;
 2466         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2467 #ifdef MAC
 2468         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2469         if (error == 0)
 2470 #endif
 2471                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2472         VOP_UNLOCK(vp, 0);
 2473         vn_finished_write(mp);
 2474         return (error);
 2475 }
 2476 
 2477 /*
 2478  * Change flags of a file given a path name.
 2479  */
 2480 #ifndef _SYS_SYSPROTO_H_
 2481 struct chflags_args {
 2482         const char *path;
 2483         u_long  flags;
 2484 };
 2485 #endif
 2486 int
 2487 sys_chflags(td, uap)
 2488         struct thread *td;
 2489         register struct chflags_args /* {
 2490                 const char *path;
 2491                 u_long flags;
 2492         } */ *uap;
 2493 {
 2494 
 2495         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2496             uap->flags, 0));
 2497 }
 2498 
 2499 #ifndef _SYS_SYSPROTO_H_
 2500 struct chflagsat_args {
 2501         int     fd;
 2502         const char *path;
 2503         u_long  flags;
 2504         int     atflag;
 2505 }
 2506 #endif
 2507 int
 2508 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2509 {
 2510         int fd = uap->fd;
 2511         const char *path = uap->path;
 2512         u_long flags = uap->flags;
 2513         int atflag = uap->atflag;
 2514 
 2515         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2516                 return (EINVAL);
 2517 
 2518         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2519 }
 2520 
 2521 /*
 2522  * Same as chflags() but doesn't follow symlinks.
 2523  */
 2524 int
 2525 sys_lchflags(td, uap)
 2526         struct thread *td;
 2527         register struct lchflags_args /* {
 2528                 const char *path;
 2529                 u_long flags;
 2530         } */ *uap;
 2531 {
 2532 
 2533         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2534             uap->flags, AT_SYMLINK_NOFOLLOW));
 2535 }
 2536 
 2537 static int
 2538 kern_chflagsat(struct thread *td, int fd, const char *path,
 2539     enum uio_seg pathseg, u_long flags, int atflag)
 2540 {
 2541         struct nameidata nd;
 2542         cap_rights_t rights;
 2543         int error, follow;
 2544 
 2545         AUDIT_ARG_FFLAGS(flags);
 2546         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2547         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2548             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2549         if ((error = namei(&nd)) != 0)
 2550                 return (error);
 2551         NDFREE(&nd, NDF_ONLY_PNBUF);
 2552         error = setfflags(td, nd.ni_vp, flags);
 2553         vrele(nd.ni_vp);
 2554         return (error);
 2555 }
 2556 
 2557 /*
 2558  * Change flags of a file given a file descriptor.
 2559  */
 2560 #ifndef _SYS_SYSPROTO_H_
 2561 struct fchflags_args {
 2562         int     fd;
 2563         u_long  flags;
 2564 };
 2565 #endif
 2566 int
 2567 sys_fchflags(td, uap)
 2568         struct thread *td;
 2569         register struct fchflags_args /* {
 2570                 int fd;
 2571                 u_long flags;
 2572         } */ *uap;
 2573 {
 2574         struct file *fp;
 2575         cap_rights_t rights;
 2576         int error;
 2577 
 2578         AUDIT_ARG_FD(uap->fd);
 2579         AUDIT_ARG_FFLAGS(uap->flags);
 2580         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS),
 2581             &fp);
 2582         if (error != 0)
 2583                 return (error);
 2584 #ifdef AUDIT
 2585         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2586         AUDIT_ARG_VNODE1(fp->f_vnode);
 2587         VOP_UNLOCK(fp->f_vnode, 0);
 2588 #endif
 2589         error = setfflags(td, fp->f_vnode, uap->flags);
 2590         fdrop(fp, td);
 2591         return (error);
 2592 }
 2593 
 2594 /*
 2595  * Common implementation code for chmod(), lchmod() and fchmod().
 2596  */
 2597 int
 2598 setfmode(td, cred, vp, mode)
 2599         struct thread *td;
 2600         struct ucred *cred;
 2601         struct vnode *vp;
 2602         int mode;
 2603 {
 2604         struct mount *mp;
 2605         struct vattr vattr;
 2606         int error;
 2607 
 2608         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2609                 return (error);
 2610         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2611         VATTR_NULL(&vattr);
 2612         vattr.va_mode = mode & ALLPERMS;
 2613 #ifdef MAC
 2614         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2615         if (error == 0)
 2616 #endif
 2617                 error = VOP_SETATTR(vp, &vattr, cred);
 2618         VOP_UNLOCK(vp, 0);
 2619         vn_finished_write(mp);
 2620         return (error);
 2621 }
 2622 
 2623 /*
 2624  * Change mode of a file given path name.
 2625  */
 2626 #ifndef _SYS_SYSPROTO_H_
 2627 struct chmod_args {
 2628         char    *path;
 2629         int     mode;
 2630 };
 2631 #endif
 2632 int
 2633 sys_chmod(td, uap)
 2634         struct thread *td;
 2635         register struct chmod_args /* {
 2636                 char *path;
 2637                 int mode;
 2638         } */ *uap;
 2639 {
 2640 
 2641         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2642             uap->mode, 0));
 2643 }
 2644 
 2645 #ifndef _SYS_SYSPROTO_H_
 2646 struct fchmodat_args {
 2647         int     dirfd;
 2648         char    *path;
 2649         mode_t  mode;
 2650         int     flag;
 2651 }
 2652 #endif
 2653 int
 2654 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2655 {
 2656         int flag = uap->flag;
 2657         int fd = uap->fd;
 2658         char *path = uap->path;
 2659         mode_t mode = uap->mode;
 2660 
 2661         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2662                 return (EINVAL);
 2663 
 2664         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2665 }
 2666 
 2667 /*
 2668  * Change mode of a file given path name (don't follow links.)
 2669  */
 2670 #ifndef _SYS_SYSPROTO_H_
 2671 struct lchmod_args {
 2672         char    *path;
 2673         int     mode;
 2674 };
 2675 #endif
 2676 int
 2677 sys_lchmod(td, uap)
 2678         struct thread *td;
 2679         register struct lchmod_args /* {
 2680                 char *path;
 2681                 int mode;
 2682         } */ *uap;
 2683 {
 2684 
 2685         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2686             uap->mode, AT_SYMLINK_NOFOLLOW));
 2687 }
 2688 
 2689 int
 2690 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2691     mode_t mode, int flag)
 2692 {
 2693         struct nameidata nd;
 2694         cap_rights_t rights;
 2695         int error, follow;
 2696 
 2697         AUDIT_ARG_MODE(mode);
 2698         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2699         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2700             cap_rights_init(&rights, CAP_FCHMOD), td);
 2701         if ((error = namei(&nd)) != 0)
 2702                 return (error);
 2703         NDFREE(&nd, NDF_ONLY_PNBUF);
 2704         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2705         vrele(nd.ni_vp);
 2706         return (error);
 2707 }
 2708 
 2709 /*
 2710  * Change mode of a file given a file descriptor.
 2711  */
 2712 #ifndef _SYS_SYSPROTO_H_
 2713 struct fchmod_args {
 2714         int     fd;
 2715         int     mode;
 2716 };
 2717 #endif
 2718 int
 2719 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2720 {
 2721         struct file *fp;
 2722         cap_rights_t rights;
 2723         int error;
 2724 
 2725         AUDIT_ARG_FD(uap->fd);
 2726         AUDIT_ARG_MODE(uap->mode);
 2727 
 2728         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2729         if (error != 0)
 2730                 return (error);
 2731         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2732         fdrop(fp, td);
 2733         return (error);
 2734 }
 2735 
 2736 /*
 2737  * Common implementation for chown(), lchown(), and fchown()
 2738  */
 2739 int
 2740 setfown(td, cred, vp, uid, gid)
 2741         struct thread *td;
 2742         struct ucred *cred;
 2743         struct vnode *vp;
 2744         uid_t uid;
 2745         gid_t gid;
 2746 {
 2747         struct mount *mp;
 2748         struct vattr vattr;
 2749         int error;
 2750 
 2751         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2752                 return (error);
 2753         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2754         VATTR_NULL(&vattr);
 2755         vattr.va_uid = uid;
 2756         vattr.va_gid = gid;
 2757 #ifdef MAC
 2758         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2759             vattr.va_gid);
 2760         if (error == 0)
 2761 #endif
 2762                 error = VOP_SETATTR(vp, &vattr, cred);
 2763         VOP_UNLOCK(vp, 0);
 2764         vn_finished_write(mp);
 2765         return (error);
 2766 }
 2767 
 2768 /*
 2769  * Set ownership given a path name.
 2770  */
 2771 #ifndef _SYS_SYSPROTO_H_
 2772 struct chown_args {
 2773         char    *path;
 2774         int     uid;
 2775         int     gid;
 2776 };
 2777 #endif
 2778 int
 2779 sys_chown(td, uap)
 2780         struct thread *td;
 2781         register struct chown_args /* {
 2782                 char *path;
 2783                 int uid;
 2784                 int gid;
 2785         } */ *uap;
 2786 {
 2787 
 2788         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 2789             uap->gid, 0));
 2790 }
 2791 
 2792 #ifndef _SYS_SYSPROTO_H_
 2793 struct fchownat_args {
 2794         int fd;
 2795         const char * path;
 2796         uid_t uid;
 2797         gid_t gid;
 2798         int flag;
 2799 };
 2800 #endif
 2801 int
 2802 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2803 {
 2804         int flag;
 2805 
 2806         flag = uap->flag;
 2807         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2808                 return (EINVAL);
 2809 
 2810         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2811             uap->gid, uap->flag));
 2812 }
 2813 
 2814 int
 2815 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2816     int uid, int gid, int flag)
 2817 {
 2818         struct nameidata nd;
 2819         cap_rights_t rights;
 2820         int error, follow;
 2821 
 2822         AUDIT_ARG_OWNER(uid, gid);
 2823         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2824         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2825             cap_rights_init(&rights, CAP_FCHOWN), td);
 2826 
 2827         if ((error = namei(&nd)) != 0)
 2828                 return (error);
 2829         NDFREE(&nd, NDF_ONLY_PNBUF);
 2830         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2831         vrele(nd.ni_vp);
 2832         return (error);
 2833 }
 2834 
 2835 /*
 2836  * Set ownership given a path name, do not cross symlinks.
 2837  */
 2838 #ifndef _SYS_SYSPROTO_H_
 2839 struct lchown_args {
 2840         char    *path;
 2841         int     uid;
 2842         int     gid;
 2843 };
 2844 #endif
 2845 int
 2846 sys_lchown(td, uap)
 2847         struct thread *td;
 2848         register struct lchown_args /* {
 2849                 char *path;
 2850                 int uid;
 2851                 int gid;
 2852         } */ *uap;
 2853 {
 2854 
 2855         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2856             uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 2857 }
 2858 
 2859 /*
 2860  * Set ownership given a file descriptor.
 2861  */
 2862 #ifndef _SYS_SYSPROTO_H_
 2863 struct fchown_args {
 2864         int     fd;
 2865         int     uid;
 2866         int     gid;
 2867 };
 2868 #endif
 2869 int
 2870 sys_fchown(td, uap)
 2871         struct thread *td;
 2872         register struct fchown_args /* {
 2873                 int fd;
 2874                 int uid;
 2875                 int gid;
 2876         } */ *uap;
 2877 {
 2878         struct file *fp;
 2879         cap_rights_t rights;
 2880         int error;
 2881 
 2882         AUDIT_ARG_FD(uap->fd);
 2883         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 2884         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 2885         if (error != 0)
 2886                 return (error);
 2887         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 2888         fdrop(fp, td);
 2889         return (error);
 2890 }
 2891 
 2892 /*
 2893  * Common implementation code for utimes(), lutimes(), and futimes().
 2894  */
 2895 static int
 2896 getutimes(usrtvp, tvpseg, tsp)
 2897         const struct timeval *usrtvp;
 2898         enum uio_seg tvpseg;
 2899         struct timespec *tsp;
 2900 {
 2901         struct timeval tv[2];
 2902         const struct timeval *tvp;
 2903         int error;
 2904 
 2905         if (usrtvp == NULL) {
 2906                 vfs_timestamp(&tsp[0]);
 2907                 tsp[1] = tsp[0];
 2908         } else {
 2909                 if (tvpseg == UIO_SYSSPACE) {
 2910                         tvp = usrtvp;
 2911                 } else {
 2912                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 2913                                 return (error);
 2914                         tvp = tv;
 2915                 }
 2916 
 2917                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 2918                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 2919                         return (EINVAL);
 2920                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 2921                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 2922         }
 2923         return (0);
 2924 }
 2925 
 2926 /*
 2927  * Common implementation code for futimens(), utimensat().
 2928  */
 2929 #define UTIMENS_NULL    0x1
 2930 #define UTIMENS_EXIT    0x2
 2931 static int
 2932 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 2933     struct timespec *tsp, int *retflags)
 2934 {
 2935         struct timespec tsnow;
 2936         int error;
 2937 
 2938         vfs_timestamp(&tsnow);
 2939         *retflags = 0;
 2940         if (usrtsp == NULL) {
 2941                 tsp[0] = tsnow;
 2942                 tsp[1] = tsnow;
 2943                 *retflags |= UTIMENS_NULL;
 2944                 return (0);
 2945         }
 2946         if (tspseg == UIO_SYSSPACE) {
 2947                 tsp[0] = usrtsp[0];
 2948                 tsp[1] = usrtsp[1];
 2949         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 2950                 return (error);
 2951         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 2952                 *retflags |= UTIMENS_EXIT;
 2953         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 2954                 *retflags |= UTIMENS_NULL;
 2955         if (tsp[0].tv_nsec == UTIME_OMIT)
 2956                 tsp[0].tv_sec = VNOVAL;
 2957         else if (tsp[0].tv_nsec == UTIME_NOW)
 2958                 tsp[0] = tsnow;
 2959         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 2960                 return (EINVAL);
 2961         if (tsp[1].tv_nsec == UTIME_OMIT)
 2962                 tsp[1].tv_sec = VNOVAL;
 2963         else if (tsp[1].tv_nsec == UTIME_NOW)
 2964                 tsp[1] = tsnow;
 2965         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 2966                 return (EINVAL);
 2967 
 2968         return (0);
 2969 }
 2970 
 2971 /*
 2972  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 2973  * and utimensat().
 2974  */
 2975 static int
 2976 setutimes(td, vp, ts, numtimes, nullflag)
 2977         struct thread *td;
 2978         struct vnode *vp;
 2979         const struct timespec *ts;
 2980         int numtimes;
 2981         int nullflag;
 2982 {
 2983         struct mount *mp;
 2984         struct vattr vattr;
 2985         int error, setbirthtime;
 2986 
 2987         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2988                 return (error);
 2989         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2990         setbirthtime = 0;
 2991         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 2992             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 2993                 setbirthtime = 1;
 2994         VATTR_NULL(&vattr);
 2995         vattr.va_atime = ts[0];
 2996         vattr.va_mtime = ts[1];
 2997         if (setbirthtime)
 2998                 vattr.va_birthtime = ts[1];
 2999         if (numtimes > 2)
 3000                 vattr.va_birthtime = ts[2];
 3001         if (nullflag)
 3002                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3003 #ifdef MAC
 3004         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3005             vattr.va_mtime);
 3006 #endif
 3007         if (error == 0)
 3008                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3009         VOP_UNLOCK(vp, 0);
 3010         vn_finished_write(mp);
 3011         return (error);
 3012 }
 3013 
 3014 /*
 3015  * Set the access and modification times of a file.
 3016  */
 3017 #ifndef _SYS_SYSPROTO_H_
 3018 struct utimes_args {
 3019         char    *path;
 3020         struct  timeval *tptr;
 3021 };
 3022 #endif
 3023 int
 3024 sys_utimes(td, uap)
 3025         struct thread *td;
 3026         register struct utimes_args /* {
 3027                 char *path;
 3028                 struct timeval *tptr;
 3029         } */ *uap;
 3030 {
 3031 
 3032         return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3033             uap->tptr, UIO_USERSPACE));
 3034 }
 3035 
 3036 #ifndef _SYS_SYSPROTO_H_
 3037 struct futimesat_args {
 3038         int fd;
 3039         const char * path;
 3040         const struct timeval * times;
 3041 };
 3042 #endif
 3043 int
 3044 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3045 {
 3046 
 3047         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3048             uap->times, UIO_USERSPACE));
 3049 }
 3050 
 3051 int
 3052 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3053     struct timeval *tptr, enum uio_seg tptrseg)
 3054 {
 3055         struct nameidata nd;
 3056         struct timespec ts[2];
 3057         cap_rights_t rights;
 3058         int error;
 3059 
 3060         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3061                 return (error);
 3062         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3063             cap_rights_init(&rights, CAP_FUTIMES), td);
 3064 
 3065         if ((error = namei(&nd)) != 0)
 3066                 return (error);
 3067         NDFREE(&nd, NDF_ONLY_PNBUF);
 3068         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3069         vrele(nd.ni_vp);
 3070         return (error);
 3071 }
 3072 
 3073 /*
 3074  * Set the access and modification times of a file.
 3075  */
 3076 #ifndef _SYS_SYSPROTO_H_
 3077 struct lutimes_args {
 3078         char    *path;
 3079         struct  timeval *tptr;
 3080 };
 3081 #endif
 3082 int
 3083 sys_lutimes(td, uap)
 3084         struct thread *td;
 3085         register struct lutimes_args /* {
 3086                 char *path;
 3087                 struct timeval *tptr;
 3088         } */ *uap;
 3089 {
 3090 
 3091         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3092             UIO_USERSPACE));
 3093 }
 3094 
 3095 int
 3096 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3097     struct timeval *tptr, enum uio_seg tptrseg)
 3098 {
 3099         struct timespec ts[2];
 3100         struct nameidata nd;
 3101         int error;
 3102 
 3103         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3104                 return (error);
 3105         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3106         if ((error = namei(&nd)) != 0)
 3107                 return (error);
 3108         NDFREE(&nd, NDF_ONLY_PNBUF);
 3109         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3110         vrele(nd.ni_vp);
 3111         return (error);
 3112 }
 3113 
 3114 /*
 3115  * Set the access and modification times of a file.
 3116  */
 3117 #ifndef _SYS_SYSPROTO_H_
 3118 struct futimes_args {
 3119         int     fd;
 3120         struct  timeval *tptr;
 3121 };
 3122 #endif
 3123 int
 3124 sys_futimes(td, uap)
 3125         struct thread *td;
 3126         register struct futimes_args /* {
 3127                 int  fd;
 3128                 struct timeval *tptr;
 3129         } */ *uap;
 3130 {
 3131 
 3132         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3133 }
 3134 
 3135 int
 3136 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3137     enum uio_seg tptrseg)
 3138 {
 3139         struct timespec ts[2];
 3140         struct file *fp;
 3141         cap_rights_t rights;
 3142         int error;
 3143 
 3144         AUDIT_ARG_FD(fd);
 3145         error = getutimes(tptr, tptrseg, ts);
 3146         if (error != 0)
 3147                 return (error);
 3148         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3149         if (error != 0)
 3150                 return (error);
 3151 #ifdef AUDIT
 3152         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3153         AUDIT_ARG_VNODE1(fp->f_vnode);
 3154         VOP_UNLOCK(fp->f_vnode, 0);
 3155 #endif
 3156         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3157         fdrop(fp, td);
 3158         return (error);
 3159 }
 3160 
 3161 int
 3162 sys_futimens(struct thread *td, struct futimens_args *uap)
 3163 {
 3164 
 3165         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3166 }
 3167 
 3168 int
 3169 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3170     enum uio_seg tptrseg)
 3171 {
 3172         struct timespec ts[2];
 3173         struct file *fp;
 3174         cap_rights_t rights;
 3175         int error, flags;
 3176 
 3177         AUDIT_ARG_FD(fd);
 3178         error = getutimens(tptr, tptrseg, ts, &flags);
 3179         if (error != 0)
 3180                 return (error);
 3181         if (flags & UTIMENS_EXIT)
 3182                 return (0);
 3183         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3184         if (error != 0)
 3185                 return (error);
 3186 #ifdef AUDIT
 3187         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3188         AUDIT_ARG_VNODE1(fp->f_vnode);
 3189         VOP_UNLOCK(fp->f_vnode, 0);
 3190 #endif
 3191         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3192         fdrop(fp, td);
 3193         return (error);
 3194 }
 3195 
 3196 int
 3197 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3198 {
 3199 
 3200         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3201             uap->times, UIO_USERSPACE, uap->flag));
 3202 }
 3203 
 3204 int
 3205 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3206     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 3207 {
 3208         struct nameidata nd;
 3209         struct timespec ts[2];
 3210         cap_rights_t rights;
 3211         int error, flags;
 3212 
 3213         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3214                 return (EINVAL);
 3215 
 3216         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3217                 return (error);
 3218         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 3219             FOLLOW) | AUDITVNODE1, pathseg, path, fd,
 3220             cap_rights_init(&rights, CAP_FUTIMES), td);
 3221         if ((error = namei(&nd)) != 0)
 3222                 return (error);
 3223         /*
 3224          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3225          * POSIX states:
 3226          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3227          * "Search permission is denied by a component of the path prefix."
 3228          */
 3229         NDFREE(&nd, NDF_ONLY_PNBUF);
 3230         if ((flags & UTIMENS_EXIT) == 0)
 3231                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3232         vrele(nd.ni_vp);
 3233         return (error);
 3234 }
 3235 
 3236 /*
 3237  * Truncate a file given its path name.
 3238  */
 3239 #ifndef _SYS_SYSPROTO_H_
 3240 struct truncate_args {
 3241         char    *path;
 3242         int     pad;
 3243         off_t   length;
 3244 };
 3245 #endif
 3246 int
 3247 sys_truncate(td, uap)
 3248         struct thread *td;
 3249         register struct truncate_args /* {
 3250                 char *path;
 3251                 int pad;
 3252                 off_t length;
 3253         } */ *uap;
 3254 {
 3255 
 3256         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3257 }
 3258 
 3259 int
 3260 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3261 {
 3262         struct mount *mp;
 3263         struct vnode *vp;
 3264         void *rl_cookie;
 3265         struct vattr vattr;
 3266         struct nameidata nd;
 3267         int error;
 3268 
 3269         if (length < 0)
 3270                 return(EINVAL);
 3271         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3272         if ((error = namei(&nd)) != 0)
 3273                 return (error);
 3274         vp = nd.ni_vp;
 3275         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3276         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3277                 vn_rangelock_unlock(vp, rl_cookie);
 3278                 vrele(vp);
 3279                 return (error);
 3280         }
 3281         NDFREE(&nd, NDF_ONLY_PNBUF);
 3282         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3283         if (vp->v_type == VDIR)
 3284                 error = EISDIR;
 3285 #ifdef MAC
 3286         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3287         }
 3288 #endif
 3289         else if ((error = vn_writechk(vp)) == 0 &&
 3290             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3291                 VATTR_NULL(&vattr);
 3292                 vattr.va_size = length;
 3293                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3294         }
 3295         VOP_UNLOCK(vp, 0);
 3296         vn_finished_write(mp);
 3297         vn_rangelock_unlock(vp, rl_cookie);
 3298         vrele(vp);
 3299         return (error);
 3300 }
 3301 
 3302 #if defined(COMPAT_43)
 3303 /*
 3304  * Truncate a file given its path name.
 3305  */
 3306 #ifndef _SYS_SYSPROTO_H_
 3307 struct otruncate_args {
 3308         char    *path;
 3309         long    length;
 3310 };
 3311 #endif
 3312 int
 3313 otruncate(struct thread *td, struct otruncate_args *uap)
 3314 {
 3315 
 3316         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3317 }
 3318 #endif /* COMPAT_43 */
 3319 
 3320 #if defined(COMPAT_FREEBSD6)
 3321 /* Versions with the pad argument */
 3322 int
 3323 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3324 {
 3325 
 3326         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3327 }
 3328 
 3329 int
 3330 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3331 {
 3332 
 3333         return (kern_ftruncate(td, uap->fd, uap->length));
 3334 }
 3335 #endif
 3336 
 3337 int
 3338 kern_fsync(struct thread *td, int fd, bool fullsync)
 3339 {
 3340         struct vnode *vp;
 3341         struct mount *mp;
 3342         struct file *fp;
 3343         cap_rights_t rights;
 3344         int error, lock_flags;
 3345 
 3346         AUDIT_ARG_FD(fd);
 3347         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp);
 3348         if (error != 0)
 3349                 return (error);
 3350         vp = fp->f_vnode;
 3351 #if 0
 3352         if (!fullsync)
 3353                 /* XXXKIB: compete outstanding aio writes */;
 3354 #endif
 3355         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3356         if (error != 0)
 3357                 goto drop;
 3358         if (MNT_SHARED_WRITES(mp) ||
 3359             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3360                 lock_flags = LK_SHARED;
 3361         } else {
 3362                 lock_flags = LK_EXCLUSIVE;
 3363         }
 3364         vn_lock(vp, lock_flags | LK_RETRY);
 3365         AUDIT_ARG_VNODE1(vp);
 3366         if (vp->v_object != NULL) {
 3367                 VM_OBJECT_WLOCK(vp->v_object);
 3368                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3369                 VM_OBJECT_WUNLOCK(vp->v_object);
 3370         }
 3371         error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 3372         VOP_UNLOCK(vp, 0);
 3373         vn_finished_write(mp);
 3374 drop:
 3375         fdrop(fp, td);
 3376         return (error);
 3377 }
 3378 
 3379 /*
 3380  * Sync an open file.
 3381  */
 3382 #ifndef _SYS_SYSPROTO_H_
 3383 struct fsync_args {
 3384         int     fd;
 3385 };
 3386 #endif
 3387 int
 3388 sys_fsync(struct thread *td, struct fsync_args *uap)
 3389 {
 3390 
 3391         return (kern_fsync(td, uap->fd, true));
 3392 }
 3393 
 3394 int
 3395 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 3396 {
 3397 
 3398         return (kern_fsync(td, uap->fd, false));
 3399 }
 3400 
 3401 /*
 3402  * Rename files.  Source and destination must either both be directories, or
 3403  * both not be directories.  If target is a directory, it must be empty.
 3404  */
 3405 #ifndef _SYS_SYSPROTO_H_
 3406 struct rename_args {
 3407         char    *from;
 3408         char    *to;
 3409 };
 3410 #endif
 3411 int
 3412 sys_rename(td, uap)
 3413         struct thread *td;
 3414         register struct rename_args /* {
 3415                 char *from;
 3416                 char *to;
 3417         } */ *uap;
 3418 {
 3419 
 3420         return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 3421             uap->to, UIO_USERSPACE));
 3422 }
 3423 
 3424 #ifndef _SYS_SYSPROTO_H_
 3425 struct renameat_args {
 3426         int     oldfd;
 3427         char    *old;
 3428         int     newfd;
 3429         char    *new;
 3430 };
 3431 #endif
 3432 int
 3433 sys_renameat(struct thread *td, struct renameat_args *uap)
 3434 {
 3435 
 3436         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3437             UIO_USERSPACE));
 3438 }
 3439 
 3440 int
 3441 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3442     enum uio_seg pathseg)
 3443 {
 3444         struct mount *mp = NULL;
 3445         struct vnode *tvp, *fvp, *tdvp;
 3446         struct nameidata fromnd, tond;
 3447         cap_rights_t rights;
 3448         int error;
 3449 
 3450 again:
 3451         bwillwrite();
 3452 #ifdef MAC
 3453         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3454             AUDITVNODE1, pathseg, old, oldfd,
 3455             cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
 3456 #else
 3457         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3458             pathseg, old, oldfd,
 3459             cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
 3460 #endif
 3461 
 3462         if ((error = namei(&fromnd)) != 0)
 3463                 return (error);
 3464 #ifdef MAC
 3465         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3466             fromnd.ni_vp, &fromnd.ni_cnd);
 3467         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3468         if (fromnd.ni_dvp != fromnd.ni_vp)
 3469                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3470 #endif
 3471         fvp = fromnd.ni_vp;
 3472         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3473             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3474             cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td);
 3475         if (fromnd.ni_vp->v_type == VDIR)
 3476                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3477         if ((error = namei(&tond)) != 0) {
 3478                 /* Translate error code for rename("dir1", "dir2/."). */
 3479                 if (error == EISDIR && fvp->v_type == VDIR)
 3480                         error = EINVAL;
 3481                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3482                 vrele(fromnd.ni_dvp);
 3483                 vrele(fvp);
 3484                 goto out1;
 3485         }
 3486         tdvp = tond.ni_dvp;
 3487         tvp = tond.ni_vp;
 3488         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3489         if (error != 0) {
 3490                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3491                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3492                 if (tvp != NULL)
 3493                         vput(tvp);
 3494                 if (tdvp == tvp)
 3495                         vrele(tdvp);
 3496                 else
 3497                         vput(tdvp);
 3498                 vrele(fromnd.ni_dvp);
 3499                 vrele(fvp);
 3500                 vrele(tond.ni_startdir);
 3501                 if (fromnd.ni_startdir != NULL)
 3502                         vrele(fromnd.ni_startdir);
 3503                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3504                 if (error != 0)
 3505                         return (error);
 3506                 goto again;
 3507         }
 3508         if (tvp != NULL) {
 3509                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3510                         error = ENOTDIR;
 3511                         goto out;
 3512                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3513                         error = EISDIR;
 3514                         goto out;
 3515                 }
 3516 #ifdef CAPABILITIES
 3517                 if (newfd != AT_FDCWD) {
 3518                         /*
 3519                          * If the target already exists we require CAP_UNLINKAT
 3520                          * from 'newfd'.
 3521                          */
 3522                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3523                             cap_rights_init(&rights, CAP_UNLINKAT));
 3524                         if (error != 0)
 3525                                 goto out;
 3526                 }
 3527 #endif
 3528         }
 3529         if (fvp == tdvp) {
 3530                 error = EINVAL;
 3531                 goto out;
 3532         }
 3533         /*
 3534          * If the source is the same as the destination (that is, if they
 3535          * are links to the same vnode), then there is nothing to do.
 3536          */
 3537         if (fvp == tvp)
 3538                 error = -1;
 3539 #ifdef MAC
 3540         else
 3541                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3542                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3543 #endif
 3544 out:
 3545         if (error == 0) {
 3546                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3547                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3548                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3549                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3550         } else {
 3551                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3552                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3553                 if (tvp != NULL)
 3554                         vput(tvp);
 3555                 if (tdvp == tvp)
 3556                         vrele(tdvp);
 3557                 else
 3558                         vput(tdvp);
 3559                 vrele(fromnd.ni_dvp);
 3560                 vrele(fvp);
 3561         }
 3562         vrele(tond.ni_startdir);
 3563         vn_finished_write(mp);
 3564 out1:
 3565         if (fromnd.ni_startdir)
 3566                 vrele(fromnd.ni_startdir);
 3567         if (error == -1)
 3568                 return (0);
 3569         return (error);
 3570 }
 3571 
 3572 /*
 3573  * Make a directory file.
 3574  */
 3575 #ifndef _SYS_SYSPROTO_H_
 3576 struct mkdir_args {
 3577         char    *path;
 3578         int     mode;
 3579 };
 3580 #endif
 3581 int
 3582 sys_mkdir(td, uap)
 3583         struct thread *td;
 3584         register struct mkdir_args /* {
 3585                 char *path;
 3586                 int mode;
 3587         } */ *uap;
 3588 {
 3589 
 3590         return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3591             uap->mode));
 3592 }
 3593 
 3594 #ifndef _SYS_SYSPROTO_H_
 3595 struct mkdirat_args {
 3596         int     fd;
 3597         char    *path;
 3598         mode_t  mode;
 3599 };
 3600 #endif
 3601 int
 3602 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3603 {
 3604 
 3605         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3606 }
 3607 
 3608 int
 3609 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3610     int mode)
 3611 {
 3612         struct mount *mp;
 3613         struct vnode *vp;
 3614         struct vattr vattr;
 3615         struct nameidata nd;
 3616         cap_rights_t rights;
 3617         int error;
 3618 
 3619         AUDIT_ARG_MODE(mode);
 3620 restart:
 3621         bwillwrite();
 3622         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3623             NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 3624             td);
 3625         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3626         if ((error = namei(&nd)) != 0)
 3627                 return (error);
 3628         vp = nd.ni_vp;
 3629         if (vp != NULL) {
 3630                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3631                 /*
 3632                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3633                  * the strange behaviour of leaving the vnode unlocked
 3634                  * if the target is the same vnode as the parent.
 3635                  */
 3636                 if (vp == nd.ni_dvp)
 3637                         vrele(nd.ni_dvp);
 3638                 else
 3639                         vput(nd.ni_dvp);
 3640                 vrele(vp);
 3641                 return (EEXIST);
 3642         }
 3643         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3644                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3645                 vput(nd.ni_dvp);
 3646                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3647                         return (error);
 3648                 goto restart;
 3649         }
 3650         VATTR_NULL(&vattr);
 3651         vattr.va_type = VDIR;
 3652         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3653 #ifdef MAC
 3654         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3655             &vattr);
 3656         if (error != 0)
 3657                 goto out;
 3658 #endif
 3659         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3660 #ifdef MAC
 3661 out:
 3662 #endif
 3663         NDFREE(&nd, NDF_ONLY_PNBUF);
 3664         vput(nd.ni_dvp);
 3665         if (error == 0)
 3666                 vput(nd.ni_vp);
 3667         vn_finished_write(mp);
 3668         return (error);
 3669 }
 3670 
 3671 /*
 3672  * Remove a directory file.
 3673  */
 3674 #ifndef _SYS_SYSPROTO_H_
 3675 struct rmdir_args {
 3676         char    *path;
 3677 };
 3678 #endif
 3679 int
 3680 sys_rmdir(td, uap)
 3681         struct thread *td;
 3682         struct rmdir_args /* {
 3683                 char *path;
 3684         } */ *uap;
 3685 {
 3686 
 3687         return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE));
 3688 }
 3689 
 3690 int
 3691 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3692 {
 3693         struct mount *mp;
 3694         struct vnode *vp;
 3695         struct nameidata nd;
 3696         cap_rights_t rights;
 3697         int error;
 3698 
 3699 restart:
 3700         bwillwrite();
 3701         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3702             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3703         if ((error = namei(&nd)) != 0)
 3704                 return (error);
 3705         vp = nd.ni_vp;
 3706         if (vp->v_type != VDIR) {
 3707                 error = ENOTDIR;
 3708                 goto out;
 3709         }
 3710         /*
 3711          * No rmdir "." please.
 3712          */
 3713         if (nd.ni_dvp == vp) {
 3714                 error = EINVAL;
 3715                 goto out;
 3716         }
 3717         /*
 3718          * The root of a mounted filesystem cannot be deleted.
 3719          */
 3720         if (vp->v_vflag & VV_ROOT) {
 3721                 error = EBUSY;
 3722                 goto out;
 3723         }
 3724 #ifdef MAC
 3725         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3726             &nd.ni_cnd);
 3727         if (error != 0)
 3728                 goto out;
 3729 #endif
 3730         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3731                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3732                 vput(vp);
 3733                 if (nd.ni_dvp == vp)
 3734                         vrele(nd.ni_dvp);
 3735                 else
 3736                         vput(nd.ni_dvp);
 3737                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3738                         return (error);
 3739                 goto restart;
 3740         }
 3741         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3742         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3743         vn_finished_write(mp);
 3744 out:
 3745         NDFREE(&nd, NDF_ONLY_PNBUF);
 3746         vput(vp);
 3747         if (nd.ni_dvp == vp)
 3748                 vrele(nd.ni_dvp);
 3749         else
 3750                 vput(nd.ni_dvp);
 3751         return (error);
 3752 }
 3753 
 3754 #ifdef COMPAT_43
 3755 /*
 3756  * Read a block of directory entries in a filesystem independent format.
 3757  */
 3758 #ifndef _SYS_SYSPROTO_H_
 3759 struct ogetdirentries_args {
 3760         int     fd;
 3761         char    *buf;
 3762         u_int   count;
 3763         long    *basep;
 3764 };
 3765 #endif
 3766 int
 3767 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3768 {
 3769         long loff;
 3770         int error;
 3771 
 3772         error = kern_ogetdirentries(td, uap, &loff);
 3773         if (error == 0)
 3774                 error = copyout(&loff, uap->basep, sizeof(long));
 3775         return (error);
 3776 }
 3777 
 3778 int
 3779 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3780     long *ploff)
 3781 {
 3782         struct vnode *vp;
 3783         struct file *fp;
 3784         struct uio auio, kuio;
 3785         struct iovec aiov, kiov;
 3786         struct dirent *dp, *edp;
 3787         cap_rights_t rights;
 3788         caddr_t dirbuf;
 3789         int error, eofflag, readcnt;
 3790         long loff;
 3791         off_t foffset;
 3792 
 3793         /* XXX arbitrary sanity limit on `count'. */
 3794         if (uap->count > 64 * 1024)
 3795                 return (EINVAL);
 3796         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp);
 3797         if (error != 0)
 3798                 return (error);
 3799         if ((fp->f_flag & FREAD) == 0) {
 3800                 fdrop(fp, td);
 3801                 return (EBADF);
 3802         }
 3803         vp = fp->f_vnode;
 3804         foffset = foffset_lock(fp, 0);
 3805 unionread:
 3806         if (vp->v_type != VDIR) {
 3807                 foffset_unlock(fp, foffset, 0);
 3808                 fdrop(fp, td);
 3809                 return (EINVAL);
 3810         }
 3811         aiov.iov_base = uap->buf;
 3812         aiov.iov_len = uap->count;
 3813         auio.uio_iov = &aiov;
 3814         auio.uio_iovcnt = 1;
 3815         auio.uio_rw = UIO_READ;
 3816         auio.uio_segflg = UIO_USERSPACE;
 3817         auio.uio_td = td;
 3818         auio.uio_resid = uap->count;
 3819         vn_lock(vp, LK_SHARED | LK_RETRY);
 3820         loff = auio.uio_offset = foffset;
 3821 #ifdef MAC
 3822         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3823         if (error != 0) {
 3824                 VOP_UNLOCK(vp, 0);
 3825                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 3826                 fdrop(fp, td);
 3827                 return (error);
 3828         }
 3829 #endif
 3830 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3831                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3832                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3833                             NULL, NULL);
 3834                         foffset = auio.uio_offset;
 3835                 } else
 3836 #       endif
 3837         {
 3838                 kuio = auio;
 3839                 kuio.uio_iov = &kiov;
 3840                 kuio.uio_segflg = UIO_SYSSPACE;
 3841                 kiov.iov_len = uap->count;
 3842                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 3843                 kiov.iov_base = dirbuf;
 3844                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3845                             NULL, NULL);
 3846                 foffset = kuio.uio_offset;
 3847                 if (error == 0) {
 3848                         readcnt = uap->count - kuio.uio_resid;
 3849                         edp = (struct dirent *)&dirbuf[readcnt];
 3850                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3851 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3852                                         /*
 3853                                          * The expected low byte of
 3854                                          * dp->d_namlen is our dp->d_type.
 3855                                          * The high MBZ byte of dp->d_namlen
 3856                                          * is our dp->d_namlen.
 3857                                          */
 3858                                         dp->d_type = dp->d_namlen;
 3859                                         dp->d_namlen = 0;
 3860 #                               else
 3861                                         /*
 3862                                          * The dp->d_type is the high byte
 3863                                          * of the expected dp->d_namlen,
 3864                                          * so must be zero'ed.
 3865                                          */
 3866                                         dp->d_type = 0;
 3867 #                               endif
 3868                                 if (dp->d_reclen > 0) {
 3869                                         dp = (struct dirent *)
 3870                                             ((char *)dp + dp->d_reclen);
 3871                                 } else {
 3872                                         error = EIO;
 3873                                         break;
 3874                                 }
 3875                         }
 3876                         if (dp >= edp)
 3877                                 error = uiomove(dirbuf, readcnt, &auio);
 3878                 }
 3879                 free(dirbuf, M_TEMP);
 3880         }
 3881         if (error != 0) {
 3882                 VOP_UNLOCK(vp, 0);
 3883                 foffset_unlock(fp, foffset, 0);
 3884                 fdrop(fp, td);
 3885                 return (error);
 3886         }
 3887         if (uap->count == auio.uio_resid &&
 3888             (vp->v_vflag & VV_ROOT) &&
 3889             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3890                 struct vnode *tvp = vp;
 3891                 vp = vp->v_mount->mnt_vnodecovered;
 3892                 VREF(vp);
 3893                 fp->f_vnode = vp;
 3894                 fp->f_data = vp;
 3895                 foffset = 0;
 3896                 vput(tvp);
 3897                 goto unionread;
 3898         }
 3899         VOP_UNLOCK(vp, 0);
 3900         foffset_unlock(fp, foffset, 0);
 3901         fdrop(fp, td);
 3902         td->td_retval[0] = uap->count - auio.uio_resid;
 3903         if (error == 0)
 3904                 *ploff = loff;
 3905         return (error);
 3906 }
 3907 #endif /* COMPAT_43 */
 3908 
 3909 /*
 3910  * Read a block of directory entries in a filesystem independent format.
 3911  */
 3912 #ifndef _SYS_SYSPROTO_H_
 3913 struct getdirentries_args {
 3914         int     fd;
 3915         char    *buf;
 3916         u_int   count;
 3917         long    *basep;
 3918 };
 3919 #endif
 3920 int
 3921 sys_getdirentries(td, uap)
 3922         struct thread *td;
 3923         register struct getdirentries_args /* {
 3924                 int fd;
 3925                 char *buf;
 3926                 u_int count;
 3927                 long *basep;
 3928         } */ *uap;
 3929 {
 3930         long base;
 3931         int error;
 3932 
 3933         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 3934             NULL, UIO_USERSPACE);
 3935         if (error != 0)
 3936                 return (error);
 3937         if (uap->basep != NULL)
 3938                 error = copyout(&base, uap->basep, sizeof(long));
 3939         return (error);
 3940 }
 3941 
 3942 int
 3943 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 3944     long *basep, ssize_t *residp, enum uio_seg bufseg)
 3945 {
 3946         struct vnode *vp;
 3947         struct file *fp;
 3948         struct uio auio;
 3949         struct iovec aiov;
 3950         cap_rights_t rights;
 3951         long loff;
 3952         int error, eofflag;
 3953         off_t foffset;
 3954 
 3955         AUDIT_ARG_FD(fd);
 3956         if (count > IOSIZE_MAX)
 3957                 return (EINVAL);
 3958         auio.uio_resid = count;
 3959         error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
 3960         if (error != 0)
 3961                 return (error);
 3962         if ((fp->f_flag & FREAD) == 0) {
 3963                 fdrop(fp, td);
 3964                 return (EBADF);
 3965         }
 3966         vp = fp->f_vnode;
 3967         foffset = foffset_lock(fp, 0);
 3968 unionread:
 3969         if (vp->v_type != VDIR) {
 3970                 error = EINVAL;
 3971                 goto fail;
 3972         }
 3973         aiov.iov_base = buf;
 3974         aiov.iov_len = count;
 3975         auio.uio_iov = &aiov;
 3976         auio.uio_iovcnt = 1;
 3977         auio.uio_rw = UIO_READ;
 3978         auio.uio_segflg = bufseg;
 3979         auio.uio_td = td;
 3980         vn_lock(vp, LK_SHARED | LK_RETRY);
 3981         AUDIT_ARG_VNODE1(vp);
 3982         loff = auio.uio_offset = foffset;
 3983 #ifdef MAC
 3984         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3985         if (error == 0)
 3986 #endif
 3987                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 3988                     NULL);
 3989         foffset = auio.uio_offset;
 3990         if (error != 0) {
 3991                 VOP_UNLOCK(vp, 0);
 3992                 goto fail;
 3993         }
 3994         if (count == auio.uio_resid &&
 3995             (vp->v_vflag & VV_ROOT) &&
 3996             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3997                 struct vnode *tvp = vp;
 3998 
 3999                 vp = vp->v_mount->mnt_vnodecovered;
 4000                 VREF(vp);
 4001                 fp->f_vnode = vp;
 4002                 fp->f_data = vp;
 4003                 foffset = 0;
 4004                 vput(tvp);
 4005                 goto unionread;
 4006         }
 4007         VOP_UNLOCK(vp, 0);
 4008         *basep = loff;
 4009         if (residp != NULL)
 4010                 *residp = auio.uio_resid;
 4011         td->td_retval[0] = count - auio.uio_resid;
 4012 fail:
 4013         foffset_unlock(fp, foffset, 0);
 4014         fdrop(fp, td);
 4015         return (error);
 4016 }
 4017 
 4018 #ifndef _SYS_SYSPROTO_H_
 4019 struct getdents_args {
 4020         int fd;
 4021         char *buf;
 4022         size_t count;
 4023 };
 4024 #endif
 4025 int
 4026 sys_getdents(td, uap)
 4027         struct thread *td;
 4028         register struct getdents_args /* {
 4029                 int fd;
 4030                 char *buf;
 4031                 u_int count;
 4032         } */ *uap;
 4033 {
 4034         struct getdirentries_args ap;
 4035 
 4036         ap.fd = uap->fd;
 4037         ap.buf = uap->buf;
 4038         ap.count = uap->count;
 4039         ap.basep = NULL;
 4040         return (sys_getdirentries(td, &ap));
 4041 }
 4042 
 4043 /*
 4044  * Set the mode mask for creation of filesystem nodes.
 4045  */
 4046 #ifndef _SYS_SYSPROTO_H_
 4047 struct umask_args {
 4048         int     newmask;
 4049 };
 4050 #endif
 4051 int
 4052 sys_umask(td, uap)
 4053         struct thread *td;
 4054         struct umask_args /* {
 4055                 int newmask;
 4056         } */ *uap;
 4057 {
 4058         struct filedesc *fdp;
 4059 
 4060         fdp = td->td_proc->p_fd;
 4061         FILEDESC_XLOCK(fdp);
 4062         td->td_retval[0] = fdp->fd_cmask;
 4063         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4064         FILEDESC_XUNLOCK(fdp);
 4065         return (0);
 4066 }
 4067 
 4068 /*
 4069  * Void all references to file by ripping underlying filesystem away from
 4070  * vnode.
 4071  */
 4072 #ifndef _SYS_SYSPROTO_H_
 4073 struct revoke_args {
 4074         char    *path;
 4075 };
 4076 #endif
 4077 int
 4078 sys_revoke(td, uap)
 4079         struct thread *td;
 4080         register struct revoke_args /* {
 4081                 char *path;
 4082         } */ *uap;
 4083 {
 4084         struct vnode *vp;
 4085         struct vattr vattr;
 4086         struct nameidata nd;
 4087         int error;
 4088 
 4089         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4090             uap->path, td);
 4091         if ((error = namei(&nd)) != 0)
 4092                 return (error);
 4093         vp = nd.ni_vp;
 4094         NDFREE(&nd, NDF_ONLY_PNBUF);
 4095         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4096                 error = EINVAL;
 4097                 goto out;
 4098         }
 4099 #ifdef MAC
 4100         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4101         if (error != 0)
 4102                 goto out;
 4103 #endif
 4104         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4105         if (error != 0)
 4106                 goto out;
 4107         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4108                 error = priv_check(td, PRIV_VFS_ADMIN);
 4109                 if (error != 0)
 4110                         goto out;
 4111         }
 4112         if (vcount(vp) > 1)
 4113                 VOP_REVOKE(vp, REVOKEALL);
 4114 out:
 4115         vput(vp);
 4116         return (error);
 4117 }
 4118 
 4119 /*
 4120  * Convert a user file descriptor to a kernel file entry and check that, if it
 4121  * is a capability, the correct rights are present. A reference on the file
 4122  * entry is held upon returning.
 4123  */
 4124 int
 4125 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 4126 {
 4127         struct file *fp;
 4128         int error;
 4129 
 4130         error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
 4131         if (error != 0)
 4132                 return (error);
 4133 
 4134         /*
 4135          * The file could be not of the vnode type, or it may be not
 4136          * yet fully initialized, in which case the f_vnode pointer
 4137          * may be set, but f_ops is still badfileops.  E.g.,
 4138          * devfs_open() transiently create such situation to
 4139          * facilitate csw d_fdopen().
 4140          *
 4141          * Dupfdopen() handling in kern_openat() installs the
 4142          * half-baked file into the process descriptor table, allowing
 4143          * other thread to dereference it. Guard against the race by
 4144          * checking f_ops.
 4145          */
 4146         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4147                 fdrop(fp, td);
 4148                 return (EINVAL);
 4149         }
 4150         *fpp = fp;
 4151         return (0);
 4152 }
 4153 
 4154 
 4155 /*
 4156  * Get an (NFS) file handle.
 4157  */
 4158 #ifndef _SYS_SYSPROTO_H_
 4159 struct lgetfh_args {
 4160         char    *fname;
 4161         fhandle_t *fhp;
 4162 };
 4163 #endif
 4164 int
 4165 sys_lgetfh(td, uap)
 4166         struct thread *td;
 4167         register struct lgetfh_args *uap;
 4168 {
 4169         struct nameidata nd;
 4170         fhandle_t fh;
 4171         register struct vnode *vp;
 4172         int error;
 4173 
 4174         error = priv_check(td, PRIV_VFS_GETFH);
 4175         if (error != 0)
 4176                 return (error);
 4177         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4178             uap->fname, td);
 4179         error = namei(&nd);
 4180         if (error != 0)
 4181                 return (error);
 4182         NDFREE(&nd, NDF_ONLY_PNBUF);
 4183         vp = nd.ni_vp;
 4184         bzero(&fh, sizeof(fh));
 4185         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4186         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4187         vput(vp);
 4188         if (error == 0)
 4189                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4190         return (error);
 4191 }
 4192 
 4193 #ifndef _SYS_SYSPROTO_H_
 4194 struct getfh_args {
 4195         char    *fname;
 4196         fhandle_t *fhp;
 4197 };
 4198 #endif
 4199 int
 4200 sys_getfh(td, uap)
 4201         struct thread *td;
 4202         register struct getfh_args *uap;
 4203 {
 4204         struct nameidata nd;
 4205         fhandle_t fh;
 4206         register struct vnode *vp;
 4207         int error;
 4208 
 4209         error = priv_check(td, PRIV_VFS_GETFH);
 4210         if (error != 0)
 4211                 return (error);
 4212         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4213             uap->fname, td);
 4214         error = namei(&nd);
 4215         if (error != 0)
 4216                 return (error);
 4217         NDFREE(&nd, NDF_ONLY_PNBUF);
 4218         vp = nd.ni_vp;
 4219         bzero(&fh, sizeof(fh));
 4220         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4221         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4222         vput(vp);
 4223         if (error == 0)
 4224                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4225         return (error);
 4226 }
 4227 
 4228 /*
 4229  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4230  * open descriptor.
 4231  *
 4232  * warning: do not remove the priv_check() call or this becomes one giant
 4233  * security hole.
 4234  */
 4235 #ifndef _SYS_SYSPROTO_H_
 4236 struct fhopen_args {
 4237         const struct fhandle *u_fhp;
 4238         int flags;
 4239 };
 4240 #endif
 4241 int
 4242 sys_fhopen(td, uap)
 4243         struct thread *td;
 4244         struct fhopen_args /* {
 4245                 const struct fhandle *u_fhp;
 4246                 int flags;
 4247         } */ *uap;
 4248 {
 4249         struct mount *mp;
 4250         struct vnode *vp;
 4251         struct fhandle fhp;
 4252         struct file *fp;
 4253         int fmode, error;
 4254         int indx;
 4255 
 4256         error = priv_check(td, PRIV_VFS_FHOPEN);
 4257         if (error != 0)
 4258                 return (error);
 4259         indx = -1;
 4260         fmode = FFLAGS(uap->flags);
 4261         /* why not allow a non-read/write open for our lockd? */
 4262         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4263                 return (EINVAL);
 4264         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4265         if (error != 0)
 4266                 return(error);
 4267         /* find the mount point */
 4268         mp = vfs_busyfs(&fhp.fh_fsid);
 4269         if (mp == NULL)
 4270                 return (ESTALE);
 4271         /* now give me my vnode, it gets returned to me locked */
 4272         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4273         vfs_unbusy(mp);
 4274         if (error != 0)
 4275                 return (error);
 4276 
 4277         error = falloc_noinstall(td, &fp);
 4278         if (error != 0) {
 4279                 vput(vp);
 4280                 return (error);
 4281         }
 4282         /*
 4283          * An extra reference on `fp' has been held for us by
 4284          * falloc_noinstall().
 4285          */
 4286 
 4287 #ifdef INVARIANTS
 4288         td->td_dupfd = -1;
 4289 #endif
 4290         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4291         if (error != 0) {
 4292                 KASSERT(fp->f_ops == &badfileops,
 4293                     ("VOP_OPEN in fhopen() set f_ops"));
 4294                 KASSERT(td->td_dupfd < 0,
 4295                     ("fhopen() encountered fdopen()"));
 4296 
 4297                 vput(vp);
 4298                 goto bad;
 4299         }
 4300 #ifdef INVARIANTS
 4301         td->td_dupfd = 0;
 4302 #endif
 4303         fp->f_vnode = vp;
 4304         fp->f_seqcount = 1;
 4305         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4306             &vnops);
 4307         VOP_UNLOCK(vp, 0);
 4308         if ((fmode & O_TRUNC) != 0) {
 4309                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4310                 if (error != 0)
 4311                         goto bad;
 4312         }
 4313 
 4314         error = finstall(td, fp, &indx, fmode, NULL);
 4315 bad:
 4316         fdrop(fp, td);
 4317         td->td_retval[0] = indx;
 4318         return (error);
 4319 }
 4320 
 4321 /*
 4322  * Stat an (NFS) file handle.
 4323  */
 4324 #ifndef _SYS_SYSPROTO_H_
 4325 struct fhstat_args {
 4326         struct fhandle *u_fhp;
 4327         struct stat *sb;
 4328 };
 4329 #endif
 4330 int
 4331 sys_fhstat(td, uap)
 4332         struct thread *td;
 4333         register struct fhstat_args /* {
 4334                 struct fhandle *u_fhp;
 4335                 struct stat *sb;
 4336         } */ *uap;
 4337 {
 4338         struct stat sb;
 4339         struct fhandle fh;
 4340         int error;
 4341 
 4342         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4343         if (error != 0)
 4344                 return (error);
 4345         error = kern_fhstat(td, fh, &sb);
 4346         if (error == 0)
 4347                 error = copyout(&sb, uap->sb, sizeof(sb));
 4348         return (error);
 4349 }
 4350 
 4351 int
 4352 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4353 {
 4354         struct mount *mp;
 4355         struct vnode *vp;
 4356         int error;
 4357 
 4358         error = priv_check(td, PRIV_VFS_FHSTAT);
 4359         if (error != 0)
 4360                 return (error);
 4361         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4362                 return (ESTALE);
 4363         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4364         vfs_unbusy(mp);
 4365         if (error != 0)
 4366                 return (error);
 4367         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4368         vput(vp);
 4369         return (error);
 4370 }
 4371 
 4372 /*
 4373  * Implement fstatfs() for (NFS) file handles.
 4374  */
 4375 #ifndef _SYS_SYSPROTO_H_
 4376 struct fhstatfs_args {
 4377         struct fhandle *u_fhp;
 4378         struct statfs *buf;
 4379 };
 4380 #endif
 4381 int
 4382 sys_fhstatfs(td, uap)
 4383         struct thread *td;
 4384         struct fhstatfs_args /* {
 4385                 struct fhandle *u_fhp;
 4386                 struct statfs *buf;
 4387         } */ *uap;
 4388 {
 4389         struct statfs *sfp;
 4390         fhandle_t fh;
 4391         int error;
 4392 
 4393         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4394         if (error != 0)
 4395                 return (error);
 4396         sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 4397         error = kern_fhstatfs(td, fh, sfp);
 4398         if (error == 0)
 4399                 error = copyout(sfp, uap->buf, sizeof(*sfp));
 4400         free(sfp, M_STATFS);
 4401         return (error);
 4402 }
 4403 
 4404 int
 4405 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4406 {
 4407         struct statfs *sp;
 4408         struct mount *mp;
 4409         struct vnode *vp;
 4410         int error;
 4411 
 4412         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4413         if (error != 0)
 4414                 return (error);
 4415         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4416                 return (ESTALE);
 4417         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4418         if (error != 0) {
 4419                 vfs_unbusy(mp);
 4420                 return (error);
 4421         }
 4422         vput(vp);
 4423         error = prison_canseemount(td->td_ucred, mp);
 4424         if (error != 0)
 4425                 goto out;
 4426 #ifdef MAC
 4427         error = mac_mount_check_stat(td->td_ucred, mp);
 4428         if (error != 0)
 4429                 goto out;
 4430 #endif
 4431         /*
 4432          * Set these in case the underlying filesystem fails to do so.
 4433          */
 4434         sp = &mp->mnt_stat;
 4435         sp->f_version = STATFS_VERSION;
 4436         sp->f_namemax = NAME_MAX;
 4437         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4438         error = VFS_STATFS(mp, sp);
 4439         if (error == 0)
 4440                 *buf = *sp;
 4441 out:
 4442         vfs_unbusy(mp);
 4443         return (error);
 4444 }
 4445 
 4446 int
 4447 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4448 {
 4449         struct file *fp;
 4450         struct mount *mp;
 4451         struct vnode *vp;
 4452         cap_rights_t rights;
 4453         off_t olen, ooffset;
 4454         int error;
 4455 
 4456         if (offset < 0 || len <= 0)
 4457                 return (EINVAL);
 4458         /* Check for wrap. */
 4459         if (offset > OFF_MAX - len)
 4460                 return (EFBIG);
 4461         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4462         if (error != 0)
 4463                 return (error);
 4464         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4465                 error = ESPIPE;
 4466                 goto out;
 4467         }
 4468         if ((fp->f_flag & FWRITE) == 0) {
 4469                 error = EBADF;
 4470                 goto out;
 4471         }
 4472         if (fp->f_type != DTYPE_VNODE) {
 4473                 error = ENODEV;
 4474                 goto out;
 4475         }
 4476         vp = fp->f_vnode;
 4477         if (vp->v_type != VREG) {
 4478                 error = ENODEV;
 4479                 goto out;
 4480         }
 4481 
 4482         /* Allocating blocks may take a long time, so iterate. */
 4483         for (;;) {
 4484                 olen = len;
 4485                 ooffset = offset;
 4486 
 4487                 bwillwrite();
 4488                 mp = NULL;
 4489                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4490                 if (error != 0)
 4491                         break;
 4492                 error = vn_lock(vp, LK_EXCLUSIVE);
 4493                 if (error != 0) {
 4494                         vn_finished_write(mp);
 4495                         break;
 4496                 }
 4497 #ifdef MAC
 4498                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4499                 if (error == 0)
 4500 #endif
 4501                         error = VOP_ALLOCATE(vp, &offset, &len);
 4502                 VOP_UNLOCK(vp, 0);
 4503                 vn_finished_write(mp);
 4504 
 4505                 if (olen + ooffset != offset + len) {
 4506                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4507                             ooffset, olen, offset, len);
 4508                 }
 4509                 if (error != 0 || len == 0)
 4510                         break;
 4511                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4512                 maybe_yield();
 4513         }
 4514  out:
 4515         fdrop(fp, td);
 4516         return (error);
 4517 }
 4518 
 4519 int
 4520 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4521 {
 4522         int error;
 4523 
 4524         error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
 4525         return (kern_posix_error(td, error));
 4526 }
 4527 
 4528 /*
 4529  * Unlike madvise(2), we do not make a best effort to remember every
 4530  * possible caching hint.  Instead, we remember the last setting with
 4531  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4532  * region of any current setting.
 4533  */
 4534 int
 4535 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4536     int advice)
 4537 {
 4538         struct fadvise_info *fa, *new;
 4539         struct file *fp;
 4540         struct vnode *vp;
 4541         cap_rights_t rights;
 4542         off_t end;
 4543         int error;
 4544 
 4545         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4546                 return (EINVAL);
 4547         switch (advice) {
 4548         case POSIX_FADV_SEQUENTIAL:
 4549         case POSIX_FADV_RANDOM:
 4550         case POSIX_FADV_NOREUSE:
 4551                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4552                 break;
 4553         case POSIX_FADV_NORMAL:
 4554         case POSIX_FADV_WILLNEED:
 4555         case POSIX_FADV_DONTNEED:
 4556                 new = NULL;
 4557                 break;
 4558         default:
 4559                 return (EINVAL);
 4560         }
 4561         /* XXX: CAP_POSIX_FADVISE? */
 4562         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4563         if (error != 0)
 4564                 goto out;
 4565         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4566                 error = ESPIPE;
 4567                 goto out;
 4568         }
 4569         if (fp->f_type != DTYPE_VNODE) {
 4570                 error = ENODEV;
 4571                 goto out;
 4572         }
 4573         vp = fp->f_vnode;
 4574         if (vp->v_type != VREG) {
 4575                 error = ENODEV;
 4576                 goto out;
 4577         }
 4578         if (len == 0)
 4579                 end = OFF_MAX;
 4580         else
 4581                 end = offset + len - 1;
 4582         switch (advice) {
 4583         case POSIX_FADV_SEQUENTIAL:
 4584         case POSIX_FADV_RANDOM:
 4585         case POSIX_FADV_NOREUSE:
 4586                 /*
 4587                  * Try to merge any existing non-standard region with
 4588                  * this new region if possible, otherwise create a new
 4589                  * non-standard region for this request.
 4590                  */
 4591                 mtx_pool_lock(mtxpool_sleep, fp);
 4592                 fa = fp->f_advice;
 4593                 if (fa != NULL && fa->fa_advice == advice &&
 4594                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4595                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4596                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4597                         if (offset < fa->fa_start)
 4598                                 fa->fa_start = offset;
 4599                         if (end > fa->fa_end)
 4600                                 fa->fa_end = end;
 4601                 } else {
 4602                         new->fa_advice = advice;
 4603                         new->fa_start = offset;
 4604                         new->fa_end = end;
 4605                         fp->f_advice = new;
 4606                         new = fa;
 4607                 }
 4608                 mtx_pool_unlock(mtxpool_sleep, fp);
 4609                 break;
 4610         case POSIX_FADV_NORMAL:
 4611                 /*
 4612                  * If a the "normal" region overlaps with an existing
 4613                  * non-standard region, trim or remove the
 4614                  * non-standard region.
 4615                  */
 4616                 mtx_pool_lock(mtxpool_sleep, fp);
 4617                 fa = fp->f_advice;
 4618                 if (fa != NULL) {
 4619                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4620                                 new = fa;
 4621                                 fp->f_advice = NULL;
 4622                         } else if (offset <= fa->fa_start &&
 4623                             end >= fa->fa_start)
 4624                                 fa->fa_start = end + 1;
 4625                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4626                                 fa->fa_end = offset - 1;
 4627                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4628                                 /*
 4629                                  * If the "normal" region is a middle
 4630                                  * portion of the existing
 4631                                  * non-standard region, just remove
 4632                                  * the whole thing rather than picking
 4633                                  * one side or the other to
 4634                                  * preserve.
 4635                                  */
 4636                                 new = fa;
 4637                                 fp->f_advice = NULL;
 4638                         }
 4639                 }
 4640                 mtx_pool_unlock(mtxpool_sleep, fp);
 4641                 break;
 4642         case POSIX_FADV_WILLNEED:
 4643         case POSIX_FADV_DONTNEED:
 4644                 error = VOP_ADVISE(vp, offset, end, advice);
 4645                 break;
 4646         }
 4647 out:
 4648         if (fp != NULL)
 4649                 fdrop(fp, td);
 4650         free(new, M_FADVISE);
 4651         return (error);
 4652 }
 4653 
 4654 int
 4655 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4656 {
 4657         int error;
 4658 
 4659         error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4660             uap->advice);
 4661         return (kern_posix_error(td, error));
 4662 }

Cache object: 2f8567a884294915ae61a556a3d7ce75


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.