The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.4/sys/kern/vfs_syscalls.c 311959 2017-01-12 01:20:51Z kib $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   97 
   98 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   99 static int kern_chflags(struct thread *td, const char *path,
  100     enum uio_seg pathseg, u_long flags);
  101 static int kern_chflagsat(struct thread *td, int fd, const char *path,
  102     enum uio_seg pathseg, u_long flags, int atflag);
  103 static int setfflags(struct thread *td, struct vnode *, u_long);
  104 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  105 static int getutimens(const struct timespec *, enum uio_seg,
  106     struct timespec *, int *);
  107 static int setutimes(struct thread *td, struct vnode *,
  108     const struct timespec *, int, int);
  109 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  110     struct thread *td);
  111 
  112 /*
  113  * The module initialization routine for POSIX asynchronous I/O will
  114  * set this to the version of AIO that it implements.  (Zero means
  115  * that it is not implemented.)  This value is used here by pathconf()
  116  * and in kern_descrip.c by fpathconf().
  117  */
  118 int async_io_version;
  119 
  120 /*
  121  * Sync each mounted filesystem.
  122  */
  123 #ifndef _SYS_SYSPROTO_H_
  124 struct sync_args {
  125         int     dummy;
  126 };
  127 #endif
  128 /* ARGSUSED */
  129 int
  130 sys_sync(td, uap)
  131         struct thread *td;
  132         struct sync_args *uap;
  133 {
  134         struct mount *mp, *nmp;
  135         int save;
  136 
  137         mtx_lock(&mountlist_mtx);
  138         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  139                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  140                         nmp = TAILQ_NEXT(mp, mnt_list);
  141                         continue;
  142                 }
  143                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  144                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  145                         save = curthread_pflags_set(TDP_SYNCIO);
  146                         vfs_msync(mp, MNT_NOWAIT);
  147                         VFS_SYNC(mp, MNT_NOWAIT);
  148                         curthread_pflags_restore(save);
  149                         vn_finished_write(mp);
  150                 }
  151                 mtx_lock(&mountlist_mtx);
  152                 nmp = TAILQ_NEXT(mp, mnt_list);
  153                 vfs_unbusy(mp);
  154         }
  155         mtx_unlock(&mountlist_mtx);
  156         return (0);
  157 }
  158 
  159 /*
  160  * Change filesystem quotas.
  161  */
  162 #ifndef _SYS_SYSPROTO_H_
  163 struct quotactl_args {
  164         char *path;
  165         int cmd;
  166         int uid;
  167         caddr_t arg;
  168 };
  169 #endif
  170 int
  171 sys_quotactl(td, uap)
  172         struct thread *td;
  173         register struct quotactl_args /* {
  174                 char *path;
  175                 int cmd;
  176                 int uid;
  177                 caddr_t arg;
  178         } */ *uap;
  179 {
  180         struct mount *mp;
  181         struct nameidata nd;
  182         int error;
  183 
  184         AUDIT_ARG_CMD(uap->cmd);
  185         AUDIT_ARG_UID(uap->uid);
  186         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  187                 return (EPERM);
  188         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  189             uap->path, td);
  190         if ((error = namei(&nd)) != 0)
  191                 return (error);
  192         NDFREE(&nd, NDF_ONLY_PNBUF);
  193         mp = nd.ni_vp->v_mount;
  194         vfs_ref(mp);
  195         vput(nd.ni_vp);
  196         error = vfs_busy(mp, 0);
  197         vfs_rel(mp);
  198         if (error != 0)
  199                 return (error);
  200         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  201 
  202         /*
  203          * Since quota on operation typically needs to open quota
  204          * file, the Q_QUOTAON handler needs to unbusy the mount point
  205          * before calling into namei.  Otherwise, unmount might be
  206          * started between two vfs_busy() invocations (first is our,
  207          * second is from mount point cross-walk code in lookup()),
  208          * causing deadlock.
  209          *
  210          * Require that Q_QUOTAON handles the vfs_busy() reference on
  211          * its own, always returning with ubusied mount point.
  212          */
  213         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  214                 vfs_unbusy(mp);
  215         return (error);
  216 }
  217 
  218 /*
  219  * Used by statfs conversion routines to scale the block size up if
  220  * necessary so that all of the block counts are <= 'max_size'.  Note
  221  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  222  * value of 'n'.
  223  */
  224 void
  225 statfs_scale_blocks(struct statfs *sf, long max_size)
  226 {
  227         uint64_t count;
  228         int shift;
  229 
  230         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  231 
  232         /*
  233          * Attempt to scale the block counts to give a more accurate
  234          * overview to userland of the ratio of free space to used
  235          * space.  To do this, find the largest block count and compute
  236          * a divisor that lets it fit into a signed integer <= max_size.
  237          */
  238         if (sf->f_bavail < 0)
  239                 count = -sf->f_bavail;
  240         else
  241                 count = sf->f_bavail;
  242         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  243         if (count <= max_size)
  244                 return;
  245 
  246         count >>= flsl(max_size);
  247         shift = 0;
  248         while (count > 0) {
  249                 shift++;
  250                 count >>=1;
  251         }
  252 
  253         sf->f_bsize <<= shift;
  254         sf->f_blocks >>= shift;
  255         sf->f_bfree >>= shift;
  256         sf->f_bavail >>= shift;
  257 }
  258 
  259 static int
  260 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
  261 {
  262         struct statfs *sp;
  263         int error;
  264 
  265         if (mp == NULL)
  266                 return (EBADF);
  267         error = vfs_busy(mp, 0);
  268         vfs_rel(mp);
  269         if (error != 0)
  270                 return (error);
  271 #ifdef MAC
  272         error = mac_mount_check_stat(td->td_ucred, mp);
  273         if (error != 0)
  274                 goto out;
  275 #endif
  276         /*
  277          * Set these in case the underlying filesystem fails to do so.
  278          */
  279         sp = &mp->mnt_stat;
  280         sp->f_version = STATFS_VERSION;
  281         sp->f_namemax = NAME_MAX;
  282         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  283         error = VFS_STATFS(mp, sp);
  284         if (error != 0)
  285                 goto out;
  286         *buf = *sp;
  287         if (priv_check(td, PRIV_VFS_GENERATION)) {
  288                 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
  289                 prison_enforce_statfs(td->td_ucred, mp, buf);
  290         }
  291 out:
  292         vfs_unbusy(mp);
  293         return (error);
  294 }
  295 
  296 /*
  297  * Get filesystem statistics.
  298  */
  299 #ifndef _SYS_SYSPROTO_H_
  300 struct statfs_args {
  301         char *path;
  302         struct statfs *buf;
  303 };
  304 #endif
  305 int
  306 sys_statfs(td, uap)
  307         struct thread *td;
  308         register struct statfs_args /* {
  309                 char *path;
  310                 struct statfs *buf;
  311         } */ *uap;
  312 {
  313         struct statfs sf;
  314         int error;
  315 
  316         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  317         if (error == 0)
  318                 error = copyout(&sf, uap->buf, sizeof(sf));
  319         return (error);
  320 }
  321 
  322 int
  323 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  324     struct statfs *buf)
  325 {
  326         struct mount *mp;
  327         struct nameidata nd;
  328         int error;
  329 
  330         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  331             pathseg, path, td);
  332         error = namei(&nd);
  333         if (error != 0)
  334                 return (error);
  335         mp = nd.ni_vp->v_mount;
  336         vfs_ref(mp);
  337         NDFREE(&nd, NDF_ONLY_PNBUF);
  338         vput(nd.ni_vp);
  339         return (kern_do_statfs(td, mp, buf));
  340 }
  341 
  342 /*
  343  * Get filesystem statistics.
  344  */
  345 #ifndef _SYS_SYSPROTO_H_
  346 struct fstatfs_args {
  347         int fd;
  348         struct statfs *buf;
  349 };
  350 #endif
  351 int
  352 sys_fstatfs(td, uap)
  353         struct thread *td;
  354         register struct fstatfs_args /* {
  355                 int fd;
  356                 struct statfs *buf;
  357         } */ *uap;
  358 {
  359         struct statfs sf;
  360         int error;
  361 
  362         error = kern_fstatfs(td, uap->fd, &sf);
  363         if (error == 0)
  364                 error = copyout(&sf, uap->buf, sizeof(sf));
  365         return (error);
  366 }
  367 
  368 int
  369 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  370 {
  371         struct file *fp;
  372         struct mount *mp;
  373         struct vnode *vp;
  374         cap_rights_t rights;
  375         int error;
  376 
  377         AUDIT_ARG_FD(fd);
  378         error = getvnode(td->td_proc->p_fd, fd,
  379             cap_rights_init(&rights, CAP_FSTATFS), &fp);
  380         if (error != 0)
  381                 return (error);
  382         vp = fp->f_vnode;
  383         vn_lock(vp, LK_SHARED | LK_RETRY);
  384 #ifdef AUDIT
  385         AUDIT_ARG_VNODE1(vp);
  386 #endif
  387         mp = vp->v_mount;
  388         if (mp != NULL)
  389                 vfs_ref(mp);
  390         VOP_UNLOCK(vp, 0);
  391         fdrop(fp, td);
  392         return (kern_do_statfs(td, mp, buf));
  393 }
  394 
  395 /*
  396  * Get statistics on all filesystems.
  397  */
  398 #ifndef _SYS_SYSPROTO_H_
  399 struct getfsstat_args {
  400         struct statfs *buf;
  401         long bufsize;
  402         int flags;
  403 };
  404 #endif
  405 int
  406 sys_getfsstat(td, uap)
  407         struct thread *td;
  408         register struct getfsstat_args /* {
  409                 struct statfs *buf;
  410                 long bufsize;
  411                 int flags;
  412         } */ *uap;
  413 {
  414 
  415         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  416             uap->flags));
  417 }
  418 
  419 /*
  420  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  421  *      The caller is responsible for freeing memory which will be allocated
  422  *      in '*buf'.
  423  */
  424 int
  425 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  426     enum uio_seg bufseg, int flags)
  427 {
  428         struct mount *mp, *nmp;
  429         struct statfs *sfsp, *sp, sb;
  430         size_t count, maxcount;
  431         int error;
  432 
  433         maxcount = bufsize / sizeof(struct statfs);
  434         if (bufsize == 0)
  435                 sfsp = NULL;
  436         else if (bufseg == UIO_USERSPACE)
  437                 sfsp = *buf;
  438         else /* if (bufseg == UIO_SYSSPACE) */ {
  439                 count = 0;
  440                 mtx_lock(&mountlist_mtx);
  441                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  442                         count++;
  443                 }
  444                 mtx_unlock(&mountlist_mtx);
  445                 if (maxcount > count)
  446                         maxcount = count;
  447                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  448                     M_WAITOK);
  449         }
  450         count = 0;
  451         mtx_lock(&mountlist_mtx);
  452         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  453                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  454                         nmp = TAILQ_NEXT(mp, mnt_list);
  455                         continue;
  456                 }
  457 #ifdef MAC
  458                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  459                         nmp = TAILQ_NEXT(mp, mnt_list);
  460                         continue;
  461                 }
  462 #endif
  463                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  464                         nmp = TAILQ_NEXT(mp, mnt_list);
  465                         continue;
  466                 }
  467                 if (sfsp != NULL && count < maxcount) {
  468                         sp = &mp->mnt_stat;
  469                         /*
  470                          * Set these in case the underlying filesystem
  471                          * fails to do so.
  472                          */
  473                         sp->f_version = STATFS_VERSION;
  474                         sp->f_namemax = NAME_MAX;
  475                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  476                         /*
  477                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  478                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  479                          * overrides MNT_WAIT.
  480                          */
  481                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  482                             (flags & MNT_WAIT)) &&
  483                             (error = VFS_STATFS(mp, sp))) {
  484                                 mtx_lock(&mountlist_mtx);
  485                                 nmp = TAILQ_NEXT(mp, mnt_list);
  486                                 vfs_unbusy(mp);
  487                                 continue;
  488                         }
  489                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  490                                 bcopy(sp, &sb, sizeof(sb));
  491                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  492                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  493                                 sp = &sb;
  494                         }
  495                         if (bufseg == UIO_SYSSPACE)
  496                                 bcopy(sp, sfsp, sizeof(*sp));
  497                         else /* if (bufseg == UIO_USERSPACE) */ {
  498                                 error = copyout(sp, sfsp, sizeof(*sp));
  499                                 if (error != 0) {
  500                                         vfs_unbusy(mp);
  501                                         return (error);
  502                                 }
  503                         }
  504                         sfsp++;
  505                 }
  506                 count++;
  507                 mtx_lock(&mountlist_mtx);
  508                 nmp = TAILQ_NEXT(mp, mnt_list);
  509                 vfs_unbusy(mp);
  510         }
  511         mtx_unlock(&mountlist_mtx);
  512         if (sfsp != NULL && count > maxcount)
  513                 td->td_retval[0] = maxcount;
  514         else
  515                 td->td_retval[0] = count;
  516         return (0);
  517 }
  518 
  519 #ifdef COMPAT_FREEBSD4
  520 /*
  521  * Get old format filesystem statistics.
  522  */
  523 static void cvtstatfs(struct statfs *, struct ostatfs *);
  524 
  525 #ifndef _SYS_SYSPROTO_H_
  526 struct freebsd4_statfs_args {
  527         char *path;
  528         struct ostatfs *buf;
  529 };
  530 #endif
  531 int
  532 freebsd4_statfs(td, uap)
  533         struct thread *td;
  534         struct freebsd4_statfs_args /* {
  535                 char *path;
  536                 struct ostatfs *buf;
  537         } */ *uap;
  538 {
  539         struct ostatfs osb;
  540         struct statfs sf;
  541         int error;
  542 
  543         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  544         if (error != 0)
  545                 return (error);
  546         cvtstatfs(&sf, &osb);
  547         return (copyout(&osb, uap->buf, sizeof(osb)));
  548 }
  549 
  550 /*
  551  * Get filesystem statistics.
  552  */
  553 #ifndef _SYS_SYSPROTO_H_
  554 struct freebsd4_fstatfs_args {
  555         int fd;
  556         struct ostatfs *buf;
  557 };
  558 #endif
  559 int
  560 freebsd4_fstatfs(td, uap)
  561         struct thread *td;
  562         struct freebsd4_fstatfs_args /* {
  563                 int fd;
  564                 struct ostatfs *buf;
  565         } */ *uap;
  566 {
  567         struct ostatfs osb;
  568         struct statfs sf;
  569         int error;
  570 
  571         error = kern_fstatfs(td, uap->fd, &sf);
  572         if (error != 0)
  573                 return (error);
  574         cvtstatfs(&sf, &osb);
  575         return (copyout(&osb, uap->buf, sizeof(osb)));
  576 }
  577 
  578 /*
  579  * Get statistics on all filesystems.
  580  */
  581 #ifndef _SYS_SYSPROTO_H_
  582 struct freebsd4_getfsstat_args {
  583         struct ostatfs *buf;
  584         long bufsize;
  585         int flags;
  586 };
  587 #endif
  588 int
  589 freebsd4_getfsstat(td, uap)
  590         struct thread *td;
  591         register struct freebsd4_getfsstat_args /* {
  592                 struct ostatfs *buf;
  593                 long bufsize;
  594                 int flags;
  595         } */ *uap;
  596 {
  597         struct statfs *buf, *sp;
  598         struct ostatfs osb;
  599         size_t count, size;
  600         int error;
  601 
  602         count = uap->bufsize / sizeof(struct ostatfs);
  603         size = count * sizeof(struct statfs);
  604         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  605         if (size > 0) {
  606                 count = td->td_retval[0];
  607                 sp = buf;
  608                 while (count > 0 && error == 0) {
  609                         cvtstatfs(sp, &osb);
  610                         error = copyout(&osb, uap->buf, sizeof(osb));
  611                         sp++;
  612                         uap->buf++;
  613                         count--;
  614                 }
  615                 free(buf, M_TEMP);
  616         }
  617         return (error);
  618 }
  619 
  620 /*
  621  * Implement fstatfs() for (NFS) file handles.
  622  */
  623 #ifndef _SYS_SYSPROTO_H_
  624 struct freebsd4_fhstatfs_args {
  625         struct fhandle *u_fhp;
  626         struct ostatfs *buf;
  627 };
  628 #endif
  629 int
  630 freebsd4_fhstatfs(td, uap)
  631         struct thread *td;
  632         struct freebsd4_fhstatfs_args /* {
  633                 struct fhandle *u_fhp;
  634                 struct ostatfs *buf;
  635         } */ *uap;
  636 {
  637         struct ostatfs osb;
  638         struct statfs sf;
  639         fhandle_t fh;
  640         int error;
  641 
  642         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  643         if (error != 0)
  644                 return (error);
  645         error = kern_fhstatfs(td, fh, &sf);
  646         if (error != 0)
  647                 return (error);
  648         cvtstatfs(&sf, &osb);
  649         return (copyout(&osb, uap->buf, sizeof(osb)));
  650 }
  651 
  652 /*
  653  * Convert a new format statfs structure to an old format statfs structure.
  654  */
  655 static void
  656 cvtstatfs(nsp, osp)
  657         struct statfs *nsp;
  658         struct ostatfs *osp;
  659 {
  660 
  661         statfs_scale_blocks(nsp, LONG_MAX);
  662         bzero(osp, sizeof(*osp));
  663         osp->f_bsize = nsp->f_bsize;
  664         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  665         osp->f_blocks = nsp->f_blocks;
  666         osp->f_bfree = nsp->f_bfree;
  667         osp->f_bavail = nsp->f_bavail;
  668         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  669         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  670         osp->f_owner = nsp->f_owner;
  671         osp->f_type = nsp->f_type;
  672         osp->f_flags = nsp->f_flags;
  673         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  674         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  675         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  676         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  677         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  678             MIN(MFSNAMELEN, OMFSNAMELEN));
  679         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  680             MIN(MNAMELEN, OMNAMELEN));
  681         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  682             MIN(MNAMELEN, OMNAMELEN));
  683         osp->f_fsid = nsp->f_fsid;
  684 }
  685 #endif /* COMPAT_FREEBSD4 */
  686 
  687 /*
  688  * Change current working directory to a given file descriptor.
  689  */
  690 #ifndef _SYS_SYSPROTO_H_
  691 struct fchdir_args {
  692         int     fd;
  693 };
  694 #endif
  695 int
  696 sys_fchdir(td, uap)
  697         struct thread *td;
  698         struct fchdir_args /* {
  699                 int fd;
  700         } */ *uap;
  701 {
  702         register struct filedesc *fdp = td->td_proc->p_fd;
  703         struct vnode *vp, *tdp, *vpold;
  704         struct mount *mp;
  705         struct file *fp;
  706         cap_rights_t rights;
  707         int error;
  708 
  709         AUDIT_ARG_FD(uap->fd);
  710         error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  711             &fp);
  712         if (error != 0)
  713                 return (error);
  714         vp = fp->f_vnode;
  715         VREF(vp);
  716         fdrop(fp, td);
  717         vn_lock(vp, LK_SHARED | LK_RETRY);
  718         AUDIT_ARG_VNODE1(vp);
  719         error = change_dir(vp, td);
  720         while (!error && (mp = vp->v_mountedhere) != NULL) {
  721                 if (vfs_busy(mp, 0))
  722                         continue;
  723                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  724                 vfs_unbusy(mp);
  725                 if (error != 0)
  726                         break;
  727                 vput(vp);
  728                 vp = tdp;
  729         }
  730         if (error != 0) {
  731                 vput(vp);
  732                 return (error);
  733         }
  734         VOP_UNLOCK(vp, 0);
  735         FILEDESC_XLOCK(fdp);
  736         vpold = fdp->fd_cdir;
  737         fdp->fd_cdir = vp;
  738         FILEDESC_XUNLOCK(fdp);
  739         vrele(vpold);
  740         return (0);
  741 }
  742 
  743 /*
  744  * Change current working directory (``.'').
  745  */
  746 #ifndef _SYS_SYSPROTO_H_
  747 struct chdir_args {
  748         char    *path;
  749 };
  750 #endif
  751 int
  752 sys_chdir(td, uap)
  753         struct thread *td;
  754         struct chdir_args /* {
  755                 char *path;
  756         } */ *uap;
  757 {
  758 
  759         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  760 }
  761 
  762 int
  763 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  764 {
  765         register struct filedesc *fdp = td->td_proc->p_fd;
  766         struct nameidata nd;
  767         struct vnode *vp;
  768         int error;
  769 
  770         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  771             pathseg, path, td);
  772         if ((error = namei(&nd)) != 0)
  773                 return (error);
  774         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  775                 vput(nd.ni_vp);
  776                 NDFREE(&nd, NDF_ONLY_PNBUF);
  777                 return (error);
  778         }
  779         VOP_UNLOCK(nd.ni_vp, 0);
  780         NDFREE(&nd, NDF_ONLY_PNBUF);
  781         FILEDESC_XLOCK(fdp);
  782         vp = fdp->fd_cdir;
  783         fdp->fd_cdir = nd.ni_vp;
  784         FILEDESC_XUNLOCK(fdp);
  785         vrele(vp);
  786         return (0);
  787 }
  788 
  789 /*
  790  * Helper function for raised chroot(2) security function:  Refuse if
  791  * any filedescriptors are open directories.
  792  */
  793 static int
  794 chroot_refuse_vdir_fds(fdp)
  795         struct filedesc *fdp;
  796 {
  797         struct vnode *vp;
  798         struct file *fp;
  799         int fd;
  800 
  801         FILEDESC_LOCK_ASSERT(fdp);
  802 
  803         for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
  804                 fp = fget_locked(fdp, fd);
  805                 if (fp == NULL)
  806                         continue;
  807                 if (fp->f_type == DTYPE_VNODE) {
  808                         vp = fp->f_vnode;
  809                         if (vp->v_type == VDIR)
  810                                 return (EPERM);
  811                 }
  812         }
  813         return (0);
  814 }
  815 
  816 /*
  817  * This sysctl determines if we will allow a process to chroot(2) if it
  818  * has a directory open:
  819  *      0: disallowed for all processes.
  820  *      1: allowed for processes that were not already chroot(2)'ed.
  821  *      2: allowed for all processes.
  822  */
  823 
  824 static int chroot_allow_open_directories = 1;
  825 
  826 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  827      &chroot_allow_open_directories, 0,
  828      "Allow a process to chroot(2) if it has a directory open");
  829 
  830 /*
  831  * Change notion of root (``/'') directory.
  832  */
  833 #ifndef _SYS_SYSPROTO_H_
  834 struct chroot_args {
  835         char    *path;
  836 };
  837 #endif
  838 int
  839 sys_chroot(td, uap)
  840         struct thread *td;
  841         struct chroot_args /* {
  842                 char *path;
  843         } */ *uap;
  844 {
  845         struct nameidata nd;
  846         int error;
  847 
  848         error = priv_check(td, PRIV_VFS_CHROOT);
  849         if (error != 0)
  850                 return (error);
  851         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  852             UIO_USERSPACE, uap->path, td);
  853         error = namei(&nd);
  854         if (error != 0)
  855                 goto error;
  856         error = change_dir(nd.ni_vp, td);
  857         if (error != 0)
  858                 goto e_vunlock;
  859 #ifdef MAC
  860         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  861         if (error != 0)
  862                 goto e_vunlock;
  863 #endif
  864         VOP_UNLOCK(nd.ni_vp, 0);
  865         error = change_root(nd.ni_vp, td);
  866         vrele(nd.ni_vp);
  867         NDFREE(&nd, NDF_ONLY_PNBUF);
  868         return (error);
  869 e_vunlock:
  870         vput(nd.ni_vp);
  871 error:
  872         NDFREE(&nd, NDF_ONLY_PNBUF);
  873         return (error);
  874 }
  875 
  876 /*
  877  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  878  * instance.
  879  */
  880 int
  881 change_dir(vp, td)
  882         struct vnode *vp;
  883         struct thread *td;
  884 {
  885 #ifdef MAC
  886         int error;
  887 #endif
  888 
  889         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  890         if (vp->v_type != VDIR)
  891                 return (ENOTDIR);
  892 #ifdef MAC
  893         error = mac_vnode_check_chdir(td->td_ucred, vp);
  894         if (error != 0)
  895                 return (error);
  896 #endif
  897         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  898 }
  899 
  900 /*
  901  * Common routine for kern_chroot() and jail_attach().  The caller is
  902  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  903  * authorize this operation.
  904  */
  905 int
  906 change_root(vp, td)
  907         struct vnode *vp;
  908         struct thread *td;
  909 {
  910         struct filedesc *fdp;
  911         struct vnode *oldvp;
  912         int error;
  913 
  914         fdp = td->td_proc->p_fd;
  915         FILEDESC_XLOCK(fdp);
  916         if (chroot_allow_open_directories == 0 ||
  917             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  918                 error = chroot_refuse_vdir_fds(fdp);
  919                 if (error != 0) {
  920                         FILEDESC_XUNLOCK(fdp);
  921                         return (error);
  922                 }
  923         }
  924         oldvp = fdp->fd_rdir;
  925         fdp->fd_rdir = vp;
  926         VREF(fdp->fd_rdir);
  927         if (!fdp->fd_jdir) {
  928                 fdp->fd_jdir = vp;
  929                 VREF(fdp->fd_jdir);
  930         }
  931         FILEDESC_XUNLOCK(fdp);
  932         vrele(oldvp);
  933         return (0);
  934 }
  935 
  936 static __inline void
  937 flags_to_rights(int flags, cap_rights_t *rightsp)
  938 {
  939 
  940         if (flags & O_EXEC) {
  941                 cap_rights_set(rightsp, CAP_FEXECVE);
  942         } else {
  943                 switch ((flags & O_ACCMODE)) {
  944                 case O_RDONLY:
  945                         cap_rights_set(rightsp, CAP_READ);
  946                         break;
  947                 case O_RDWR:
  948                         cap_rights_set(rightsp, CAP_READ);
  949                         /* FALLTHROUGH */
  950                 case O_WRONLY:
  951                         cap_rights_set(rightsp, CAP_WRITE);
  952                         if (!(flags & (O_APPEND | O_TRUNC)))
  953                                 cap_rights_set(rightsp, CAP_SEEK);
  954                         break;
  955                 }
  956         }
  957 
  958         if (flags & O_CREAT)
  959                 cap_rights_set(rightsp, CAP_CREATE);
  960 
  961         if (flags & O_TRUNC)
  962                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  963 
  964         if (flags & (O_SYNC | O_FSYNC))
  965                 cap_rights_set(rightsp, CAP_FSYNC);
  966 
  967         if (flags & (O_EXLOCK | O_SHLOCK))
  968                 cap_rights_set(rightsp, CAP_FLOCK);
  969 }
  970 
  971 /*
  972  * Check permissions, allocate an open file structure, and call the device
  973  * open routine if any.
  974  */
  975 #ifndef _SYS_SYSPROTO_H_
  976 struct open_args {
  977         char    *path;
  978         int     flags;
  979         int     mode;
  980 };
  981 #endif
  982 int
  983 sys_open(td, uap)
  984         struct thread *td;
  985         register struct open_args /* {
  986                 char *path;
  987                 int flags;
  988                 int mode;
  989         } */ *uap;
  990 {
  991 
  992         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
  993 }
  994 
  995 #ifndef _SYS_SYSPROTO_H_
  996 struct openat_args {
  997         int     fd;
  998         char    *path;
  999         int     flag;
 1000         int     mode;
 1001 };
 1002 #endif
 1003 int
 1004 sys_openat(struct thread *td, struct openat_args *uap)
 1005 {
 1006 
 1007         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1008             uap->mode));
 1009 }
 1010 
 1011 int
 1012 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1013     int mode)
 1014 {
 1015 
 1016         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1017 }
 1018 
 1019 int
 1020 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1021     int flags, int mode)
 1022 {
 1023         struct proc *p = td->td_proc;
 1024         struct filedesc *fdp = p->p_fd;
 1025         struct file *fp;
 1026         struct vnode *vp;
 1027         struct nameidata nd;
 1028         cap_rights_t rights;
 1029         int cmode, error, indx;
 1030 
 1031         indx = -1;
 1032 
 1033         AUDIT_ARG_FFLAGS(flags);
 1034         AUDIT_ARG_MODE(mode);
 1035         /* XXX: audit dirfd */
 1036         cap_rights_init(&rights, CAP_LOOKUP);
 1037         flags_to_rights(flags, &rights);
 1038         /*
 1039          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1040          * may be specified.
 1041          */
 1042         if (flags & O_EXEC) {
 1043                 if (flags & O_ACCMODE)
 1044                         return (EINVAL);
 1045         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1046                 return (EINVAL);
 1047         } else {
 1048                 flags = FFLAGS(flags);
 1049         }
 1050 
 1051         /*
 1052          * Allocate the file descriptor, but don't install a descriptor yet.
 1053          */
 1054         error = falloc_noinstall(td, &fp);
 1055         if (error != 0)
 1056                 return (error);
 1057         /*
 1058          * An extra reference on `fp' has been held for us by
 1059          * falloc_noinstall().
 1060          */
 1061         /* Set the flags early so the finit in devfs can pick them up. */
 1062         fp->f_flag = flags & FMASK;
 1063         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1064         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1065             &rights, td);
 1066         td->td_dupfd = -1;              /* XXX check for fdopen */
 1067         error = vn_open(&nd, &flags, cmode, fp);
 1068         if (error != 0) {
 1069                 /*
 1070                  * If the vn_open replaced the method vector, something
 1071                  * wonderous happened deep below and we just pass it up
 1072                  * pretending we know what we do.
 1073                  */
 1074                 if (error == ENXIO && fp->f_ops != &badfileops)
 1075                         goto success;
 1076 
 1077                 /*
 1078                  * Handle special fdopen() case. bleh.
 1079                  *
 1080                  * Don't do this for relative (capability) lookups; we don't
 1081                  * understand exactly what would happen, and we don't think
 1082                  * that it ever should.
 1083                  */
 1084                 if (nd.ni_strictrelative == 0 &&
 1085                     (error == ENODEV || error == ENXIO) &&
 1086                     td->td_dupfd >= 0) {
 1087                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1088                             &indx);
 1089                         if (error == 0)
 1090                                 goto success;
 1091                 }
 1092 
 1093                 goto bad;
 1094         }
 1095         td->td_dupfd = 0;
 1096         NDFREE(&nd, NDF_ONLY_PNBUF);
 1097         vp = nd.ni_vp;
 1098 
 1099         /*
 1100          * Store the vnode, for any f_type. Typically, the vnode use
 1101          * count is decremented by direct call to vn_closefile() for
 1102          * files that switched type in the cdevsw fdopen() method.
 1103          */
 1104         fp->f_vnode = vp;
 1105         /*
 1106          * If the file wasn't claimed by devfs bind it to the normal
 1107          * vnode operations here.
 1108          */
 1109         if (fp->f_ops == &badfileops) {
 1110                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1111                 fp->f_seqcount = 1;
 1112                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1113                     DTYPE_VNODE, vp, &vnops);
 1114         }
 1115 
 1116         VOP_UNLOCK(vp, 0);
 1117         if (flags & O_TRUNC) {
 1118                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1119                 if (error != 0)
 1120                         goto bad;
 1121         }
 1122 success:
 1123         /*
 1124          * If we haven't already installed the FD (for dupfdopen), do so now.
 1125          */
 1126         if (indx == -1) {
 1127                 struct filecaps *fcaps;
 1128 
 1129 #ifdef CAPABILITIES
 1130                 if (nd.ni_strictrelative == 1)
 1131                         fcaps = &nd.ni_filecaps;
 1132                 else
 1133 #endif
 1134                         fcaps = NULL;
 1135                 error = finstall(td, fp, &indx, flags, fcaps);
 1136                 /* On success finstall() consumes fcaps. */
 1137                 if (error != 0) {
 1138                         filecaps_free(&nd.ni_filecaps);
 1139                         goto bad;
 1140                 }
 1141         } else {
 1142                 filecaps_free(&nd.ni_filecaps);
 1143         }
 1144 
 1145         /*
 1146          * Release our private reference, leaving the one associated with
 1147          * the descriptor table intact.
 1148          */
 1149         fdrop(fp, td);
 1150         td->td_retval[0] = indx;
 1151         return (0);
 1152 bad:
 1153         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1154         fdrop(fp, td);
 1155         return (error);
 1156 }
 1157 
 1158 #ifdef COMPAT_43
 1159 /*
 1160  * Create a file.
 1161  */
 1162 #ifndef _SYS_SYSPROTO_H_
 1163 struct ocreat_args {
 1164         char    *path;
 1165         int     mode;
 1166 };
 1167 #endif
 1168 int
 1169 ocreat(td, uap)
 1170         struct thread *td;
 1171         register struct ocreat_args /* {
 1172                 char *path;
 1173                 int mode;
 1174         } */ *uap;
 1175 {
 1176 
 1177         return (kern_open(td, uap->path, UIO_USERSPACE,
 1178             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1179 }
 1180 #endif /* COMPAT_43 */
 1181 
 1182 /*
 1183  * Create a special file.
 1184  */
 1185 #ifndef _SYS_SYSPROTO_H_
 1186 struct mknod_args {
 1187         char    *path;
 1188         int     mode;
 1189         int     dev;
 1190 };
 1191 #endif
 1192 int
 1193 sys_mknod(td, uap)
 1194         struct thread *td;
 1195         register struct mknod_args /* {
 1196                 char *path;
 1197                 int mode;
 1198                 int dev;
 1199         } */ *uap;
 1200 {
 1201 
 1202         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1203 }
 1204 
 1205 #ifndef _SYS_SYSPROTO_H_
 1206 struct mknodat_args {
 1207         int     fd;
 1208         char    *path;
 1209         mode_t  mode;
 1210         dev_t   dev;
 1211 };
 1212 #endif
 1213 int
 1214 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1215 {
 1216 
 1217         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1218             uap->dev));
 1219 }
 1220 
 1221 int
 1222 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1223     int dev)
 1224 {
 1225 
 1226         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1227 }
 1228 
 1229 int
 1230 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1231     int mode, int dev)
 1232 {
 1233         struct vnode *vp;
 1234         struct mount *mp;
 1235         struct vattr vattr;
 1236         struct nameidata nd;
 1237         cap_rights_t rights;
 1238         int error, whiteout = 0;
 1239 
 1240         AUDIT_ARG_MODE(mode);
 1241         AUDIT_ARG_DEV(dev);
 1242         switch (mode & S_IFMT) {
 1243         case S_IFCHR:
 1244         case S_IFBLK:
 1245                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1246                 if (error == 0 && dev == VNOVAL)
 1247                         error = EINVAL;
 1248                 break;
 1249         case S_IFMT:
 1250                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1251                 break;
 1252         case S_IFWHT:
 1253                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1254                 break;
 1255         case S_IFIFO:
 1256                 if (dev == 0)
 1257                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1258                 /* FALLTHROUGH */
 1259         default:
 1260                 error = EINVAL;
 1261                 break;
 1262         }
 1263         if (error != 0)
 1264                 return (error);
 1265 restart:
 1266         bwillwrite();
 1267         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1268             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 1269             td);
 1270         if ((error = namei(&nd)) != 0)
 1271                 return (error);
 1272         vp = nd.ni_vp;
 1273         if (vp != NULL) {
 1274                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1275                 if (vp == nd.ni_dvp)
 1276                         vrele(nd.ni_dvp);
 1277                 else
 1278                         vput(nd.ni_dvp);
 1279                 vrele(vp);
 1280                 return (EEXIST);
 1281         } else {
 1282                 VATTR_NULL(&vattr);
 1283                 vattr.va_mode = (mode & ALLPERMS) &
 1284                     ~td->td_proc->p_fd->fd_cmask;
 1285                 vattr.va_rdev = dev;
 1286                 whiteout = 0;
 1287 
 1288                 switch (mode & S_IFMT) {
 1289                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1290                         vattr.va_type = VBAD;
 1291                         break;
 1292                 case S_IFCHR:
 1293                         vattr.va_type = VCHR;
 1294                         break;
 1295                 case S_IFBLK:
 1296                         vattr.va_type = VBLK;
 1297                         break;
 1298                 case S_IFWHT:
 1299                         whiteout = 1;
 1300                         break;
 1301                 default:
 1302                         panic("kern_mknod: invalid mode");
 1303                 }
 1304         }
 1305         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1306                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1307                 vput(nd.ni_dvp);
 1308                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1309                         return (error);
 1310                 goto restart;
 1311         }
 1312 #ifdef MAC
 1313         if (error == 0 && !whiteout)
 1314                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1315                     &nd.ni_cnd, &vattr);
 1316 #endif
 1317         if (error == 0) {
 1318                 if (whiteout)
 1319                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1320                 else {
 1321                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1322                                                 &nd.ni_cnd, &vattr);
 1323                         if (error == 0)
 1324                                 vput(nd.ni_vp);
 1325                 }
 1326         }
 1327         NDFREE(&nd, NDF_ONLY_PNBUF);
 1328         vput(nd.ni_dvp);
 1329         vn_finished_write(mp);
 1330         return (error);
 1331 }
 1332 
 1333 /*
 1334  * Create a named pipe.
 1335  */
 1336 #ifndef _SYS_SYSPROTO_H_
 1337 struct mkfifo_args {
 1338         char    *path;
 1339         int     mode;
 1340 };
 1341 #endif
 1342 int
 1343 sys_mkfifo(td, uap)
 1344         struct thread *td;
 1345         register struct mkfifo_args /* {
 1346                 char *path;
 1347                 int mode;
 1348         } */ *uap;
 1349 {
 1350 
 1351         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1352 }
 1353 
 1354 #ifndef _SYS_SYSPROTO_H_
 1355 struct mkfifoat_args {
 1356         int     fd;
 1357         char    *path;
 1358         mode_t  mode;
 1359 };
 1360 #endif
 1361 int
 1362 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1363 {
 1364 
 1365         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1366             uap->mode));
 1367 }
 1368 
 1369 int
 1370 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1371 {
 1372 
 1373         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1374 }
 1375 
 1376 int
 1377 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1378     int mode)
 1379 {
 1380         struct mount *mp;
 1381         struct vattr vattr;
 1382         struct nameidata nd;
 1383         cap_rights_t rights;
 1384         int error;
 1385 
 1386         AUDIT_ARG_MODE(mode);
 1387 restart:
 1388         bwillwrite();
 1389         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1390             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 1391             td);
 1392         if ((error = namei(&nd)) != 0)
 1393                 return (error);
 1394         if (nd.ni_vp != NULL) {
 1395                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1396                 if (nd.ni_vp == nd.ni_dvp)
 1397                         vrele(nd.ni_dvp);
 1398                 else
 1399                         vput(nd.ni_dvp);
 1400                 vrele(nd.ni_vp);
 1401                 return (EEXIST);
 1402         }
 1403         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1404                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1405                 vput(nd.ni_dvp);
 1406                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1407                         return (error);
 1408                 goto restart;
 1409         }
 1410         VATTR_NULL(&vattr);
 1411         vattr.va_type = VFIFO;
 1412         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1413 #ifdef MAC
 1414         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1415             &vattr);
 1416         if (error != 0)
 1417                 goto out;
 1418 #endif
 1419         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1420         if (error == 0)
 1421                 vput(nd.ni_vp);
 1422 #ifdef MAC
 1423 out:
 1424 #endif
 1425         vput(nd.ni_dvp);
 1426         vn_finished_write(mp);
 1427         NDFREE(&nd, NDF_ONLY_PNBUF);
 1428         return (error);
 1429 }
 1430 
 1431 /*
 1432  * Make a hard file link.
 1433  */
 1434 #ifndef _SYS_SYSPROTO_H_
 1435 struct link_args {
 1436         char    *path;
 1437         char    *link;
 1438 };
 1439 #endif
 1440 int
 1441 sys_link(td, uap)
 1442         struct thread *td;
 1443         register struct link_args /* {
 1444                 char *path;
 1445                 char *link;
 1446         } */ *uap;
 1447 {
 1448 
 1449         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1450 }
 1451 
 1452 #ifndef _SYS_SYSPROTO_H_
 1453 struct linkat_args {
 1454         int     fd1;
 1455         char    *path1;
 1456         int     fd2;
 1457         char    *path2;
 1458         int     flag;
 1459 };
 1460 #endif
 1461 int
 1462 sys_linkat(struct thread *td, struct linkat_args *uap)
 1463 {
 1464         int flag;
 1465 
 1466         flag = uap->flag;
 1467         if (flag & ~AT_SYMLINK_FOLLOW)
 1468                 return (EINVAL);
 1469 
 1470         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1471             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1472 }
 1473 
 1474 int hardlink_check_uid = 0;
 1475 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1476     &hardlink_check_uid, 0,
 1477     "Unprivileged processes cannot create hard links to files owned by other "
 1478     "users");
 1479 static int hardlink_check_gid = 0;
 1480 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1481     &hardlink_check_gid, 0,
 1482     "Unprivileged processes cannot create hard links to files owned by other "
 1483     "groups");
 1484 
 1485 static int
 1486 can_hardlink(struct vnode *vp, struct ucred *cred)
 1487 {
 1488         struct vattr va;
 1489         int error;
 1490 
 1491         if (!hardlink_check_uid && !hardlink_check_gid)
 1492                 return (0);
 1493 
 1494         error = VOP_GETATTR(vp, &va, cred);
 1495         if (error != 0)
 1496                 return (error);
 1497 
 1498         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1499                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1500                 if (error != 0)
 1501                         return (error);
 1502         }
 1503 
 1504         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1505                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1506                 if (error != 0)
 1507                         return (error);
 1508         }
 1509 
 1510         return (0);
 1511 }
 1512 
 1513 int
 1514 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1515 {
 1516 
 1517         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1518 }
 1519 
 1520 int
 1521 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1522     enum uio_seg segflg, int follow)
 1523 {
 1524         struct vnode *vp;
 1525         struct mount *mp;
 1526         struct nameidata nd;
 1527         cap_rights_t rights;
 1528         int error;
 1529 
 1530 again:
 1531         bwillwrite();
 1532         NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 1533 
 1534         if ((error = namei(&nd)) != 0)
 1535                 return (error);
 1536         NDFREE(&nd, NDF_ONLY_PNBUF);
 1537         vp = nd.ni_vp;
 1538         if (vp->v_type == VDIR) {
 1539                 vrele(vp);
 1540                 return (EPERM);         /* POSIX */
 1541         }
 1542         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 |
 1543             NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT),
 1544             td);
 1545         if ((error = namei(&nd)) == 0) {
 1546                 if (nd.ni_vp != NULL) {
 1547                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1548                         if (nd.ni_dvp == nd.ni_vp)
 1549                                 vrele(nd.ni_dvp);
 1550                         else
 1551                                 vput(nd.ni_dvp);
 1552                         vrele(nd.ni_vp);
 1553                         vrele(vp);
 1554                         return (EEXIST);
 1555                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1556                         /*
 1557                          * Cross-device link.  No need to recheck
 1558                          * vp->v_type, since it cannot change, except
 1559                          * to VBAD.
 1560                          */
 1561                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1562                         vput(nd.ni_dvp);
 1563                         vrele(vp);
 1564                         return (EXDEV);
 1565                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1566                         error = can_hardlink(vp, td->td_ucred);
 1567 #ifdef MAC
 1568                         if (error == 0)
 1569                                 error = mac_vnode_check_link(td->td_ucred,
 1570                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1571 #endif
 1572                         if (error != 0) {
 1573                                 vput(vp);
 1574                                 vput(nd.ni_dvp);
 1575                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1576                                 return (error);
 1577                         }
 1578                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1579                         if (error != 0) {
 1580                                 vput(vp);
 1581                                 vput(nd.ni_dvp);
 1582                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1583                                 error = vn_start_write(NULL, &mp,
 1584                                     V_XSLEEP | PCATCH);
 1585                                 if (error != 0)
 1586                                         return (error);
 1587                                 goto again;
 1588                         }
 1589                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1590                         VOP_UNLOCK(vp, 0);
 1591                         vput(nd.ni_dvp);
 1592                         vn_finished_write(mp);
 1593                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1594                 } else {
 1595                         vput(nd.ni_dvp);
 1596                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1597                         vrele(vp);
 1598                         goto again;
 1599                 }
 1600         }
 1601         vrele(vp);
 1602         return (error);
 1603 }
 1604 
 1605 /*
 1606  * Make a symbolic link.
 1607  */
 1608 #ifndef _SYS_SYSPROTO_H_
 1609 struct symlink_args {
 1610         char    *path;
 1611         char    *link;
 1612 };
 1613 #endif
 1614 int
 1615 sys_symlink(td, uap)
 1616         struct thread *td;
 1617         register struct symlink_args /* {
 1618                 char *path;
 1619                 char *link;
 1620         } */ *uap;
 1621 {
 1622 
 1623         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1624 }
 1625 
 1626 #ifndef _SYS_SYSPROTO_H_
 1627 struct symlinkat_args {
 1628         char    *path;
 1629         int     fd;
 1630         char    *path2;
 1631 };
 1632 #endif
 1633 int
 1634 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1635 {
 1636 
 1637         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1638             UIO_USERSPACE));
 1639 }
 1640 
 1641 int
 1642 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1643 {
 1644 
 1645         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1646 }
 1647 
 1648 int
 1649 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1650     enum uio_seg segflg)
 1651 {
 1652         struct mount *mp;
 1653         struct vattr vattr;
 1654         char *syspath;
 1655         struct nameidata nd;
 1656         int error;
 1657         cap_rights_t rights;
 1658 
 1659         if (segflg == UIO_SYSSPACE) {
 1660                 syspath = path1;
 1661         } else {
 1662                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1663                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1664                         goto out;
 1665         }
 1666         AUDIT_ARG_TEXT(syspath);
 1667 restart:
 1668         bwillwrite();
 1669         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1670             NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 1671             td);
 1672         if ((error = namei(&nd)) != 0)
 1673                 goto out;
 1674         if (nd.ni_vp) {
 1675                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1676                 if (nd.ni_vp == nd.ni_dvp)
 1677                         vrele(nd.ni_dvp);
 1678                 else
 1679                         vput(nd.ni_dvp);
 1680                 vrele(nd.ni_vp);
 1681                 error = EEXIST;
 1682                 goto out;
 1683         }
 1684         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1685                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1686                 vput(nd.ni_dvp);
 1687                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1688                         goto out;
 1689                 goto restart;
 1690         }
 1691         VATTR_NULL(&vattr);
 1692         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1693 #ifdef MAC
 1694         vattr.va_type = VLNK;
 1695         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1696             &vattr);
 1697         if (error != 0)
 1698                 goto out2;
 1699 #endif
 1700         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1701         if (error == 0)
 1702                 vput(nd.ni_vp);
 1703 #ifdef MAC
 1704 out2:
 1705 #endif
 1706         NDFREE(&nd, NDF_ONLY_PNBUF);
 1707         vput(nd.ni_dvp);
 1708         vn_finished_write(mp);
 1709 out:
 1710         if (segflg != UIO_SYSSPACE)
 1711                 uma_zfree(namei_zone, syspath);
 1712         return (error);
 1713 }
 1714 
 1715 /*
 1716  * Delete a whiteout from the filesystem.
 1717  */
 1718 int
 1719 sys_undelete(td, uap)
 1720         struct thread *td;
 1721         register struct undelete_args /* {
 1722                 char *path;
 1723         } */ *uap;
 1724 {
 1725         struct mount *mp;
 1726         struct nameidata nd;
 1727         int error;
 1728 
 1729 restart:
 1730         bwillwrite();
 1731         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1732             UIO_USERSPACE, uap->path, td);
 1733         error = namei(&nd);
 1734         if (error != 0)
 1735                 return (error);
 1736 
 1737         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1738                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1739                 if (nd.ni_vp == nd.ni_dvp)
 1740                         vrele(nd.ni_dvp);
 1741                 else
 1742                         vput(nd.ni_dvp);
 1743                 if (nd.ni_vp)
 1744                         vrele(nd.ni_vp);
 1745                 return (EEXIST);
 1746         }
 1747         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1748                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1749                 vput(nd.ni_dvp);
 1750                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1751                         return (error);
 1752                 goto restart;
 1753         }
 1754         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1755         NDFREE(&nd, NDF_ONLY_PNBUF);
 1756         vput(nd.ni_dvp);
 1757         vn_finished_write(mp);
 1758         return (error);
 1759 }
 1760 
 1761 /*
 1762  * Delete a name from the filesystem.
 1763  */
 1764 #ifndef _SYS_SYSPROTO_H_
 1765 struct unlink_args {
 1766         char    *path;
 1767 };
 1768 #endif
 1769 int
 1770 sys_unlink(td, uap)
 1771         struct thread *td;
 1772         struct unlink_args /* {
 1773                 char *path;
 1774         } */ *uap;
 1775 {
 1776 
 1777         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1778 }
 1779 
 1780 #ifndef _SYS_SYSPROTO_H_
 1781 struct unlinkat_args {
 1782         int     fd;
 1783         char    *path;
 1784         int     flag;
 1785 };
 1786 #endif
 1787 int
 1788 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1789 {
 1790         int flag = uap->flag;
 1791         int fd = uap->fd;
 1792         char *path = uap->path;
 1793 
 1794         if (flag & ~AT_REMOVEDIR)
 1795                 return (EINVAL);
 1796 
 1797         if (flag & AT_REMOVEDIR)
 1798                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1799         else
 1800                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1801 }
 1802 
 1803 int
 1804 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1805 {
 1806 
 1807         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1808 }
 1809 
 1810 int
 1811 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1812     ino_t oldinum)
 1813 {
 1814         struct mount *mp;
 1815         struct vnode *vp;
 1816         struct nameidata nd;
 1817         struct stat sb;
 1818         cap_rights_t rights;
 1819         int error;
 1820 
 1821 restart:
 1822         bwillwrite();
 1823         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1824             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1825         if ((error = namei(&nd)) != 0)
 1826                 return (error == EINVAL ? EPERM : error);
 1827         vp = nd.ni_vp;
 1828         if (vp->v_type == VDIR && oldinum == 0) {
 1829                 error = EPERM;          /* POSIX */
 1830         } else if (oldinum != 0 &&
 1831                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1832                   sb.st_ino != oldinum) {
 1833                         error = EIDRM;  /* Identifier removed */
 1834         } else {
 1835                 /*
 1836                  * The root of a mounted filesystem cannot be deleted.
 1837                  *
 1838                  * XXX: can this only be a VDIR case?
 1839                  */
 1840                 if (vp->v_vflag & VV_ROOT)
 1841                         error = EBUSY;
 1842         }
 1843         if (error == 0) {
 1844                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1845                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1846                         vput(nd.ni_dvp);
 1847                         if (vp == nd.ni_dvp)
 1848                                 vrele(vp);
 1849                         else
 1850                                 vput(vp);
 1851                         if ((error = vn_start_write(NULL, &mp,
 1852                             V_XSLEEP | PCATCH)) != 0)
 1853                                 return (error);
 1854                         goto restart;
 1855                 }
 1856 #ifdef MAC
 1857                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1858                     &nd.ni_cnd);
 1859                 if (error != 0)
 1860                         goto out;
 1861 #endif
 1862                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1863                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1864 #ifdef MAC
 1865 out:
 1866 #endif
 1867                 vn_finished_write(mp);
 1868         }
 1869         NDFREE(&nd, NDF_ONLY_PNBUF);
 1870         vput(nd.ni_dvp);
 1871         if (vp == nd.ni_dvp)
 1872                 vrele(vp);
 1873         else
 1874                 vput(vp);
 1875         return (error);
 1876 }
 1877 
 1878 /*
 1879  * Reposition read/write file offset.
 1880  */
 1881 #ifndef _SYS_SYSPROTO_H_
 1882 struct lseek_args {
 1883         int     fd;
 1884         int     pad;
 1885         off_t   offset;
 1886         int     whence;
 1887 };
 1888 #endif
 1889 int
 1890 sys_lseek(td, uap)
 1891         struct thread *td;
 1892         register struct lseek_args /* {
 1893                 int fd;
 1894                 int pad;
 1895                 off_t offset;
 1896                 int whence;
 1897         } */ *uap;
 1898 {
 1899         struct file *fp;
 1900         cap_rights_t rights;
 1901         int error;
 1902 
 1903         AUDIT_ARG_FD(uap->fd);
 1904         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1905         if (error != 0)
 1906                 return (error);
 1907         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1908             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1909         fdrop(fp, td);
 1910         return (error);
 1911 }
 1912 
 1913 #if defined(COMPAT_43)
 1914 /*
 1915  * Reposition read/write file offset.
 1916  */
 1917 #ifndef _SYS_SYSPROTO_H_
 1918 struct olseek_args {
 1919         int     fd;
 1920         long    offset;
 1921         int     whence;
 1922 };
 1923 #endif
 1924 int
 1925 olseek(td, uap)
 1926         struct thread *td;
 1927         register struct olseek_args /* {
 1928                 int fd;
 1929                 long offset;
 1930                 int whence;
 1931         } */ *uap;
 1932 {
 1933         struct lseek_args /* {
 1934                 int fd;
 1935                 int pad;
 1936                 off_t offset;
 1937                 int whence;
 1938         } */ nuap;
 1939 
 1940         nuap.fd = uap->fd;
 1941         nuap.offset = uap->offset;
 1942         nuap.whence = uap->whence;
 1943         return (sys_lseek(td, &nuap));
 1944 }
 1945 #endif /* COMPAT_43 */
 1946 
 1947 /* Version with the 'pad' argument */
 1948 int
 1949 freebsd6_lseek(td, uap)
 1950         struct thread *td;
 1951         register struct freebsd6_lseek_args *uap;
 1952 {
 1953         struct lseek_args ouap;
 1954 
 1955         ouap.fd = uap->fd;
 1956         ouap.offset = uap->offset;
 1957         ouap.whence = uap->whence;
 1958         return (sys_lseek(td, &ouap));
 1959 }
 1960 
 1961 /*
 1962  * Check access permissions using passed credentials.
 1963  */
 1964 static int
 1965 vn_access(vp, user_flags, cred, td)
 1966         struct vnode    *vp;
 1967         int             user_flags;
 1968         struct ucred    *cred;
 1969         struct thread   *td;
 1970 {
 1971         accmode_t accmode;
 1972         int error;
 1973 
 1974         /* Flags == 0 means only check for existence. */
 1975         error = 0;
 1976         if (user_flags) {
 1977                 accmode = 0;
 1978                 if (user_flags & R_OK)
 1979                         accmode |= VREAD;
 1980                 if (user_flags & W_OK)
 1981                         accmode |= VWRITE;
 1982                 if (user_flags & X_OK)
 1983                         accmode |= VEXEC;
 1984 #ifdef MAC
 1985                 error = mac_vnode_check_access(cred, vp, accmode);
 1986                 if (error != 0)
 1987                         return (error);
 1988 #endif
 1989                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1990                         error = VOP_ACCESS(vp, accmode, cred, td);
 1991         }
 1992         return (error);
 1993 }
 1994 
 1995 /*
 1996  * Check access permissions using "real" credentials.
 1997  */
 1998 #ifndef _SYS_SYSPROTO_H_
 1999 struct access_args {
 2000         char    *path;
 2001         int     amode;
 2002 };
 2003 #endif
 2004 int
 2005 sys_access(td, uap)
 2006         struct thread *td;
 2007         register struct access_args /* {
 2008                 char *path;
 2009                 int amode;
 2010         } */ *uap;
 2011 {
 2012 
 2013         return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode));
 2014 }
 2015 
 2016 #ifndef _SYS_SYSPROTO_H_
 2017 struct faccessat_args {
 2018         int     dirfd;
 2019         char    *path;
 2020         int     amode;
 2021         int     flag;
 2022 }
 2023 #endif
 2024 int
 2025 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2026 {
 2027 
 2028         if (uap->flag & ~AT_EACCESS)
 2029                 return (EINVAL);
 2030         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2031             uap->amode));
 2032 }
 2033 
 2034 int
 2035 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2036 {
 2037 
 2038         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode));
 2039 }
 2040 
 2041 int
 2042 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2043     int flag, int amode)
 2044 {
 2045         struct ucred *cred, *tmpcred;
 2046         struct vnode *vp;
 2047         struct nameidata nd;
 2048         cap_rights_t rights;
 2049         int error;
 2050 
 2051         /*
 2052          * Create and modify a temporary credential instead of one that
 2053          * is potentially shared.
 2054          */
 2055         if (!(flag & AT_EACCESS)) {
 2056                 cred = td->td_ucred;
 2057                 tmpcred = crdup(cred);
 2058                 tmpcred->cr_uid = cred->cr_ruid;
 2059                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2060                 td->td_ucred = tmpcred;
 2061         } else
 2062                 cred = tmpcred = td->td_ucred;
 2063         AUDIT_ARG_VALUE(amode);
 2064         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2065             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 2066             td);
 2067         if ((error = namei(&nd)) != 0)
 2068                 goto out1;
 2069         vp = nd.ni_vp;
 2070 
 2071         error = vn_access(vp, amode, tmpcred, td);
 2072         NDFREE(&nd, NDF_ONLY_PNBUF);
 2073         vput(vp);
 2074 out1:
 2075         if (!(flag & AT_EACCESS)) {
 2076                 td->td_ucred = cred;
 2077                 crfree(tmpcred);
 2078         }
 2079         return (error);
 2080 }
 2081 
 2082 /*
 2083  * Check access permissions using "effective" credentials.
 2084  */
 2085 #ifndef _SYS_SYSPROTO_H_
 2086 struct eaccess_args {
 2087         char    *path;
 2088         int     amode;
 2089 };
 2090 #endif
 2091 int
 2092 sys_eaccess(td, uap)
 2093         struct thread *td;
 2094         register struct eaccess_args /* {
 2095                 char *path;
 2096                 int amode;
 2097         } */ *uap;
 2098 {
 2099 
 2100         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode));
 2101 }
 2102 
 2103 int
 2104 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2105 {
 2106 
 2107         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode));
 2108 }
 2109 
 2110 #if defined(COMPAT_43)
 2111 /*
 2112  * Get file status; this version follows links.
 2113  */
 2114 #ifndef _SYS_SYSPROTO_H_
 2115 struct ostat_args {
 2116         char    *path;
 2117         struct ostat *ub;
 2118 };
 2119 #endif
 2120 int
 2121 ostat(td, uap)
 2122         struct thread *td;
 2123         register struct ostat_args /* {
 2124                 char *path;
 2125                 struct ostat *ub;
 2126         } */ *uap;
 2127 {
 2128         struct stat sb;
 2129         struct ostat osb;
 2130         int error;
 2131 
 2132         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2133         if (error != 0)
 2134                 return (error);
 2135         cvtstat(&sb, &osb);
 2136         return (copyout(&osb, uap->ub, sizeof (osb)));
 2137 }
 2138 
 2139 /*
 2140  * Get file status; this version does not follow links.
 2141  */
 2142 #ifndef _SYS_SYSPROTO_H_
 2143 struct olstat_args {
 2144         char    *path;
 2145         struct ostat *ub;
 2146 };
 2147 #endif
 2148 int
 2149 olstat(td, uap)
 2150         struct thread *td;
 2151         register struct olstat_args /* {
 2152                 char *path;
 2153                 struct ostat *ub;
 2154         } */ *uap;
 2155 {
 2156         struct stat sb;
 2157         struct ostat osb;
 2158         int error;
 2159 
 2160         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2161         if (error != 0)
 2162                 return (error);
 2163         cvtstat(&sb, &osb);
 2164         return (copyout(&osb, uap->ub, sizeof (osb)));
 2165 }
 2166 
 2167 /*
 2168  * Convert from an old to a new stat structure.
 2169  */
 2170 void
 2171 cvtstat(st, ost)
 2172         struct stat *st;
 2173         struct ostat *ost;
 2174 {
 2175 
 2176         bzero(ost, sizeof(*ost));
 2177         ost->st_dev = st->st_dev;
 2178         ost->st_ino = st->st_ino;
 2179         ost->st_mode = st->st_mode;
 2180         ost->st_nlink = st->st_nlink;
 2181         ost->st_uid = st->st_uid;
 2182         ost->st_gid = st->st_gid;
 2183         ost->st_rdev = st->st_rdev;
 2184         if (st->st_size < (quad_t)1 << 32)
 2185                 ost->st_size = st->st_size;
 2186         else
 2187                 ost->st_size = -2;
 2188         ost->st_atim = st->st_atim;
 2189         ost->st_mtim = st->st_mtim;
 2190         ost->st_ctim = st->st_ctim;
 2191         ost->st_blksize = st->st_blksize;
 2192         ost->st_blocks = st->st_blocks;
 2193         ost->st_flags = st->st_flags;
 2194         ost->st_gen = st->st_gen;
 2195 }
 2196 #endif /* COMPAT_43 */
 2197 
 2198 /*
 2199  * Get file status; this version follows links.
 2200  */
 2201 #ifndef _SYS_SYSPROTO_H_
 2202 struct stat_args {
 2203         char    *path;
 2204         struct stat *ub;
 2205 };
 2206 #endif
 2207 int
 2208 sys_stat(td, uap)
 2209         struct thread *td;
 2210         register struct stat_args /* {
 2211                 char *path;
 2212                 struct stat *ub;
 2213         } */ *uap;
 2214 {
 2215         struct stat sb;
 2216         int error;
 2217 
 2218         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2219         if (error == 0)
 2220                 error = copyout(&sb, uap->ub, sizeof (sb));
 2221         return (error);
 2222 }
 2223 
 2224 #ifndef _SYS_SYSPROTO_H_
 2225 struct fstatat_args {
 2226         int     fd;
 2227         char    *path;
 2228         struct stat     *buf;
 2229         int     flag;
 2230 }
 2231 #endif
 2232 int
 2233 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2234 {
 2235         struct stat sb;
 2236         int error;
 2237 
 2238         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2239             UIO_USERSPACE, &sb);
 2240         if (error == 0)
 2241                 error = copyout(&sb, uap->buf, sizeof (sb));
 2242         return (error);
 2243 }
 2244 
 2245 int
 2246 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2247 {
 2248 
 2249         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2250 }
 2251 
 2252 int
 2253 kern_statat(struct thread *td, int flag, int fd, char *path,
 2254     enum uio_seg pathseg, struct stat *sbp)
 2255 {
 2256 
 2257         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2258 }
 2259 
 2260 int
 2261 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2262     enum uio_seg pathseg, struct stat *sbp,
 2263     void (*hook)(struct vnode *vp, struct stat *sbp))
 2264 {
 2265         struct nameidata nd;
 2266         struct stat sb;
 2267         cap_rights_t rights;
 2268         int error;
 2269 
 2270         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2271                 return (EINVAL);
 2272 
 2273         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2274             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2275             cap_rights_init(&rights, CAP_FSTAT), td);
 2276 
 2277         if ((error = namei(&nd)) != 0)
 2278                 return (error);
 2279         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2280         if (error == 0) {
 2281                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2282                 if (S_ISREG(sb.st_mode))
 2283                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2284                 if (__predict_false(hook != NULL))
 2285                         hook(nd.ni_vp, &sb);
 2286         }
 2287         NDFREE(&nd, NDF_ONLY_PNBUF);
 2288         vput(nd.ni_vp);
 2289         if (error != 0)
 2290                 return (error);
 2291         *sbp = sb;
 2292 #ifdef KTRACE
 2293         if (KTRPOINT(td, KTR_STRUCT))
 2294                 ktrstat(&sb);
 2295 #endif
 2296         return (0);
 2297 }
 2298 
 2299 /*
 2300  * Get file status; this version does not follow links.
 2301  */
 2302 #ifndef _SYS_SYSPROTO_H_
 2303 struct lstat_args {
 2304         char    *path;
 2305         struct stat *ub;
 2306 };
 2307 #endif
 2308 int
 2309 sys_lstat(td, uap)
 2310         struct thread *td;
 2311         register struct lstat_args /* {
 2312                 char *path;
 2313                 struct stat *ub;
 2314         } */ *uap;
 2315 {
 2316         struct stat sb;
 2317         int error;
 2318 
 2319         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2320         if (error == 0)
 2321                 error = copyout(&sb, uap->ub, sizeof (sb));
 2322         return (error);
 2323 }
 2324 
 2325 int
 2326 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2327 {
 2328 
 2329         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2330             sbp));
 2331 }
 2332 
 2333 /*
 2334  * Implementation of the NetBSD [l]stat() functions.
 2335  */
 2336 void
 2337 cvtnstat(sb, nsb)
 2338         struct stat *sb;
 2339         struct nstat *nsb;
 2340 {
 2341 
 2342         bzero(nsb, sizeof *nsb);
 2343         nsb->st_dev = sb->st_dev;
 2344         nsb->st_ino = sb->st_ino;
 2345         nsb->st_mode = sb->st_mode;
 2346         nsb->st_nlink = sb->st_nlink;
 2347         nsb->st_uid = sb->st_uid;
 2348         nsb->st_gid = sb->st_gid;
 2349         nsb->st_rdev = sb->st_rdev;
 2350         nsb->st_atim = sb->st_atim;
 2351         nsb->st_mtim = sb->st_mtim;
 2352         nsb->st_ctim = sb->st_ctim;
 2353         nsb->st_size = sb->st_size;
 2354         nsb->st_blocks = sb->st_blocks;
 2355         nsb->st_blksize = sb->st_blksize;
 2356         nsb->st_flags = sb->st_flags;
 2357         nsb->st_gen = sb->st_gen;
 2358         nsb->st_birthtim = sb->st_birthtim;
 2359 }
 2360 
 2361 #ifndef _SYS_SYSPROTO_H_
 2362 struct nstat_args {
 2363         char    *path;
 2364         struct nstat *ub;
 2365 };
 2366 #endif
 2367 int
 2368 sys_nstat(td, uap)
 2369         struct thread *td;
 2370         register struct nstat_args /* {
 2371                 char *path;
 2372                 struct nstat *ub;
 2373         } */ *uap;
 2374 {
 2375         struct stat sb;
 2376         struct nstat nsb;
 2377         int error;
 2378 
 2379         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2380         if (error != 0)
 2381                 return (error);
 2382         cvtnstat(&sb, &nsb);
 2383         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2384 }
 2385 
 2386 /*
 2387  * NetBSD lstat.  Get file status; this version does not follow links.
 2388  */
 2389 #ifndef _SYS_SYSPROTO_H_
 2390 struct lstat_args {
 2391         char    *path;
 2392         struct stat *ub;
 2393 };
 2394 #endif
 2395 int
 2396 sys_nlstat(td, uap)
 2397         struct thread *td;
 2398         register struct nlstat_args /* {
 2399                 char *path;
 2400                 struct nstat *ub;
 2401         } */ *uap;
 2402 {
 2403         struct stat sb;
 2404         struct nstat nsb;
 2405         int error;
 2406 
 2407         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2408         if (error != 0)
 2409                 return (error);
 2410         cvtnstat(&sb, &nsb);
 2411         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2412 }
 2413 
 2414 /*
 2415  * Get configurable pathname variables.
 2416  */
 2417 #ifndef _SYS_SYSPROTO_H_
 2418 struct pathconf_args {
 2419         char    *path;
 2420         int     name;
 2421 };
 2422 #endif
 2423 int
 2424 sys_pathconf(td, uap)
 2425         struct thread *td;
 2426         register struct pathconf_args /* {
 2427                 char *path;
 2428                 int name;
 2429         } */ *uap;
 2430 {
 2431 
 2432         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2433 }
 2434 
 2435 #ifndef _SYS_SYSPROTO_H_
 2436 struct lpathconf_args {
 2437         char    *path;
 2438         int     name;
 2439 };
 2440 #endif
 2441 int
 2442 sys_lpathconf(td, uap)
 2443         struct thread *td;
 2444         register struct lpathconf_args /* {
 2445                 char *path;
 2446                 int name;
 2447         } */ *uap;
 2448 {
 2449 
 2450         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2451             NOFOLLOW));
 2452 }
 2453 
 2454 int
 2455 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2456     u_long flags)
 2457 {
 2458         struct nameidata nd;
 2459         int error;
 2460 
 2461         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2462             pathseg, path, td);
 2463         if ((error = namei(&nd)) != 0)
 2464                 return (error);
 2465         NDFREE(&nd, NDF_ONLY_PNBUF);
 2466 
 2467         /* If asynchronous I/O is available, it works for all files. */
 2468         if (name == _PC_ASYNC_IO)
 2469                 td->td_retval[0] = async_io_version;
 2470         else
 2471                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2472         vput(nd.ni_vp);
 2473         return (error);
 2474 }
 2475 
 2476 /*
 2477  * Return target name of a symbolic link.
 2478  */
 2479 #ifndef _SYS_SYSPROTO_H_
 2480 struct readlink_args {
 2481         char    *path;
 2482         char    *buf;
 2483         size_t  count;
 2484 };
 2485 #endif
 2486 int
 2487 sys_readlink(td, uap)
 2488         struct thread *td;
 2489         register struct readlink_args /* {
 2490                 char *path;
 2491                 char *buf;
 2492                 size_t count;
 2493         } */ *uap;
 2494 {
 2495 
 2496         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2497             UIO_USERSPACE, uap->count));
 2498 }
 2499 #ifndef _SYS_SYSPROTO_H_
 2500 struct readlinkat_args {
 2501         int     fd;
 2502         char    *path;
 2503         char    *buf;
 2504         size_t  bufsize;
 2505 };
 2506 #endif
 2507 int
 2508 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2509 {
 2510 
 2511         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2512             uap->buf, UIO_USERSPACE, uap->bufsize));
 2513 }
 2514 
 2515 int
 2516 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2517     enum uio_seg bufseg, size_t count)
 2518 {
 2519 
 2520         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2521             count));
 2522 }
 2523 
 2524 int
 2525 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2526     char *buf, enum uio_seg bufseg, size_t count)
 2527 {
 2528         struct vnode *vp;
 2529         struct iovec aiov;
 2530         struct uio auio;
 2531         struct nameidata nd;
 2532         int error;
 2533 
 2534         if (count > IOSIZE_MAX)
 2535                 return (EINVAL);
 2536 
 2537         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2538             pathseg, path, fd, td);
 2539 
 2540         if ((error = namei(&nd)) != 0)
 2541                 return (error);
 2542         NDFREE(&nd, NDF_ONLY_PNBUF);
 2543         vp = nd.ni_vp;
 2544 #ifdef MAC
 2545         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2546         if (error != 0) {
 2547                 vput(vp);
 2548                 return (error);
 2549         }
 2550 #endif
 2551         if (vp->v_type != VLNK)
 2552                 error = EINVAL;
 2553         else {
 2554                 aiov.iov_base = buf;
 2555                 aiov.iov_len = count;
 2556                 auio.uio_iov = &aiov;
 2557                 auio.uio_iovcnt = 1;
 2558                 auio.uio_offset = 0;
 2559                 auio.uio_rw = UIO_READ;
 2560                 auio.uio_segflg = bufseg;
 2561                 auio.uio_td = td;
 2562                 auio.uio_resid = count;
 2563                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2564                 td->td_retval[0] = count - auio.uio_resid;
 2565         }
 2566         vput(vp);
 2567         return (error);
 2568 }
 2569 
 2570 /*
 2571  * Common implementation code for chflags() and fchflags().
 2572  */
 2573 static int
 2574 setfflags(td, vp, flags)
 2575         struct thread *td;
 2576         struct vnode *vp;
 2577         u_long flags;
 2578 {
 2579         struct mount *mp;
 2580         struct vattr vattr;
 2581         int error;
 2582 
 2583         /* We can't support the value matching VNOVAL. */
 2584         if (flags == VNOVAL)
 2585                 return (EOPNOTSUPP);
 2586 
 2587         /*
 2588          * Prevent non-root users from setting flags on devices.  When
 2589          * a device is reused, users can retain ownership of the device
 2590          * if they are allowed to set flags and programs assume that
 2591          * chown can't fail when done as root.
 2592          */
 2593         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2594                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2595                 if (error != 0)
 2596                         return (error);
 2597         }
 2598 
 2599         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2600                 return (error);
 2601         VATTR_NULL(&vattr);
 2602         vattr.va_flags = flags;
 2603         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2604 #ifdef MAC
 2605         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2606         if (error == 0)
 2607 #endif
 2608                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2609         VOP_UNLOCK(vp, 0);
 2610         vn_finished_write(mp);
 2611         return (error);
 2612 }
 2613 
 2614 /*
 2615  * Change flags of a file given a path name.
 2616  */
 2617 #ifndef _SYS_SYSPROTO_H_
 2618 struct chflags_args {
 2619         const char *path;
 2620         u_long  flags;
 2621 };
 2622 #endif
 2623 int
 2624 sys_chflags(td, uap)
 2625         struct thread *td;
 2626         register struct chflags_args /* {
 2627                 const char *path;
 2628                 u_long flags;
 2629         } */ *uap;
 2630 {
 2631 
 2632         return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags));
 2633 }
 2634 
 2635 #ifndef _SYS_SYSPROTO_H_
 2636 struct chflagsat_args {
 2637         int     fd;
 2638         const char *path;
 2639         u_long  flags;
 2640         int     atflag;
 2641 }
 2642 #endif
 2643 int
 2644 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2645 {
 2646         int fd = uap->fd;
 2647         const char *path = uap->path;
 2648         u_long flags = uap->flags;
 2649         int atflag = uap->atflag;
 2650 
 2651         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2652                 return (EINVAL);
 2653 
 2654         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2655 }
 2656 
 2657 static int
 2658 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg,
 2659     u_long flags)
 2660 {
 2661 
 2662         return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0));
 2663 }
 2664 
 2665 /*
 2666  * Same as chflags() but doesn't follow symlinks.
 2667  */
 2668 int
 2669 sys_lchflags(td, uap)
 2670         struct thread *td;
 2671         register struct lchflags_args /* {
 2672                 const char *path;
 2673                 u_long flags;
 2674         } */ *uap;
 2675 {
 2676 
 2677         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2678             uap->flags, AT_SYMLINK_NOFOLLOW));
 2679 }
 2680 
 2681 static int
 2682 kern_chflagsat(struct thread *td, int fd, const char *path,
 2683     enum uio_seg pathseg, u_long flags, int atflag)
 2684 {
 2685         struct nameidata nd;
 2686         cap_rights_t rights;
 2687         int error, follow;
 2688 
 2689         AUDIT_ARG_FFLAGS(flags);
 2690         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2691         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2692             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2693         if ((error = namei(&nd)) != 0)
 2694                 return (error);
 2695         NDFREE(&nd, NDF_ONLY_PNBUF);
 2696         error = setfflags(td, nd.ni_vp, flags);
 2697         vrele(nd.ni_vp);
 2698         return (error);
 2699 }
 2700 
 2701 /*
 2702  * Change flags of a file given a file descriptor.
 2703  */
 2704 #ifndef _SYS_SYSPROTO_H_
 2705 struct fchflags_args {
 2706         int     fd;
 2707         u_long  flags;
 2708 };
 2709 #endif
 2710 int
 2711 sys_fchflags(td, uap)
 2712         struct thread *td;
 2713         register struct fchflags_args /* {
 2714                 int fd;
 2715                 u_long flags;
 2716         } */ *uap;
 2717 {
 2718         struct file *fp;
 2719         cap_rights_t rights;
 2720         int error;
 2721 
 2722         AUDIT_ARG_FD(uap->fd);
 2723         AUDIT_ARG_FFLAGS(uap->flags);
 2724         error = getvnode(td->td_proc->p_fd, uap->fd,
 2725             cap_rights_init(&rights, CAP_FCHFLAGS), &fp);
 2726         if (error != 0)
 2727                 return (error);
 2728 #ifdef AUDIT
 2729         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2730         AUDIT_ARG_VNODE1(fp->f_vnode);
 2731         VOP_UNLOCK(fp->f_vnode, 0);
 2732 #endif
 2733         error = setfflags(td, fp->f_vnode, uap->flags);
 2734         fdrop(fp, td);
 2735         return (error);
 2736 }
 2737 
 2738 /*
 2739  * Common implementation code for chmod(), lchmod() and fchmod().
 2740  */
 2741 int
 2742 setfmode(td, cred, vp, mode)
 2743         struct thread *td;
 2744         struct ucred *cred;
 2745         struct vnode *vp;
 2746         int mode;
 2747 {
 2748         struct mount *mp;
 2749         struct vattr vattr;
 2750         int error;
 2751 
 2752         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2753                 return (error);
 2754         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2755         VATTR_NULL(&vattr);
 2756         vattr.va_mode = mode & ALLPERMS;
 2757 #ifdef MAC
 2758         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2759         if (error == 0)
 2760 #endif
 2761                 error = VOP_SETATTR(vp, &vattr, cred);
 2762         VOP_UNLOCK(vp, 0);
 2763         vn_finished_write(mp);
 2764         return (error);
 2765 }
 2766 
 2767 /*
 2768  * Change mode of a file given path name.
 2769  */
 2770 #ifndef _SYS_SYSPROTO_H_
 2771 struct chmod_args {
 2772         char    *path;
 2773         int     mode;
 2774 };
 2775 #endif
 2776 int
 2777 sys_chmod(td, uap)
 2778         struct thread *td;
 2779         register struct chmod_args /* {
 2780                 char *path;
 2781                 int mode;
 2782         } */ *uap;
 2783 {
 2784 
 2785         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2786 }
 2787 
 2788 #ifndef _SYS_SYSPROTO_H_
 2789 struct fchmodat_args {
 2790         int     dirfd;
 2791         char    *path;
 2792         mode_t  mode;
 2793         int     flag;
 2794 }
 2795 #endif
 2796 int
 2797 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2798 {
 2799         int flag = uap->flag;
 2800         int fd = uap->fd;
 2801         char *path = uap->path;
 2802         mode_t mode = uap->mode;
 2803 
 2804         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2805                 return (EINVAL);
 2806 
 2807         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2808 }
 2809 
 2810 int
 2811 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2812 {
 2813 
 2814         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2815 }
 2816 
 2817 /*
 2818  * Change mode of a file given path name (don't follow links.)
 2819  */
 2820 #ifndef _SYS_SYSPROTO_H_
 2821 struct lchmod_args {
 2822         char    *path;
 2823         int     mode;
 2824 };
 2825 #endif
 2826 int
 2827 sys_lchmod(td, uap)
 2828         struct thread *td;
 2829         register struct lchmod_args /* {
 2830                 char *path;
 2831                 int mode;
 2832         } */ *uap;
 2833 {
 2834 
 2835         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2836             uap->mode, AT_SYMLINK_NOFOLLOW));
 2837 }
 2838 
 2839 int
 2840 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2841     mode_t mode, int flag)
 2842 {
 2843         struct nameidata nd;
 2844         cap_rights_t rights;
 2845         int error, follow;
 2846 
 2847         AUDIT_ARG_MODE(mode);
 2848         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2849         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2850             cap_rights_init(&rights, CAP_FCHMOD), td);
 2851         if ((error = namei(&nd)) != 0)
 2852                 return (error);
 2853         NDFREE(&nd, NDF_ONLY_PNBUF);
 2854         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2855         vrele(nd.ni_vp);
 2856         return (error);
 2857 }
 2858 
 2859 /*
 2860  * Change mode of a file given a file descriptor.
 2861  */
 2862 #ifndef _SYS_SYSPROTO_H_
 2863 struct fchmod_args {
 2864         int     fd;
 2865         int     mode;
 2866 };
 2867 #endif
 2868 int
 2869 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2870 {
 2871         struct file *fp;
 2872         cap_rights_t rights;
 2873         int error;
 2874 
 2875         AUDIT_ARG_FD(uap->fd);
 2876         AUDIT_ARG_MODE(uap->mode);
 2877 
 2878         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2879         if (error != 0)
 2880                 return (error);
 2881         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2882         fdrop(fp, td);
 2883         return (error);
 2884 }
 2885 
 2886 /*
 2887  * Common implementation for chown(), lchown(), and fchown()
 2888  */
 2889 int
 2890 setfown(td, cred, vp, uid, gid)
 2891         struct thread *td;
 2892         struct ucred *cred;
 2893         struct vnode *vp;
 2894         uid_t uid;
 2895         gid_t gid;
 2896 {
 2897         struct mount *mp;
 2898         struct vattr vattr;
 2899         int error;
 2900 
 2901         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2902                 return (error);
 2903         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2904         VATTR_NULL(&vattr);
 2905         vattr.va_uid = uid;
 2906         vattr.va_gid = gid;
 2907 #ifdef MAC
 2908         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2909             vattr.va_gid);
 2910         if (error == 0)
 2911 #endif
 2912                 error = VOP_SETATTR(vp, &vattr, cred);
 2913         VOP_UNLOCK(vp, 0);
 2914         vn_finished_write(mp);
 2915         return (error);
 2916 }
 2917 
 2918 /*
 2919  * Set ownership given a path name.
 2920  */
 2921 #ifndef _SYS_SYSPROTO_H_
 2922 struct chown_args {
 2923         char    *path;
 2924         int     uid;
 2925         int     gid;
 2926 };
 2927 #endif
 2928 int
 2929 sys_chown(td, uap)
 2930         struct thread *td;
 2931         register struct chown_args /* {
 2932                 char *path;
 2933                 int uid;
 2934                 int gid;
 2935         } */ *uap;
 2936 {
 2937 
 2938         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2939 }
 2940 
 2941 #ifndef _SYS_SYSPROTO_H_
 2942 struct fchownat_args {
 2943         int fd;
 2944         const char * path;
 2945         uid_t uid;
 2946         gid_t gid;
 2947         int flag;
 2948 };
 2949 #endif
 2950 int
 2951 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2952 {
 2953         int flag;
 2954 
 2955         flag = uap->flag;
 2956         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2957                 return (EINVAL);
 2958 
 2959         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2960             uap->gid, uap->flag));
 2961 }
 2962 
 2963 int
 2964 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2965     int gid)
 2966 {
 2967 
 2968         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 2969 }
 2970 
 2971 int
 2972 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2973     int uid, int gid, int flag)
 2974 {
 2975         struct nameidata nd;
 2976         cap_rights_t rights;
 2977         int error, follow;
 2978 
 2979         AUDIT_ARG_OWNER(uid, gid);
 2980         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2981         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2982             cap_rights_init(&rights, CAP_FCHOWN), td);
 2983 
 2984         if ((error = namei(&nd)) != 0)
 2985                 return (error);
 2986         NDFREE(&nd, NDF_ONLY_PNBUF);
 2987         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2988         vrele(nd.ni_vp);
 2989         return (error);
 2990 }
 2991 
 2992 /*
 2993  * Set ownership given a path name, do not cross symlinks.
 2994  */
 2995 #ifndef _SYS_SYSPROTO_H_
 2996 struct lchown_args {
 2997         char    *path;
 2998         int     uid;
 2999         int     gid;
 3000 };
 3001 #endif
 3002 int
 3003 sys_lchown(td, uap)
 3004         struct thread *td;
 3005         register struct lchown_args /* {
 3006                 char *path;
 3007                 int uid;
 3008                 int gid;
 3009         } */ *uap;
 3010 {
 3011 
 3012         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3013 }
 3014 
 3015 int
 3016 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3017     int gid)
 3018 {
 3019 
 3020         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3021             AT_SYMLINK_NOFOLLOW));
 3022 }
 3023 
 3024 /*
 3025  * Set ownership given a file descriptor.
 3026  */
 3027 #ifndef _SYS_SYSPROTO_H_
 3028 struct fchown_args {
 3029         int     fd;
 3030         int     uid;
 3031         int     gid;
 3032 };
 3033 #endif
 3034 int
 3035 sys_fchown(td, uap)
 3036         struct thread *td;
 3037         register struct fchown_args /* {
 3038                 int fd;
 3039                 int uid;
 3040                 int gid;
 3041         } */ *uap;
 3042 {
 3043         struct file *fp;
 3044         cap_rights_t rights;
 3045         int error;
 3046 
 3047         AUDIT_ARG_FD(uap->fd);
 3048         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3049         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 3050         if (error != 0)
 3051                 return (error);
 3052         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3053         fdrop(fp, td);
 3054         return (error);
 3055 }
 3056 
 3057 /*
 3058  * Common implementation code for utimes(), lutimes(), and futimes().
 3059  */
 3060 static int
 3061 getutimes(usrtvp, tvpseg, tsp)
 3062         const struct timeval *usrtvp;
 3063         enum uio_seg tvpseg;
 3064         struct timespec *tsp;
 3065 {
 3066         struct timeval tv[2];
 3067         const struct timeval *tvp;
 3068         int error;
 3069 
 3070         if (usrtvp == NULL) {
 3071                 vfs_timestamp(&tsp[0]);
 3072                 tsp[1] = tsp[0];
 3073         } else {
 3074                 if (tvpseg == UIO_SYSSPACE) {
 3075                         tvp = usrtvp;
 3076                 } else {
 3077                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3078                                 return (error);
 3079                         tvp = tv;
 3080                 }
 3081 
 3082                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3083                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3084                         return (EINVAL);
 3085                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3086                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3087         }
 3088         return (0);
 3089 }
 3090 
 3091 /*
 3092  * Common implementation code for futimens(), utimensat().
 3093  */
 3094 #define UTIMENS_NULL    0x1
 3095 #define UTIMENS_EXIT    0x2
 3096 static int
 3097 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 3098     struct timespec *tsp, int *retflags)
 3099 {
 3100         struct timespec tsnow;
 3101         int error;
 3102 
 3103         vfs_timestamp(&tsnow);
 3104         *retflags = 0;
 3105         if (usrtsp == NULL) {
 3106                 tsp[0] = tsnow;
 3107                 tsp[1] = tsnow;
 3108                 *retflags |= UTIMENS_NULL;
 3109                 return (0);
 3110         }
 3111         if (tspseg == UIO_SYSSPACE) {
 3112                 tsp[0] = usrtsp[0];
 3113                 tsp[1] = usrtsp[1];
 3114         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 3115                 return (error);
 3116         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 3117                 *retflags |= UTIMENS_EXIT;
 3118         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 3119                 *retflags |= UTIMENS_NULL;
 3120         if (tsp[0].tv_nsec == UTIME_OMIT)
 3121                 tsp[0].tv_sec = VNOVAL;
 3122         else if (tsp[0].tv_nsec == UTIME_NOW)
 3123                 tsp[0] = tsnow;
 3124         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 3125                 return (EINVAL);
 3126         if (tsp[1].tv_nsec == UTIME_OMIT)
 3127                 tsp[1].tv_sec = VNOVAL;
 3128         else if (tsp[1].tv_nsec == UTIME_NOW)
 3129                 tsp[1] = tsnow;
 3130         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 3131                 return (EINVAL);
 3132 
 3133         return (0);
 3134 }
 3135 
 3136 /*
 3137  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 3138  * and utimensat().
 3139  */
 3140 static int
 3141 setutimes(td, vp, ts, numtimes, nullflag)
 3142         struct thread *td;
 3143         struct vnode *vp;
 3144         const struct timespec *ts;
 3145         int numtimes;
 3146         int nullflag;
 3147 {
 3148         struct mount *mp;
 3149         struct vattr vattr;
 3150         int error, setbirthtime;
 3151 
 3152         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3153                 return (error);
 3154         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3155         setbirthtime = 0;
 3156         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3157             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3158                 setbirthtime = 1;
 3159         VATTR_NULL(&vattr);
 3160         vattr.va_atime = ts[0];
 3161         vattr.va_mtime = ts[1];
 3162         if (setbirthtime)
 3163                 vattr.va_birthtime = ts[1];
 3164         if (numtimes > 2)
 3165                 vattr.va_birthtime = ts[2];
 3166         if (nullflag)
 3167                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3168 #ifdef MAC
 3169         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3170             vattr.va_mtime);
 3171 #endif
 3172         if (error == 0)
 3173                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3174         VOP_UNLOCK(vp, 0);
 3175         vn_finished_write(mp);
 3176         return (error);
 3177 }
 3178 
 3179 /*
 3180  * Set the access and modification times of a file.
 3181  */
 3182 #ifndef _SYS_SYSPROTO_H_
 3183 struct utimes_args {
 3184         char    *path;
 3185         struct  timeval *tptr;
 3186 };
 3187 #endif
 3188 int
 3189 sys_utimes(td, uap)
 3190         struct thread *td;
 3191         register struct utimes_args /* {
 3192                 char *path;
 3193                 struct timeval *tptr;
 3194         } */ *uap;
 3195 {
 3196 
 3197         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3198             UIO_USERSPACE));
 3199 }
 3200 
 3201 #ifndef _SYS_SYSPROTO_H_
 3202 struct futimesat_args {
 3203         int fd;
 3204         const char * path;
 3205         const struct timeval * times;
 3206 };
 3207 #endif
 3208 int
 3209 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3210 {
 3211 
 3212         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3213             uap->times, UIO_USERSPACE));
 3214 }
 3215 
 3216 int
 3217 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3218     struct timeval *tptr, enum uio_seg tptrseg)
 3219 {
 3220 
 3221         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3222 }
 3223 
 3224 int
 3225 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3226     struct timeval *tptr, enum uio_seg tptrseg)
 3227 {
 3228         struct nameidata nd;
 3229         struct timespec ts[2];
 3230         cap_rights_t rights;
 3231         int error;
 3232 
 3233         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3234                 return (error);
 3235         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3236             cap_rights_init(&rights, CAP_FUTIMES), td);
 3237 
 3238         if ((error = namei(&nd)) != 0)
 3239                 return (error);
 3240         NDFREE(&nd, NDF_ONLY_PNBUF);
 3241         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3242         vrele(nd.ni_vp);
 3243         return (error);
 3244 }
 3245 
 3246 /*
 3247  * Set the access and modification times of a file.
 3248  */
 3249 #ifndef _SYS_SYSPROTO_H_
 3250 struct lutimes_args {
 3251         char    *path;
 3252         struct  timeval *tptr;
 3253 };
 3254 #endif
 3255 int
 3256 sys_lutimes(td, uap)
 3257         struct thread *td;
 3258         register struct lutimes_args /* {
 3259                 char *path;
 3260                 struct timeval *tptr;
 3261         } */ *uap;
 3262 {
 3263 
 3264         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3265             UIO_USERSPACE));
 3266 }
 3267 
 3268 int
 3269 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3270     struct timeval *tptr, enum uio_seg tptrseg)
 3271 {
 3272         struct timespec ts[2];
 3273         struct nameidata nd;
 3274         int error;
 3275 
 3276         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3277                 return (error);
 3278         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3279         if ((error = namei(&nd)) != 0)
 3280                 return (error);
 3281         NDFREE(&nd, NDF_ONLY_PNBUF);
 3282         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3283         vrele(nd.ni_vp);
 3284         return (error);
 3285 }
 3286 
 3287 /*
 3288  * Set the access and modification times of a file.
 3289  */
 3290 #ifndef _SYS_SYSPROTO_H_
 3291 struct futimes_args {
 3292         int     fd;
 3293         struct  timeval *tptr;
 3294 };
 3295 #endif
 3296 int
 3297 sys_futimes(td, uap)
 3298         struct thread *td;
 3299         register struct futimes_args /* {
 3300                 int  fd;
 3301                 struct timeval *tptr;
 3302         } */ *uap;
 3303 {
 3304 
 3305         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3306 }
 3307 
 3308 int
 3309 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3310     enum uio_seg tptrseg)
 3311 {
 3312         struct timespec ts[2];
 3313         struct file *fp;
 3314         cap_rights_t rights;
 3315         int error;
 3316 
 3317         AUDIT_ARG_FD(fd);
 3318         error = getutimes(tptr, tptrseg, ts);
 3319         if (error != 0)
 3320                 return (error);
 3321         error = getvnode(td->td_proc->p_fd, fd,
 3322             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3323         if (error != 0)
 3324                 return (error);
 3325 #ifdef AUDIT
 3326         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3327         AUDIT_ARG_VNODE1(fp->f_vnode);
 3328         VOP_UNLOCK(fp->f_vnode, 0);
 3329 #endif
 3330         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3331         fdrop(fp, td);
 3332         return (error);
 3333 }
 3334 
 3335 int
 3336 sys_futimens(struct thread *td, struct futimens_args *uap)
 3337 {
 3338 
 3339         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3340 }
 3341 
 3342 int
 3343 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3344     enum uio_seg tptrseg)
 3345 {
 3346         struct timespec ts[2];
 3347         struct file *fp;
 3348         cap_rights_t rights;
 3349         int error, flags;
 3350 
 3351         AUDIT_ARG_FD(fd);
 3352         error = getutimens(tptr, tptrseg, ts, &flags);
 3353         if (error != 0)
 3354                 return (error);
 3355         if (flags & UTIMENS_EXIT)
 3356                 return (0);
 3357         error = getvnode(td->td_proc->p_fd, fd,
 3358             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3359         if (error != 0)
 3360                 return (error);
 3361 #ifdef AUDIT
 3362         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3363         AUDIT_ARG_VNODE1(fp->f_vnode);
 3364         VOP_UNLOCK(fp->f_vnode, 0);
 3365 #endif
 3366         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3367         fdrop(fp, td);
 3368         return (error);
 3369 }
 3370 
 3371 int
 3372 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3373 {
 3374 
 3375         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3376             uap->times, UIO_USERSPACE, uap->flag));
 3377 }
 3378 
 3379 int
 3380 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3381     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 3382 {
 3383         struct nameidata nd;
 3384         struct timespec ts[2];
 3385         cap_rights_t rights;
 3386         int error, flags;
 3387 
 3388         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3389                 return (EINVAL);
 3390 
 3391         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3392                 return (error);
 3393         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 3394             FOLLOW) | AUDITVNODE1, pathseg, path, fd,
 3395             cap_rights_init(&rights, CAP_FUTIMES), td);
 3396         if ((error = namei(&nd)) != 0)
 3397                 return (error);
 3398         /*
 3399          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3400          * POSIX states:
 3401          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3402          * "Search permission is denied by a component of the path prefix."
 3403          */
 3404         NDFREE(&nd, NDF_ONLY_PNBUF);
 3405         if ((flags & UTIMENS_EXIT) == 0)
 3406                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3407         vrele(nd.ni_vp);
 3408         return (error);
 3409 }
 3410 
 3411 /*
 3412  * Truncate a file given its path name.
 3413  */
 3414 #ifndef _SYS_SYSPROTO_H_
 3415 struct truncate_args {
 3416         char    *path;
 3417         int     pad;
 3418         off_t   length;
 3419 };
 3420 #endif
 3421 int
 3422 sys_truncate(td, uap)
 3423         struct thread *td;
 3424         register struct truncate_args /* {
 3425                 char *path;
 3426                 int pad;
 3427                 off_t length;
 3428         } */ *uap;
 3429 {
 3430 
 3431         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3432 }
 3433 
 3434 int
 3435 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3436 {
 3437         struct mount *mp;
 3438         struct vnode *vp;
 3439         void *rl_cookie;
 3440         struct vattr vattr;
 3441         struct nameidata nd;
 3442         int error;
 3443 
 3444         if (length < 0)
 3445                 return(EINVAL);
 3446         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3447         if ((error = namei(&nd)) != 0)
 3448                 return (error);
 3449         vp = nd.ni_vp;
 3450         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3451         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3452                 vn_rangelock_unlock(vp, rl_cookie);
 3453                 vrele(vp);
 3454                 return (error);
 3455         }
 3456         NDFREE(&nd, NDF_ONLY_PNBUF);
 3457         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3458         if (vp->v_type == VDIR)
 3459                 error = EISDIR;
 3460 #ifdef MAC
 3461         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3462         }
 3463 #endif
 3464         else if ((error = vn_writechk(vp)) == 0 &&
 3465             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3466                 VATTR_NULL(&vattr);
 3467                 vattr.va_size = length;
 3468                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3469         }
 3470         VOP_UNLOCK(vp, 0);
 3471         vn_finished_write(mp);
 3472         vn_rangelock_unlock(vp, rl_cookie);
 3473         vrele(vp);
 3474         return (error);
 3475 }
 3476 
 3477 #if defined(COMPAT_43)
 3478 /*
 3479  * Truncate a file given its path name.
 3480  */
 3481 #ifndef _SYS_SYSPROTO_H_
 3482 struct otruncate_args {
 3483         char    *path;
 3484         long    length;
 3485 };
 3486 #endif
 3487 int
 3488 otruncate(td, uap)
 3489         struct thread *td;
 3490         register struct otruncate_args /* {
 3491                 char *path;
 3492                 long length;
 3493         } */ *uap;
 3494 {
 3495         struct truncate_args /* {
 3496                 char *path;
 3497                 int pad;
 3498                 off_t length;
 3499         } */ nuap;
 3500 
 3501         nuap.path = uap->path;
 3502         nuap.length = uap->length;
 3503         return (sys_truncate(td, &nuap));
 3504 }
 3505 #endif /* COMPAT_43 */
 3506 
 3507 /* Versions with the pad argument */
 3508 int
 3509 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3510 {
 3511         struct truncate_args ouap;
 3512 
 3513         ouap.path = uap->path;
 3514         ouap.length = uap->length;
 3515         return (sys_truncate(td, &ouap));
 3516 }
 3517 
 3518 int
 3519 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3520 {
 3521         struct ftruncate_args ouap;
 3522 
 3523         ouap.fd = uap->fd;
 3524         ouap.length = uap->length;
 3525         return (sys_ftruncate(td, &ouap));
 3526 }
 3527 
 3528 /*
 3529  * Sync an open file.
 3530  */
 3531 #ifndef _SYS_SYSPROTO_H_
 3532 struct fsync_args {
 3533         int     fd;
 3534 };
 3535 #endif
 3536 int
 3537 sys_fsync(td, uap)
 3538         struct thread *td;
 3539         struct fsync_args /* {
 3540                 int fd;
 3541         } */ *uap;
 3542 {
 3543         struct vnode *vp;
 3544         struct mount *mp;
 3545         struct file *fp;
 3546         cap_rights_t rights;
 3547         int error, lock_flags;
 3548 
 3549         AUDIT_ARG_FD(uap->fd);
 3550         error = getvnode(td->td_proc->p_fd, uap->fd,
 3551             cap_rights_init(&rights, CAP_FSYNC), &fp);
 3552         if (error != 0)
 3553                 return (error);
 3554         vp = fp->f_vnode;
 3555         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3556         if (error != 0)
 3557                 goto drop;
 3558         if (MNT_SHARED_WRITES(mp) ||
 3559             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3560                 lock_flags = LK_SHARED;
 3561         } else {
 3562                 lock_flags = LK_EXCLUSIVE;
 3563         }
 3564         vn_lock(vp, lock_flags | LK_RETRY);
 3565         AUDIT_ARG_VNODE1(vp);
 3566         if (vp->v_object != NULL) {
 3567                 VM_OBJECT_WLOCK(vp->v_object);
 3568                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3569                 VM_OBJECT_WUNLOCK(vp->v_object);
 3570         }
 3571         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3572 
 3573         VOP_UNLOCK(vp, 0);
 3574         vn_finished_write(mp);
 3575 drop:
 3576         fdrop(fp, td);
 3577         return (error);
 3578 }
 3579 
 3580 /*
 3581  * Rename files.  Source and destination must either both be directories, or
 3582  * both not be directories.  If target is a directory, it must be empty.
 3583  */
 3584 #ifndef _SYS_SYSPROTO_H_
 3585 struct rename_args {
 3586         char    *from;
 3587         char    *to;
 3588 };
 3589 #endif
 3590 int
 3591 sys_rename(td, uap)
 3592         struct thread *td;
 3593         register struct rename_args /* {
 3594                 char *from;
 3595                 char *to;
 3596         } */ *uap;
 3597 {
 3598 
 3599         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3600 }
 3601 
 3602 #ifndef _SYS_SYSPROTO_H_
 3603 struct renameat_args {
 3604         int     oldfd;
 3605         char    *old;
 3606         int     newfd;
 3607         char    *new;
 3608 };
 3609 #endif
 3610 int
 3611 sys_renameat(struct thread *td, struct renameat_args *uap)
 3612 {
 3613 
 3614         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3615             UIO_USERSPACE));
 3616 }
 3617 
 3618 int
 3619 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3620 {
 3621 
 3622         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3623 }
 3624 
 3625 int
 3626 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3627     enum uio_seg pathseg)
 3628 {
 3629         struct mount *mp = NULL;
 3630         struct vnode *tvp, *fvp, *tdvp;
 3631         struct nameidata fromnd, tond;
 3632         cap_rights_t rights;
 3633         int error;
 3634 
 3635 again:
 3636         bwillwrite();
 3637 #ifdef MAC
 3638         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3639             AUDITVNODE1, pathseg, old, oldfd,
 3640             cap_rights_init(&rights, CAP_RENAMEAT), td);
 3641 #else
 3642         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3643             pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td);
 3644 #endif
 3645 
 3646         if ((error = namei(&fromnd)) != 0)
 3647                 return (error);
 3648 #ifdef MAC
 3649         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3650             fromnd.ni_vp, &fromnd.ni_cnd);
 3651         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3652         if (fromnd.ni_dvp != fromnd.ni_vp)
 3653                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3654 #endif
 3655         fvp = fromnd.ni_vp;
 3656         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3657             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3658             cap_rights_init(&rights, CAP_LINKAT), td);
 3659         if (fromnd.ni_vp->v_type == VDIR)
 3660                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3661         if ((error = namei(&tond)) != 0) {
 3662                 /* Translate error code for rename("dir1", "dir2/."). */
 3663                 if (error == EISDIR && fvp->v_type == VDIR)
 3664                         error = EINVAL;
 3665                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3666                 vrele(fromnd.ni_dvp);
 3667                 vrele(fvp);
 3668                 goto out1;
 3669         }
 3670         tdvp = tond.ni_dvp;
 3671         tvp = tond.ni_vp;
 3672         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3673         if (error != 0) {
 3674                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3675                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3676                 if (tvp != NULL)
 3677                         vput(tvp);
 3678                 if (tdvp == tvp)
 3679                         vrele(tdvp);
 3680                 else
 3681                         vput(tdvp);
 3682                 vrele(fromnd.ni_dvp);
 3683                 vrele(fvp);
 3684                 vrele(tond.ni_startdir);
 3685                 if (fromnd.ni_startdir != NULL)
 3686                         vrele(fromnd.ni_startdir);
 3687                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3688                 if (error != 0)
 3689                         return (error);
 3690                 goto again;
 3691         }
 3692         if (tvp != NULL) {
 3693                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3694                         error = ENOTDIR;
 3695                         goto out;
 3696                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3697                         error = EISDIR;
 3698                         goto out;
 3699                 }
 3700 #ifdef CAPABILITIES
 3701                 if (newfd != AT_FDCWD) {
 3702                         /*
 3703                          * If the target already exists we require CAP_UNLINKAT
 3704                          * from 'newfd'.
 3705                          */
 3706                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3707                             cap_rights_init(&rights, CAP_UNLINKAT));
 3708                         if (error != 0)
 3709                                 goto out;
 3710                 }
 3711 #endif
 3712         }
 3713         if (fvp == tdvp) {
 3714                 error = EINVAL;
 3715                 goto out;
 3716         }
 3717         /*
 3718          * If the source is the same as the destination (that is, if they
 3719          * are links to the same vnode), then there is nothing to do.
 3720          */
 3721         if (fvp == tvp)
 3722                 error = -1;
 3723 #ifdef MAC
 3724         else
 3725                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3726                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3727 #endif
 3728 out:
 3729         if (error == 0) {
 3730                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3731                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3732                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3733                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3734         } else {
 3735                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3736                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3737                 if (tvp != NULL)
 3738                         vput(tvp);
 3739                 if (tdvp == tvp)
 3740                         vrele(tdvp);
 3741                 else
 3742                         vput(tdvp);
 3743                 vrele(fromnd.ni_dvp);
 3744                 vrele(fvp);
 3745         }
 3746         vrele(tond.ni_startdir);
 3747         vn_finished_write(mp);
 3748 out1:
 3749         if (fromnd.ni_startdir)
 3750                 vrele(fromnd.ni_startdir);
 3751         if (error == -1)
 3752                 return (0);
 3753         return (error);
 3754 }
 3755 
 3756 /*
 3757  * Make a directory file.
 3758  */
 3759 #ifndef _SYS_SYSPROTO_H_
 3760 struct mkdir_args {
 3761         char    *path;
 3762         int     mode;
 3763 };
 3764 #endif
 3765 int
 3766 sys_mkdir(td, uap)
 3767         struct thread *td;
 3768         register struct mkdir_args /* {
 3769                 char *path;
 3770                 int mode;
 3771         } */ *uap;
 3772 {
 3773 
 3774         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3775 }
 3776 
 3777 #ifndef _SYS_SYSPROTO_H_
 3778 struct mkdirat_args {
 3779         int     fd;
 3780         char    *path;
 3781         mode_t  mode;
 3782 };
 3783 #endif
 3784 int
 3785 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3786 {
 3787 
 3788         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3789 }
 3790 
 3791 int
 3792 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3793 {
 3794 
 3795         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3796 }
 3797 
 3798 int
 3799 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3800     int mode)
 3801 {
 3802         struct mount *mp;
 3803         struct vnode *vp;
 3804         struct vattr vattr;
 3805         struct nameidata nd;
 3806         cap_rights_t rights;
 3807         int error;
 3808 
 3809         AUDIT_ARG_MODE(mode);
 3810 restart:
 3811         bwillwrite();
 3812         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3813             NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 3814             td);
 3815         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3816         if ((error = namei(&nd)) != 0)
 3817                 return (error);
 3818         vp = nd.ni_vp;
 3819         if (vp != NULL) {
 3820                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3821                 /*
 3822                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3823                  * the strange behaviour of leaving the vnode unlocked
 3824                  * if the target is the same vnode as the parent.
 3825                  */
 3826                 if (vp == nd.ni_dvp)
 3827                         vrele(nd.ni_dvp);
 3828                 else
 3829                         vput(nd.ni_dvp);
 3830                 vrele(vp);
 3831                 return (EEXIST);
 3832         }
 3833         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3834                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3835                 vput(nd.ni_dvp);
 3836                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3837                         return (error);
 3838                 goto restart;
 3839         }
 3840         VATTR_NULL(&vattr);
 3841         vattr.va_type = VDIR;
 3842         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3843 #ifdef MAC
 3844         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3845             &vattr);
 3846         if (error != 0)
 3847                 goto out;
 3848 #endif
 3849         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3850 #ifdef MAC
 3851 out:
 3852 #endif
 3853         NDFREE(&nd, NDF_ONLY_PNBUF);
 3854         vput(nd.ni_dvp);
 3855         if (error == 0)
 3856                 vput(nd.ni_vp);
 3857         vn_finished_write(mp);
 3858         return (error);
 3859 }
 3860 
 3861 /*
 3862  * Remove a directory file.
 3863  */
 3864 #ifndef _SYS_SYSPROTO_H_
 3865 struct rmdir_args {
 3866         char    *path;
 3867 };
 3868 #endif
 3869 int
 3870 sys_rmdir(td, uap)
 3871         struct thread *td;
 3872         struct rmdir_args /* {
 3873                 char *path;
 3874         } */ *uap;
 3875 {
 3876 
 3877         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3878 }
 3879 
 3880 int
 3881 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3882 {
 3883 
 3884         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3885 }
 3886 
 3887 int
 3888 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3889 {
 3890         struct mount *mp;
 3891         struct vnode *vp;
 3892         struct nameidata nd;
 3893         cap_rights_t rights;
 3894         int error;
 3895 
 3896 restart:
 3897         bwillwrite();
 3898         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3899             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3900         if ((error = namei(&nd)) != 0)
 3901                 return (error);
 3902         vp = nd.ni_vp;
 3903         if (vp->v_type != VDIR) {
 3904                 error = ENOTDIR;
 3905                 goto out;
 3906         }
 3907         /*
 3908          * No rmdir "." please.
 3909          */
 3910         if (nd.ni_dvp == vp) {
 3911                 error = EINVAL;
 3912                 goto out;
 3913         }
 3914         /*
 3915          * The root of a mounted filesystem cannot be deleted.
 3916          */
 3917         if (vp->v_vflag & VV_ROOT) {
 3918                 error = EBUSY;
 3919                 goto out;
 3920         }
 3921 #ifdef MAC
 3922         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3923             &nd.ni_cnd);
 3924         if (error != 0)
 3925                 goto out;
 3926 #endif
 3927         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3928                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3929                 vput(vp);
 3930                 if (nd.ni_dvp == vp)
 3931                         vrele(nd.ni_dvp);
 3932                 else
 3933                         vput(nd.ni_dvp);
 3934                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3935                         return (error);
 3936                 goto restart;
 3937         }
 3938         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3939         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3940         vn_finished_write(mp);
 3941 out:
 3942         NDFREE(&nd, NDF_ONLY_PNBUF);
 3943         vput(vp);
 3944         if (nd.ni_dvp == vp)
 3945                 vrele(nd.ni_dvp);
 3946         else
 3947                 vput(nd.ni_dvp);
 3948         return (error);
 3949 }
 3950 
 3951 #ifdef COMPAT_43
 3952 /*
 3953  * Read a block of directory entries in a filesystem independent format.
 3954  */
 3955 #ifndef _SYS_SYSPROTO_H_
 3956 struct ogetdirentries_args {
 3957         int     fd;
 3958         char    *buf;
 3959         u_int   count;
 3960         long    *basep;
 3961 };
 3962 #endif
 3963 int
 3964 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3965 {
 3966         long loff;
 3967         int error;
 3968 
 3969         error = kern_ogetdirentries(td, uap, &loff);
 3970         if (error == 0)
 3971                 error = copyout(&loff, uap->basep, sizeof(long));
 3972         return (error);
 3973 }
 3974 
 3975 int
 3976 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3977     long *ploff)
 3978 {
 3979         struct vnode *vp;
 3980         struct file *fp;
 3981         struct uio auio, kuio;
 3982         struct iovec aiov, kiov;
 3983         struct dirent *dp, *edp;
 3984         cap_rights_t rights;
 3985         caddr_t dirbuf;
 3986         int error, eofflag, readcnt;
 3987         long loff;
 3988         off_t foffset;
 3989 
 3990         /* XXX arbitrary sanity limit on `count'. */
 3991         if (uap->count > 64 * 1024)
 3992                 return (EINVAL);
 3993         error = getvnode(td->td_proc->p_fd, uap->fd,
 3994             cap_rights_init(&rights, CAP_READ), &fp);
 3995         if (error != 0)
 3996                 return (error);
 3997         if ((fp->f_flag & FREAD) == 0) {
 3998                 fdrop(fp, td);
 3999                 return (EBADF);
 4000         }
 4001         vp = fp->f_vnode;
 4002         foffset = foffset_lock(fp, 0);
 4003 unionread:
 4004         if (vp->v_type != VDIR) {
 4005                 foffset_unlock(fp, foffset, 0);
 4006                 fdrop(fp, td);
 4007                 return (EINVAL);
 4008         }
 4009         aiov.iov_base = uap->buf;
 4010         aiov.iov_len = uap->count;
 4011         auio.uio_iov = &aiov;
 4012         auio.uio_iovcnt = 1;
 4013         auio.uio_rw = UIO_READ;
 4014         auio.uio_segflg = UIO_USERSPACE;
 4015         auio.uio_td = td;
 4016         auio.uio_resid = uap->count;
 4017         vn_lock(vp, LK_SHARED | LK_RETRY);
 4018         loff = auio.uio_offset = foffset;
 4019 #ifdef MAC
 4020         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4021         if (error != 0) {
 4022                 VOP_UNLOCK(vp, 0);
 4023                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 4024                 fdrop(fp, td);
 4025                 return (error);
 4026         }
 4027 #endif
 4028 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4029                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4030                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4031                             NULL, NULL);
 4032                         foffset = auio.uio_offset;
 4033                 } else
 4034 #       endif
 4035         {
 4036                 kuio = auio;
 4037                 kuio.uio_iov = &kiov;
 4038                 kuio.uio_segflg = UIO_SYSSPACE;
 4039                 kiov.iov_len = uap->count;
 4040                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4041                 kiov.iov_base = dirbuf;
 4042                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4043                             NULL, NULL);
 4044                 foffset = kuio.uio_offset;
 4045                 if (error == 0) {
 4046                         readcnt = uap->count - kuio.uio_resid;
 4047                         edp = (struct dirent *)&dirbuf[readcnt];
 4048                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4049 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4050                                         /*
 4051                                          * The expected low byte of
 4052                                          * dp->d_namlen is our dp->d_type.
 4053                                          * The high MBZ byte of dp->d_namlen
 4054                                          * is our dp->d_namlen.
 4055                                          */
 4056                                         dp->d_type = dp->d_namlen;
 4057                                         dp->d_namlen = 0;
 4058 #                               else
 4059                                         /*
 4060                                          * The dp->d_type is the high byte
 4061                                          * of the expected dp->d_namlen,
 4062                                          * so must be zero'ed.
 4063                                          */
 4064                                         dp->d_type = 0;
 4065 #                               endif
 4066                                 if (dp->d_reclen > 0) {
 4067                                         dp = (struct dirent *)
 4068                                             ((char *)dp + dp->d_reclen);
 4069                                 } else {
 4070                                         error = EIO;
 4071                                         break;
 4072                                 }
 4073                         }
 4074                         if (dp >= edp)
 4075                                 error = uiomove(dirbuf, readcnt, &auio);
 4076                 }
 4077                 free(dirbuf, M_TEMP);
 4078         }
 4079         if (error != 0) {
 4080                 VOP_UNLOCK(vp, 0);
 4081                 foffset_unlock(fp, foffset, 0);
 4082                 fdrop(fp, td);
 4083                 return (error);
 4084         }
 4085         if (uap->count == auio.uio_resid &&
 4086             (vp->v_vflag & VV_ROOT) &&
 4087             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4088                 struct vnode *tvp = vp;
 4089                 vp = vp->v_mount->mnt_vnodecovered;
 4090                 VREF(vp);
 4091                 fp->f_vnode = vp;
 4092                 fp->f_data = vp;
 4093                 foffset = 0;
 4094                 vput(tvp);
 4095                 goto unionread;
 4096         }
 4097         VOP_UNLOCK(vp, 0);
 4098         foffset_unlock(fp, foffset, 0);
 4099         fdrop(fp, td);
 4100         td->td_retval[0] = uap->count - auio.uio_resid;
 4101         if (error == 0)
 4102                 *ploff = loff;
 4103         return (error);
 4104 }
 4105 #endif /* COMPAT_43 */
 4106 
 4107 /*
 4108  * Read a block of directory entries in a filesystem independent format.
 4109  */
 4110 #ifndef _SYS_SYSPROTO_H_
 4111 struct getdirentries_args {
 4112         int     fd;
 4113         char    *buf;
 4114         u_int   count;
 4115         long    *basep;
 4116 };
 4117 #endif
 4118 int
 4119 sys_getdirentries(td, uap)
 4120         struct thread *td;
 4121         register struct getdirentries_args /* {
 4122                 int fd;
 4123                 char *buf;
 4124                 u_int count;
 4125                 long *basep;
 4126         } */ *uap;
 4127 {
 4128         long base;
 4129         int error;
 4130 
 4131         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4132             NULL, UIO_USERSPACE);
 4133         if (error != 0)
 4134                 return (error);
 4135         if (uap->basep != NULL)
 4136                 error = copyout(&base, uap->basep, sizeof(long));
 4137         return (error);
 4138 }
 4139 
 4140 int
 4141 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4142     long *basep, ssize_t *residp, enum uio_seg bufseg)
 4143 {
 4144         struct vnode *vp;
 4145         struct file *fp;
 4146         struct uio auio;
 4147         struct iovec aiov;
 4148         cap_rights_t rights;
 4149         long loff;
 4150         int error, eofflag;
 4151         off_t foffset;
 4152 
 4153         AUDIT_ARG_FD(fd);
 4154         if (count > IOSIZE_MAX)
 4155                 return (EINVAL);
 4156         auio.uio_resid = count;
 4157         error = getvnode(td->td_proc->p_fd, fd,
 4158             cap_rights_init(&rights, CAP_READ), &fp);
 4159         if (error != 0)
 4160                 return (error);
 4161         if ((fp->f_flag & FREAD) == 0) {
 4162                 fdrop(fp, td);
 4163                 return (EBADF);
 4164         }
 4165         vp = fp->f_vnode;
 4166         foffset = foffset_lock(fp, 0);
 4167 unionread:
 4168         if (vp->v_type != VDIR) {
 4169                 error = EINVAL;
 4170                 goto fail;
 4171         }
 4172         aiov.iov_base = buf;
 4173         aiov.iov_len = count;
 4174         auio.uio_iov = &aiov;
 4175         auio.uio_iovcnt = 1;
 4176         auio.uio_rw = UIO_READ;
 4177         auio.uio_segflg = bufseg;
 4178         auio.uio_td = td;
 4179         vn_lock(vp, LK_SHARED | LK_RETRY);
 4180         AUDIT_ARG_VNODE1(vp);
 4181         loff = auio.uio_offset = foffset;
 4182 #ifdef MAC
 4183         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4184         if (error == 0)
 4185 #endif
 4186                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4187                     NULL);
 4188         foffset = auio.uio_offset;
 4189         if (error != 0) {
 4190                 VOP_UNLOCK(vp, 0);
 4191                 goto fail;
 4192         }
 4193         if (count == auio.uio_resid &&
 4194             (vp->v_vflag & VV_ROOT) &&
 4195             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4196                 struct vnode *tvp = vp;
 4197 
 4198                 vp = vp->v_mount->mnt_vnodecovered;
 4199                 VREF(vp);
 4200                 fp->f_vnode = vp;
 4201                 fp->f_data = vp;
 4202                 foffset = 0;
 4203                 vput(tvp);
 4204                 goto unionread;
 4205         }
 4206         VOP_UNLOCK(vp, 0);
 4207         *basep = loff;
 4208         if (residp != NULL)
 4209                 *residp = auio.uio_resid;
 4210         td->td_retval[0] = count - auio.uio_resid;
 4211 fail:
 4212         foffset_unlock(fp, foffset, 0);
 4213         fdrop(fp, td);
 4214         return (error);
 4215 }
 4216 
 4217 #ifndef _SYS_SYSPROTO_H_
 4218 struct getdents_args {
 4219         int fd;
 4220         char *buf;
 4221         size_t count;
 4222 };
 4223 #endif
 4224 int
 4225 sys_getdents(td, uap)
 4226         struct thread *td;
 4227         register struct getdents_args /* {
 4228                 int fd;
 4229                 char *buf;
 4230                 u_int count;
 4231         } */ *uap;
 4232 {
 4233         struct getdirentries_args ap;
 4234 
 4235         ap.fd = uap->fd;
 4236         ap.buf = uap->buf;
 4237         ap.count = uap->count;
 4238         ap.basep = NULL;
 4239         return (sys_getdirentries(td, &ap));
 4240 }
 4241 
 4242 /*
 4243  * Set the mode mask for creation of filesystem nodes.
 4244  */
 4245 #ifndef _SYS_SYSPROTO_H_
 4246 struct umask_args {
 4247         int     newmask;
 4248 };
 4249 #endif
 4250 int
 4251 sys_umask(td, uap)
 4252         struct thread *td;
 4253         struct umask_args /* {
 4254                 int newmask;
 4255         } */ *uap;
 4256 {
 4257         register struct filedesc *fdp;
 4258 
 4259         FILEDESC_XLOCK(td->td_proc->p_fd);
 4260         fdp = td->td_proc->p_fd;
 4261         td->td_retval[0] = fdp->fd_cmask;
 4262         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4263         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4264         return (0);
 4265 }
 4266 
 4267 /*
 4268  * Void all references to file by ripping underlying filesystem away from
 4269  * vnode.
 4270  */
 4271 #ifndef _SYS_SYSPROTO_H_
 4272 struct revoke_args {
 4273         char    *path;
 4274 };
 4275 #endif
 4276 int
 4277 sys_revoke(td, uap)
 4278         struct thread *td;
 4279         register struct revoke_args /* {
 4280                 char *path;
 4281         } */ *uap;
 4282 {
 4283         struct vnode *vp;
 4284         struct vattr vattr;
 4285         struct nameidata nd;
 4286         int error;
 4287 
 4288         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4289             uap->path, td);
 4290         if ((error = namei(&nd)) != 0)
 4291                 return (error);
 4292         vp = nd.ni_vp;
 4293         NDFREE(&nd, NDF_ONLY_PNBUF);
 4294         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4295                 error = EINVAL;
 4296                 goto out;
 4297         }
 4298 #ifdef MAC
 4299         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4300         if (error != 0)
 4301                 goto out;
 4302 #endif
 4303         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4304         if (error != 0)
 4305                 goto out;
 4306         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4307                 error = priv_check(td, PRIV_VFS_ADMIN);
 4308                 if (error != 0)
 4309                         goto out;
 4310         }
 4311         if (vcount(vp) > 1)
 4312                 VOP_REVOKE(vp, REVOKEALL);
 4313 out:
 4314         vput(vp);
 4315         return (error);
 4316 }
 4317 
 4318 /*
 4319  * Convert a user file descriptor to a kernel file entry and check that, if it
 4320  * is a capability, the correct rights are present. A reference on the file
 4321  * entry is held upon returning.
 4322  */
 4323 int
 4324 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp)
 4325 {
 4326         struct file *fp;
 4327         int error;
 4328 
 4329         error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL);
 4330         if (error != 0)
 4331                 return (error);
 4332 
 4333         /*
 4334          * The file could be not of the vnode type, or it may be not
 4335          * yet fully initialized, in which case the f_vnode pointer
 4336          * may be set, but f_ops is still badfileops.  E.g.,
 4337          * devfs_open() transiently create such situation to
 4338          * facilitate csw d_fdopen().
 4339          *
 4340          * Dupfdopen() handling in kern_openat() installs the
 4341          * half-baked file into the process descriptor table, allowing
 4342          * other thread to dereference it. Guard against the race by
 4343          * checking f_ops.
 4344          */
 4345         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4346                 fdrop(fp, curthread);
 4347                 return (EINVAL);
 4348         }
 4349         *fpp = fp;
 4350         return (0);
 4351 }
 4352 
 4353 
 4354 /*
 4355  * Get an (NFS) file handle.
 4356  */
 4357 #ifndef _SYS_SYSPROTO_H_
 4358 struct lgetfh_args {
 4359         char    *fname;
 4360         fhandle_t *fhp;
 4361 };
 4362 #endif
 4363 int
 4364 sys_lgetfh(td, uap)
 4365         struct thread *td;
 4366         register struct lgetfh_args *uap;
 4367 {
 4368         struct nameidata nd;
 4369         fhandle_t fh;
 4370         register struct vnode *vp;
 4371         int error;
 4372 
 4373         error = priv_check(td, PRIV_VFS_GETFH);
 4374         if (error != 0)
 4375                 return (error);
 4376         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4377             uap->fname, td);
 4378         error = namei(&nd);
 4379         if (error != 0)
 4380                 return (error);
 4381         NDFREE(&nd, NDF_ONLY_PNBUF);
 4382         vp = nd.ni_vp;
 4383         bzero(&fh, sizeof(fh));
 4384         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4385         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4386         vput(vp);
 4387         if (error == 0)
 4388                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4389         return (error);
 4390 }
 4391 
 4392 #ifndef _SYS_SYSPROTO_H_
 4393 struct getfh_args {
 4394         char    *fname;
 4395         fhandle_t *fhp;
 4396 };
 4397 #endif
 4398 int
 4399 sys_getfh(td, uap)
 4400         struct thread *td;
 4401         register struct getfh_args *uap;
 4402 {
 4403         struct nameidata nd;
 4404         fhandle_t fh;
 4405         register struct vnode *vp;
 4406         int error;
 4407 
 4408         error = priv_check(td, PRIV_VFS_GETFH);
 4409         if (error != 0)
 4410                 return (error);
 4411         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4412             uap->fname, td);
 4413         error = namei(&nd);
 4414         if (error != 0)
 4415                 return (error);
 4416         NDFREE(&nd, NDF_ONLY_PNBUF);
 4417         vp = nd.ni_vp;
 4418         bzero(&fh, sizeof(fh));
 4419         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4420         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4421         vput(vp);
 4422         if (error == 0)
 4423                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4424         return (error);
 4425 }
 4426 
 4427 /*
 4428  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4429  * open descriptor.
 4430  *
 4431  * warning: do not remove the priv_check() call or this becomes one giant
 4432  * security hole.
 4433  */
 4434 #ifndef _SYS_SYSPROTO_H_
 4435 struct fhopen_args {
 4436         const struct fhandle *u_fhp;
 4437         int flags;
 4438 };
 4439 #endif
 4440 int
 4441 sys_fhopen(td, uap)
 4442         struct thread *td;
 4443         struct fhopen_args /* {
 4444                 const struct fhandle *u_fhp;
 4445                 int flags;
 4446         } */ *uap;
 4447 {
 4448         struct mount *mp;
 4449         struct vnode *vp;
 4450         struct fhandle fhp;
 4451         struct file *fp;
 4452         int fmode, error;
 4453         int indx;
 4454 
 4455         error = priv_check(td, PRIV_VFS_FHOPEN);
 4456         if (error != 0)
 4457                 return (error);
 4458         indx = -1;
 4459         fmode = FFLAGS(uap->flags);
 4460         /* why not allow a non-read/write open for our lockd? */
 4461         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4462                 return (EINVAL);
 4463         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4464         if (error != 0)
 4465                 return(error);
 4466         /* find the mount point */
 4467         mp = vfs_busyfs(&fhp.fh_fsid);
 4468         if (mp == NULL)
 4469                 return (ESTALE);
 4470         /* now give me my vnode, it gets returned to me locked */
 4471         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4472         vfs_unbusy(mp);
 4473         if (error != 0)
 4474                 return (error);
 4475 
 4476         error = falloc_noinstall(td, &fp);
 4477         if (error != 0) {
 4478                 vput(vp);
 4479                 return (error);
 4480         }
 4481         /*
 4482          * An extra reference on `fp' has been held for us by
 4483          * falloc_noinstall().
 4484          */
 4485 
 4486 #ifdef INVARIANTS
 4487         td->td_dupfd = -1;
 4488 #endif
 4489         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4490         if (error != 0) {
 4491                 KASSERT(fp->f_ops == &badfileops,
 4492                     ("VOP_OPEN in fhopen() set f_ops"));
 4493                 KASSERT(td->td_dupfd < 0,
 4494                     ("fhopen() encountered fdopen()"));
 4495 
 4496                 vput(vp);
 4497                 goto bad;
 4498         }
 4499 #ifdef INVARIANTS
 4500         td->td_dupfd = 0;
 4501 #endif
 4502         fp->f_vnode = vp;
 4503         fp->f_seqcount = 1;
 4504         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4505             &vnops);
 4506         VOP_UNLOCK(vp, 0);
 4507         if ((fmode & O_TRUNC) != 0) {
 4508                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4509                 if (error != 0)
 4510                         goto bad;
 4511         }
 4512 
 4513         error = finstall(td, fp, &indx, fmode, NULL);
 4514 bad:
 4515         fdrop(fp, td);
 4516         td->td_retval[0] = indx;
 4517         return (error);
 4518 }
 4519 
 4520 /*
 4521  * Stat an (NFS) file handle.
 4522  */
 4523 #ifndef _SYS_SYSPROTO_H_
 4524 struct fhstat_args {
 4525         struct fhandle *u_fhp;
 4526         struct stat *sb;
 4527 };
 4528 #endif
 4529 int
 4530 sys_fhstat(td, uap)
 4531         struct thread *td;
 4532         register struct fhstat_args /* {
 4533                 struct fhandle *u_fhp;
 4534                 struct stat *sb;
 4535         } */ *uap;
 4536 {
 4537         struct stat sb;
 4538         struct fhandle fh;
 4539         int error;
 4540 
 4541         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4542         if (error != 0)
 4543                 return (error);
 4544         error = kern_fhstat(td, fh, &sb);
 4545         if (error == 0)
 4546                 error = copyout(&sb, uap->sb, sizeof(sb));
 4547         return (error);
 4548 }
 4549 
 4550 int
 4551 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4552 {
 4553         struct mount *mp;
 4554         struct vnode *vp;
 4555         int error;
 4556 
 4557         error = priv_check(td, PRIV_VFS_FHSTAT);
 4558         if (error != 0)
 4559                 return (error);
 4560         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4561                 return (ESTALE);
 4562         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4563         vfs_unbusy(mp);
 4564         if (error != 0)
 4565                 return (error);
 4566         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4567         vput(vp);
 4568         return (error);
 4569 }
 4570 
 4571 /*
 4572  * Implement fstatfs() for (NFS) file handles.
 4573  */
 4574 #ifndef _SYS_SYSPROTO_H_
 4575 struct fhstatfs_args {
 4576         struct fhandle *u_fhp;
 4577         struct statfs *buf;
 4578 };
 4579 #endif
 4580 int
 4581 sys_fhstatfs(td, uap)
 4582         struct thread *td;
 4583         struct fhstatfs_args /* {
 4584                 struct fhandle *u_fhp;
 4585                 struct statfs *buf;
 4586         } */ *uap;
 4587 {
 4588         struct statfs sf;
 4589         fhandle_t fh;
 4590         int error;
 4591 
 4592         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4593         if (error != 0)
 4594                 return (error);
 4595         error = kern_fhstatfs(td, fh, &sf);
 4596         if (error != 0)
 4597                 return (error);
 4598         return (copyout(&sf, uap->buf, sizeof(sf)));
 4599 }
 4600 
 4601 int
 4602 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4603 {
 4604         struct statfs *sp;
 4605         struct mount *mp;
 4606         struct vnode *vp;
 4607         int error;
 4608 
 4609         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4610         if (error != 0)
 4611                 return (error);
 4612         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4613                 return (ESTALE);
 4614         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4615         if (error != 0) {
 4616                 vfs_unbusy(mp);
 4617                 return (error);
 4618         }
 4619         vput(vp);
 4620         error = prison_canseemount(td->td_ucred, mp);
 4621         if (error != 0)
 4622                 goto out;
 4623 #ifdef MAC
 4624         error = mac_mount_check_stat(td->td_ucred, mp);
 4625         if (error != 0)
 4626                 goto out;
 4627 #endif
 4628         /*
 4629          * Set these in case the underlying filesystem fails to do so.
 4630          */
 4631         sp = &mp->mnt_stat;
 4632         sp->f_version = STATFS_VERSION;
 4633         sp->f_namemax = NAME_MAX;
 4634         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4635         error = VFS_STATFS(mp, sp);
 4636         if (error == 0)
 4637                 *buf = *sp;
 4638 out:
 4639         vfs_unbusy(mp);
 4640         return (error);
 4641 }
 4642 
 4643 int
 4644 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4645 {
 4646         struct file *fp;
 4647         struct mount *mp;
 4648         struct vnode *vp;
 4649         cap_rights_t rights;
 4650         off_t olen, ooffset;
 4651         int error;
 4652 
 4653         if (offset < 0 || len <= 0)
 4654                 return (EINVAL);
 4655         /* Check for wrap. */
 4656         if (offset > OFF_MAX - len)
 4657                 return (EFBIG);
 4658         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4659         if (error != 0)
 4660                 return (error);
 4661         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4662                 error = ESPIPE;
 4663                 goto out;
 4664         }
 4665         if ((fp->f_flag & FWRITE) == 0) {
 4666                 error = EBADF;
 4667                 goto out;
 4668         }
 4669         if (fp->f_type != DTYPE_VNODE) {
 4670                 error = ENODEV;
 4671                 goto out;
 4672         }
 4673         vp = fp->f_vnode;
 4674         if (vp->v_type != VREG) {
 4675                 error = ENODEV;
 4676                 goto out;
 4677         }
 4678 
 4679         /* Allocating blocks may take a long time, so iterate. */
 4680         for (;;) {
 4681                 olen = len;
 4682                 ooffset = offset;
 4683 
 4684                 bwillwrite();
 4685                 mp = NULL;
 4686                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4687                 if (error != 0)
 4688                         break;
 4689                 error = vn_lock(vp, LK_EXCLUSIVE);
 4690                 if (error != 0) {
 4691                         vn_finished_write(mp);
 4692                         break;
 4693                 }
 4694 #ifdef MAC
 4695                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4696                 if (error == 0)
 4697 #endif
 4698                         error = VOP_ALLOCATE(vp, &offset, &len);
 4699                 VOP_UNLOCK(vp, 0);
 4700                 vn_finished_write(mp);
 4701 
 4702                 if (olen + ooffset != offset + len) {
 4703                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4704                             ooffset, olen, offset, len);
 4705                 }
 4706                 if (error != 0 || len == 0)
 4707                         break;
 4708                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4709                 maybe_yield();
 4710         }
 4711  out:
 4712         fdrop(fp, td);
 4713         return (error);
 4714 }
 4715 
 4716 int
 4717 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4718 {
 4719 
 4720         td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
 4721             uap->len);
 4722         return (0);
 4723 }
 4724 
 4725 /*
 4726  * Unlike madvise(2), we do not make a best effort to remember every
 4727  * possible caching hint.  Instead, we remember the last setting with
 4728  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4729  * region of any current setting.
 4730  */
 4731 int
 4732 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4733     int advice)
 4734 {
 4735         struct fadvise_info *fa, *new;
 4736         struct file *fp;
 4737         struct vnode *vp;
 4738         cap_rights_t rights;
 4739         off_t end;
 4740         int error;
 4741 
 4742         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4743                 return (EINVAL);
 4744         switch (advice) {
 4745         case POSIX_FADV_SEQUENTIAL:
 4746         case POSIX_FADV_RANDOM:
 4747         case POSIX_FADV_NOREUSE:
 4748                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4749                 break;
 4750         case POSIX_FADV_NORMAL:
 4751         case POSIX_FADV_WILLNEED:
 4752         case POSIX_FADV_DONTNEED:
 4753                 new = NULL;
 4754                 break;
 4755         default:
 4756                 return (EINVAL);
 4757         }
 4758         /* XXX: CAP_POSIX_FADVISE? */
 4759         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4760         if (error != 0)
 4761                 goto out;
 4762         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4763                 error = ESPIPE;
 4764                 goto out;
 4765         }
 4766         if (fp->f_type != DTYPE_VNODE) {
 4767                 error = ENODEV;
 4768                 goto out;
 4769         }
 4770         vp = fp->f_vnode;
 4771         if (vp->v_type != VREG) {
 4772                 error = ENODEV;
 4773                 goto out;
 4774         }
 4775         if (len == 0)
 4776                 end = OFF_MAX;
 4777         else
 4778                 end = offset + len - 1;
 4779         switch (advice) {
 4780         case POSIX_FADV_SEQUENTIAL:
 4781         case POSIX_FADV_RANDOM:
 4782         case POSIX_FADV_NOREUSE:
 4783                 /*
 4784                  * Try to merge any existing non-standard region with
 4785                  * this new region if possible, otherwise create a new
 4786                  * non-standard region for this request.
 4787                  */
 4788                 mtx_pool_lock(mtxpool_sleep, fp);
 4789                 fa = fp->f_advice;
 4790                 if (fa != NULL && fa->fa_advice == advice &&
 4791                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4792                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4793                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4794                         if (offset < fa->fa_start)
 4795                                 fa->fa_start = offset;
 4796                         if (end > fa->fa_end)
 4797                                 fa->fa_end = end;
 4798                 } else {
 4799                         new->fa_advice = advice;
 4800                         new->fa_start = offset;
 4801                         new->fa_end = end;
 4802                         new->fa_prevstart = 0;
 4803                         new->fa_prevend = 0;
 4804                         fp->f_advice = new;
 4805                         new = fa;
 4806                 }
 4807                 mtx_pool_unlock(mtxpool_sleep, fp);
 4808                 break;
 4809         case POSIX_FADV_NORMAL:
 4810                 /*
 4811                  * If a the "normal" region overlaps with an existing
 4812                  * non-standard region, trim or remove the
 4813                  * non-standard region.
 4814                  */
 4815                 mtx_pool_lock(mtxpool_sleep, fp);
 4816                 fa = fp->f_advice;
 4817                 if (fa != NULL) {
 4818                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4819                                 new = fa;
 4820                                 fp->f_advice = NULL;
 4821                         } else if (offset <= fa->fa_start &&
 4822                             end >= fa->fa_start)
 4823                                 fa->fa_start = end + 1;
 4824                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4825                                 fa->fa_end = offset - 1;
 4826                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4827                                 /*
 4828                                  * If the "normal" region is a middle
 4829                                  * portion of the existing
 4830                                  * non-standard region, just remove
 4831                                  * the whole thing rather than picking
 4832                                  * one side or the other to
 4833                                  * preserve.
 4834                                  */
 4835                                 new = fa;
 4836                                 fp->f_advice = NULL;
 4837                         }
 4838                 }
 4839                 mtx_pool_unlock(mtxpool_sleep, fp);
 4840                 break;
 4841         case POSIX_FADV_WILLNEED:
 4842         case POSIX_FADV_DONTNEED:
 4843                 error = VOP_ADVISE(vp, offset, end, advice);
 4844                 break;
 4845         }
 4846 out:
 4847         if (fp != NULL)
 4848                 fdrop(fp, td);
 4849         free(new, M_FADVISE);
 4850         return (error);
 4851 }
 4852 
 4853 int
 4854 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4855 {
 4856 
 4857         td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
 4858             uap->len, uap->advice);
 4859         return (0);
 4860 }

Cache object: 27e5b943879dd2354923fc5a6aba63bc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.