The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.0/sys/kern/vfs_syscalls.c 255236 2013-09-05 09:36:19Z pjd $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capability.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, reg, "char *", "int");
   97 
   98 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  100 static int kern_chflags(struct thread *td, const char *path,
  101     enum uio_seg pathseg, u_long flags);
  102 static int kern_chflagsat(struct thread *td, int fd, const char *path,
  103     enum uio_seg pathseg, u_long flags, int atflag);
  104 static int setfflags(struct thread *td, struct vnode *, u_long);
  105 static int setutimes(struct thread *td, struct vnode *,
  106     const struct timespec *, int, int);
  107 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  108     struct thread *td);
  109 
  110 /*
  111  * The module initialization routine for POSIX asynchronous I/O will
  112  * set this to the version of AIO that it implements.  (Zero means
  113  * that it is not implemented.)  This value is used here by pathconf()
  114  * and in kern_descrip.c by fpathconf().
  115  */
  116 int async_io_version;
  117 
  118 #ifdef DEBUG
  119 static int syncprt = 0;
  120 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  121 #endif
  122 
  123 /*
  124  * Sync each mounted filesystem.
  125  */
  126 #ifndef _SYS_SYSPROTO_H_
  127 struct sync_args {
  128         int     dummy;
  129 };
  130 #endif
  131 /* ARGSUSED */
  132 int
  133 sys_sync(td, uap)
  134         struct thread *td;
  135         struct sync_args *uap;
  136 {
  137         struct mount *mp, *nmp;
  138         int save;
  139 
  140         mtx_lock(&mountlist_mtx);
  141         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  142                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  143                         nmp = TAILQ_NEXT(mp, mnt_list);
  144                         continue;
  145                 }
  146                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  147                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  148                         save = curthread_pflags_set(TDP_SYNCIO);
  149                         vfs_msync(mp, MNT_NOWAIT);
  150                         VFS_SYNC(mp, MNT_NOWAIT);
  151                         curthread_pflags_restore(save);
  152                         vn_finished_write(mp);
  153                 }
  154                 mtx_lock(&mountlist_mtx);
  155                 nmp = TAILQ_NEXT(mp, mnt_list);
  156                 vfs_unbusy(mp);
  157         }
  158         mtx_unlock(&mountlist_mtx);
  159         return (0);
  160 }
  161 
  162 /*
  163  * Change filesystem quotas.
  164  */
  165 #ifndef _SYS_SYSPROTO_H_
  166 struct quotactl_args {
  167         char *path;
  168         int cmd;
  169         int uid;
  170         caddr_t arg;
  171 };
  172 #endif
  173 int
  174 sys_quotactl(td, uap)
  175         struct thread *td;
  176         register struct quotactl_args /* {
  177                 char *path;
  178                 int cmd;
  179                 int uid;
  180                 caddr_t arg;
  181         } */ *uap;
  182 {
  183         struct mount *mp;
  184         struct nameidata nd;
  185         int error;
  186 
  187         AUDIT_ARG_CMD(uap->cmd);
  188         AUDIT_ARG_UID(uap->uid);
  189         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  190                 return (EPERM);
  191         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  192             uap->path, td);
  193         if ((error = namei(&nd)) != 0)
  194                 return (error);
  195         NDFREE(&nd, NDF_ONLY_PNBUF);
  196         mp = nd.ni_vp->v_mount;
  197         vfs_ref(mp);
  198         vput(nd.ni_vp);
  199         error = vfs_busy(mp, 0);
  200         vfs_rel(mp);
  201         if (error != 0)
  202                 return (error);
  203         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  204 
  205         /*
  206          * Since quota on operation typically needs to open quota
  207          * file, the Q_QUOTAON handler needs to unbusy the mount point
  208          * before calling into namei.  Otherwise, unmount might be
  209          * started between two vfs_busy() invocations (first is our,
  210          * second is from mount point cross-walk code in lookup()),
  211          * causing deadlock.
  212          *
  213          * Require that Q_QUOTAON handles the vfs_busy() reference on
  214          * its own, always returning with ubusied mount point.
  215          */
  216         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  217                 vfs_unbusy(mp);
  218         return (error);
  219 }
  220 
  221 /*
  222  * Used by statfs conversion routines to scale the block size up if
  223  * necessary so that all of the block counts are <= 'max_size'.  Note
  224  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  225  * value of 'n'.
  226  */
  227 void
  228 statfs_scale_blocks(struct statfs *sf, long max_size)
  229 {
  230         uint64_t count;
  231         int shift;
  232 
  233         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  234 
  235         /*
  236          * Attempt to scale the block counts to give a more accurate
  237          * overview to userland of the ratio of free space to used
  238          * space.  To do this, find the largest block count and compute
  239          * a divisor that lets it fit into a signed integer <= max_size.
  240          */
  241         if (sf->f_bavail < 0)
  242                 count = -sf->f_bavail;
  243         else
  244                 count = sf->f_bavail;
  245         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  246         if (count <= max_size)
  247                 return;
  248 
  249         count >>= flsl(max_size);
  250         shift = 0;
  251         while (count > 0) {
  252                 shift++;
  253                 count >>=1;
  254         }
  255 
  256         sf->f_bsize <<= shift;
  257         sf->f_blocks >>= shift;
  258         sf->f_bfree >>= shift;
  259         sf->f_bavail >>= shift;
  260 }
  261 
  262 /*
  263  * Get filesystem statistics.
  264  */
  265 #ifndef _SYS_SYSPROTO_H_
  266 struct statfs_args {
  267         char *path;
  268         struct statfs *buf;
  269 };
  270 #endif
  271 int
  272 sys_statfs(td, uap)
  273         struct thread *td;
  274         register struct statfs_args /* {
  275                 char *path;
  276                 struct statfs *buf;
  277         } */ *uap;
  278 {
  279         struct statfs sf;
  280         int error;
  281 
  282         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  283         if (error == 0)
  284                 error = copyout(&sf, uap->buf, sizeof(sf));
  285         return (error);
  286 }
  287 
  288 int
  289 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  290     struct statfs *buf)
  291 {
  292         struct mount *mp;
  293         struct statfs *sp, sb;
  294         struct nameidata nd;
  295         int error;
  296 
  297         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  298             pathseg, path, td);
  299         error = namei(&nd);
  300         if (error != 0)
  301                 return (error);
  302         mp = nd.ni_vp->v_mount;
  303         vfs_ref(mp);
  304         NDFREE(&nd, NDF_ONLY_PNBUF);
  305         vput(nd.ni_vp);
  306         error = vfs_busy(mp, 0);
  307         vfs_rel(mp);
  308         if (error != 0)
  309                 return (error);
  310 #ifdef MAC
  311         error = mac_mount_check_stat(td->td_ucred, mp);
  312         if (error != 0)
  313                 goto out;
  314 #endif
  315         /*
  316          * Set these in case the underlying filesystem fails to do so.
  317          */
  318         sp = &mp->mnt_stat;
  319         sp->f_version = STATFS_VERSION;
  320         sp->f_namemax = NAME_MAX;
  321         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  322         error = VFS_STATFS(mp, sp);
  323         if (error != 0)
  324                 goto out;
  325         if (priv_check(td, PRIV_VFS_GENERATION)) {
  326                 bcopy(sp, &sb, sizeof(sb));
  327                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  328                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  329                 sp = &sb;
  330         }
  331         *buf = *sp;
  332 out:
  333         vfs_unbusy(mp);
  334         return (error);
  335 }
  336 
  337 /*
  338  * Get filesystem statistics.
  339  */
  340 #ifndef _SYS_SYSPROTO_H_
  341 struct fstatfs_args {
  342         int fd;
  343         struct statfs *buf;
  344 };
  345 #endif
  346 int
  347 sys_fstatfs(td, uap)
  348         struct thread *td;
  349         register struct fstatfs_args /* {
  350                 int fd;
  351                 struct statfs *buf;
  352         } */ *uap;
  353 {
  354         struct statfs sf;
  355         int error;
  356 
  357         error = kern_fstatfs(td, uap->fd, &sf);
  358         if (error == 0)
  359                 error = copyout(&sf, uap->buf, sizeof(sf));
  360         return (error);
  361 }
  362 
  363 int
  364 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  365 {
  366         struct file *fp;
  367         struct mount *mp;
  368         struct statfs *sp, sb;
  369         struct vnode *vp;
  370         cap_rights_t rights;
  371         int error;
  372 
  373         AUDIT_ARG_FD(fd);
  374         error = getvnode(td->td_proc->p_fd, fd,
  375             cap_rights_init(&rights, CAP_FSTATFS), &fp);
  376         if (error != 0)
  377                 return (error);
  378         vp = fp->f_vnode;
  379         vn_lock(vp, LK_SHARED | LK_RETRY);
  380 #ifdef AUDIT
  381         AUDIT_ARG_VNODE1(vp);
  382 #endif
  383         mp = vp->v_mount;
  384         if (mp)
  385                 vfs_ref(mp);
  386         VOP_UNLOCK(vp, 0);
  387         fdrop(fp, td);
  388         if (mp == NULL) {
  389                 error = EBADF;
  390                 goto out;
  391         }
  392         error = vfs_busy(mp, 0);
  393         vfs_rel(mp);
  394         if (error != 0)
  395                 return (error);
  396 #ifdef MAC
  397         error = mac_mount_check_stat(td->td_ucred, mp);
  398         if (error != 0)
  399                 goto out;
  400 #endif
  401         /*
  402          * Set these in case the underlying filesystem fails to do so.
  403          */
  404         sp = &mp->mnt_stat;
  405         sp->f_version = STATFS_VERSION;
  406         sp->f_namemax = NAME_MAX;
  407         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  408         error = VFS_STATFS(mp, sp);
  409         if (error != 0)
  410                 goto out;
  411         if (priv_check(td, PRIV_VFS_GENERATION)) {
  412                 bcopy(sp, &sb, sizeof(sb));
  413                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  414                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  415                 sp = &sb;
  416         }
  417         *buf = *sp;
  418 out:
  419         if (mp)
  420                 vfs_unbusy(mp);
  421         return (error);
  422 }
  423 
  424 /*
  425  * Get statistics on all filesystems.
  426  */
  427 #ifndef _SYS_SYSPROTO_H_
  428 struct getfsstat_args {
  429         struct statfs *buf;
  430         long bufsize;
  431         int flags;
  432 };
  433 #endif
  434 int
  435 sys_getfsstat(td, uap)
  436         struct thread *td;
  437         register struct getfsstat_args /* {
  438                 struct statfs *buf;
  439                 long bufsize;
  440                 int flags;
  441         } */ *uap;
  442 {
  443 
  444         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  445             uap->flags));
  446 }
  447 
  448 /*
  449  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  450  *      The caller is responsible for freeing memory which will be allocated
  451  *      in '*buf'.
  452  */
  453 int
  454 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  455     enum uio_seg bufseg, int flags)
  456 {
  457         struct mount *mp, *nmp;
  458         struct statfs *sfsp, *sp, sb;
  459         size_t count, maxcount;
  460         int error;
  461 
  462         maxcount = bufsize / sizeof(struct statfs);
  463         if (bufsize == 0)
  464                 sfsp = NULL;
  465         else if (bufseg == UIO_USERSPACE)
  466                 sfsp = *buf;
  467         else /* if (bufseg == UIO_SYSSPACE) */ {
  468                 count = 0;
  469                 mtx_lock(&mountlist_mtx);
  470                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  471                         count++;
  472                 }
  473                 mtx_unlock(&mountlist_mtx);
  474                 if (maxcount > count)
  475                         maxcount = count;
  476                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  477                     M_WAITOK);
  478         }
  479         count = 0;
  480         mtx_lock(&mountlist_mtx);
  481         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  482                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  483                         nmp = TAILQ_NEXT(mp, mnt_list);
  484                         continue;
  485                 }
  486 #ifdef MAC
  487                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  488                         nmp = TAILQ_NEXT(mp, mnt_list);
  489                         continue;
  490                 }
  491 #endif
  492                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  493                         nmp = TAILQ_NEXT(mp, mnt_list);
  494                         continue;
  495                 }
  496                 if (sfsp && count < maxcount) {
  497                         sp = &mp->mnt_stat;
  498                         /*
  499                          * Set these in case the underlying filesystem
  500                          * fails to do so.
  501                          */
  502                         sp->f_version = STATFS_VERSION;
  503                         sp->f_namemax = NAME_MAX;
  504                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  505                         /*
  506                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  507                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  508                          * overrides MNT_WAIT.
  509                          */
  510                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  511                             (flags & MNT_WAIT)) &&
  512                             (error = VFS_STATFS(mp, sp))) {
  513                                 mtx_lock(&mountlist_mtx);
  514                                 nmp = TAILQ_NEXT(mp, mnt_list);
  515                                 vfs_unbusy(mp);
  516                                 continue;
  517                         }
  518                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  519                                 bcopy(sp, &sb, sizeof(sb));
  520                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  521                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  522                                 sp = &sb;
  523                         }
  524                         if (bufseg == UIO_SYSSPACE)
  525                                 bcopy(sp, sfsp, sizeof(*sp));
  526                         else /* if (bufseg == UIO_USERSPACE) */ {
  527                                 error = copyout(sp, sfsp, sizeof(*sp));
  528                                 if (error != 0) {
  529                                         vfs_unbusy(mp);
  530                                         return (error);
  531                                 }
  532                         }
  533                         sfsp++;
  534                 }
  535                 count++;
  536                 mtx_lock(&mountlist_mtx);
  537                 nmp = TAILQ_NEXT(mp, mnt_list);
  538                 vfs_unbusy(mp);
  539         }
  540         mtx_unlock(&mountlist_mtx);
  541         if (sfsp && count > maxcount)
  542                 td->td_retval[0] = maxcount;
  543         else
  544                 td->td_retval[0] = count;
  545         return (0);
  546 }
  547 
  548 #ifdef COMPAT_FREEBSD4
  549 /*
  550  * Get old format filesystem statistics.
  551  */
  552 static void cvtstatfs(struct statfs *, struct ostatfs *);
  553 
  554 #ifndef _SYS_SYSPROTO_H_
  555 struct freebsd4_statfs_args {
  556         char *path;
  557         struct ostatfs *buf;
  558 };
  559 #endif
  560 int
  561 freebsd4_statfs(td, uap)
  562         struct thread *td;
  563         struct freebsd4_statfs_args /* {
  564                 char *path;
  565                 struct ostatfs *buf;
  566         } */ *uap;
  567 {
  568         struct ostatfs osb;
  569         struct statfs sf;
  570         int error;
  571 
  572         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  573         if (error != 0)
  574                 return (error);
  575         cvtstatfs(&sf, &osb);
  576         return (copyout(&osb, uap->buf, sizeof(osb)));
  577 }
  578 
  579 /*
  580  * Get filesystem statistics.
  581  */
  582 #ifndef _SYS_SYSPROTO_H_
  583 struct freebsd4_fstatfs_args {
  584         int fd;
  585         struct ostatfs *buf;
  586 };
  587 #endif
  588 int
  589 freebsd4_fstatfs(td, uap)
  590         struct thread *td;
  591         struct freebsd4_fstatfs_args /* {
  592                 int fd;
  593                 struct ostatfs *buf;
  594         } */ *uap;
  595 {
  596         struct ostatfs osb;
  597         struct statfs sf;
  598         int error;
  599 
  600         error = kern_fstatfs(td, uap->fd, &sf);
  601         if (error != 0)
  602                 return (error);
  603         cvtstatfs(&sf, &osb);
  604         return (copyout(&osb, uap->buf, sizeof(osb)));
  605 }
  606 
  607 /*
  608  * Get statistics on all filesystems.
  609  */
  610 #ifndef _SYS_SYSPROTO_H_
  611 struct freebsd4_getfsstat_args {
  612         struct ostatfs *buf;
  613         long bufsize;
  614         int flags;
  615 };
  616 #endif
  617 int
  618 freebsd4_getfsstat(td, uap)
  619         struct thread *td;
  620         register struct freebsd4_getfsstat_args /* {
  621                 struct ostatfs *buf;
  622                 long bufsize;
  623                 int flags;
  624         } */ *uap;
  625 {
  626         struct statfs *buf, *sp;
  627         struct ostatfs osb;
  628         size_t count, size;
  629         int error;
  630 
  631         count = uap->bufsize / sizeof(struct ostatfs);
  632         size = count * sizeof(struct statfs);
  633         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  634         if (size > 0) {
  635                 count = td->td_retval[0];
  636                 sp = buf;
  637                 while (count > 0 && error == 0) {
  638                         cvtstatfs(sp, &osb);
  639                         error = copyout(&osb, uap->buf, sizeof(osb));
  640                         sp++;
  641                         uap->buf++;
  642                         count--;
  643                 }
  644                 free(buf, M_TEMP);
  645         }
  646         return (error);
  647 }
  648 
  649 /*
  650  * Implement fstatfs() for (NFS) file handles.
  651  */
  652 #ifndef _SYS_SYSPROTO_H_
  653 struct freebsd4_fhstatfs_args {
  654         struct fhandle *u_fhp;
  655         struct ostatfs *buf;
  656 };
  657 #endif
  658 int
  659 freebsd4_fhstatfs(td, uap)
  660         struct thread *td;
  661         struct freebsd4_fhstatfs_args /* {
  662                 struct fhandle *u_fhp;
  663                 struct ostatfs *buf;
  664         } */ *uap;
  665 {
  666         struct ostatfs osb;
  667         struct statfs sf;
  668         fhandle_t fh;
  669         int error;
  670 
  671         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  672         if (error != 0)
  673                 return (error);
  674         error = kern_fhstatfs(td, fh, &sf);
  675         if (error != 0)
  676                 return (error);
  677         cvtstatfs(&sf, &osb);
  678         return (copyout(&osb, uap->buf, sizeof(osb)));
  679 }
  680 
  681 /*
  682  * Convert a new format statfs structure to an old format statfs structure.
  683  */
  684 static void
  685 cvtstatfs(nsp, osp)
  686         struct statfs *nsp;
  687         struct ostatfs *osp;
  688 {
  689 
  690         statfs_scale_blocks(nsp, LONG_MAX);
  691         bzero(osp, sizeof(*osp));
  692         osp->f_bsize = nsp->f_bsize;
  693         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  694         osp->f_blocks = nsp->f_blocks;
  695         osp->f_bfree = nsp->f_bfree;
  696         osp->f_bavail = nsp->f_bavail;
  697         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  698         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  699         osp->f_owner = nsp->f_owner;
  700         osp->f_type = nsp->f_type;
  701         osp->f_flags = nsp->f_flags;
  702         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  703         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  704         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  705         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  706         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  707             MIN(MFSNAMELEN, OMFSNAMELEN));
  708         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  709             MIN(MNAMELEN, OMNAMELEN));
  710         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  711             MIN(MNAMELEN, OMNAMELEN));
  712         osp->f_fsid = nsp->f_fsid;
  713 }
  714 #endif /* COMPAT_FREEBSD4 */
  715 
  716 /*
  717  * Change current working directory to a given file descriptor.
  718  */
  719 #ifndef _SYS_SYSPROTO_H_
  720 struct fchdir_args {
  721         int     fd;
  722 };
  723 #endif
  724 int
  725 sys_fchdir(td, uap)
  726         struct thread *td;
  727         struct fchdir_args /* {
  728                 int fd;
  729         } */ *uap;
  730 {
  731         register struct filedesc *fdp = td->td_proc->p_fd;
  732         struct vnode *vp, *tdp, *vpold;
  733         struct mount *mp;
  734         struct file *fp;
  735         cap_rights_t rights;
  736         int error;
  737 
  738         AUDIT_ARG_FD(uap->fd);
  739         error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  740             &fp);
  741         if (error != 0)
  742                 return (error);
  743         vp = fp->f_vnode;
  744         VREF(vp);
  745         fdrop(fp, td);
  746         vn_lock(vp, LK_SHARED | LK_RETRY);
  747         AUDIT_ARG_VNODE1(vp);
  748         error = change_dir(vp, td);
  749         while (!error && (mp = vp->v_mountedhere) != NULL) {
  750                 if (vfs_busy(mp, 0))
  751                         continue;
  752                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  753                 vfs_unbusy(mp);
  754                 if (error != 0)
  755                         break;
  756                 vput(vp);
  757                 vp = tdp;
  758         }
  759         if (error != 0) {
  760                 vput(vp);
  761                 return (error);
  762         }
  763         VOP_UNLOCK(vp, 0);
  764         FILEDESC_XLOCK(fdp);
  765         vpold = fdp->fd_cdir;
  766         fdp->fd_cdir = vp;
  767         FILEDESC_XUNLOCK(fdp);
  768         vrele(vpold);
  769         return (0);
  770 }
  771 
  772 /*
  773  * Change current working directory (``.'').
  774  */
  775 #ifndef _SYS_SYSPROTO_H_
  776 struct chdir_args {
  777         char    *path;
  778 };
  779 #endif
  780 int
  781 sys_chdir(td, uap)
  782         struct thread *td;
  783         struct chdir_args /* {
  784                 char *path;
  785         } */ *uap;
  786 {
  787 
  788         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  789 }
  790 
  791 int
  792 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  793 {
  794         register struct filedesc *fdp = td->td_proc->p_fd;
  795         struct nameidata nd;
  796         struct vnode *vp;
  797         int error;
  798 
  799         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  800             pathseg, path, td);
  801         if ((error = namei(&nd)) != 0)
  802                 return (error);
  803         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  804                 vput(nd.ni_vp);
  805                 NDFREE(&nd, NDF_ONLY_PNBUF);
  806                 return (error);
  807         }
  808         VOP_UNLOCK(nd.ni_vp, 0);
  809         NDFREE(&nd, NDF_ONLY_PNBUF);
  810         FILEDESC_XLOCK(fdp);
  811         vp = fdp->fd_cdir;
  812         fdp->fd_cdir = nd.ni_vp;
  813         FILEDESC_XUNLOCK(fdp);
  814         vrele(vp);
  815         return (0);
  816 }
  817 
  818 /*
  819  * Helper function for raised chroot(2) security function:  Refuse if
  820  * any filedescriptors are open directories.
  821  */
  822 static int
  823 chroot_refuse_vdir_fds(fdp)
  824         struct filedesc *fdp;
  825 {
  826         struct vnode *vp;
  827         struct file *fp;
  828         int fd;
  829 
  830         FILEDESC_LOCK_ASSERT(fdp);
  831 
  832         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  833                 fp = fget_locked(fdp, fd);
  834                 if (fp == NULL)
  835                         continue;
  836                 if (fp->f_type == DTYPE_VNODE) {
  837                         vp = fp->f_vnode;
  838                         if (vp->v_type == VDIR)
  839                                 return (EPERM);
  840                 }
  841         }
  842         return (0);
  843 }
  844 
  845 /*
  846  * This sysctl determines if we will allow a process to chroot(2) if it
  847  * has a directory open:
  848  *      0: disallowed for all processes.
  849  *      1: allowed for processes that were not already chroot(2)'ed.
  850  *      2: allowed for all processes.
  851  */
  852 
  853 static int chroot_allow_open_directories = 1;
  854 
  855 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  856      &chroot_allow_open_directories, 0,
  857      "Allow a process to chroot(2) if it has a directory open");
  858 
  859 /*
  860  * Change notion of root (``/'') directory.
  861  */
  862 #ifndef _SYS_SYSPROTO_H_
  863 struct chroot_args {
  864         char    *path;
  865 };
  866 #endif
  867 int
  868 sys_chroot(td, uap)
  869         struct thread *td;
  870         struct chroot_args /* {
  871                 char *path;
  872         } */ *uap;
  873 {
  874         struct nameidata nd;
  875         int error;
  876 
  877         error = priv_check(td, PRIV_VFS_CHROOT);
  878         if (error != 0)
  879                 return (error);
  880         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  881             UIO_USERSPACE, uap->path, td);
  882         error = namei(&nd);
  883         if (error != 0)
  884                 goto error;
  885         error = change_dir(nd.ni_vp, td);
  886         if (error != 0)
  887                 goto e_vunlock;
  888 #ifdef MAC
  889         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  890         if (error != 0)
  891                 goto e_vunlock;
  892 #endif
  893         VOP_UNLOCK(nd.ni_vp, 0);
  894         error = change_root(nd.ni_vp, td);
  895         vrele(nd.ni_vp);
  896         NDFREE(&nd, NDF_ONLY_PNBUF);
  897         return (error);
  898 e_vunlock:
  899         vput(nd.ni_vp);
  900 error:
  901         NDFREE(&nd, NDF_ONLY_PNBUF);
  902         return (error);
  903 }
  904 
  905 /*
  906  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  907  * instance.
  908  */
  909 int
  910 change_dir(vp, td)
  911         struct vnode *vp;
  912         struct thread *td;
  913 {
  914 #ifdef MAC
  915         int error;
  916 #endif
  917 
  918         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  919         if (vp->v_type != VDIR)
  920                 return (ENOTDIR);
  921 #ifdef MAC
  922         error = mac_vnode_check_chdir(td->td_ucred, vp);
  923         if (error != 0)
  924                 return (error);
  925 #endif
  926         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  927 }
  928 
  929 /*
  930  * Common routine for kern_chroot() and jail_attach().  The caller is
  931  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  932  * authorize this operation.
  933  */
  934 int
  935 change_root(vp, td)
  936         struct vnode *vp;
  937         struct thread *td;
  938 {
  939         struct filedesc *fdp;
  940         struct vnode *oldvp;
  941         int error;
  942 
  943         fdp = td->td_proc->p_fd;
  944         FILEDESC_XLOCK(fdp);
  945         if (chroot_allow_open_directories == 0 ||
  946             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  947                 error = chroot_refuse_vdir_fds(fdp);
  948                 if (error != 0) {
  949                         FILEDESC_XUNLOCK(fdp);
  950                         return (error);
  951                 }
  952         }
  953         oldvp = fdp->fd_rdir;
  954         fdp->fd_rdir = vp;
  955         VREF(fdp->fd_rdir);
  956         if (!fdp->fd_jdir) {
  957                 fdp->fd_jdir = vp;
  958                 VREF(fdp->fd_jdir);
  959         }
  960         FILEDESC_XUNLOCK(fdp);
  961         vrele(oldvp);
  962         return (0);
  963 }
  964 
  965 static __inline void
  966 flags_to_rights(int flags, cap_rights_t *rightsp)
  967 {
  968 
  969         if (flags & O_EXEC) {
  970                 cap_rights_set(rightsp, CAP_FEXECVE);
  971         } else {
  972                 switch ((flags & O_ACCMODE)) {
  973                 case O_RDONLY:
  974                         cap_rights_set(rightsp, CAP_READ);
  975                         break;
  976                 case O_RDWR:
  977                         cap_rights_set(rightsp, CAP_READ);
  978                         /* FALLTHROUGH */
  979                 case O_WRONLY:
  980                         cap_rights_set(rightsp, CAP_WRITE);
  981                         if (!(flags & (O_APPEND | O_TRUNC)))
  982                                 cap_rights_set(rightsp, CAP_SEEK);
  983                         break;
  984                 }
  985         }
  986 
  987         if (flags & O_CREAT)
  988                 cap_rights_set(rightsp, CAP_CREATE);
  989 
  990         if (flags & O_TRUNC)
  991                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  992 
  993         if (flags & (O_SYNC | O_FSYNC))
  994                 cap_rights_set(rightsp, CAP_FSYNC);
  995 
  996         if (flags & (O_EXLOCK | O_SHLOCK))
  997                 cap_rights_set(rightsp, CAP_FLOCK);
  998 }
  999 
 1000 /*
 1001  * Check permissions, allocate an open file structure, and call the device
 1002  * open routine if any.
 1003  */
 1004 #ifndef _SYS_SYSPROTO_H_
 1005 struct open_args {
 1006         char    *path;
 1007         int     flags;
 1008         int     mode;
 1009 };
 1010 #endif
 1011 int
 1012 sys_open(td, uap)
 1013         struct thread *td;
 1014         register struct open_args /* {
 1015                 char *path;
 1016                 int flags;
 1017                 int mode;
 1018         } */ *uap;
 1019 {
 1020 
 1021         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1022 }
 1023 
 1024 #ifndef _SYS_SYSPROTO_H_
 1025 struct openat_args {
 1026         int     fd;
 1027         char    *path;
 1028         int     flag;
 1029         int     mode;
 1030 };
 1031 #endif
 1032 int
 1033 sys_openat(struct thread *td, struct openat_args *uap)
 1034 {
 1035 
 1036         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1037             uap->mode));
 1038 }
 1039 
 1040 int
 1041 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1042     int mode)
 1043 {
 1044 
 1045         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1046 }
 1047 
 1048 int
 1049 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1050     int flags, int mode)
 1051 {
 1052         struct proc *p = td->td_proc;
 1053         struct filedesc *fdp = p->p_fd;
 1054         struct file *fp;
 1055         struct vnode *vp;
 1056         struct nameidata nd;
 1057         cap_rights_t rights;
 1058         int cmode, error, indx;
 1059 
 1060         indx = -1;
 1061 
 1062         AUDIT_ARG_FFLAGS(flags);
 1063         AUDIT_ARG_MODE(mode);
 1064         /* XXX: audit dirfd */
 1065         cap_rights_init(&rights, CAP_LOOKUP);
 1066         flags_to_rights(flags, &rights);
 1067         /*
 1068          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1069          * may be specified.
 1070          */
 1071         if (flags & O_EXEC) {
 1072                 if (flags & O_ACCMODE)
 1073                         return (EINVAL);
 1074         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1075                 return (EINVAL);
 1076         } else {
 1077                 flags = FFLAGS(flags);
 1078         }
 1079 
 1080         /*
 1081          * Allocate the file descriptor, but don't install a descriptor yet.
 1082          */
 1083         error = falloc_noinstall(td, &fp);
 1084         if (error != 0)
 1085                 return (error);
 1086         /*
 1087          * An extra reference on `fp' has been held for us by
 1088          * falloc_noinstall().
 1089          */
 1090         /* Set the flags early so the finit in devfs can pick them up. */
 1091         fp->f_flag = flags & FMASK;
 1092         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1093         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1094             &rights, td);
 1095         td->td_dupfd = -1;              /* XXX check for fdopen */
 1096         error = vn_open(&nd, &flags, cmode, fp);
 1097         if (error != 0) {
 1098                 /*
 1099                  * If the vn_open replaced the method vector, something
 1100                  * wonderous happened deep below and we just pass it up
 1101                  * pretending we know what we do.
 1102                  */
 1103                 if (error == ENXIO && fp->f_ops != &badfileops)
 1104                         goto success;
 1105 
 1106                 /*
 1107                  * Handle special fdopen() case. bleh.
 1108                  *
 1109                  * Don't do this for relative (capability) lookups; we don't
 1110                  * understand exactly what would happen, and we don't think
 1111                  * that it ever should.
 1112                  */
 1113                 if (nd.ni_strictrelative == 0 &&
 1114                     (error == ENODEV || error == ENXIO) &&
 1115                     td->td_dupfd >= 0) {
 1116                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1117                             &indx);
 1118                         if (error == 0)
 1119                                 goto success;
 1120                 }
 1121 
 1122                 goto bad;
 1123         }
 1124         td->td_dupfd = 0;
 1125         NDFREE(&nd, NDF_ONLY_PNBUF);
 1126         vp = nd.ni_vp;
 1127 
 1128         /*
 1129          * Store the vnode, for any f_type. Typically, the vnode use
 1130          * count is decremented by direct call to vn_closefile() for
 1131          * files that switched type in the cdevsw fdopen() method.
 1132          */
 1133         fp->f_vnode = vp;
 1134         /*
 1135          * If the file wasn't claimed by devfs bind it to the normal
 1136          * vnode operations here.
 1137          */
 1138         if (fp->f_ops == &badfileops) {
 1139                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1140                 fp->f_seqcount = 1;
 1141                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1142                     DTYPE_VNODE, vp, &vnops);
 1143         }
 1144 
 1145         VOP_UNLOCK(vp, 0);
 1146         if (flags & O_TRUNC) {
 1147                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1148                 if (error != 0)
 1149                         goto bad;
 1150         }
 1151 success:
 1152         /*
 1153          * If we haven't already installed the FD (for dupfdopen), do so now.
 1154          */
 1155         if (indx == -1) {
 1156                 struct filecaps *fcaps;
 1157 
 1158 #ifdef CAPABILITIES
 1159                 if (nd.ni_strictrelative == 1)
 1160                         fcaps = &nd.ni_filecaps;
 1161                 else
 1162 #endif
 1163                         fcaps = NULL;
 1164                 error = finstall(td, fp, &indx, flags, fcaps);
 1165                 /* On success finstall() consumes fcaps. */
 1166                 if (error != 0) {
 1167                         filecaps_free(&nd.ni_filecaps);
 1168                         goto bad;
 1169                 }
 1170         } else {
 1171                 filecaps_free(&nd.ni_filecaps);
 1172         }
 1173 
 1174         /*
 1175          * Release our private reference, leaving the one associated with
 1176          * the descriptor table intact.
 1177          */
 1178         fdrop(fp, td);
 1179         td->td_retval[0] = indx;
 1180         return (0);
 1181 bad:
 1182         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1183         fdrop(fp, td);
 1184         return (error);
 1185 }
 1186 
 1187 #ifdef COMPAT_43
 1188 /*
 1189  * Create a file.
 1190  */
 1191 #ifndef _SYS_SYSPROTO_H_
 1192 struct ocreat_args {
 1193         char    *path;
 1194         int     mode;
 1195 };
 1196 #endif
 1197 int
 1198 ocreat(td, uap)
 1199         struct thread *td;
 1200         register struct ocreat_args /* {
 1201                 char *path;
 1202                 int mode;
 1203         } */ *uap;
 1204 {
 1205 
 1206         return (kern_open(td, uap->path, UIO_USERSPACE,
 1207             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1208 }
 1209 #endif /* COMPAT_43 */
 1210 
 1211 /*
 1212  * Create a special file.
 1213  */
 1214 #ifndef _SYS_SYSPROTO_H_
 1215 struct mknod_args {
 1216         char    *path;
 1217         int     mode;
 1218         int     dev;
 1219 };
 1220 #endif
 1221 int
 1222 sys_mknod(td, uap)
 1223         struct thread *td;
 1224         register struct mknod_args /* {
 1225                 char *path;
 1226                 int mode;
 1227                 int dev;
 1228         } */ *uap;
 1229 {
 1230 
 1231         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1232 }
 1233 
 1234 #ifndef _SYS_SYSPROTO_H_
 1235 struct mknodat_args {
 1236         int     fd;
 1237         char    *path;
 1238         mode_t  mode;
 1239         dev_t   dev;
 1240 };
 1241 #endif
 1242 int
 1243 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1244 {
 1245 
 1246         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1247             uap->dev));
 1248 }
 1249 
 1250 int
 1251 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1252     int dev)
 1253 {
 1254 
 1255         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1256 }
 1257 
 1258 int
 1259 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1260     int mode, int dev)
 1261 {
 1262         struct vnode *vp;
 1263         struct mount *mp;
 1264         struct vattr vattr;
 1265         struct nameidata nd;
 1266         cap_rights_t rights;
 1267         int error, whiteout = 0;
 1268 
 1269         AUDIT_ARG_MODE(mode);
 1270         AUDIT_ARG_DEV(dev);
 1271         switch (mode & S_IFMT) {
 1272         case S_IFCHR:
 1273         case S_IFBLK:
 1274                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1275                 break;
 1276         case S_IFMT:
 1277                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1278                 break;
 1279         case S_IFWHT:
 1280                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1281                 break;
 1282         case S_IFIFO:
 1283                 if (dev == 0)
 1284                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1285                 /* FALLTHROUGH */
 1286         default:
 1287                 error = EINVAL;
 1288                 break;
 1289         }
 1290         if (error != 0)
 1291                 return (error);
 1292 restart:
 1293         bwillwrite();
 1294         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 1295             pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td);
 1296         if ((error = namei(&nd)) != 0)
 1297                 return (error);
 1298         vp = nd.ni_vp;
 1299         if (vp != NULL) {
 1300                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1301                 if (vp == nd.ni_dvp)
 1302                         vrele(nd.ni_dvp);
 1303                 else
 1304                         vput(nd.ni_dvp);
 1305                 vrele(vp);
 1306                 return (EEXIST);
 1307         } else {
 1308                 VATTR_NULL(&vattr);
 1309                 vattr.va_mode = (mode & ALLPERMS) &
 1310                     ~td->td_proc->p_fd->fd_cmask;
 1311                 vattr.va_rdev = dev;
 1312                 whiteout = 0;
 1313 
 1314                 switch (mode & S_IFMT) {
 1315                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1316                         vattr.va_type = VBAD;
 1317                         break;
 1318                 case S_IFCHR:
 1319                         vattr.va_type = VCHR;
 1320                         break;
 1321                 case S_IFBLK:
 1322                         vattr.va_type = VBLK;
 1323                         break;
 1324                 case S_IFWHT:
 1325                         whiteout = 1;
 1326                         break;
 1327                 default:
 1328                         panic("kern_mknod: invalid mode");
 1329                 }
 1330         }
 1331         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1332                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1333                 vput(nd.ni_dvp);
 1334                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1335                         return (error);
 1336                 goto restart;
 1337         }
 1338 #ifdef MAC
 1339         if (error == 0 && !whiteout)
 1340                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1341                     &nd.ni_cnd, &vattr);
 1342 #endif
 1343         if (error == 0) {
 1344                 if (whiteout)
 1345                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1346                 else {
 1347                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1348                                                 &nd.ni_cnd, &vattr);
 1349                         if (error == 0)
 1350                                 vput(nd.ni_vp);
 1351                 }
 1352         }
 1353         NDFREE(&nd, NDF_ONLY_PNBUF);
 1354         vput(nd.ni_dvp);
 1355         vn_finished_write(mp);
 1356         return (error);
 1357 }
 1358 
 1359 /*
 1360  * Create a named pipe.
 1361  */
 1362 #ifndef _SYS_SYSPROTO_H_
 1363 struct mkfifo_args {
 1364         char    *path;
 1365         int     mode;
 1366 };
 1367 #endif
 1368 int
 1369 sys_mkfifo(td, uap)
 1370         struct thread *td;
 1371         register struct mkfifo_args /* {
 1372                 char *path;
 1373                 int mode;
 1374         } */ *uap;
 1375 {
 1376 
 1377         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1378 }
 1379 
 1380 #ifndef _SYS_SYSPROTO_H_
 1381 struct mkfifoat_args {
 1382         int     fd;
 1383         char    *path;
 1384         mode_t  mode;
 1385 };
 1386 #endif
 1387 int
 1388 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1389 {
 1390 
 1391         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1392             uap->mode));
 1393 }
 1394 
 1395 int
 1396 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1397 {
 1398 
 1399         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1400 }
 1401 
 1402 int
 1403 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1404     int mode)
 1405 {
 1406         struct mount *mp;
 1407         struct vattr vattr;
 1408         struct nameidata nd;
 1409         cap_rights_t rights;
 1410         int error;
 1411 
 1412         AUDIT_ARG_MODE(mode);
 1413 restart:
 1414         bwillwrite();
 1415         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 1416             pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td);
 1417         if ((error = namei(&nd)) != 0)
 1418                 return (error);
 1419         if (nd.ni_vp != NULL) {
 1420                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1421                 if (nd.ni_vp == nd.ni_dvp)
 1422                         vrele(nd.ni_dvp);
 1423                 else
 1424                         vput(nd.ni_dvp);
 1425                 vrele(nd.ni_vp);
 1426                 return (EEXIST);
 1427         }
 1428         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1429                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1430                 vput(nd.ni_dvp);
 1431                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1432                         return (error);
 1433                 goto restart;
 1434         }
 1435         VATTR_NULL(&vattr);
 1436         vattr.va_type = VFIFO;
 1437         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1438 #ifdef MAC
 1439         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1440             &vattr);
 1441         if (error != 0)
 1442                 goto out;
 1443 #endif
 1444         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1445         if (error == 0)
 1446                 vput(nd.ni_vp);
 1447 #ifdef MAC
 1448 out:
 1449 #endif
 1450         vput(nd.ni_dvp);
 1451         vn_finished_write(mp);
 1452         NDFREE(&nd, NDF_ONLY_PNBUF);
 1453         return (error);
 1454 }
 1455 
 1456 /*
 1457  * Make a hard file link.
 1458  */
 1459 #ifndef _SYS_SYSPROTO_H_
 1460 struct link_args {
 1461         char    *path;
 1462         char    *link;
 1463 };
 1464 #endif
 1465 int
 1466 sys_link(td, uap)
 1467         struct thread *td;
 1468         register struct link_args /* {
 1469                 char *path;
 1470                 char *link;
 1471         } */ *uap;
 1472 {
 1473 
 1474         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1475 }
 1476 
 1477 #ifndef _SYS_SYSPROTO_H_
 1478 struct linkat_args {
 1479         int     fd1;
 1480         char    *path1;
 1481         int     fd2;
 1482         char    *path2;
 1483         int     flag;
 1484 };
 1485 #endif
 1486 int
 1487 sys_linkat(struct thread *td, struct linkat_args *uap)
 1488 {
 1489         int flag;
 1490 
 1491         flag = uap->flag;
 1492         if (flag & ~AT_SYMLINK_FOLLOW)
 1493                 return (EINVAL);
 1494 
 1495         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1496             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1497 }
 1498 
 1499 int hardlink_check_uid = 0;
 1500 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1501     &hardlink_check_uid, 0,
 1502     "Unprivileged processes cannot create hard links to files owned by other "
 1503     "users");
 1504 static int hardlink_check_gid = 0;
 1505 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1506     &hardlink_check_gid, 0,
 1507     "Unprivileged processes cannot create hard links to files owned by other "
 1508     "groups");
 1509 
 1510 static int
 1511 can_hardlink(struct vnode *vp, struct ucred *cred)
 1512 {
 1513         struct vattr va;
 1514         int error;
 1515 
 1516         if (!hardlink_check_uid && !hardlink_check_gid)
 1517                 return (0);
 1518 
 1519         error = VOP_GETATTR(vp, &va, cred);
 1520         if (error != 0)
 1521                 return (error);
 1522 
 1523         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1524                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1525                 if (error != 0)
 1526                         return (error);
 1527         }
 1528 
 1529         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1530                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1531                 if (error != 0)
 1532                         return (error);
 1533         }
 1534 
 1535         return (0);
 1536 }
 1537 
 1538 int
 1539 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1540 {
 1541 
 1542         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1543 }
 1544 
 1545 int
 1546 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1547     enum uio_seg segflg, int follow)
 1548 {
 1549         struct vnode *vp;
 1550         struct mount *mp;
 1551         struct nameidata nd;
 1552         cap_rights_t rights;
 1553         int error;
 1554 
 1555         bwillwrite();
 1556         NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 1557 
 1558         if ((error = namei(&nd)) != 0)
 1559                 return (error);
 1560         NDFREE(&nd, NDF_ONLY_PNBUF);
 1561         vp = nd.ni_vp;
 1562         if (vp->v_type == VDIR) {
 1563                 vrele(vp);
 1564                 return (EPERM);         /* POSIX */
 1565         }
 1566         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1567                 vrele(vp);
 1568                 return (error);
 1569         }
 1570         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2,
 1571             segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td);
 1572         if ((error = namei(&nd)) == 0) {
 1573                 if (nd.ni_vp != NULL) {
 1574                         if (nd.ni_dvp == nd.ni_vp)
 1575                                 vrele(nd.ni_dvp);
 1576                         else
 1577                                 vput(nd.ni_dvp);
 1578                         vrele(nd.ni_vp);
 1579                         error = EEXIST;
 1580                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
 1581                     == 0) {
 1582                         error = can_hardlink(vp, td->td_ucred);
 1583                         if (error == 0)
 1584 #ifdef MAC
 1585                                 error = mac_vnode_check_link(td->td_ucred,
 1586                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1587                         if (error == 0)
 1588 #endif
 1589                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1590                         VOP_UNLOCK(vp, 0);
 1591                         vput(nd.ni_dvp);
 1592                 }
 1593                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1594         }
 1595         vrele(vp);
 1596         vn_finished_write(mp);
 1597         return (error);
 1598 }
 1599 
 1600 /*
 1601  * Make a symbolic link.
 1602  */
 1603 #ifndef _SYS_SYSPROTO_H_
 1604 struct symlink_args {
 1605         char    *path;
 1606         char    *link;
 1607 };
 1608 #endif
 1609 int
 1610 sys_symlink(td, uap)
 1611         struct thread *td;
 1612         register struct symlink_args /* {
 1613                 char *path;
 1614                 char *link;
 1615         } */ *uap;
 1616 {
 1617 
 1618         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1619 }
 1620 
 1621 #ifndef _SYS_SYSPROTO_H_
 1622 struct symlinkat_args {
 1623         char    *path;
 1624         int     fd;
 1625         char    *path2;
 1626 };
 1627 #endif
 1628 int
 1629 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1630 {
 1631 
 1632         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1633             UIO_USERSPACE));
 1634 }
 1635 
 1636 int
 1637 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1638 {
 1639 
 1640         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1641 }
 1642 
 1643 int
 1644 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1645     enum uio_seg segflg)
 1646 {
 1647         struct mount *mp;
 1648         struct vattr vattr;
 1649         char *syspath;
 1650         struct nameidata nd;
 1651         int error;
 1652         cap_rights_t rights;
 1653 
 1654         if (segflg == UIO_SYSSPACE) {
 1655                 syspath = path1;
 1656         } else {
 1657                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1658                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1659                         goto out;
 1660         }
 1661         AUDIT_ARG_TEXT(syspath);
 1662 restart:
 1663         bwillwrite();
 1664         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 1665             segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td);
 1666         if ((error = namei(&nd)) != 0)
 1667                 goto out;
 1668         if (nd.ni_vp) {
 1669                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1670                 if (nd.ni_vp == nd.ni_dvp)
 1671                         vrele(nd.ni_dvp);
 1672                 else
 1673                         vput(nd.ni_dvp);
 1674                 vrele(nd.ni_vp);
 1675                 error = EEXIST;
 1676                 goto out;
 1677         }
 1678         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1679                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1680                 vput(nd.ni_dvp);
 1681                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1682                         goto out;
 1683                 goto restart;
 1684         }
 1685         VATTR_NULL(&vattr);
 1686         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1687 #ifdef MAC
 1688         vattr.va_type = VLNK;
 1689         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1690             &vattr);
 1691         if (error != 0)
 1692                 goto out2;
 1693 #endif
 1694         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1695         if (error == 0)
 1696                 vput(nd.ni_vp);
 1697 #ifdef MAC
 1698 out2:
 1699 #endif
 1700         NDFREE(&nd, NDF_ONLY_PNBUF);
 1701         vput(nd.ni_dvp);
 1702         vn_finished_write(mp);
 1703 out:
 1704         if (segflg != UIO_SYSSPACE)
 1705                 uma_zfree(namei_zone, syspath);
 1706         return (error);
 1707 }
 1708 
 1709 /*
 1710  * Delete a whiteout from the filesystem.
 1711  */
 1712 int
 1713 sys_undelete(td, uap)
 1714         struct thread *td;
 1715         register struct undelete_args /* {
 1716                 char *path;
 1717         } */ *uap;
 1718 {
 1719         struct mount *mp;
 1720         struct nameidata nd;
 1721         int error;
 1722 
 1723 restart:
 1724         bwillwrite();
 1725         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1726             UIO_USERSPACE, uap->path, td);
 1727         error = namei(&nd);
 1728         if (error != 0)
 1729                 return (error);
 1730 
 1731         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1732                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1733                 if (nd.ni_vp == nd.ni_dvp)
 1734                         vrele(nd.ni_dvp);
 1735                 else
 1736                         vput(nd.ni_dvp);
 1737                 if (nd.ni_vp)
 1738                         vrele(nd.ni_vp);
 1739                 return (EEXIST);
 1740         }
 1741         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1742                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1743                 vput(nd.ni_dvp);
 1744                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1745                         return (error);
 1746                 goto restart;
 1747         }
 1748         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1749         NDFREE(&nd, NDF_ONLY_PNBUF);
 1750         vput(nd.ni_dvp);
 1751         vn_finished_write(mp);
 1752         return (error);
 1753 }
 1754 
 1755 /*
 1756  * Delete a name from the filesystem.
 1757  */
 1758 #ifndef _SYS_SYSPROTO_H_
 1759 struct unlink_args {
 1760         char    *path;
 1761 };
 1762 #endif
 1763 int
 1764 sys_unlink(td, uap)
 1765         struct thread *td;
 1766         struct unlink_args /* {
 1767                 char *path;
 1768         } */ *uap;
 1769 {
 1770 
 1771         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1772 }
 1773 
 1774 #ifndef _SYS_SYSPROTO_H_
 1775 struct unlinkat_args {
 1776         int     fd;
 1777         char    *path;
 1778         int     flag;
 1779 };
 1780 #endif
 1781 int
 1782 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1783 {
 1784         int flag = uap->flag;
 1785         int fd = uap->fd;
 1786         char *path = uap->path;
 1787 
 1788         if (flag & ~AT_REMOVEDIR)
 1789                 return (EINVAL);
 1790 
 1791         if (flag & AT_REMOVEDIR)
 1792                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1793         else
 1794                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1795 }
 1796 
 1797 int
 1798 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1799 {
 1800 
 1801         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1802 }
 1803 
 1804 int
 1805 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1806     ino_t oldinum)
 1807 {
 1808         struct mount *mp;
 1809         struct vnode *vp;
 1810         struct nameidata nd;
 1811         struct stat sb;
 1812         cap_rights_t rights;
 1813         int error;
 1814 
 1815 restart:
 1816         bwillwrite();
 1817         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1818             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1819         if ((error = namei(&nd)) != 0)
 1820                 return (error == EINVAL ? EPERM : error);
 1821         vp = nd.ni_vp;
 1822         if (vp->v_type == VDIR && oldinum == 0) {
 1823                 error = EPERM;          /* POSIX */
 1824         } else if (oldinum != 0 &&
 1825                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1826                   sb.st_ino != oldinum) {
 1827                         error = EIDRM;  /* Identifier removed */
 1828         } else {
 1829                 /*
 1830                  * The root of a mounted filesystem cannot be deleted.
 1831                  *
 1832                  * XXX: can this only be a VDIR case?
 1833                  */
 1834                 if (vp->v_vflag & VV_ROOT)
 1835                         error = EBUSY;
 1836         }
 1837         if (error == 0) {
 1838                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1839                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1840                         vput(nd.ni_dvp);
 1841                         if (vp == nd.ni_dvp)
 1842                                 vrele(vp);
 1843                         else
 1844                                 vput(vp);
 1845                         if ((error = vn_start_write(NULL, &mp,
 1846                             V_XSLEEP | PCATCH)) != 0)
 1847                                 return (error);
 1848                         goto restart;
 1849                 }
 1850 #ifdef MAC
 1851                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1852                     &nd.ni_cnd);
 1853                 if (error != 0)
 1854                         goto out;
 1855 #endif
 1856                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1857                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1858 #ifdef MAC
 1859 out:
 1860 #endif
 1861                 vn_finished_write(mp);
 1862         }
 1863         NDFREE(&nd, NDF_ONLY_PNBUF);
 1864         vput(nd.ni_dvp);
 1865         if (vp == nd.ni_dvp)
 1866                 vrele(vp);
 1867         else
 1868                 vput(vp);
 1869         return (error);
 1870 }
 1871 
 1872 /*
 1873  * Reposition read/write file offset.
 1874  */
 1875 #ifndef _SYS_SYSPROTO_H_
 1876 struct lseek_args {
 1877         int     fd;
 1878         int     pad;
 1879         off_t   offset;
 1880         int     whence;
 1881 };
 1882 #endif
 1883 int
 1884 sys_lseek(td, uap)
 1885         struct thread *td;
 1886         register struct lseek_args /* {
 1887                 int fd;
 1888                 int pad;
 1889                 off_t offset;
 1890                 int whence;
 1891         } */ *uap;
 1892 {
 1893         struct file *fp;
 1894         cap_rights_t rights;
 1895         int error;
 1896 
 1897         AUDIT_ARG_FD(uap->fd);
 1898         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1899         if (error != 0)
 1900                 return (error);
 1901         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1902             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1903         fdrop(fp, td);
 1904         return (error);
 1905 }
 1906 
 1907 #if defined(COMPAT_43)
 1908 /*
 1909  * Reposition read/write file offset.
 1910  */
 1911 #ifndef _SYS_SYSPROTO_H_
 1912 struct olseek_args {
 1913         int     fd;
 1914         long    offset;
 1915         int     whence;
 1916 };
 1917 #endif
 1918 int
 1919 olseek(td, uap)
 1920         struct thread *td;
 1921         register struct olseek_args /* {
 1922                 int fd;
 1923                 long offset;
 1924                 int whence;
 1925         } */ *uap;
 1926 {
 1927         struct lseek_args /* {
 1928                 int fd;
 1929                 int pad;
 1930                 off_t offset;
 1931                 int whence;
 1932         } */ nuap;
 1933 
 1934         nuap.fd = uap->fd;
 1935         nuap.offset = uap->offset;
 1936         nuap.whence = uap->whence;
 1937         return (sys_lseek(td, &nuap));
 1938 }
 1939 #endif /* COMPAT_43 */
 1940 
 1941 /* Version with the 'pad' argument */
 1942 int
 1943 freebsd6_lseek(td, uap)
 1944         struct thread *td;
 1945         register struct freebsd6_lseek_args *uap;
 1946 {
 1947         struct lseek_args ouap;
 1948 
 1949         ouap.fd = uap->fd;
 1950         ouap.offset = uap->offset;
 1951         ouap.whence = uap->whence;
 1952         return (sys_lseek(td, &ouap));
 1953 }
 1954 
 1955 /*
 1956  * Check access permissions using passed credentials.
 1957  */
 1958 static int
 1959 vn_access(vp, user_flags, cred, td)
 1960         struct vnode    *vp;
 1961         int             user_flags;
 1962         struct ucred    *cred;
 1963         struct thread   *td;
 1964 {
 1965         accmode_t accmode;
 1966         int error;
 1967 
 1968         /* Flags == 0 means only check for existence. */
 1969         error = 0;
 1970         if (user_flags) {
 1971                 accmode = 0;
 1972                 if (user_flags & R_OK)
 1973                         accmode |= VREAD;
 1974                 if (user_flags & W_OK)
 1975                         accmode |= VWRITE;
 1976                 if (user_flags & X_OK)
 1977                         accmode |= VEXEC;
 1978 #ifdef MAC
 1979                 error = mac_vnode_check_access(cred, vp, accmode);
 1980                 if (error != 0)
 1981                         return (error);
 1982 #endif
 1983                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1984                         error = VOP_ACCESS(vp, accmode, cred, td);
 1985         }
 1986         return (error);
 1987 }
 1988 
 1989 /*
 1990  * Check access permissions using "real" credentials.
 1991  */
 1992 #ifndef _SYS_SYSPROTO_H_
 1993 struct access_args {
 1994         char    *path;
 1995         int     amode;
 1996 };
 1997 #endif
 1998 int
 1999 sys_access(td, uap)
 2000         struct thread *td;
 2001         register struct access_args /* {
 2002                 char *path;
 2003                 int amode;
 2004         } */ *uap;
 2005 {
 2006 
 2007         return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode));
 2008 }
 2009 
 2010 #ifndef _SYS_SYSPROTO_H_
 2011 struct faccessat_args {
 2012         int     dirfd;
 2013         char    *path;
 2014         int     amode;
 2015         int     flag;
 2016 }
 2017 #endif
 2018 int
 2019 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2020 {
 2021 
 2022         if (uap->flag & ~AT_EACCESS)
 2023                 return (EINVAL);
 2024         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2025             uap->amode));
 2026 }
 2027 
 2028 int
 2029 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2030 {
 2031 
 2032         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode));
 2033 }
 2034 
 2035 int
 2036 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2037     int flag, int amode)
 2038 {
 2039         struct ucred *cred, *tmpcred;
 2040         struct vnode *vp;
 2041         struct nameidata nd;
 2042         cap_rights_t rights;
 2043         int error;
 2044 
 2045         /*
 2046          * Create and modify a temporary credential instead of one that
 2047          * is potentially shared.
 2048          */
 2049         if (!(flag & AT_EACCESS)) {
 2050                 cred = td->td_ucred;
 2051                 tmpcred = crdup(cred);
 2052                 tmpcred->cr_uid = cred->cr_ruid;
 2053                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2054                 td->td_ucred = tmpcred;
 2055         } else
 2056                 cred = tmpcred = td->td_ucred;
 2057         AUDIT_ARG_VALUE(amode);
 2058         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2059             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 2060             td);
 2061         if ((error = namei(&nd)) != 0)
 2062                 goto out1;
 2063         vp = nd.ni_vp;
 2064 
 2065         error = vn_access(vp, amode, tmpcred, td);
 2066         NDFREE(&nd, NDF_ONLY_PNBUF);
 2067         vput(vp);
 2068 out1:
 2069         if (!(flag & AT_EACCESS)) {
 2070                 td->td_ucred = cred;
 2071                 crfree(tmpcred);
 2072         }
 2073         return (error);
 2074 }
 2075 
 2076 /*
 2077  * Check access permissions using "effective" credentials.
 2078  */
 2079 #ifndef _SYS_SYSPROTO_H_
 2080 struct eaccess_args {
 2081         char    *path;
 2082         int     amode;
 2083 };
 2084 #endif
 2085 int
 2086 sys_eaccess(td, uap)
 2087         struct thread *td;
 2088         register struct eaccess_args /* {
 2089                 char *path;
 2090                 int amode;
 2091         } */ *uap;
 2092 {
 2093 
 2094         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode));
 2095 }
 2096 
 2097 int
 2098 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2099 {
 2100 
 2101         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode));
 2102 }
 2103 
 2104 #if defined(COMPAT_43)
 2105 /*
 2106  * Get file status; this version follows links.
 2107  */
 2108 #ifndef _SYS_SYSPROTO_H_
 2109 struct ostat_args {
 2110         char    *path;
 2111         struct ostat *ub;
 2112 };
 2113 #endif
 2114 int
 2115 ostat(td, uap)
 2116         struct thread *td;
 2117         register struct ostat_args /* {
 2118                 char *path;
 2119                 struct ostat *ub;
 2120         } */ *uap;
 2121 {
 2122         struct stat sb;
 2123         struct ostat osb;
 2124         int error;
 2125 
 2126         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2127         if (error != 0)
 2128                 return (error);
 2129         cvtstat(&sb, &osb);
 2130         return (copyout(&osb, uap->ub, sizeof (osb)));
 2131 }
 2132 
 2133 /*
 2134  * Get file status; this version does not follow links.
 2135  */
 2136 #ifndef _SYS_SYSPROTO_H_
 2137 struct olstat_args {
 2138         char    *path;
 2139         struct ostat *ub;
 2140 };
 2141 #endif
 2142 int
 2143 olstat(td, uap)
 2144         struct thread *td;
 2145         register struct olstat_args /* {
 2146                 char *path;
 2147                 struct ostat *ub;
 2148         } */ *uap;
 2149 {
 2150         struct stat sb;
 2151         struct ostat osb;
 2152         int error;
 2153 
 2154         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2155         if (error != 0)
 2156                 return (error);
 2157         cvtstat(&sb, &osb);
 2158         return (copyout(&osb, uap->ub, sizeof (osb)));
 2159 }
 2160 
 2161 /*
 2162  * Convert from an old to a new stat structure.
 2163  */
 2164 void
 2165 cvtstat(st, ost)
 2166         struct stat *st;
 2167         struct ostat *ost;
 2168 {
 2169 
 2170         ost->st_dev = st->st_dev;
 2171         ost->st_ino = st->st_ino;
 2172         ost->st_mode = st->st_mode;
 2173         ost->st_nlink = st->st_nlink;
 2174         ost->st_uid = st->st_uid;
 2175         ost->st_gid = st->st_gid;
 2176         ost->st_rdev = st->st_rdev;
 2177         if (st->st_size < (quad_t)1 << 32)
 2178                 ost->st_size = st->st_size;
 2179         else
 2180                 ost->st_size = -2;
 2181         ost->st_atim = st->st_atim;
 2182         ost->st_mtim = st->st_mtim;
 2183         ost->st_ctim = st->st_ctim;
 2184         ost->st_blksize = st->st_blksize;
 2185         ost->st_blocks = st->st_blocks;
 2186         ost->st_flags = st->st_flags;
 2187         ost->st_gen = st->st_gen;
 2188 }
 2189 #endif /* COMPAT_43 */
 2190 
 2191 /*
 2192  * Get file status; this version follows links.
 2193  */
 2194 #ifndef _SYS_SYSPROTO_H_
 2195 struct stat_args {
 2196         char    *path;
 2197         struct stat *ub;
 2198 };
 2199 #endif
 2200 int
 2201 sys_stat(td, uap)
 2202         struct thread *td;
 2203         register struct stat_args /* {
 2204                 char *path;
 2205                 struct stat *ub;
 2206         } */ *uap;
 2207 {
 2208         struct stat sb;
 2209         int error;
 2210 
 2211         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2212         if (error == 0)
 2213                 error = copyout(&sb, uap->ub, sizeof (sb));
 2214         return (error);
 2215 }
 2216 
 2217 #ifndef _SYS_SYSPROTO_H_
 2218 struct fstatat_args {
 2219         int     fd;
 2220         char    *path;
 2221         struct stat     *buf;
 2222         int     flag;
 2223 }
 2224 #endif
 2225 int
 2226 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2227 {
 2228         struct stat sb;
 2229         int error;
 2230 
 2231         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2232             UIO_USERSPACE, &sb);
 2233         if (error == 0)
 2234                 error = copyout(&sb, uap->buf, sizeof (sb));
 2235         return (error);
 2236 }
 2237 
 2238 int
 2239 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2240 {
 2241 
 2242         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2243 }
 2244 
 2245 int
 2246 kern_statat(struct thread *td, int flag, int fd, char *path,
 2247     enum uio_seg pathseg, struct stat *sbp)
 2248 {
 2249 
 2250         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2251 }
 2252 
 2253 int
 2254 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2255     enum uio_seg pathseg, struct stat *sbp,
 2256     void (*hook)(struct vnode *vp, struct stat *sbp))
 2257 {
 2258         struct nameidata nd;
 2259         struct stat sb;
 2260         cap_rights_t rights;
 2261         int error;
 2262 
 2263         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2264                 return (EINVAL);
 2265 
 2266         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2267             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2268             cap_rights_init(&rights, CAP_FSTAT), td);
 2269 
 2270         if ((error = namei(&nd)) != 0)
 2271                 return (error);
 2272         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2273         if (error == 0) {
 2274                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2275                 if (S_ISREG(sb.st_mode))
 2276                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2277                 if (__predict_false(hook != NULL))
 2278                         hook(nd.ni_vp, &sb);
 2279         }
 2280         NDFREE(&nd, NDF_ONLY_PNBUF);
 2281         vput(nd.ni_vp);
 2282         if (error != 0)
 2283                 return (error);
 2284         *sbp = sb;
 2285 #ifdef KTRACE
 2286         if (KTRPOINT(td, KTR_STRUCT))
 2287                 ktrstat(&sb);
 2288 #endif
 2289         return (0);
 2290 }
 2291 
 2292 /*
 2293  * Get file status; this version does not follow links.
 2294  */
 2295 #ifndef _SYS_SYSPROTO_H_
 2296 struct lstat_args {
 2297         char    *path;
 2298         struct stat *ub;
 2299 };
 2300 #endif
 2301 int
 2302 sys_lstat(td, uap)
 2303         struct thread *td;
 2304         register struct lstat_args /* {
 2305                 char *path;
 2306                 struct stat *ub;
 2307         } */ *uap;
 2308 {
 2309         struct stat sb;
 2310         int error;
 2311 
 2312         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2313         if (error == 0)
 2314                 error = copyout(&sb, uap->ub, sizeof (sb));
 2315         return (error);
 2316 }
 2317 
 2318 int
 2319 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2320 {
 2321 
 2322         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2323             sbp));
 2324 }
 2325 
 2326 /*
 2327  * Implementation of the NetBSD [l]stat() functions.
 2328  */
 2329 void
 2330 cvtnstat(sb, nsb)
 2331         struct stat *sb;
 2332         struct nstat *nsb;
 2333 {
 2334 
 2335         bzero(nsb, sizeof *nsb);
 2336         nsb->st_dev = sb->st_dev;
 2337         nsb->st_ino = sb->st_ino;
 2338         nsb->st_mode = sb->st_mode;
 2339         nsb->st_nlink = sb->st_nlink;
 2340         nsb->st_uid = sb->st_uid;
 2341         nsb->st_gid = sb->st_gid;
 2342         nsb->st_rdev = sb->st_rdev;
 2343         nsb->st_atim = sb->st_atim;
 2344         nsb->st_mtim = sb->st_mtim;
 2345         nsb->st_ctim = sb->st_ctim;
 2346         nsb->st_size = sb->st_size;
 2347         nsb->st_blocks = sb->st_blocks;
 2348         nsb->st_blksize = sb->st_blksize;
 2349         nsb->st_flags = sb->st_flags;
 2350         nsb->st_gen = sb->st_gen;
 2351         nsb->st_birthtim = sb->st_birthtim;
 2352 }
 2353 
 2354 #ifndef _SYS_SYSPROTO_H_
 2355 struct nstat_args {
 2356         char    *path;
 2357         struct nstat *ub;
 2358 };
 2359 #endif
 2360 int
 2361 sys_nstat(td, uap)
 2362         struct thread *td;
 2363         register struct nstat_args /* {
 2364                 char *path;
 2365                 struct nstat *ub;
 2366         } */ *uap;
 2367 {
 2368         struct stat sb;
 2369         struct nstat nsb;
 2370         int error;
 2371 
 2372         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2373         if (error != 0)
 2374                 return (error);
 2375         cvtnstat(&sb, &nsb);
 2376         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2377 }
 2378 
 2379 /*
 2380  * NetBSD lstat.  Get file status; this version does not follow links.
 2381  */
 2382 #ifndef _SYS_SYSPROTO_H_
 2383 struct lstat_args {
 2384         char    *path;
 2385         struct stat *ub;
 2386 };
 2387 #endif
 2388 int
 2389 sys_nlstat(td, uap)
 2390         struct thread *td;
 2391         register struct nlstat_args /* {
 2392                 char *path;
 2393                 struct nstat *ub;
 2394         } */ *uap;
 2395 {
 2396         struct stat sb;
 2397         struct nstat nsb;
 2398         int error;
 2399 
 2400         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2401         if (error != 0)
 2402                 return (error);
 2403         cvtnstat(&sb, &nsb);
 2404         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2405 }
 2406 
 2407 /*
 2408  * Get configurable pathname variables.
 2409  */
 2410 #ifndef _SYS_SYSPROTO_H_
 2411 struct pathconf_args {
 2412         char    *path;
 2413         int     name;
 2414 };
 2415 #endif
 2416 int
 2417 sys_pathconf(td, uap)
 2418         struct thread *td;
 2419         register struct pathconf_args /* {
 2420                 char *path;
 2421                 int name;
 2422         } */ *uap;
 2423 {
 2424 
 2425         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2426 }
 2427 
 2428 #ifndef _SYS_SYSPROTO_H_
 2429 struct lpathconf_args {
 2430         char    *path;
 2431         int     name;
 2432 };
 2433 #endif
 2434 int
 2435 sys_lpathconf(td, uap)
 2436         struct thread *td;
 2437         register struct lpathconf_args /* {
 2438                 char *path;
 2439                 int name;
 2440         } */ *uap;
 2441 {
 2442 
 2443         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2444             NOFOLLOW));
 2445 }
 2446 
 2447 int
 2448 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2449     u_long flags)
 2450 {
 2451         struct nameidata nd;
 2452         int error;
 2453 
 2454         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2455             pathseg, path, td);
 2456         if ((error = namei(&nd)) != 0)
 2457                 return (error);
 2458         NDFREE(&nd, NDF_ONLY_PNBUF);
 2459 
 2460         /* If asynchronous I/O is available, it works for all files. */
 2461         if (name == _PC_ASYNC_IO)
 2462                 td->td_retval[0] = async_io_version;
 2463         else
 2464                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2465         vput(nd.ni_vp);
 2466         return (error);
 2467 }
 2468 
 2469 /*
 2470  * Return target name of a symbolic link.
 2471  */
 2472 #ifndef _SYS_SYSPROTO_H_
 2473 struct readlink_args {
 2474         char    *path;
 2475         char    *buf;
 2476         size_t  count;
 2477 };
 2478 #endif
 2479 int
 2480 sys_readlink(td, uap)
 2481         struct thread *td;
 2482         register struct readlink_args /* {
 2483                 char *path;
 2484                 char *buf;
 2485                 size_t count;
 2486         } */ *uap;
 2487 {
 2488 
 2489         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2490             UIO_USERSPACE, uap->count));
 2491 }
 2492 #ifndef _SYS_SYSPROTO_H_
 2493 struct readlinkat_args {
 2494         int     fd;
 2495         char    *path;
 2496         char    *buf;
 2497         size_t  bufsize;
 2498 };
 2499 #endif
 2500 int
 2501 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2502 {
 2503 
 2504         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2505             uap->buf, UIO_USERSPACE, uap->bufsize));
 2506 }
 2507 
 2508 int
 2509 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2510     enum uio_seg bufseg, size_t count)
 2511 {
 2512 
 2513         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2514             count));
 2515 }
 2516 
 2517 int
 2518 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2519     char *buf, enum uio_seg bufseg, size_t count)
 2520 {
 2521         struct vnode *vp;
 2522         struct iovec aiov;
 2523         struct uio auio;
 2524         struct nameidata nd;
 2525         int error;
 2526 
 2527         if (count > IOSIZE_MAX)
 2528                 return (EINVAL);
 2529 
 2530         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2531             pathseg, path, fd, td);
 2532 
 2533         if ((error = namei(&nd)) != 0)
 2534                 return (error);
 2535         NDFREE(&nd, NDF_ONLY_PNBUF);
 2536         vp = nd.ni_vp;
 2537 #ifdef MAC
 2538         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2539         if (error != 0) {
 2540                 vput(vp);
 2541                 return (error);
 2542         }
 2543 #endif
 2544         if (vp->v_type != VLNK)
 2545                 error = EINVAL;
 2546         else {
 2547                 aiov.iov_base = buf;
 2548                 aiov.iov_len = count;
 2549                 auio.uio_iov = &aiov;
 2550                 auio.uio_iovcnt = 1;
 2551                 auio.uio_offset = 0;
 2552                 auio.uio_rw = UIO_READ;
 2553                 auio.uio_segflg = bufseg;
 2554                 auio.uio_td = td;
 2555                 auio.uio_resid = count;
 2556                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2557         }
 2558         vput(vp);
 2559         td->td_retval[0] = count - auio.uio_resid;
 2560         return (error);
 2561 }
 2562 
 2563 /*
 2564  * Common implementation code for chflags() and fchflags().
 2565  */
 2566 static int
 2567 setfflags(td, vp, flags)
 2568         struct thread *td;
 2569         struct vnode *vp;
 2570         u_long flags;
 2571 {
 2572         struct mount *mp;
 2573         struct vattr vattr;
 2574         int error;
 2575 
 2576         /* We can't support the value matching VNOVAL. */
 2577         if (flags == VNOVAL)
 2578                 return (EOPNOTSUPP);
 2579 
 2580         /*
 2581          * Prevent non-root users from setting flags on devices.  When
 2582          * a device is reused, users can retain ownership of the device
 2583          * if they are allowed to set flags and programs assume that
 2584          * chown can't fail when done as root.
 2585          */
 2586         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2587                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2588                 if (error != 0)
 2589                         return (error);
 2590         }
 2591 
 2592         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2593                 return (error);
 2594         VATTR_NULL(&vattr);
 2595         vattr.va_flags = flags;
 2596         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2597 #ifdef MAC
 2598         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2599         if (error == 0)
 2600 #endif
 2601                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2602         VOP_UNLOCK(vp, 0);
 2603         vn_finished_write(mp);
 2604         return (error);
 2605 }
 2606 
 2607 /*
 2608  * Change flags of a file given a path name.
 2609  */
 2610 #ifndef _SYS_SYSPROTO_H_
 2611 struct chflags_args {
 2612         const char *path;
 2613         u_long  flags;
 2614 };
 2615 #endif
 2616 int
 2617 sys_chflags(td, uap)
 2618         struct thread *td;
 2619         register struct chflags_args /* {
 2620                 const char *path;
 2621                 u_long flags;
 2622         } */ *uap;
 2623 {
 2624 
 2625         return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags));
 2626 }
 2627 
 2628 #ifndef _SYS_SYSPROTO_H_
 2629 struct chflagsat_args {
 2630         int     fd;
 2631         const char *path;
 2632         u_long  flags;
 2633         int     atflag;
 2634 }
 2635 #endif
 2636 int
 2637 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2638 {
 2639         int fd = uap->fd;
 2640         const char *path = uap->path;
 2641         u_long flags = uap->flags;
 2642         int atflag = uap->atflag;
 2643 
 2644         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2645                 return (EINVAL);
 2646 
 2647         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2648 }
 2649 
 2650 static int
 2651 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg,
 2652     u_long flags)
 2653 {
 2654 
 2655         return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0));
 2656 }
 2657 
 2658 /*
 2659  * Same as chflags() but doesn't follow symlinks.
 2660  */
 2661 int
 2662 sys_lchflags(td, uap)
 2663         struct thread *td;
 2664         register struct lchflags_args /* {
 2665                 const char *path;
 2666                 u_long flags;
 2667         } */ *uap;
 2668 {
 2669 
 2670         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2671             uap->flags, AT_SYMLINK_NOFOLLOW));
 2672 }
 2673 
 2674 static int
 2675 kern_chflagsat(struct thread *td, int fd, const char *path,
 2676     enum uio_seg pathseg, u_long flags, int atflag)
 2677 {
 2678         struct nameidata nd;
 2679         cap_rights_t rights;
 2680         int error, follow;
 2681 
 2682         AUDIT_ARG_FFLAGS(flags);
 2683         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2684         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2685             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2686         if ((error = namei(&nd)) != 0)
 2687                 return (error);
 2688         NDFREE(&nd, NDF_ONLY_PNBUF);
 2689         error = setfflags(td, nd.ni_vp, flags);
 2690         vrele(nd.ni_vp);
 2691         return (error);
 2692 }
 2693 
 2694 /*
 2695  * Change flags of a file given a file descriptor.
 2696  */
 2697 #ifndef _SYS_SYSPROTO_H_
 2698 struct fchflags_args {
 2699         int     fd;
 2700         u_long  flags;
 2701 };
 2702 #endif
 2703 int
 2704 sys_fchflags(td, uap)
 2705         struct thread *td;
 2706         register struct fchflags_args /* {
 2707                 int fd;
 2708                 u_long flags;
 2709         } */ *uap;
 2710 {
 2711         struct file *fp;
 2712         cap_rights_t rights;
 2713         int error;
 2714 
 2715         AUDIT_ARG_FD(uap->fd);
 2716         AUDIT_ARG_FFLAGS(uap->flags);
 2717         error = getvnode(td->td_proc->p_fd, uap->fd,
 2718             cap_rights_init(&rights, CAP_FCHFLAGS), &fp);
 2719         if (error != 0)
 2720                 return (error);
 2721 #ifdef AUDIT
 2722         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2723         AUDIT_ARG_VNODE1(fp->f_vnode);
 2724         VOP_UNLOCK(fp->f_vnode, 0);
 2725 #endif
 2726         error = setfflags(td, fp->f_vnode, uap->flags);
 2727         fdrop(fp, td);
 2728         return (error);
 2729 }
 2730 
 2731 /*
 2732  * Common implementation code for chmod(), lchmod() and fchmod().
 2733  */
 2734 int
 2735 setfmode(td, cred, vp, mode)
 2736         struct thread *td;
 2737         struct ucred *cred;
 2738         struct vnode *vp;
 2739         int mode;
 2740 {
 2741         struct mount *mp;
 2742         struct vattr vattr;
 2743         int error;
 2744 
 2745         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2746                 return (error);
 2747         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2748         VATTR_NULL(&vattr);
 2749         vattr.va_mode = mode & ALLPERMS;
 2750 #ifdef MAC
 2751         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2752         if (error == 0)
 2753 #endif
 2754                 error = VOP_SETATTR(vp, &vattr, cred);
 2755         VOP_UNLOCK(vp, 0);
 2756         vn_finished_write(mp);
 2757         return (error);
 2758 }
 2759 
 2760 /*
 2761  * Change mode of a file given path name.
 2762  */
 2763 #ifndef _SYS_SYSPROTO_H_
 2764 struct chmod_args {
 2765         char    *path;
 2766         int     mode;
 2767 };
 2768 #endif
 2769 int
 2770 sys_chmod(td, uap)
 2771         struct thread *td;
 2772         register struct chmod_args /* {
 2773                 char *path;
 2774                 int mode;
 2775         } */ *uap;
 2776 {
 2777 
 2778         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2779 }
 2780 
 2781 #ifndef _SYS_SYSPROTO_H_
 2782 struct fchmodat_args {
 2783         int     dirfd;
 2784         char    *path;
 2785         mode_t  mode;
 2786         int     flag;
 2787 }
 2788 #endif
 2789 int
 2790 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2791 {
 2792         int flag = uap->flag;
 2793         int fd = uap->fd;
 2794         char *path = uap->path;
 2795         mode_t mode = uap->mode;
 2796 
 2797         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2798                 return (EINVAL);
 2799 
 2800         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2801 }
 2802 
 2803 int
 2804 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2805 {
 2806 
 2807         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2808 }
 2809 
 2810 /*
 2811  * Change mode of a file given path name (don't follow links.)
 2812  */
 2813 #ifndef _SYS_SYSPROTO_H_
 2814 struct lchmod_args {
 2815         char    *path;
 2816         int     mode;
 2817 };
 2818 #endif
 2819 int
 2820 sys_lchmod(td, uap)
 2821         struct thread *td;
 2822         register struct lchmod_args /* {
 2823                 char *path;
 2824                 int mode;
 2825         } */ *uap;
 2826 {
 2827 
 2828         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2829             uap->mode, AT_SYMLINK_NOFOLLOW));
 2830 }
 2831 
 2832 int
 2833 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2834     mode_t mode, int flag)
 2835 {
 2836         struct nameidata nd;
 2837         cap_rights_t rights;
 2838         int error, follow;
 2839 
 2840         AUDIT_ARG_MODE(mode);
 2841         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2842         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2843             cap_rights_init(&rights, CAP_FCHMOD), td);
 2844         if ((error = namei(&nd)) != 0)
 2845                 return (error);
 2846         NDFREE(&nd, NDF_ONLY_PNBUF);
 2847         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2848         vrele(nd.ni_vp);
 2849         return (error);
 2850 }
 2851 
 2852 /*
 2853  * Change mode of a file given a file descriptor.
 2854  */
 2855 #ifndef _SYS_SYSPROTO_H_
 2856 struct fchmod_args {
 2857         int     fd;
 2858         int     mode;
 2859 };
 2860 #endif
 2861 int
 2862 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2863 {
 2864         struct file *fp;
 2865         cap_rights_t rights;
 2866         int error;
 2867 
 2868         AUDIT_ARG_FD(uap->fd);
 2869         AUDIT_ARG_MODE(uap->mode);
 2870 
 2871         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2872         if (error != 0)
 2873                 return (error);
 2874         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2875         fdrop(fp, td);
 2876         return (error);
 2877 }
 2878 
 2879 /*
 2880  * Common implementation for chown(), lchown(), and fchown()
 2881  */
 2882 int
 2883 setfown(td, cred, vp, uid, gid)
 2884         struct thread *td;
 2885         struct ucred *cred;
 2886         struct vnode *vp;
 2887         uid_t uid;
 2888         gid_t gid;
 2889 {
 2890         struct mount *mp;
 2891         struct vattr vattr;
 2892         int error;
 2893 
 2894         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2895                 return (error);
 2896         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2897         VATTR_NULL(&vattr);
 2898         vattr.va_uid = uid;
 2899         vattr.va_gid = gid;
 2900 #ifdef MAC
 2901         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2902             vattr.va_gid);
 2903         if (error == 0)
 2904 #endif
 2905                 error = VOP_SETATTR(vp, &vattr, cred);
 2906         VOP_UNLOCK(vp, 0);
 2907         vn_finished_write(mp);
 2908         return (error);
 2909 }
 2910 
 2911 /*
 2912  * Set ownership given a path name.
 2913  */
 2914 #ifndef _SYS_SYSPROTO_H_
 2915 struct chown_args {
 2916         char    *path;
 2917         int     uid;
 2918         int     gid;
 2919 };
 2920 #endif
 2921 int
 2922 sys_chown(td, uap)
 2923         struct thread *td;
 2924         register struct chown_args /* {
 2925                 char *path;
 2926                 int uid;
 2927                 int gid;
 2928         } */ *uap;
 2929 {
 2930 
 2931         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2932 }
 2933 
 2934 #ifndef _SYS_SYSPROTO_H_
 2935 struct fchownat_args {
 2936         int fd;
 2937         const char * path;
 2938         uid_t uid;
 2939         gid_t gid;
 2940         int flag;
 2941 };
 2942 #endif
 2943 int
 2944 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2945 {
 2946         int flag;
 2947 
 2948         flag = uap->flag;
 2949         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2950                 return (EINVAL);
 2951 
 2952         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2953             uap->gid, uap->flag));
 2954 }
 2955 
 2956 int
 2957 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2958     int gid)
 2959 {
 2960 
 2961         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 2962 }
 2963 
 2964 int
 2965 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2966     int uid, int gid, int flag)
 2967 {
 2968         struct nameidata nd;
 2969         cap_rights_t rights;
 2970         int error, follow;
 2971 
 2972         AUDIT_ARG_OWNER(uid, gid);
 2973         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2974         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2975             cap_rights_init(&rights, CAP_FCHOWN), td);
 2976 
 2977         if ((error = namei(&nd)) != 0)
 2978                 return (error);
 2979         NDFREE(&nd, NDF_ONLY_PNBUF);
 2980         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2981         vrele(nd.ni_vp);
 2982         return (error);
 2983 }
 2984 
 2985 /*
 2986  * Set ownership given a path name, do not cross symlinks.
 2987  */
 2988 #ifndef _SYS_SYSPROTO_H_
 2989 struct lchown_args {
 2990         char    *path;
 2991         int     uid;
 2992         int     gid;
 2993 };
 2994 #endif
 2995 int
 2996 sys_lchown(td, uap)
 2997         struct thread *td;
 2998         register struct lchown_args /* {
 2999                 char *path;
 3000                 int uid;
 3001                 int gid;
 3002         } */ *uap;
 3003 {
 3004 
 3005         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3006 }
 3007 
 3008 int
 3009 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3010     int gid)
 3011 {
 3012 
 3013         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3014             AT_SYMLINK_NOFOLLOW));
 3015 }
 3016 
 3017 /*
 3018  * Set ownership given a file descriptor.
 3019  */
 3020 #ifndef _SYS_SYSPROTO_H_
 3021 struct fchown_args {
 3022         int     fd;
 3023         int     uid;
 3024         int     gid;
 3025 };
 3026 #endif
 3027 int
 3028 sys_fchown(td, uap)
 3029         struct thread *td;
 3030         register struct fchown_args /* {
 3031                 int fd;
 3032                 int uid;
 3033                 int gid;
 3034         } */ *uap;
 3035 {
 3036         struct file *fp;
 3037         cap_rights_t rights;
 3038         int error;
 3039 
 3040         AUDIT_ARG_FD(uap->fd);
 3041         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3042         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 3043         if (error != 0)
 3044                 return (error);
 3045         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3046         fdrop(fp, td);
 3047         return (error);
 3048 }
 3049 
 3050 /*
 3051  * Common implementation code for utimes(), lutimes(), and futimes().
 3052  */
 3053 static int
 3054 getutimes(usrtvp, tvpseg, tsp)
 3055         const struct timeval *usrtvp;
 3056         enum uio_seg tvpseg;
 3057         struct timespec *tsp;
 3058 {
 3059         struct timeval tv[2];
 3060         const struct timeval *tvp;
 3061         int error;
 3062 
 3063         if (usrtvp == NULL) {
 3064                 vfs_timestamp(&tsp[0]);
 3065                 tsp[1] = tsp[0];
 3066         } else {
 3067                 if (tvpseg == UIO_SYSSPACE) {
 3068                         tvp = usrtvp;
 3069                 } else {
 3070                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3071                                 return (error);
 3072                         tvp = tv;
 3073                 }
 3074 
 3075                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3076                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3077                         return (EINVAL);
 3078                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3079                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3080         }
 3081         return (0);
 3082 }
 3083 
 3084 /*
 3085  * Common implementation code for utimes(), lutimes(), and futimes().
 3086  */
 3087 static int
 3088 setutimes(td, vp, ts, numtimes, nullflag)
 3089         struct thread *td;
 3090         struct vnode *vp;
 3091         const struct timespec *ts;
 3092         int numtimes;
 3093         int nullflag;
 3094 {
 3095         struct mount *mp;
 3096         struct vattr vattr;
 3097         int error, setbirthtime;
 3098 
 3099         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3100                 return (error);
 3101         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3102         setbirthtime = 0;
 3103         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3104             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3105                 setbirthtime = 1;
 3106         VATTR_NULL(&vattr);
 3107         vattr.va_atime = ts[0];
 3108         vattr.va_mtime = ts[1];
 3109         if (setbirthtime)
 3110                 vattr.va_birthtime = ts[1];
 3111         if (numtimes > 2)
 3112                 vattr.va_birthtime = ts[2];
 3113         if (nullflag)
 3114                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3115 #ifdef MAC
 3116         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3117             vattr.va_mtime);
 3118 #endif
 3119         if (error == 0)
 3120                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3121         VOP_UNLOCK(vp, 0);
 3122         vn_finished_write(mp);
 3123         return (error);
 3124 }
 3125 
 3126 /*
 3127  * Set the access and modification times of a file.
 3128  */
 3129 #ifndef _SYS_SYSPROTO_H_
 3130 struct utimes_args {
 3131         char    *path;
 3132         struct  timeval *tptr;
 3133 };
 3134 #endif
 3135 int
 3136 sys_utimes(td, uap)
 3137         struct thread *td;
 3138         register struct utimes_args /* {
 3139                 char *path;
 3140                 struct timeval *tptr;
 3141         } */ *uap;
 3142 {
 3143 
 3144         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3145             UIO_USERSPACE));
 3146 }
 3147 
 3148 #ifndef _SYS_SYSPROTO_H_
 3149 struct futimesat_args {
 3150         int fd;
 3151         const char * path;
 3152         const struct timeval * times;
 3153 };
 3154 #endif
 3155 int
 3156 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3157 {
 3158 
 3159         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3160             uap->times, UIO_USERSPACE));
 3161 }
 3162 
 3163 int
 3164 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3165     struct timeval *tptr, enum uio_seg tptrseg)
 3166 {
 3167 
 3168         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3169 }
 3170 
 3171 int
 3172 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3173     struct timeval *tptr, enum uio_seg tptrseg)
 3174 {
 3175         struct nameidata nd;
 3176         struct timespec ts[2];
 3177         cap_rights_t rights;
 3178         int error;
 3179 
 3180         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3181                 return (error);
 3182         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3183             cap_rights_init(&rights, CAP_FUTIMES), td);
 3184 
 3185         if ((error = namei(&nd)) != 0)
 3186                 return (error);
 3187         NDFREE(&nd, NDF_ONLY_PNBUF);
 3188         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3189         vrele(nd.ni_vp);
 3190         return (error);
 3191 }
 3192 
 3193 /*
 3194  * Set the access and modification times of a file.
 3195  */
 3196 #ifndef _SYS_SYSPROTO_H_
 3197 struct lutimes_args {
 3198         char    *path;
 3199         struct  timeval *tptr;
 3200 };
 3201 #endif
 3202 int
 3203 sys_lutimes(td, uap)
 3204         struct thread *td;
 3205         register struct lutimes_args /* {
 3206                 char *path;
 3207                 struct timeval *tptr;
 3208         } */ *uap;
 3209 {
 3210 
 3211         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3212             UIO_USERSPACE));
 3213 }
 3214 
 3215 int
 3216 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3217     struct timeval *tptr, enum uio_seg tptrseg)
 3218 {
 3219         struct timespec ts[2];
 3220         struct nameidata nd;
 3221         int error;
 3222 
 3223         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3224                 return (error);
 3225         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3226         if ((error = namei(&nd)) != 0)
 3227                 return (error);
 3228         NDFREE(&nd, NDF_ONLY_PNBUF);
 3229         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3230         vrele(nd.ni_vp);
 3231         return (error);
 3232 }
 3233 
 3234 /*
 3235  * Set the access and modification times of a file.
 3236  */
 3237 #ifndef _SYS_SYSPROTO_H_
 3238 struct futimes_args {
 3239         int     fd;
 3240         struct  timeval *tptr;
 3241 };
 3242 #endif
 3243 int
 3244 sys_futimes(td, uap)
 3245         struct thread *td;
 3246         register struct futimes_args /* {
 3247                 int  fd;
 3248                 struct timeval *tptr;
 3249         } */ *uap;
 3250 {
 3251 
 3252         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3253 }
 3254 
 3255 int
 3256 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3257     enum uio_seg tptrseg)
 3258 {
 3259         struct timespec ts[2];
 3260         struct file *fp;
 3261         cap_rights_t rights;
 3262         int error;
 3263 
 3264         AUDIT_ARG_FD(fd);
 3265         error = getutimes(tptr, tptrseg, ts);
 3266         if (error != 0)
 3267                 return (error);
 3268         error = getvnode(td->td_proc->p_fd, fd,
 3269             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3270         if (error != 0)
 3271                 return (error);
 3272 #ifdef AUDIT
 3273         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3274         AUDIT_ARG_VNODE1(fp->f_vnode);
 3275         VOP_UNLOCK(fp->f_vnode, 0);
 3276 #endif
 3277         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3278         fdrop(fp, td);
 3279         return (error);
 3280 }
 3281 
 3282 /*
 3283  * Truncate a file given its path name.
 3284  */
 3285 #ifndef _SYS_SYSPROTO_H_
 3286 struct truncate_args {
 3287         char    *path;
 3288         int     pad;
 3289         off_t   length;
 3290 };
 3291 #endif
 3292 int
 3293 sys_truncate(td, uap)
 3294         struct thread *td;
 3295         register struct truncate_args /* {
 3296                 char *path;
 3297                 int pad;
 3298                 off_t length;
 3299         } */ *uap;
 3300 {
 3301 
 3302         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3303 }
 3304 
 3305 int
 3306 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3307 {
 3308         struct mount *mp;
 3309         struct vnode *vp;
 3310         void *rl_cookie;
 3311         struct vattr vattr;
 3312         struct nameidata nd;
 3313         int error;
 3314 
 3315         if (length < 0)
 3316                 return(EINVAL);
 3317         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3318         if ((error = namei(&nd)) != 0)
 3319                 return (error);
 3320         vp = nd.ni_vp;
 3321         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3322         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3323                 vn_rangelock_unlock(vp, rl_cookie);
 3324                 vrele(vp);
 3325                 return (error);
 3326         }
 3327         NDFREE(&nd, NDF_ONLY_PNBUF);
 3328         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3329         if (vp->v_type == VDIR)
 3330                 error = EISDIR;
 3331 #ifdef MAC
 3332         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3333         }
 3334 #endif
 3335         else if ((error = vn_writechk(vp)) == 0 &&
 3336             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3337                 VATTR_NULL(&vattr);
 3338                 vattr.va_size = length;
 3339                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3340         }
 3341         VOP_UNLOCK(vp, 0);
 3342         vn_finished_write(mp);
 3343         vn_rangelock_unlock(vp, rl_cookie);
 3344         vrele(vp);
 3345         return (error);
 3346 }
 3347 
 3348 #if defined(COMPAT_43)
 3349 /*
 3350  * Truncate a file given its path name.
 3351  */
 3352 #ifndef _SYS_SYSPROTO_H_
 3353 struct otruncate_args {
 3354         char    *path;
 3355         long    length;
 3356 };
 3357 #endif
 3358 int
 3359 otruncate(td, uap)
 3360         struct thread *td;
 3361         register struct otruncate_args /* {
 3362                 char *path;
 3363                 long length;
 3364         } */ *uap;
 3365 {
 3366         struct truncate_args /* {
 3367                 char *path;
 3368                 int pad;
 3369                 off_t length;
 3370         } */ nuap;
 3371 
 3372         nuap.path = uap->path;
 3373         nuap.length = uap->length;
 3374         return (sys_truncate(td, &nuap));
 3375 }
 3376 #endif /* COMPAT_43 */
 3377 
 3378 /* Versions with the pad argument */
 3379 int
 3380 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3381 {
 3382         struct truncate_args ouap;
 3383 
 3384         ouap.path = uap->path;
 3385         ouap.length = uap->length;
 3386         return (sys_truncate(td, &ouap));
 3387 }
 3388 
 3389 int
 3390 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3391 {
 3392         struct ftruncate_args ouap;
 3393 
 3394         ouap.fd = uap->fd;
 3395         ouap.length = uap->length;
 3396         return (sys_ftruncate(td, &ouap));
 3397 }
 3398 
 3399 /*
 3400  * Sync an open file.
 3401  */
 3402 #ifndef _SYS_SYSPROTO_H_
 3403 struct fsync_args {
 3404         int     fd;
 3405 };
 3406 #endif
 3407 int
 3408 sys_fsync(td, uap)
 3409         struct thread *td;
 3410         struct fsync_args /* {
 3411                 int fd;
 3412         } */ *uap;
 3413 {
 3414         struct vnode *vp;
 3415         struct mount *mp;
 3416         struct file *fp;
 3417         cap_rights_t rights;
 3418         int error, lock_flags;
 3419 
 3420         AUDIT_ARG_FD(uap->fd);
 3421         error = getvnode(td->td_proc->p_fd, uap->fd,
 3422             cap_rights_init(&rights, CAP_FSYNC), &fp);
 3423         if (error != 0)
 3424                 return (error);
 3425         vp = fp->f_vnode;
 3426         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3427         if (error != 0)
 3428                 goto drop;
 3429         if (MNT_SHARED_WRITES(mp) ||
 3430             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3431                 lock_flags = LK_SHARED;
 3432         } else {
 3433                 lock_flags = LK_EXCLUSIVE;
 3434         }
 3435         vn_lock(vp, lock_flags | LK_RETRY);
 3436         AUDIT_ARG_VNODE1(vp);
 3437         if (vp->v_object != NULL) {
 3438                 VM_OBJECT_WLOCK(vp->v_object);
 3439                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3440                 VM_OBJECT_WUNLOCK(vp->v_object);
 3441         }
 3442         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3443 
 3444         VOP_UNLOCK(vp, 0);
 3445         vn_finished_write(mp);
 3446 drop:
 3447         fdrop(fp, td);
 3448         return (error);
 3449 }
 3450 
 3451 /*
 3452  * Rename files.  Source and destination must either both be directories, or
 3453  * both not be directories.  If target is a directory, it must be empty.
 3454  */
 3455 #ifndef _SYS_SYSPROTO_H_
 3456 struct rename_args {
 3457         char    *from;
 3458         char    *to;
 3459 };
 3460 #endif
 3461 int
 3462 sys_rename(td, uap)
 3463         struct thread *td;
 3464         register struct rename_args /* {
 3465                 char *from;
 3466                 char *to;
 3467         } */ *uap;
 3468 {
 3469 
 3470         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3471 }
 3472 
 3473 #ifndef _SYS_SYSPROTO_H_
 3474 struct renameat_args {
 3475         int     oldfd;
 3476         char    *old;
 3477         int     newfd;
 3478         char    *new;
 3479 };
 3480 #endif
 3481 int
 3482 sys_renameat(struct thread *td, struct renameat_args *uap)
 3483 {
 3484 
 3485         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3486             UIO_USERSPACE));
 3487 }
 3488 
 3489 int
 3490 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3491 {
 3492 
 3493         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3494 }
 3495 
 3496 int
 3497 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3498     enum uio_seg pathseg)
 3499 {
 3500         struct mount *mp = NULL;
 3501         struct vnode *tvp, *fvp, *tdvp;
 3502         struct nameidata fromnd, tond;
 3503         cap_rights_t rights;
 3504         int error;
 3505 
 3506         bwillwrite();
 3507 #ifdef MAC
 3508         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3509             AUDITVNODE1, pathseg, old, oldfd,
 3510             cap_rights_init(&rights, CAP_RENAMEAT), td);
 3511 #else
 3512         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3513             pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td);
 3514 #endif
 3515 
 3516         if ((error = namei(&fromnd)) != 0)
 3517                 return (error);
 3518 #ifdef MAC
 3519         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3520             fromnd.ni_vp, &fromnd.ni_cnd);
 3521         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3522         if (fromnd.ni_dvp != fromnd.ni_vp)
 3523                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3524 #endif
 3525         fvp = fromnd.ni_vp;
 3526         if (error == 0)
 3527                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3528         if (error != 0) {
 3529                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3530                 vrele(fromnd.ni_dvp);
 3531                 vrele(fvp);
 3532                 goto out1;
 3533         }
 3534         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3535             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3536             cap_rights_init(&rights, CAP_LINKAT), td);
 3537         if (fromnd.ni_vp->v_type == VDIR)
 3538                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3539         if ((error = namei(&tond)) != 0) {
 3540                 /* Translate error code for rename("dir1", "dir2/."). */
 3541                 if (error == EISDIR && fvp->v_type == VDIR)
 3542                         error = EINVAL;
 3543                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3544                 vrele(fromnd.ni_dvp);
 3545                 vrele(fvp);
 3546                 vn_finished_write(mp);
 3547                 goto out1;
 3548         }
 3549         tdvp = tond.ni_dvp;
 3550         tvp = tond.ni_vp;
 3551         if (tvp != NULL) {
 3552                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3553                         error = ENOTDIR;
 3554                         goto out;
 3555                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3556                         error = EISDIR;
 3557                         goto out;
 3558                 }
 3559 #ifdef CAPABILITIES
 3560                 if (newfd != AT_FDCWD) {
 3561                         /*
 3562                          * If the target already exists we require CAP_UNLINKAT
 3563                          * from 'newfd'.
 3564                          */
 3565                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3566                             cap_rights_init(&rights, CAP_UNLINKAT));
 3567                         if (error != 0)
 3568                                 goto out;
 3569                 }
 3570 #endif
 3571         }
 3572         if (fvp == tdvp) {
 3573                 error = EINVAL;
 3574                 goto out;
 3575         }
 3576         /*
 3577          * If the source is the same as the destination (that is, if they
 3578          * are links to the same vnode), then there is nothing to do.
 3579          */
 3580         if (fvp == tvp)
 3581                 error = -1;
 3582 #ifdef MAC
 3583         else
 3584                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3585                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3586 #endif
 3587 out:
 3588         if (error == 0) {
 3589                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3590                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3591                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3592                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3593         } else {
 3594                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3595                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3596                 if (tvp != NULL)
 3597                         vput(tvp);
 3598                 if (tdvp == tvp)
 3599                         vrele(tdvp);
 3600                 else
 3601                         vput(tdvp);
 3602                 vrele(fromnd.ni_dvp);
 3603                 vrele(fvp);
 3604         }
 3605         vrele(tond.ni_startdir);
 3606         vn_finished_write(mp);
 3607 out1:
 3608         if (fromnd.ni_startdir)
 3609                 vrele(fromnd.ni_startdir);
 3610         if (error == -1)
 3611                 return (0);
 3612         return (error);
 3613 }
 3614 
 3615 /*
 3616  * Make a directory file.
 3617  */
 3618 #ifndef _SYS_SYSPROTO_H_
 3619 struct mkdir_args {
 3620         char    *path;
 3621         int     mode;
 3622 };
 3623 #endif
 3624 int
 3625 sys_mkdir(td, uap)
 3626         struct thread *td;
 3627         register struct mkdir_args /* {
 3628                 char *path;
 3629                 int mode;
 3630         } */ *uap;
 3631 {
 3632 
 3633         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3634 }
 3635 
 3636 #ifndef _SYS_SYSPROTO_H_
 3637 struct mkdirat_args {
 3638         int     fd;
 3639         char    *path;
 3640         mode_t  mode;
 3641 };
 3642 #endif
 3643 int
 3644 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3645 {
 3646 
 3647         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3648 }
 3649 
 3650 int
 3651 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3652 {
 3653 
 3654         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3655 }
 3656 
 3657 int
 3658 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3659     int mode)
 3660 {
 3661         struct mount *mp;
 3662         struct vnode *vp;
 3663         struct vattr vattr;
 3664         struct nameidata nd;
 3665         cap_rights_t rights;
 3666         int error;
 3667 
 3668         AUDIT_ARG_MODE(mode);
 3669 restart:
 3670         bwillwrite();
 3671         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 3672             segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td);
 3673         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3674         if ((error = namei(&nd)) != 0)
 3675                 return (error);
 3676         vp = nd.ni_vp;
 3677         if (vp != NULL) {
 3678                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3679                 /*
 3680                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3681                  * the strange behaviour of leaving the vnode unlocked
 3682                  * if the target is the same vnode as the parent.
 3683                  */
 3684                 if (vp == nd.ni_dvp)
 3685                         vrele(nd.ni_dvp);
 3686                 else
 3687                         vput(nd.ni_dvp);
 3688                 vrele(vp);
 3689                 return (EEXIST);
 3690         }
 3691         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3692                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3693                 vput(nd.ni_dvp);
 3694                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3695                         return (error);
 3696                 goto restart;
 3697         }
 3698         VATTR_NULL(&vattr);
 3699         vattr.va_type = VDIR;
 3700         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3701 #ifdef MAC
 3702         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3703             &vattr);
 3704         if (error != 0)
 3705                 goto out;
 3706 #endif
 3707         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3708 #ifdef MAC
 3709 out:
 3710 #endif
 3711         NDFREE(&nd, NDF_ONLY_PNBUF);
 3712         vput(nd.ni_dvp);
 3713         if (error == 0)
 3714                 vput(nd.ni_vp);
 3715         vn_finished_write(mp);
 3716         return (error);
 3717 }
 3718 
 3719 /*
 3720  * Remove a directory file.
 3721  */
 3722 #ifndef _SYS_SYSPROTO_H_
 3723 struct rmdir_args {
 3724         char    *path;
 3725 };
 3726 #endif
 3727 int
 3728 sys_rmdir(td, uap)
 3729         struct thread *td;
 3730         struct rmdir_args /* {
 3731                 char *path;
 3732         } */ *uap;
 3733 {
 3734 
 3735         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3736 }
 3737 
 3738 int
 3739 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3740 {
 3741 
 3742         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3743 }
 3744 
 3745 int
 3746 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3747 {
 3748         struct mount *mp;
 3749         struct vnode *vp;
 3750         struct nameidata nd;
 3751         cap_rights_t rights;
 3752         int error;
 3753 
 3754 restart:
 3755         bwillwrite();
 3756         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3757             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3758         if ((error = namei(&nd)) != 0)
 3759                 return (error);
 3760         vp = nd.ni_vp;
 3761         if (vp->v_type != VDIR) {
 3762                 error = ENOTDIR;
 3763                 goto out;
 3764         }
 3765         /*
 3766          * No rmdir "." please.
 3767          */
 3768         if (nd.ni_dvp == vp) {
 3769                 error = EINVAL;
 3770                 goto out;
 3771         }
 3772         /*
 3773          * The root of a mounted filesystem cannot be deleted.
 3774          */
 3775         if (vp->v_vflag & VV_ROOT) {
 3776                 error = EBUSY;
 3777                 goto out;
 3778         }
 3779 #ifdef MAC
 3780         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3781             &nd.ni_cnd);
 3782         if (error != 0)
 3783                 goto out;
 3784 #endif
 3785         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3786                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3787                 vput(vp);
 3788                 if (nd.ni_dvp == vp)
 3789                         vrele(nd.ni_dvp);
 3790                 else
 3791                         vput(nd.ni_dvp);
 3792                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3793                         return (error);
 3794                 goto restart;
 3795         }
 3796         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3797         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3798         vn_finished_write(mp);
 3799 out:
 3800         NDFREE(&nd, NDF_ONLY_PNBUF);
 3801         vput(vp);
 3802         if (nd.ni_dvp == vp)
 3803                 vrele(nd.ni_dvp);
 3804         else
 3805                 vput(nd.ni_dvp);
 3806         return (error);
 3807 }
 3808 
 3809 #ifdef COMPAT_43
 3810 /*
 3811  * Read a block of directory entries in a filesystem independent format.
 3812  */
 3813 #ifndef _SYS_SYSPROTO_H_
 3814 struct ogetdirentries_args {
 3815         int     fd;
 3816         char    *buf;
 3817         u_int   count;
 3818         long    *basep;
 3819 };
 3820 #endif
 3821 int
 3822 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3823 {
 3824         long loff;
 3825         int error;
 3826 
 3827         error = kern_ogetdirentries(td, uap, &loff);
 3828         if (error == 0)
 3829                 error = copyout(&loff, uap->basep, sizeof(long));
 3830         return (error);
 3831 }
 3832 
 3833 int
 3834 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3835     long *ploff)
 3836 {
 3837         struct vnode *vp;
 3838         struct file *fp;
 3839         struct uio auio, kuio;
 3840         struct iovec aiov, kiov;
 3841         struct dirent *dp, *edp;
 3842         cap_rights_t rights;
 3843         caddr_t dirbuf;
 3844         int error, eofflag, readcnt;
 3845         long loff;
 3846         off_t foffset;
 3847 
 3848         /* XXX arbitrary sanity limit on `count'. */
 3849         if (uap->count > 64 * 1024)
 3850                 return (EINVAL);
 3851         error = getvnode(td->td_proc->p_fd, uap->fd,
 3852             cap_rights_init(&rights, CAP_READ), &fp);
 3853         if (error != 0)
 3854                 return (error);
 3855         if ((fp->f_flag & FREAD) == 0) {
 3856                 fdrop(fp, td);
 3857                 return (EBADF);
 3858         }
 3859         vp = fp->f_vnode;
 3860         foffset = foffset_lock(fp, 0);
 3861 unionread:
 3862         if (vp->v_type != VDIR) {
 3863                 foffset_unlock(fp, foffset, 0);
 3864                 fdrop(fp, td);
 3865                 return (EINVAL);
 3866         }
 3867         aiov.iov_base = uap->buf;
 3868         aiov.iov_len = uap->count;
 3869         auio.uio_iov = &aiov;
 3870         auio.uio_iovcnt = 1;
 3871         auio.uio_rw = UIO_READ;
 3872         auio.uio_segflg = UIO_USERSPACE;
 3873         auio.uio_td = td;
 3874         auio.uio_resid = uap->count;
 3875         vn_lock(vp, LK_SHARED | LK_RETRY);
 3876         loff = auio.uio_offset = foffset;
 3877 #ifdef MAC
 3878         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3879         if (error != 0) {
 3880                 VOP_UNLOCK(vp, 0);
 3881                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 3882                 fdrop(fp, td);
 3883                 return (error);
 3884         }
 3885 #endif
 3886 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3887                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3888                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3889                             NULL, NULL);
 3890                         foffset = auio.uio_offset;
 3891                 } else
 3892 #       endif
 3893         {
 3894                 kuio = auio;
 3895                 kuio.uio_iov = &kiov;
 3896                 kuio.uio_segflg = UIO_SYSSPACE;
 3897                 kiov.iov_len = uap->count;
 3898                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 3899                 kiov.iov_base = dirbuf;
 3900                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3901                             NULL, NULL);
 3902                 foffset = kuio.uio_offset;
 3903                 if (error == 0) {
 3904                         readcnt = uap->count - kuio.uio_resid;
 3905                         edp = (struct dirent *)&dirbuf[readcnt];
 3906                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3907 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3908                                         /*
 3909                                          * The expected low byte of
 3910                                          * dp->d_namlen is our dp->d_type.
 3911                                          * The high MBZ byte of dp->d_namlen
 3912                                          * is our dp->d_namlen.
 3913                                          */
 3914                                         dp->d_type = dp->d_namlen;
 3915                                         dp->d_namlen = 0;
 3916 #                               else
 3917                                         /*
 3918                                          * The dp->d_type is the high byte
 3919                                          * of the expected dp->d_namlen,
 3920                                          * so must be zero'ed.
 3921                                          */
 3922                                         dp->d_type = 0;
 3923 #                               endif
 3924                                 if (dp->d_reclen > 0) {
 3925                                         dp = (struct dirent *)
 3926                                             ((char *)dp + dp->d_reclen);
 3927                                 } else {
 3928                                         error = EIO;
 3929                                         break;
 3930                                 }
 3931                         }
 3932                         if (dp >= edp)
 3933                                 error = uiomove(dirbuf, readcnt, &auio);
 3934                 }
 3935                 free(dirbuf, M_TEMP);
 3936         }
 3937         if (error != 0) {
 3938                 VOP_UNLOCK(vp, 0);
 3939                 foffset_unlock(fp, foffset, 0);
 3940                 fdrop(fp, td);
 3941                 return (error);
 3942         }
 3943         if (uap->count == auio.uio_resid &&
 3944             (vp->v_vflag & VV_ROOT) &&
 3945             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3946                 struct vnode *tvp = vp;
 3947                 vp = vp->v_mount->mnt_vnodecovered;
 3948                 VREF(vp);
 3949                 fp->f_vnode = vp;
 3950                 fp->f_data = vp;
 3951                 foffset = 0;
 3952                 vput(tvp);
 3953                 goto unionread;
 3954         }
 3955         VOP_UNLOCK(vp, 0);
 3956         foffset_unlock(fp, foffset, 0);
 3957         fdrop(fp, td);
 3958         td->td_retval[0] = uap->count - auio.uio_resid;
 3959         if (error == 0)
 3960                 *ploff = loff;
 3961         return (error);
 3962 }
 3963 #endif /* COMPAT_43 */
 3964 
 3965 /*
 3966  * Read a block of directory entries in a filesystem independent format.
 3967  */
 3968 #ifndef _SYS_SYSPROTO_H_
 3969 struct getdirentries_args {
 3970         int     fd;
 3971         char    *buf;
 3972         u_int   count;
 3973         long    *basep;
 3974 };
 3975 #endif
 3976 int
 3977 sys_getdirentries(td, uap)
 3978         struct thread *td;
 3979         register struct getdirentries_args /* {
 3980                 int fd;
 3981                 char *buf;
 3982                 u_int count;
 3983                 long *basep;
 3984         } */ *uap;
 3985 {
 3986         long base;
 3987         int error;
 3988 
 3989         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 3990             NULL, UIO_USERSPACE);
 3991         if (error != 0)
 3992                 return (error);
 3993         if (uap->basep != NULL)
 3994                 error = copyout(&base, uap->basep, sizeof(long));
 3995         return (error);
 3996 }
 3997 
 3998 int
 3999 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4000     long *basep, ssize_t *residp, enum uio_seg bufseg)
 4001 {
 4002         struct vnode *vp;
 4003         struct file *fp;
 4004         struct uio auio;
 4005         struct iovec aiov;
 4006         cap_rights_t rights;
 4007         long loff;
 4008         int error, eofflag;
 4009         off_t foffset;
 4010 
 4011         AUDIT_ARG_FD(fd);
 4012         if (count > IOSIZE_MAX)
 4013                 return (EINVAL);
 4014         auio.uio_resid = count;
 4015         error = getvnode(td->td_proc->p_fd, fd,
 4016             cap_rights_init(&rights, CAP_READ), &fp);
 4017         if (error != 0)
 4018                 return (error);
 4019         if ((fp->f_flag & FREAD) == 0) {
 4020                 fdrop(fp, td);
 4021                 return (EBADF);
 4022         }
 4023         vp = fp->f_vnode;
 4024         foffset = foffset_lock(fp, 0);
 4025 unionread:
 4026         if (vp->v_type != VDIR) {
 4027                 error = EINVAL;
 4028                 goto fail;
 4029         }
 4030         aiov.iov_base = buf;
 4031         aiov.iov_len = count;
 4032         auio.uio_iov = &aiov;
 4033         auio.uio_iovcnt = 1;
 4034         auio.uio_rw = UIO_READ;
 4035         auio.uio_segflg = bufseg;
 4036         auio.uio_td = td;
 4037         vn_lock(vp, LK_SHARED | LK_RETRY);
 4038         AUDIT_ARG_VNODE1(vp);
 4039         loff = auio.uio_offset = foffset;
 4040 #ifdef MAC
 4041         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4042         if (error == 0)
 4043 #endif
 4044                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4045                     NULL);
 4046         foffset = auio.uio_offset;
 4047         if (error != 0) {
 4048                 VOP_UNLOCK(vp, 0);
 4049                 goto fail;
 4050         }
 4051         if (count == auio.uio_resid &&
 4052             (vp->v_vflag & VV_ROOT) &&
 4053             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4054                 struct vnode *tvp = vp;
 4055 
 4056                 vp = vp->v_mount->mnt_vnodecovered;
 4057                 VREF(vp);
 4058                 fp->f_vnode = vp;
 4059                 fp->f_data = vp;
 4060                 foffset = 0;
 4061                 vput(tvp);
 4062                 goto unionread;
 4063         }
 4064         VOP_UNLOCK(vp, 0);
 4065         *basep = loff;
 4066         if (residp != NULL)
 4067                 *residp = auio.uio_resid;
 4068         td->td_retval[0] = count - auio.uio_resid;
 4069 fail:
 4070         foffset_unlock(fp, foffset, 0);
 4071         fdrop(fp, td);
 4072         return (error);
 4073 }
 4074 
 4075 #ifndef _SYS_SYSPROTO_H_
 4076 struct getdents_args {
 4077         int fd;
 4078         char *buf;
 4079         size_t count;
 4080 };
 4081 #endif
 4082 int
 4083 sys_getdents(td, uap)
 4084         struct thread *td;
 4085         register struct getdents_args /* {
 4086                 int fd;
 4087                 char *buf;
 4088                 u_int count;
 4089         } */ *uap;
 4090 {
 4091         struct getdirentries_args ap;
 4092 
 4093         ap.fd = uap->fd;
 4094         ap.buf = uap->buf;
 4095         ap.count = uap->count;
 4096         ap.basep = NULL;
 4097         return (sys_getdirentries(td, &ap));
 4098 }
 4099 
 4100 /*
 4101  * Set the mode mask for creation of filesystem nodes.
 4102  */
 4103 #ifndef _SYS_SYSPROTO_H_
 4104 struct umask_args {
 4105         int     newmask;
 4106 };
 4107 #endif
 4108 int
 4109 sys_umask(td, uap)
 4110         struct thread *td;
 4111         struct umask_args /* {
 4112                 int newmask;
 4113         } */ *uap;
 4114 {
 4115         register struct filedesc *fdp;
 4116 
 4117         FILEDESC_XLOCK(td->td_proc->p_fd);
 4118         fdp = td->td_proc->p_fd;
 4119         td->td_retval[0] = fdp->fd_cmask;
 4120         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4121         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4122         return (0);
 4123 }
 4124 
 4125 /*
 4126  * Void all references to file by ripping underlying filesystem away from
 4127  * vnode.
 4128  */
 4129 #ifndef _SYS_SYSPROTO_H_
 4130 struct revoke_args {
 4131         char    *path;
 4132 };
 4133 #endif
 4134 int
 4135 sys_revoke(td, uap)
 4136         struct thread *td;
 4137         register struct revoke_args /* {
 4138                 char *path;
 4139         } */ *uap;
 4140 {
 4141         struct vnode *vp;
 4142         struct vattr vattr;
 4143         struct nameidata nd;
 4144         int error;
 4145 
 4146         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4147             uap->path, td);
 4148         if ((error = namei(&nd)) != 0)
 4149                 return (error);
 4150         vp = nd.ni_vp;
 4151         NDFREE(&nd, NDF_ONLY_PNBUF);
 4152         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4153                 error = EINVAL;
 4154                 goto out;
 4155         }
 4156 #ifdef MAC
 4157         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4158         if (error != 0)
 4159                 goto out;
 4160 #endif
 4161         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4162         if (error != 0)
 4163                 goto out;
 4164         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4165                 error = priv_check(td, PRIV_VFS_ADMIN);
 4166                 if (error != 0)
 4167                         goto out;
 4168         }
 4169         if (vcount(vp) > 1)
 4170                 VOP_REVOKE(vp, REVOKEALL);
 4171 out:
 4172         vput(vp);
 4173         return (error);
 4174 }
 4175 
 4176 /*
 4177  * Convert a user file descriptor to a kernel file entry and check that, if it
 4178  * is a capability, the correct rights are present. A reference on the file
 4179  * entry is held upon returning.
 4180  */
 4181 int
 4182 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp)
 4183 {
 4184         struct file *fp;
 4185         int error;
 4186 
 4187         error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL);
 4188         if (error != 0)
 4189                 return (error);
 4190 
 4191         /*
 4192          * The file could be not of the vnode type, or it may be not
 4193          * yet fully initialized, in which case the f_vnode pointer
 4194          * may be set, but f_ops is still badfileops.  E.g.,
 4195          * devfs_open() transiently create such situation to
 4196          * facilitate csw d_fdopen().
 4197          *
 4198          * Dupfdopen() handling in kern_openat() installs the
 4199          * half-baked file into the process descriptor table, allowing
 4200          * other thread to dereference it. Guard against the race by
 4201          * checking f_ops.
 4202          */
 4203         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4204                 fdrop(fp, curthread);
 4205                 return (EINVAL);
 4206         }
 4207         *fpp = fp;
 4208         return (0);
 4209 }
 4210 
 4211 
 4212 /*
 4213  * Get an (NFS) file handle.
 4214  */
 4215 #ifndef _SYS_SYSPROTO_H_
 4216 struct lgetfh_args {
 4217         char    *fname;
 4218         fhandle_t *fhp;
 4219 };
 4220 #endif
 4221 int
 4222 sys_lgetfh(td, uap)
 4223         struct thread *td;
 4224         register struct lgetfh_args *uap;
 4225 {
 4226         struct nameidata nd;
 4227         fhandle_t fh;
 4228         register struct vnode *vp;
 4229         int error;
 4230 
 4231         error = priv_check(td, PRIV_VFS_GETFH);
 4232         if (error != 0)
 4233                 return (error);
 4234         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4235             uap->fname, td);
 4236         error = namei(&nd);
 4237         if (error != 0)
 4238                 return (error);
 4239         NDFREE(&nd, NDF_ONLY_PNBUF);
 4240         vp = nd.ni_vp;
 4241         bzero(&fh, sizeof(fh));
 4242         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4243         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4244         vput(vp);
 4245         if (error == 0)
 4246                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4247         return (error);
 4248 }
 4249 
 4250 #ifndef _SYS_SYSPROTO_H_
 4251 struct getfh_args {
 4252         char    *fname;
 4253         fhandle_t *fhp;
 4254 };
 4255 #endif
 4256 int
 4257 sys_getfh(td, uap)
 4258         struct thread *td;
 4259         register struct getfh_args *uap;
 4260 {
 4261         struct nameidata nd;
 4262         fhandle_t fh;
 4263         register struct vnode *vp;
 4264         int error;
 4265 
 4266         error = priv_check(td, PRIV_VFS_GETFH);
 4267         if (error != 0)
 4268                 return (error);
 4269         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4270             uap->fname, td);
 4271         error = namei(&nd);
 4272         if (error != 0)
 4273                 return (error);
 4274         NDFREE(&nd, NDF_ONLY_PNBUF);
 4275         vp = nd.ni_vp;
 4276         bzero(&fh, sizeof(fh));
 4277         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4278         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4279         vput(vp);
 4280         if (error == 0)
 4281                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4282         return (error);
 4283 }
 4284 
 4285 /*
 4286  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4287  * open descriptor.
 4288  *
 4289  * warning: do not remove the priv_check() call or this becomes one giant
 4290  * security hole.
 4291  */
 4292 #ifndef _SYS_SYSPROTO_H_
 4293 struct fhopen_args {
 4294         const struct fhandle *u_fhp;
 4295         int flags;
 4296 };
 4297 #endif
 4298 int
 4299 sys_fhopen(td, uap)
 4300         struct thread *td;
 4301         struct fhopen_args /* {
 4302                 const struct fhandle *u_fhp;
 4303                 int flags;
 4304         } */ *uap;
 4305 {
 4306         struct mount *mp;
 4307         struct vnode *vp;
 4308         struct fhandle fhp;
 4309         struct file *fp;
 4310         int fmode, error;
 4311         int indx;
 4312 
 4313         error = priv_check(td, PRIV_VFS_FHOPEN);
 4314         if (error != 0)
 4315                 return (error);
 4316         indx = -1;
 4317         fmode = FFLAGS(uap->flags);
 4318         /* why not allow a non-read/write open for our lockd? */
 4319         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4320                 return (EINVAL);
 4321         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4322         if (error != 0)
 4323                 return(error);
 4324         /* find the mount point */
 4325         mp = vfs_busyfs(&fhp.fh_fsid);
 4326         if (mp == NULL)
 4327                 return (ESTALE);
 4328         /* now give me my vnode, it gets returned to me locked */
 4329         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4330         vfs_unbusy(mp);
 4331         if (error != 0)
 4332                 return (error);
 4333 
 4334         error = falloc_noinstall(td, &fp);
 4335         if (error != 0) {
 4336                 vput(vp);
 4337                 return (error);
 4338         }
 4339         /*
 4340          * An extra reference on `fp' has been held for us by
 4341          * falloc_noinstall().
 4342          */
 4343 
 4344 #ifdef INVARIANTS
 4345         td->td_dupfd = -1;
 4346 #endif
 4347         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4348         if (error != 0) {
 4349                 KASSERT(fp->f_ops == &badfileops,
 4350                     ("VOP_OPEN in fhopen() set f_ops"));
 4351                 KASSERT(td->td_dupfd < 0,
 4352                     ("fhopen() encountered fdopen()"));
 4353 
 4354                 vput(vp);
 4355                 goto bad;
 4356         }
 4357 #ifdef INVARIANTS
 4358         td->td_dupfd = 0;
 4359 #endif
 4360         fp->f_vnode = vp;
 4361         fp->f_seqcount = 1;
 4362         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4363             &vnops);
 4364         VOP_UNLOCK(vp, 0);
 4365         if ((fmode & O_TRUNC) != 0) {
 4366                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4367                 if (error != 0)
 4368                         goto bad;
 4369         }
 4370 
 4371         error = finstall(td, fp, &indx, fmode, NULL);
 4372 bad:
 4373         fdrop(fp, td);
 4374         td->td_retval[0] = indx;
 4375         return (error);
 4376 }
 4377 
 4378 /*
 4379  * Stat an (NFS) file handle.
 4380  */
 4381 #ifndef _SYS_SYSPROTO_H_
 4382 struct fhstat_args {
 4383         struct fhandle *u_fhp;
 4384         struct stat *sb;
 4385 };
 4386 #endif
 4387 int
 4388 sys_fhstat(td, uap)
 4389         struct thread *td;
 4390         register struct fhstat_args /* {
 4391                 struct fhandle *u_fhp;
 4392                 struct stat *sb;
 4393         } */ *uap;
 4394 {
 4395         struct stat sb;
 4396         struct fhandle fh;
 4397         int error;
 4398 
 4399         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4400         if (error != 0)
 4401                 return (error);
 4402         error = kern_fhstat(td, fh, &sb);
 4403         if (error == 0)
 4404                 error = copyout(&sb, uap->sb, sizeof(sb));
 4405         return (error);
 4406 }
 4407 
 4408 int
 4409 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4410 {
 4411         struct mount *mp;
 4412         struct vnode *vp;
 4413         int error;
 4414 
 4415         error = priv_check(td, PRIV_VFS_FHSTAT);
 4416         if (error != 0)
 4417                 return (error);
 4418         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4419                 return (ESTALE);
 4420         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4421         vfs_unbusy(mp);
 4422         if (error != 0)
 4423                 return (error);
 4424         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4425         vput(vp);
 4426         return (error);
 4427 }
 4428 
 4429 /*
 4430  * Implement fstatfs() for (NFS) file handles.
 4431  */
 4432 #ifndef _SYS_SYSPROTO_H_
 4433 struct fhstatfs_args {
 4434         struct fhandle *u_fhp;
 4435         struct statfs *buf;
 4436 };
 4437 #endif
 4438 int
 4439 sys_fhstatfs(td, uap)
 4440         struct thread *td;
 4441         struct fhstatfs_args /* {
 4442                 struct fhandle *u_fhp;
 4443                 struct statfs *buf;
 4444         } */ *uap;
 4445 {
 4446         struct statfs sf;
 4447         fhandle_t fh;
 4448         int error;
 4449 
 4450         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4451         if (error != 0)
 4452                 return (error);
 4453         error = kern_fhstatfs(td, fh, &sf);
 4454         if (error != 0)
 4455                 return (error);
 4456         return (copyout(&sf, uap->buf, sizeof(sf)));
 4457 }
 4458 
 4459 int
 4460 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4461 {
 4462         struct statfs *sp;
 4463         struct mount *mp;
 4464         struct vnode *vp;
 4465         int error;
 4466 
 4467         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4468         if (error != 0)
 4469                 return (error);
 4470         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4471                 return (ESTALE);
 4472         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4473         if (error != 0) {
 4474                 vfs_unbusy(mp);
 4475                 return (error);
 4476         }
 4477         vput(vp);
 4478         error = prison_canseemount(td->td_ucred, mp);
 4479         if (error != 0)
 4480                 goto out;
 4481 #ifdef MAC
 4482         error = mac_mount_check_stat(td->td_ucred, mp);
 4483         if (error != 0)
 4484                 goto out;
 4485 #endif
 4486         /*
 4487          * Set these in case the underlying filesystem fails to do so.
 4488          */
 4489         sp = &mp->mnt_stat;
 4490         sp->f_version = STATFS_VERSION;
 4491         sp->f_namemax = NAME_MAX;
 4492         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4493         error = VFS_STATFS(mp, sp);
 4494         if (error == 0)
 4495                 *buf = *sp;
 4496 out:
 4497         vfs_unbusy(mp);
 4498         return (error);
 4499 }
 4500 
 4501 int
 4502 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4503 {
 4504         struct file *fp;
 4505         struct mount *mp;
 4506         struct vnode *vp;
 4507         cap_rights_t rights;
 4508         off_t olen, ooffset;
 4509         int error;
 4510 
 4511         fp = NULL;
 4512         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4513         if (error != 0)
 4514                 goto out;
 4515 
 4516         switch (fp->f_type) {
 4517         case DTYPE_VNODE:
 4518                 break;
 4519         case DTYPE_PIPE:
 4520         case DTYPE_FIFO:
 4521                 error = ESPIPE;
 4522                 goto out;
 4523         default:
 4524                 error = ENODEV;
 4525                 goto out;
 4526         }
 4527         if ((fp->f_flag & FWRITE) == 0) {
 4528                 error = EBADF;
 4529                 goto out;
 4530         }
 4531         vp = fp->f_vnode;
 4532         if (vp->v_type != VREG) {
 4533                 error = ENODEV;
 4534                 goto out;
 4535         }
 4536         if (offset < 0 || len <= 0) {
 4537                 error = EINVAL;
 4538                 goto out;
 4539         }
 4540         /* Check for wrap. */
 4541         if (offset > OFF_MAX - len) {
 4542                 error = EFBIG;
 4543                 goto out;
 4544         }
 4545 
 4546         /* Allocating blocks may take a long time, so iterate. */
 4547         for (;;) {
 4548                 olen = len;
 4549                 ooffset = offset;
 4550 
 4551                 bwillwrite();
 4552                 mp = NULL;
 4553                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4554                 if (error != 0)
 4555                         break;
 4556                 error = vn_lock(vp, LK_EXCLUSIVE);
 4557                 if (error != 0) {
 4558                         vn_finished_write(mp);
 4559                         break;
 4560                 }
 4561 #ifdef MAC
 4562                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4563                 if (error == 0)
 4564 #endif
 4565                         error = VOP_ALLOCATE(vp, &offset, &len);
 4566                 VOP_UNLOCK(vp, 0);
 4567                 vn_finished_write(mp);
 4568 
 4569                 if (olen + ooffset != offset + len) {
 4570                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4571                             ooffset, olen, offset, len);
 4572                 }
 4573                 if (error != 0 || len == 0)
 4574                         break;
 4575                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4576                 maybe_yield();
 4577         }
 4578  out:
 4579         if (fp != NULL)
 4580                 fdrop(fp, td);
 4581         return (error);
 4582 }
 4583 
 4584 int
 4585 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4586 {
 4587 
 4588         return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
 4589 }
 4590 
 4591 /*
 4592  * Unlike madvise(2), we do not make a best effort to remember every
 4593  * possible caching hint.  Instead, we remember the last setting with
 4594  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4595  * region of any current setting.
 4596  */
 4597 int
 4598 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4599     int advice)
 4600 {
 4601         struct fadvise_info *fa, *new;
 4602         struct file *fp;
 4603         struct vnode *vp;
 4604         cap_rights_t rights;
 4605         off_t end;
 4606         int error;
 4607 
 4608         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4609                 return (EINVAL);
 4610         switch (advice) {
 4611         case POSIX_FADV_SEQUENTIAL:
 4612         case POSIX_FADV_RANDOM:
 4613         case POSIX_FADV_NOREUSE:
 4614                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4615                 break;
 4616         case POSIX_FADV_NORMAL:
 4617         case POSIX_FADV_WILLNEED:
 4618         case POSIX_FADV_DONTNEED:
 4619                 new = NULL;
 4620                 break;
 4621         default:
 4622                 return (EINVAL);
 4623         }
 4624         /* XXX: CAP_POSIX_FADVISE? */
 4625         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4626         if (error != 0)
 4627                 goto out;
 4628 
 4629         switch (fp->f_type) {
 4630         case DTYPE_VNODE:
 4631                 break;
 4632         case DTYPE_PIPE:
 4633         case DTYPE_FIFO:
 4634                 error = ESPIPE;
 4635                 goto out;
 4636         default:
 4637                 error = ENODEV;
 4638                 goto out;
 4639         }
 4640         vp = fp->f_vnode;
 4641         if (vp->v_type != VREG) {
 4642                 error = ENODEV;
 4643                 goto out;
 4644         }
 4645         if (len == 0)
 4646                 end = OFF_MAX;
 4647         else
 4648                 end = offset + len - 1;
 4649         switch (advice) {
 4650         case POSIX_FADV_SEQUENTIAL:
 4651         case POSIX_FADV_RANDOM:
 4652         case POSIX_FADV_NOREUSE:
 4653                 /*
 4654                  * Try to merge any existing non-standard region with
 4655                  * this new region if possible, otherwise create a new
 4656                  * non-standard region for this request.
 4657                  */
 4658                 mtx_pool_lock(mtxpool_sleep, fp);
 4659                 fa = fp->f_advice;
 4660                 if (fa != NULL && fa->fa_advice == advice &&
 4661                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4662                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4663                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4664                         if (offset < fa->fa_start)
 4665                                 fa->fa_start = offset;
 4666                         if (end > fa->fa_end)
 4667                                 fa->fa_end = end;
 4668                 } else {
 4669                         new->fa_advice = advice;
 4670                         new->fa_start = offset;
 4671                         new->fa_end = end;
 4672                         new->fa_prevstart = 0;
 4673                         new->fa_prevend = 0;
 4674                         fp->f_advice = new;
 4675                         new = fa;
 4676                 }
 4677                 mtx_pool_unlock(mtxpool_sleep, fp);
 4678                 break;
 4679         case POSIX_FADV_NORMAL:
 4680                 /*
 4681                  * If a the "normal" region overlaps with an existing
 4682                  * non-standard region, trim or remove the
 4683                  * non-standard region.
 4684                  */
 4685                 mtx_pool_lock(mtxpool_sleep, fp);
 4686                 fa = fp->f_advice;
 4687                 if (fa != NULL) {
 4688                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4689                                 new = fa;
 4690                                 fp->f_advice = NULL;
 4691                         } else if (offset <= fa->fa_start &&
 4692                             end >= fa->fa_start)
 4693                                 fa->fa_start = end + 1;
 4694                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4695                                 fa->fa_end = offset - 1;
 4696                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4697                                 /*
 4698                                  * If the "normal" region is a middle
 4699                                  * portion of the existing
 4700                                  * non-standard region, just remove
 4701                                  * the whole thing rather than picking
 4702                                  * one side or the other to
 4703                                  * preserve.
 4704                                  */
 4705                                 new = fa;
 4706                                 fp->f_advice = NULL;
 4707                         }
 4708                 }
 4709                 mtx_pool_unlock(mtxpool_sleep, fp);
 4710                 break;
 4711         case POSIX_FADV_WILLNEED:
 4712         case POSIX_FADV_DONTNEED:
 4713                 error = VOP_ADVISE(vp, offset, end, advice);
 4714                 break;
 4715         }
 4716 out:
 4717         if (fp != NULL)
 4718                 fdrop(fp, td);
 4719         free(new, M_FADVISE);
 4720         return (error);
 4721 }
 4722 
 4723 int
 4724 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4725 {
 4726 
 4727         return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4728             uap->advice));
 4729 }

Cache object: f3ffaa0281a2ba1e3bd5764c7f58ef3c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.