vfs_syscalls.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.2/sys/kern/vfs_syscalls.c 301051 2016-05-31 16:55:45Z glebius $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   97 
   98 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  100 static int kern_chflags(struct thread *td, const char *path,
  101     enum uio_seg pathseg, u_long flags);
  102 static int kern_chflagsat(struct thread *td, int fd, const char *path,
  103     enum uio_seg pathseg, u_long flags, int atflag);
  104 static int setfflags(struct thread *td, struct vnode *, u_long);
  105 static int setutimes(struct thread *td, struct vnode *,
  106     const struct timespec *, int, int);
  107 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  108     struct thread *td);
  109 
  110 /*
  111  * The module initialization routine for POSIX asynchronous I/O will
  112  * set this to the version of AIO that it implements.  (Zero means
  113  * that it is not implemented.)  This value is used here by pathconf()
  114  * and in kern_descrip.c by fpathconf().
  115  */
  116 int async_io_version;
  117 
  118 /*
  119  * Sync each mounted filesystem.
  120  */
  121 #ifndef _SYS_SYSPROTO_H_
  122 struct sync_args {
  123         int     dummy;
  124 };
  125 #endif
  126 /* ARGSUSED */
  127 int
  128 sys_sync(td, uap)
  129         struct thread *td;
  130         struct sync_args *uap;
  131 {
  132         struct mount *mp, *nmp;
  133         int save;
  134 
  135         mtx_lock(&mountlist_mtx);
  136         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  137                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  138                         nmp = TAILQ_NEXT(mp, mnt_list);
  139                         continue;
  140                 }
  141                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  142                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  143                         save = curthread_pflags_set(TDP_SYNCIO);
  144                         vfs_msync(mp, MNT_NOWAIT);
  145                         VFS_SYNC(mp, MNT_NOWAIT);
  146                         curthread_pflags_restore(save);
  147                         vn_finished_write(mp);
  148                 }
  149                 mtx_lock(&mountlist_mtx);
  150                 nmp = TAILQ_NEXT(mp, mnt_list);
  151                 vfs_unbusy(mp);
  152         }
  153         mtx_unlock(&mountlist_mtx);
  154         return (0);
  155 }
  156 
  157 /*
  158  * Change filesystem quotas.
  159  */
  160 #ifndef _SYS_SYSPROTO_H_
  161 struct quotactl_args {
  162         char *path;
  163         int cmd;
  164         int uid;
  165         caddr_t arg;
  166 };
  167 #endif
  168 int
  169 sys_quotactl(td, uap)
  170         struct thread *td;
  171         register struct quotactl_args /* {
  172                 char *path;
  173                 int cmd;
  174                 int uid;
  175                 caddr_t arg;
  176         } */ *uap;
  177 {
  178         struct mount *mp;
  179         struct nameidata nd;
  180         int error;
  181 
  182         AUDIT_ARG_CMD(uap->cmd);
  183         AUDIT_ARG_UID(uap->uid);
  184         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  185                 return (EPERM);
  186         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  187             uap->path, td);
  188         if ((error = namei(&nd)) != 0)
  189                 return (error);
  190         NDFREE(&nd, NDF_ONLY_PNBUF);
  191         mp = nd.ni_vp->v_mount;
  192         vfs_ref(mp);
  193         vput(nd.ni_vp);
  194         error = vfs_busy(mp, 0);
  195         vfs_rel(mp);
  196         if (error != 0)
  197                 return (error);
  198         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  199 
  200         /*
  201          * Since quota on operation typically needs to open quota
  202          * file, the Q_QUOTAON handler needs to unbusy the mount point
  203          * before calling into namei.  Otherwise, unmount might be
  204          * started between two vfs_busy() invocations (first is our,
  205          * second is from mount point cross-walk code in lookup()),
  206          * causing deadlock.
  207          *
  208          * Require that Q_QUOTAON handles the vfs_busy() reference on
  209          * its own, always returning with ubusied mount point.
  210          */
  211         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  212                 vfs_unbusy(mp);
  213         return (error);
  214 }
  215 
  216 /*
  217  * Used by statfs conversion routines to scale the block size up if
  218  * necessary so that all of the block counts are <= 'max_size'.  Note
  219  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  220  * value of 'n'.
  221  */
  222 void
  223 statfs_scale_blocks(struct statfs *sf, long max_size)
  224 {
  225         uint64_t count;
  226         int shift;
  227 
  228         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  229 
  230         /*
  231          * Attempt to scale the block counts to give a more accurate
  232          * overview to userland of the ratio of free space to used
  233          * space.  To do this, find the largest block count and compute
  234          * a divisor that lets it fit into a signed integer <= max_size.
  235          */
  236         if (sf->f_bavail < 0)
  237                 count = -sf->f_bavail;
  238         else
  239                 count = sf->f_bavail;
  240         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  241         if (count <= max_size)
  242                 return;
  243 
  244         count >>= flsl(max_size);
  245         shift = 0;
  246         while (count > 0) {
  247                 shift++;
  248                 count >>=1;
  249         }
  250 
  251         sf->f_bsize <<= shift;
  252         sf->f_blocks >>= shift;
  253         sf->f_bfree >>= shift;
  254         sf->f_bavail >>= shift;
  255 }
  256 
  257 /*
  258  * Get filesystem statistics.
  259  */
  260 #ifndef _SYS_SYSPROTO_H_
  261 struct statfs_args {
  262         char *path;
  263         struct statfs *buf;
  264 };
  265 #endif
  266 int
  267 sys_statfs(td, uap)
  268         struct thread *td;
  269         register struct statfs_args /* {
  270                 char *path;
  271                 struct statfs *buf;
  272         } */ *uap;
  273 {
  274         struct statfs sf;
  275         int error;
  276 
  277         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  278         if (error == 0)
  279                 error = copyout(&sf, uap->buf, sizeof(sf));
  280         return (error);
  281 }
  282 
  283 int
  284 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  285     struct statfs *buf)
  286 {
  287         struct mount *mp;
  288         struct statfs *sp, sb;
  289         struct nameidata nd;
  290         int error;
  291 
  292         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  293             pathseg, path, td);
  294         error = namei(&nd);
  295         if (error != 0)
  296                 return (error);
  297         mp = nd.ni_vp->v_mount;
  298         vfs_ref(mp);
  299         NDFREE(&nd, NDF_ONLY_PNBUF);
  300         vput(nd.ni_vp);
  301         error = vfs_busy(mp, 0);
  302         vfs_rel(mp);
  303         if (error != 0)
  304                 return (error);
  305 #ifdef MAC
  306         error = mac_mount_check_stat(td->td_ucred, mp);
  307         if (error != 0)
  308                 goto out;
  309 #endif
  310         /*
  311          * Set these in case the underlying filesystem fails to do so.
  312          */
  313         sp = &mp->mnt_stat;
  314         sp->f_version = STATFS_VERSION;
  315         sp->f_namemax = NAME_MAX;
  316         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  317         error = VFS_STATFS(mp, sp);
  318         if (error != 0)
  319                 goto out;
  320         if (priv_check(td, PRIV_VFS_GENERATION)) {
  321                 bcopy(sp, &sb, sizeof(sb));
  322                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  323                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  324                 sp = &sb;
  325         }
  326         *buf = *sp;
  327 out:
  328         vfs_unbusy(mp);
  329         return (error);
  330 }
  331 
  332 /*
  333  * Get filesystem statistics.
  334  */
  335 #ifndef _SYS_SYSPROTO_H_
  336 struct fstatfs_args {
  337         int fd;
  338         struct statfs *buf;
  339 };
  340 #endif
  341 int
  342 sys_fstatfs(td, uap)
  343         struct thread *td;
  344         register struct fstatfs_args /* {
  345                 int fd;
  346                 struct statfs *buf;
  347         } */ *uap;
  348 {
  349         struct statfs sf;
  350         int error;
  351 
  352         error = kern_fstatfs(td, uap->fd, &sf);
  353         if (error == 0)
  354                 error = copyout(&sf, uap->buf, sizeof(sf));
  355         return (error);
  356 }
  357 
  358 int
  359 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  360 {
  361         struct file *fp;
  362         struct mount *mp;
  363         struct statfs *sp, sb;
  364         struct vnode *vp;
  365         cap_rights_t rights;
  366         int error;
  367 
  368         AUDIT_ARG_FD(fd);
  369         error = getvnode(td->td_proc->p_fd, fd,
  370             cap_rights_init(&rights, CAP_FSTATFS), &fp);
  371         if (error != 0)
  372                 return (error);
  373         vp = fp->f_vnode;
  374         vn_lock(vp, LK_SHARED | LK_RETRY);
  375 #ifdef AUDIT
  376         AUDIT_ARG_VNODE1(vp);
  377 #endif
  378         mp = vp->v_mount;
  379         if (mp)
  380                 vfs_ref(mp);
  381         VOP_UNLOCK(vp, 0);
  382         fdrop(fp, td);
  383         if (mp == NULL) {
  384                 error = EBADF;
  385                 goto out;
  386         }
  387         error = vfs_busy(mp, 0);
  388         vfs_rel(mp);
  389         if (error != 0)
  390                 return (error);
  391 #ifdef MAC
  392         error = mac_mount_check_stat(td->td_ucred, mp);
  393         if (error != 0)
  394                 goto out;
  395 #endif
  396         /*
  397          * Set these in case the underlying filesystem fails to do so.
  398          */
  399         sp = &mp->mnt_stat;
  400         sp->f_version = STATFS_VERSION;
  401         sp->f_namemax = NAME_MAX;
  402         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  403         error = VFS_STATFS(mp, sp);
  404         if (error != 0)
  405                 goto out;
  406         if (priv_check(td, PRIV_VFS_GENERATION)) {
  407                 bcopy(sp, &sb, sizeof(sb));
  408                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  409                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  410                 sp = &sb;
  411         }
  412         *buf = *sp;
  413 out:
  414         if (mp)
  415                 vfs_unbusy(mp);
  416         return (error);
  417 }
  418 
  419 /*
  420  * Get statistics on all filesystems.
  421  */
  422 #ifndef _SYS_SYSPROTO_H_
  423 struct getfsstat_args {
  424         struct statfs *buf;
  425         long bufsize;
  426         int flags;
  427 };
  428 #endif
  429 int
  430 sys_getfsstat(td, uap)
  431         struct thread *td;
  432         register struct getfsstat_args /* {
  433                 struct statfs *buf;
  434                 long bufsize;
  435                 int flags;
  436         } */ *uap;
  437 {
  438 
  439         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  440             uap->flags));
  441 }
  442 
  443 /*
  444  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  445  *      The caller is responsible for freeing memory which will be allocated
  446  *      in '*buf'.
  447  */
  448 int
  449 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  450     enum uio_seg bufseg, int flags)
  451 {
  452         struct mount *mp, *nmp;
  453         struct statfs *sfsp, *sp, sb;
  454         size_t count, maxcount;
  455         int error;
  456 
  457         maxcount = bufsize / sizeof(struct statfs);
  458         if (bufsize == 0)
  459                 sfsp = NULL;
  460         else if (bufseg == UIO_USERSPACE)
  461                 sfsp = *buf;
  462         else /* if (bufseg == UIO_SYSSPACE) */ {
  463                 count = 0;
  464                 mtx_lock(&mountlist_mtx);
  465                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  466                         count++;
  467                 }
  468                 mtx_unlock(&mountlist_mtx);
  469                 if (maxcount > count)
  470                         maxcount = count;
  471                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  472                     M_WAITOK);
  473         }
  474         count = 0;
  475         mtx_lock(&mountlist_mtx);
  476         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  477                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  478                         nmp = TAILQ_NEXT(mp, mnt_list);
  479                         continue;
  480                 }
  481 #ifdef MAC
  482                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  483                         nmp = TAILQ_NEXT(mp, mnt_list);
  484                         continue;
  485                 }
  486 #endif
  487                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  488                         nmp = TAILQ_NEXT(mp, mnt_list);
  489                         continue;
  490                 }
  491                 if (sfsp && count < maxcount) {
  492                         sp = &mp->mnt_stat;
  493                         /*
  494                          * Set these in case the underlying filesystem
  495                          * fails to do so.
  496                          */
  497                         sp->f_version = STATFS_VERSION;
  498                         sp->f_namemax = NAME_MAX;
  499                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  500                         /*
  501                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  502                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  503                          * overrides MNT_WAIT.
  504                          */
  505                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  506                             (flags & MNT_WAIT)) &&
  507                             (error = VFS_STATFS(mp, sp))) {
  508                                 mtx_lock(&mountlist_mtx);
  509                                 nmp = TAILQ_NEXT(mp, mnt_list);
  510                                 vfs_unbusy(mp);
  511                                 continue;
  512                         }
  513                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  514                                 bcopy(sp, &sb, sizeof(sb));
  515                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  516                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  517                                 sp = &sb;
  518                         }
  519                         if (bufseg == UIO_SYSSPACE)
  520                                 bcopy(sp, sfsp, sizeof(*sp));
  521                         else /* if (bufseg == UIO_USERSPACE) */ {
  522                                 error = copyout(sp, sfsp, sizeof(*sp));
  523                                 if (error != 0) {
  524                                         vfs_unbusy(mp);
  525                                         return (error);
  526                                 }
  527                         }
  528                         sfsp++;
  529                 }
  530                 count++;
  531                 mtx_lock(&mountlist_mtx);
  532                 nmp = TAILQ_NEXT(mp, mnt_list);
  533                 vfs_unbusy(mp);
  534         }
  535         mtx_unlock(&mountlist_mtx);
  536         if (sfsp && count > maxcount)
  537                 td->td_retval[0] = maxcount;
  538         else
  539                 td->td_retval[0] = count;
  540         return (0);
  541 }
  542 
  543 #ifdef COMPAT_FREEBSD4
  544 /*
  545  * Get old format filesystem statistics.
  546  */
  547 static void cvtstatfs(struct statfs *, struct ostatfs *);
  548 
  549 #ifndef _SYS_SYSPROTO_H_
  550 struct freebsd4_statfs_args {
  551         char *path;
  552         struct ostatfs *buf;
  553 };
  554 #endif
  555 int
  556 freebsd4_statfs(td, uap)
  557         struct thread *td;
  558         struct freebsd4_statfs_args /* {
  559                 char *path;
  560                 struct ostatfs *buf;
  561         } */ *uap;
  562 {
  563         struct ostatfs osb;
  564         struct statfs sf;
  565         int error;
  566 
  567         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  568         if (error != 0)
  569                 return (error);
  570         cvtstatfs(&sf, &osb);
  571         return (copyout(&osb, uap->buf, sizeof(osb)));
  572 }
  573 
  574 /*
  575  * Get filesystem statistics.
  576  */
  577 #ifndef _SYS_SYSPROTO_H_
  578 struct freebsd4_fstatfs_args {
  579         int fd;
  580         struct ostatfs *buf;
  581 };
  582 #endif
  583 int
  584 freebsd4_fstatfs(td, uap)
  585         struct thread *td;
  586         struct freebsd4_fstatfs_args /* {
  587                 int fd;
  588                 struct ostatfs *buf;
  589         } */ *uap;
  590 {
  591         struct ostatfs osb;
  592         struct statfs sf;
  593         int error;
  594 
  595         error = kern_fstatfs(td, uap->fd, &sf);
  596         if (error != 0)
  597                 return (error);
  598         cvtstatfs(&sf, &osb);
  599         return (copyout(&osb, uap->buf, sizeof(osb)));
  600 }
  601 
  602 /*
  603  * Get statistics on all filesystems.
  604  */
  605 #ifndef _SYS_SYSPROTO_H_
  606 struct freebsd4_getfsstat_args {
  607         struct ostatfs *buf;
  608         long bufsize;
  609         int flags;
  610 };
  611 #endif
  612 int
  613 freebsd4_getfsstat(td, uap)
  614         struct thread *td;
  615         register struct freebsd4_getfsstat_args /* {
  616                 struct ostatfs *buf;
  617                 long bufsize;
  618                 int flags;
  619         } */ *uap;
  620 {
  621         struct statfs *buf, *sp;
  622         struct ostatfs osb;
  623         size_t count, size;
  624         int error;
  625 
  626         count = uap->bufsize / sizeof(struct ostatfs);
  627         size = count * sizeof(struct statfs);
  628         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  629         if (size > 0) {
  630                 count = td->td_retval[0];
  631                 sp = buf;
  632                 while (count > 0 && error == 0) {
  633                         cvtstatfs(sp, &osb);
  634                         error = copyout(&osb, uap->buf, sizeof(osb));
  635                         sp++;
  636                         uap->buf++;
  637                         count--;
  638                 }
  639                 free(buf, M_TEMP);
  640         }
  641         return (error);
  642 }
  643 
  644 /*
  645  * Implement fstatfs() for (NFS) file handles.
  646  */
  647 #ifndef _SYS_SYSPROTO_H_
  648 struct freebsd4_fhstatfs_args {
  649         struct fhandle *u_fhp;
  650         struct ostatfs *buf;
  651 };
  652 #endif
  653 int
  654 freebsd4_fhstatfs(td, uap)
  655         struct thread *td;
  656         struct freebsd4_fhstatfs_args /* {
  657                 struct fhandle *u_fhp;
  658                 struct ostatfs *buf;
  659         } */ *uap;
  660 {
  661         struct ostatfs osb;
  662         struct statfs sf;
  663         fhandle_t fh;
  664         int error;
  665 
  666         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  667         if (error != 0)
  668                 return (error);
  669         error = kern_fhstatfs(td, fh, &sf);
  670         if (error != 0)
  671                 return (error);
  672         cvtstatfs(&sf, &osb);
  673         return (copyout(&osb, uap->buf, sizeof(osb)));
  674 }
  675 
  676 /*
  677  * Convert a new format statfs structure to an old format statfs structure.
  678  */
  679 static void
  680 cvtstatfs(nsp, osp)
  681         struct statfs *nsp;
  682         struct ostatfs *osp;
  683 {
  684 
  685         statfs_scale_blocks(nsp, LONG_MAX);
  686         bzero(osp, sizeof(*osp));
  687         osp->f_bsize = nsp->f_bsize;
  688         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  689         osp->f_blocks = nsp->f_blocks;
  690         osp->f_bfree = nsp->f_bfree;
  691         osp->f_bavail = nsp->f_bavail;
  692         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  693         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  694         osp->f_owner = nsp->f_owner;
  695         osp->f_type = nsp->f_type;
  696         osp->f_flags = nsp->f_flags;
  697         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  698         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  699         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  700         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  701         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  702             MIN(MFSNAMELEN, OMFSNAMELEN));
  703         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  704             MIN(MNAMELEN, OMNAMELEN));
  705         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  706             MIN(MNAMELEN, OMNAMELEN));
  707         osp->f_fsid = nsp->f_fsid;
  708 }
  709 #endif /* COMPAT_FREEBSD4 */
  710 
  711 /*
  712  * Change current working directory to a given file descriptor.
  713  */
  714 #ifndef _SYS_SYSPROTO_H_
  715 struct fchdir_args {
  716         int     fd;
  717 };
  718 #endif
  719 int
  720 sys_fchdir(td, uap)
  721         struct thread *td;
  722         struct fchdir_args /* {
  723                 int fd;
  724         } */ *uap;
  725 {
  726         register struct filedesc *fdp = td->td_proc->p_fd;
  727         struct vnode *vp, *tdp, *vpold;
  728         struct mount *mp;
  729         struct file *fp;
  730         cap_rights_t rights;
  731         int error;
  732 
  733         AUDIT_ARG_FD(uap->fd);
  734         error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  735             &fp);
  736         if (error != 0)
  737                 return (error);
  738         vp = fp->f_vnode;
  739         VREF(vp);
  740         fdrop(fp, td);
  741         vn_lock(vp, LK_SHARED | LK_RETRY);
  742         AUDIT_ARG_VNODE1(vp);
  743         error = change_dir(vp, td);
  744         while (!error && (mp = vp->v_mountedhere) != NULL) {
  745                 if (vfs_busy(mp, 0))
  746                         continue;
  747                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  748                 vfs_unbusy(mp);
  749                 if (error != 0)
  750                         break;
  751                 vput(vp);
  752                 vp = tdp;
  753         }
  754         if (error != 0) {
  755                 vput(vp);
  756                 return (error);
  757         }
  758         VOP_UNLOCK(vp, 0);
  759         FILEDESC_XLOCK(fdp);
  760         vpold = fdp->fd_cdir;
  761         fdp->fd_cdir = vp;
  762         FILEDESC_XUNLOCK(fdp);
  763         vrele(vpold);
  764         return (0);
  765 }
  766 
  767 /*
  768  * Change current working directory (``.'').
  769  */
  770 #ifndef _SYS_SYSPROTO_H_
  771 struct chdir_args {
  772         char    *path;
  773 };
  774 #endif
  775 int
  776 sys_chdir(td, uap)
  777         struct thread *td;
  778         struct chdir_args /* {
  779                 char *path;
  780         } */ *uap;
  781 {
  782 
  783         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  784 }
  785 
  786 int
  787 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  788 {
  789         register struct filedesc *fdp = td->td_proc->p_fd;
  790         struct nameidata nd;
  791         struct vnode *vp;
  792         int error;
  793 
  794         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  795             pathseg, path, td);
  796         if ((error = namei(&nd)) != 0)
  797                 return (error);
  798         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  799                 vput(nd.ni_vp);
  800                 NDFREE(&nd, NDF_ONLY_PNBUF);
  801                 return (error);
  802         }
  803         VOP_UNLOCK(nd.ni_vp, 0);
  804         NDFREE(&nd, NDF_ONLY_PNBUF);
  805         FILEDESC_XLOCK(fdp);
  806         vp = fdp->fd_cdir;
  807         fdp->fd_cdir = nd.ni_vp;
  808         FILEDESC_XUNLOCK(fdp);
  809         vrele(vp);
  810         return (0);
  811 }
  812 
  813 /*
  814  * Helper function for raised chroot(2) security function:  Refuse if
  815  * any filedescriptors are open directories.
  816  */
  817 static int
  818 chroot_refuse_vdir_fds(fdp)
  819         struct filedesc *fdp;
  820 {
  821         struct vnode *vp;
  822         struct file *fp;
  823         int fd;
  824 
  825         FILEDESC_LOCK_ASSERT(fdp);
  826 
  827         for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
  828                 fp = fget_locked(fdp, fd);
  829                 if (fp == NULL)
  830                         continue;
  831                 if (fp->f_type == DTYPE_VNODE) {
  832                         vp = fp->f_vnode;
  833                         if (vp->v_type == VDIR)
  834                                 return (EPERM);
  835                 }
  836         }
  837         return (0);
  838 }
  839 
  840 /*
  841  * This sysctl determines if we will allow a process to chroot(2) if it
  842  * has a directory open:
  843  *      0: disallowed for all processes.
  844  *      1: allowed for processes that were not already chroot(2)'ed.
  845  *      2: allowed for all processes.
  846  */
  847 
  848 static int chroot_allow_open_directories = 1;
  849 
  850 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  851      &chroot_allow_open_directories, 0,
  852      "Allow a process to chroot(2) if it has a directory open");
  853 
  854 /*
  855  * Change notion of root (``/'') directory.
  856  */
  857 #ifndef _SYS_SYSPROTO_H_
  858 struct chroot_args {
  859         char    *path;
  860 };
  861 #endif
  862 int
  863 sys_chroot(td, uap)
  864         struct thread *td;
  865         struct chroot_args /* {
  866                 char *path;
  867         } */ *uap;
  868 {
  869         struct nameidata nd;
  870         int error;
  871 
  872         error = priv_check(td, PRIV_VFS_CHROOT);
  873         if (error != 0)
  874                 return (error);
  875         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  876             UIO_USERSPACE, uap->path, td);
  877         error = namei(&nd);
  878         if (error != 0)
  879                 goto error;
  880         error = change_dir(nd.ni_vp, td);
  881         if (error != 0)
  882                 goto e_vunlock;
  883 #ifdef MAC
  884         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  885         if (error != 0)
  886                 goto e_vunlock;
  887 #endif
  888         VOP_UNLOCK(nd.ni_vp, 0);
  889         error = change_root(nd.ni_vp, td);
  890         vrele(nd.ni_vp);
  891         NDFREE(&nd, NDF_ONLY_PNBUF);
  892         return (error);
  893 e_vunlock:
  894         vput(nd.ni_vp);
  895 error:
  896         NDFREE(&nd, NDF_ONLY_PNBUF);
  897         return (error);
  898 }
  899 
  900 /*
  901  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  902  * instance.
  903  */
  904 int
  905 change_dir(vp, td)
  906         struct vnode *vp;
  907         struct thread *td;
  908 {
  909 #ifdef MAC
  910         int error;
  911 #endif
  912 
  913         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  914         if (vp->v_type != VDIR)
  915                 return (ENOTDIR);
  916 #ifdef MAC
  917         error = mac_vnode_check_chdir(td->td_ucred, vp);
  918         if (error != 0)
  919                 return (error);
  920 #endif
  921         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  922 }
  923 
  924 /*
  925  * Common routine for kern_chroot() and jail_attach().  The caller is
  926  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  927  * authorize this operation.
  928  */
  929 int
  930 change_root(vp, td)
  931         struct vnode *vp;
  932         struct thread *td;
  933 {
  934         struct filedesc *fdp;
  935         struct vnode *oldvp;
  936         int error;
  937 
  938         fdp = td->td_proc->p_fd;
  939         FILEDESC_XLOCK(fdp);
  940         if (chroot_allow_open_directories == 0 ||
  941             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  942                 error = chroot_refuse_vdir_fds(fdp);
  943                 if (error != 0) {
  944                         FILEDESC_XUNLOCK(fdp);
  945                         return (error);
  946                 }
  947         }
  948         oldvp = fdp->fd_rdir;
  949         fdp->fd_rdir = vp;
  950         VREF(fdp->fd_rdir);
  951         if (!fdp->fd_jdir) {
  952                 fdp->fd_jdir = vp;
  953                 VREF(fdp->fd_jdir);
  954         }
  955         FILEDESC_XUNLOCK(fdp);
  956         vrele(oldvp);
  957         return (0);
  958 }
  959 
  960 static __inline void
  961 flags_to_rights(int flags, cap_rights_t *rightsp)
  962 {
  963 
  964         if (flags & O_EXEC) {
  965                 cap_rights_set(rightsp, CAP_FEXECVE);
  966         } else {
  967                 switch ((flags & O_ACCMODE)) {
  968                 case O_RDONLY:
  969                         cap_rights_set(rightsp, CAP_READ);
  970                         break;
  971                 case O_RDWR:
  972                         cap_rights_set(rightsp, CAP_READ);
  973                         /* FALLTHROUGH */
  974                 case O_WRONLY:
  975                         cap_rights_set(rightsp, CAP_WRITE);
  976                         if (!(flags & (O_APPEND | O_TRUNC)))
  977                                 cap_rights_set(rightsp, CAP_SEEK);
  978                         break;
  979                 }
  980         }
  981 
  982         if (flags & O_CREAT)
  983                 cap_rights_set(rightsp, CAP_CREATE);
  984 
  985         if (flags & O_TRUNC)
  986                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  987 
  988         if (flags & (O_SYNC | O_FSYNC))
  989                 cap_rights_set(rightsp, CAP_FSYNC);
  990 
  991         if (flags & (O_EXLOCK | O_SHLOCK))
  992                 cap_rights_set(rightsp, CAP_FLOCK);
  993 }
  994 
  995 /*
  996  * Check permissions, allocate an open file structure, and call the device
  997  * open routine if any.
  998  */
  999 #ifndef _SYS_SYSPROTO_H_
 1000 struct open_args {
 1001         char    *path;
 1002         int     flags;
 1003         int     mode;
 1004 };
 1005 #endif
 1006 int
 1007 sys_open(td, uap)
 1008         struct thread *td;
 1009         register struct open_args /* {
 1010                 char *path;
 1011                 int flags;
 1012                 int mode;
 1013         } */ *uap;
 1014 {
 1015 
 1016         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1017 }
 1018 
 1019 #ifndef _SYS_SYSPROTO_H_
 1020 struct openat_args {
 1021         int     fd;
 1022         char    *path;
 1023         int     flag;
 1024         int     mode;
 1025 };
 1026 #endif
 1027 int
 1028 sys_openat(struct thread *td, struct openat_args *uap)
 1029 {
 1030 
 1031         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1032             uap->mode));
 1033 }
 1034 
 1035 int
 1036 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1037     int mode)
 1038 {
 1039 
 1040         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1041 }
 1042 
 1043 int
 1044 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1045     int flags, int mode)
 1046 {
 1047         struct proc *p = td->td_proc;
 1048         struct filedesc *fdp = p->p_fd;
 1049         struct file *fp;
 1050         struct vnode *vp;
 1051         struct nameidata nd;
 1052         cap_rights_t rights;
 1053         int cmode, error, indx;
 1054 
 1055         indx = -1;
 1056 
 1057         AUDIT_ARG_FFLAGS(flags);
 1058         AUDIT_ARG_MODE(mode);
 1059         /* XXX: audit dirfd */
 1060         cap_rights_init(&rights, CAP_LOOKUP);
 1061         flags_to_rights(flags, &rights);
 1062         /*
 1063          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1064          * may be specified.
 1065          */
 1066         if (flags & O_EXEC) {
 1067                 if (flags & O_ACCMODE)
 1068                         return (EINVAL);
 1069         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1070                 return (EINVAL);
 1071         } else {
 1072                 flags = FFLAGS(flags);
 1073         }
 1074 
 1075         /*
 1076          * Allocate the file descriptor, but don't install a descriptor yet.
 1077          */
 1078         error = falloc_noinstall(td, &fp);
 1079         if (error != 0)
 1080                 return (error);
 1081         /*
 1082          * An extra reference on `fp' has been held for us by
 1083          * falloc_noinstall().
 1084          */
 1085         /* Set the flags early so the finit in devfs can pick them up. */
 1086         fp->f_flag = flags & FMASK;
 1087         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1088         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1089             &rights, td);
 1090         td->td_dupfd = -1;              /* XXX check for fdopen */
 1091         error = vn_open(&nd, &flags, cmode, fp);
 1092         if (error != 0) {
 1093                 /*
 1094                  * If the vn_open replaced the method vector, something
 1095                  * wonderous happened deep below and we just pass it up
 1096                  * pretending we know what we do.
 1097                  */
 1098                 if (error == ENXIO && fp->f_ops != &badfileops)
 1099                         goto success;
 1100 
 1101                 /*
 1102                  * Handle special fdopen() case. bleh.
 1103                  *
 1104                  * Don't do this for relative (capability) lookups; we don't
 1105                  * understand exactly what would happen, and we don't think
 1106                  * that it ever should.
 1107                  */
 1108                 if (nd.ni_strictrelative == 0 &&
 1109                     (error == ENODEV || error == ENXIO) &&
 1110                     td->td_dupfd >= 0) {
 1111                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1112                             &indx);
 1113                         if (error == 0)
 1114                                 goto success;
 1115                 }
 1116 
 1117                 goto bad;
 1118         }
 1119         td->td_dupfd = 0;
 1120         NDFREE(&nd, NDF_ONLY_PNBUF);
 1121         vp = nd.ni_vp;
 1122 
 1123         /*
 1124          * Store the vnode, for any f_type. Typically, the vnode use
 1125          * count is decremented by direct call to vn_closefile() for
 1126          * files that switched type in the cdevsw fdopen() method.
 1127          */
 1128         fp->f_vnode = vp;
 1129         /*
 1130          * If the file wasn't claimed by devfs bind it to the normal
 1131          * vnode operations here.
 1132          */
 1133         if (fp->f_ops == &badfileops) {
 1134                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1135                 fp->f_seqcount = 1;
 1136                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1137                     DTYPE_VNODE, vp, &vnops);
 1138         }
 1139 
 1140         VOP_UNLOCK(vp, 0);
 1141         if (flags & O_TRUNC) {
 1142                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1143                 if (error != 0)
 1144                         goto bad;
 1145         }
 1146 success:
 1147         /*
 1148          * If we haven't already installed the FD (for dupfdopen), do so now.
 1149          */
 1150         if (indx == -1) {
 1151                 struct filecaps *fcaps;
 1152 
 1153 #ifdef CAPABILITIES
 1154                 if (nd.ni_strictrelative == 1)
 1155                         fcaps = &nd.ni_filecaps;
 1156                 else
 1157 #endif
 1158                         fcaps = NULL;
 1159                 error = finstall(td, fp, &indx, flags, fcaps);
 1160                 /* On success finstall() consumes fcaps. */
 1161                 if (error != 0) {
 1162                         filecaps_free(&nd.ni_filecaps);
 1163                         goto bad;
 1164                 }
 1165         } else {
 1166                 filecaps_free(&nd.ni_filecaps);
 1167         }
 1168 
 1169         /*
 1170          * Release our private reference, leaving the one associated with
 1171          * the descriptor table intact.
 1172          */
 1173         fdrop(fp, td);
 1174         td->td_retval[0] = indx;
 1175         return (0);
 1176 bad:
 1177         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1178         fdrop(fp, td);
 1179         return (error);
 1180 }
 1181 
 1182 #ifdef COMPAT_43
 1183 /*
 1184  * Create a file.
 1185  */
 1186 #ifndef _SYS_SYSPROTO_H_
 1187 struct ocreat_args {
 1188         char    *path;
 1189         int     mode;
 1190 };
 1191 #endif
 1192 int
 1193 ocreat(td, uap)
 1194         struct thread *td;
 1195         register struct ocreat_args /* {
 1196                 char *path;
 1197                 int mode;
 1198         } */ *uap;
 1199 {
 1200 
 1201         return (kern_open(td, uap->path, UIO_USERSPACE,
 1202             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1203 }
 1204 #endif /* COMPAT_43 */
 1205 
 1206 /*
 1207  * Create a special file.
 1208  */
 1209 #ifndef _SYS_SYSPROTO_H_
 1210 struct mknod_args {
 1211         char    *path;
 1212         int     mode;
 1213         int     dev;
 1214 };
 1215 #endif
 1216 int
 1217 sys_mknod(td, uap)
 1218         struct thread *td;
 1219         register struct mknod_args /* {
 1220                 char *path;
 1221                 int mode;
 1222                 int dev;
 1223         } */ *uap;
 1224 {
 1225 
 1226         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1227 }
 1228 
 1229 #ifndef _SYS_SYSPROTO_H_
 1230 struct mknodat_args {
 1231         int     fd;
 1232         char    *path;
 1233         mode_t  mode;
 1234         dev_t   dev;
 1235 };
 1236 #endif
 1237 int
 1238 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1239 {
 1240 
 1241         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1242             uap->dev));
 1243 }
 1244 
 1245 int
 1246 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1247     int dev)
 1248 {
 1249 
 1250         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1251 }
 1252 
 1253 int
 1254 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1255     int mode, int dev)
 1256 {
 1257         struct vnode *vp;
 1258         struct mount *mp;
 1259         struct vattr vattr;
 1260         struct nameidata nd;
 1261         cap_rights_t rights;
 1262         int error, whiteout = 0;
 1263 
 1264         AUDIT_ARG_MODE(mode);
 1265         AUDIT_ARG_DEV(dev);
 1266         switch (mode & S_IFMT) {
 1267         case S_IFCHR:
 1268         case S_IFBLK:
 1269                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1270                 break;
 1271         case S_IFMT:
 1272                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1273                 break;
 1274         case S_IFWHT:
 1275                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1276                 break;
 1277         case S_IFIFO:
 1278                 if (dev == 0)
 1279                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1280                 /* FALLTHROUGH */
 1281         default:
 1282                 error = EINVAL;
 1283                 break;
 1284         }
 1285         if (error != 0)
 1286                 return (error);
 1287 restart:
 1288         bwillwrite();
 1289         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1290             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 1291             td);
 1292         if ((error = namei(&nd)) != 0)
 1293                 return (error);
 1294         vp = nd.ni_vp;
 1295         if (vp != NULL) {
 1296                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1297                 if (vp == nd.ni_dvp)
 1298                         vrele(nd.ni_dvp);
 1299                 else
 1300                         vput(nd.ni_dvp);
 1301                 vrele(vp);
 1302                 return (EEXIST);
 1303         } else {
 1304                 VATTR_NULL(&vattr);
 1305                 vattr.va_mode = (mode & ALLPERMS) &
 1306                     ~td->td_proc->p_fd->fd_cmask;
 1307                 vattr.va_rdev = dev;
 1308                 whiteout = 0;
 1309 
 1310                 switch (mode & S_IFMT) {
 1311                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1312                         vattr.va_type = VBAD;
 1313                         break;
 1314                 case S_IFCHR:
 1315                         vattr.va_type = VCHR;
 1316                         break;
 1317                 case S_IFBLK:
 1318                         vattr.va_type = VBLK;
 1319                         break;
 1320                 case S_IFWHT:
 1321                         whiteout = 1;
 1322                         break;
 1323                 default:
 1324                         panic("kern_mknod: invalid mode");
 1325                 }
 1326         }
 1327         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1328                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1329                 vput(nd.ni_dvp);
 1330                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1331                         return (error);
 1332                 goto restart;
 1333         }
 1334 #ifdef MAC
 1335         if (error == 0 && !whiteout)
 1336                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1337                     &nd.ni_cnd, &vattr);
 1338 #endif
 1339         if (error == 0) {
 1340                 if (whiteout)
 1341                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1342                 else {
 1343                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1344                                                 &nd.ni_cnd, &vattr);
 1345                         if (error == 0)
 1346                                 vput(nd.ni_vp);
 1347                 }
 1348         }
 1349         NDFREE(&nd, NDF_ONLY_PNBUF);
 1350         vput(nd.ni_dvp);
 1351         vn_finished_write(mp);
 1352         return (error);
 1353 }
 1354 
 1355 /*
 1356  * Create a named pipe.
 1357  */
 1358 #ifndef _SYS_SYSPROTO_H_
 1359 struct mkfifo_args {
 1360         char    *path;
 1361         int     mode;
 1362 };
 1363 #endif
 1364 int
 1365 sys_mkfifo(td, uap)
 1366         struct thread *td;
 1367         register struct mkfifo_args /* {
 1368                 char *path;
 1369                 int mode;
 1370         } */ *uap;
 1371 {
 1372 
 1373         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1374 }
 1375 
 1376 #ifndef _SYS_SYSPROTO_H_
 1377 struct mkfifoat_args {
 1378         int     fd;
 1379         char    *path;
 1380         mode_t  mode;
 1381 };
 1382 #endif
 1383 int
 1384 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1385 {
 1386 
 1387         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1388             uap->mode));
 1389 }
 1390 
 1391 int
 1392 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1393 {
 1394 
 1395         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1396 }
 1397 
 1398 int
 1399 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1400     int mode)
 1401 {
 1402         struct mount *mp;
 1403         struct vattr vattr;
 1404         struct nameidata nd;
 1405         cap_rights_t rights;
 1406         int error;
 1407 
 1408         AUDIT_ARG_MODE(mode);
 1409 restart:
 1410         bwillwrite();
 1411         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1412             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 1413             td);
 1414         if ((error = namei(&nd)) != 0)
 1415                 return (error);
 1416         if (nd.ni_vp != NULL) {
 1417                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1418                 if (nd.ni_vp == nd.ni_dvp)
 1419                         vrele(nd.ni_dvp);
 1420                 else
 1421                         vput(nd.ni_dvp);
 1422                 vrele(nd.ni_vp);
 1423                 return (EEXIST);
 1424         }
 1425         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1426                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1427                 vput(nd.ni_dvp);
 1428                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1429                         return (error);
 1430                 goto restart;
 1431         }
 1432         VATTR_NULL(&vattr);
 1433         vattr.va_type = VFIFO;
 1434         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1435 #ifdef MAC
 1436         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1437             &vattr);
 1438         if (error != 0)
 1439                 goto out;
 1440 #endif
 1441         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1442         if (error == 0)
 1443                 vput(nd.ni_vp);
 1444 #ifdef MAC
 1445 out:
 1446 #endif
 1447         vput(nd.ni_dvp);
 1448         vn_finished_write(mp);
 1449         NDFREE(&nd, NDF_ONLY_PNBUF);
 1450         return (error);
 1451 }
 1452 
 1453 /*
 1454  * Make a hard file link.
 1455  */
 1456 #ifndef _SYS_SYSPROTO_H_
 1457 struct link_args {
 1458         char    *path;
 1459         char    *link;
 1460 };
 1461 #endif
 1462 int
 1463 sys_link(td, uap)
 1464         struct thread *td;
 1465         register struct link_args /* {
 1466                 char *path;
 1467                 char *link;
 1468         } */ *uap;
 1469 {
 1470 
 1471         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1472 }
 1473 
 1474 #ifndef _SYS_SYSPROTO_H_
 1475 struct linkat_args {
 1476         int     fd1;
 1477         char    *path1;
 1478         int     fd2;
 1479         char    *path2;
 1480         int     flag;
 1481 };
 1482 #endif
 1483 int
 1484 sys_linkat(struct thread *td, struct linkat_args *uap)
 1485 {
 1486         int flag;
 1487 
 1488         flag = uap->flag;
 1489         if (flag & ~AT_SYMLINK_FOLLOW)
 1490                 return (EINVAL);
 1491 
 1492         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1493             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1494 }
 1495 
 1496 int hardlink_check_uid = 0;
 1497 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1498     &hardlink_check_uid, 0,
 1499     "Unprivileged processes cannot create hard links to files owned by other "
 1500     "users");
 1501 static int hardlink_check_gid = 0;
 1502 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1503     &hardlink_check_gid, 0,
 1504     "Unprivileged processes cannot create hard links to files owned by other "
 1505     "groups");
 1506 
 1507 static int
 1508 can_hardlink(struct vnode *vp, struct ucred *cred)
 1509 {
 1510         struct vattr va;
 1511         int error;
 1512 
 1513         if (!hardlink_check_uid && !hardlink_check_gid)
 1514                 return (0);
 1515 
 1516         error = VOP_GETATTR(vp, &va, cred);
 1517         if (error != 0)
 1518                 return (error);
 1519 
 1520         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1521                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1522                 if (error != 0)
 1523                         return (error);
 1524         }
 1525 
 1526         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1527                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1528                 if (error != 0)
 1529                         return (error);
 1530         }
 1531 
 1532         return (0);
 1533 }
 1534 
 1535 int
 1536 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1537 {
 1538 
 1539         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1540 }
 1541 
 1542 int
 1543 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1544     enum uio_seg segflg, int follow)
 1545 {
 1546         struct vnode *vp;
 1547         struct mount *mp;
 1548         struct nameidata nd;
 1549         cap_rights_t rights;
 1550         int error;
 1551 
 1552 again:
 1553         bwillwrite();
 1554         NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 1555 
 1556         if ((error = namei(&nd)) != 0)
 1557                 return (error);
 1558         NDFREE(&nd, NDF_ONLY_PNBUF);
 1559         vp = nd.ni_vp;
 1560         if (vp->v_type == VDIR) {
 1561                 vrele(vp);
 1562                 return (EPERM);         /* POSIX */
 1563         }
 1564         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 |
 1565             NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT),
 1566             td);
 1567         if ((error = namei(&nd)) == 0) {
 1568                 if (nd.ni_vp != NULL) {
 1569                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1570                         if (nd.ni_dvp == nd.ni_vp)
 1571                                 vrele(nd.ni_dvp);
 1572                         else
 1573                                 vput(nd.ni_dvp);
 1574                         vrele(nd.ni_vp);
 1575                         vrele(vp);
 1576                         return (EEXIST);
 1577                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1578                         /*
 1579                          * Cross-device link.  No need to recheck
 1580                          * vp->v_type, since it cannot change, except
 1581                          * to VBAD.
 1582                          */
 1583                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1584                         vput(nd.ni_dvp);
 1585                         vrele(vp);
 1586                         return (EXDEV);
 1587                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1588                         error = can_hardlink(vp, td->td_ucred);
 1589 #ifdef MAC
 1590                         if (error == 0)
 1591                                 error = mac_vnode_check_link(td->td_ucred,
 1592                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1593 #endif
 1594                         if (error != 0) {
 1595                                 vput(vp);
 1596                                 vput(nd.ni_dvp);
 1597                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1598                                 return (error);
 1599                         }
 1600                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1601                         if (error != 0) {
 1602                                 vput(vp);
 1603                                 vput(nd.ni_dvp);
 1604                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1605                                 error = vn_start_write(NULL, &mp,
 1606                                     V_XSLEEP | PCATCH);
 1607                                 if (error != 0)
 1608                                         return (error);
 1609                                 goto again;
 1610                         }
 1611                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1612                         VOP_UNLOCK(vp, 0);
 1613                         vput(nd.ni_dvp);
 1614                         vn_finished_write(mp);
 1615                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1616                 } else {
 1617                         vput(nd.ni_dvp);
 1618                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1619                         vrele(vp);
 1620                         goto again;
 1621                 }
 1622         }
 1623         vrele(vp);
 1624         return (error);
 1625 }
 1626 
 1627 /*
 1628  * Make a symbolic link.
 1629  */
 1630 #ifndef _SYS_SYSPROTO_H_
 1631 struct symlink_args {
 1632         char    *path;
 1633         char    *link;
 1634 };
 1635 #endif
 1636 int
 1637 sys_symlink(td, uap)
 1638         struct thread *td;
 1639         register struct symlink_args /* {
 1640                 char *path;
 1641                 char *link;
 1642         } */ *uap;
 1643 {
 1644 
 1645         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1646 }
 1647 
 1648 #ifndef _SYS_SYSPROTO_H_
 1649 struct symlinkat_args {
 1650         char    *path;
 1651         int     fd;
 1652         char    *path2;
 1653 };
 1654 #endif
 1655 int
 1656 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1657 {
 1658 
 1659         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1660             UIO_USERSPACE));
 1661 }
 1662 
 1663 int
 1664 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1665 {
 1666 
 1667         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1668 }
 1669 
 1670 int
 1671 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1672     enum uio_seg segflg)
 1673 {
 1674         struct mount *mp;
 1675         struct vattr vattr;
 1676         char *syspath;
 1677         struct nameidata nd;
 1678         int error;
 1679         cap_rights_t rights;
 1680 
 1681         if (segflg == UIO_SYSSPACE) {
 1682                 syspath = path1;
 1683         } else {
 1684                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1685                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1686                         goto out;
 1687         }
 1688         AUDIT_ARG_TEXT(syspath);
 1689 restart:
 1690         bwillwrite();
 1691         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1692             NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 1693             td);
 1694         if ((error = namei(&nd)) != 0)
 1695                 goto out;
 1696         if (nd.ni_vp) {
 1697                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1698                 if (nd.ni_vp == nd.ni_dvp)
 1699                         vrele(nd.ni_dvp);
 1700                 else
 1701                         vput(nd.ni_dvp);
 1702                 vrele(nd.ni_vp);
 1703                 error = EEXIST;
 1704                 goto out;
 1705         }
 1706         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1707                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1708                 vput(nd.ni_dvp);
 1709                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1710                         goto out;
 1711                 goto restart;
 1712         }
 1713         VATTR_NULL(&vattr);
 1714         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1715 #ifdef MAC
 1716         vattr.va_type = VLNK;
 1717         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1718             &vattr);
 1719         if (error != 0)
 1720                 goto out2;
 1721 #endif
 1722         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1723         if (error == 0)
 1724                 vput(nd.ni_vp);
 1725 #ifdef MAC
 1726 out2:
 1727 #endif
 1728         NDFREE(&nd, NDF_ONLY_PNBUF);
 1729         vput(nd.ni_dvp);
 1730         vn_finished_write(mp);
 1731 out:
 1732         if (segflg != UIO_SYSSPACE)
 1733                 uma_zfree(namei_zone, syspath);
 1734         return (error);
 1735 }
 1736 
 1737 /*
 1738  * Delete a whiteout from the filesystem.
 1739  */
 1740 int
 1741 sys_undelete(td, uap)
 1742         struct thread *td;
 1743         register struct undelete_args /* {
 1744                 char *path;
 1745         } */ *uap;
 1746 {
 1747         struct mount *mp;
 1748         struct nameidata nd;
 1749         int error;
 1750 
 1751 restart:
 1752         bwillwrite();
 1753         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1754             UIO_USERSPACE, uap->path, td);
 1755         error = namei(&nd);
 1756         if (error != 0)
 1757                 return (error);
 1758 
 1759         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1760                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1761                 if (nd.ni_vp == nd.ni_dvp)
 1762                         vrele(nd.ni_dvp);
 1763                 else
 1764                         vput(nd.ni_dvp);
 1765                 if (nd.ni_vp)
 1766                         vrele(nd.ni_vp);
 1767                 return (EEXIST);
 1768         }
 1769         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1770                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1771                 vput(nd.ni_dvp);
 1772                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1773                         return (error);
 1774                 goto restart;
 1775         }
 1776         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1777         NDFREE(&nd, NDF_ONLY_PNBUF);
 1778         vput(nd.ni_dvp);
 1779         vn_finished_write(mp);
 1780         return (error);
 1781 }
 1782 
 1783 /*
 1784  * Delete a name from the filesystem.
 1785  */
 1786 #ifndef _SYS_SYSPROTO_H_
 1787 struct unlink_args {
 1788         char    *path;
 1789 };
 1790 #endif
 1791 int
 1792 sys_unlink(td, uap)
 1793         struct thread *td;
 1794         struct unlink_args /* {
 1795                 char *path;
 1796         } */ *uap;
 1797 {
 1798 
 1799         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1800 }
 1801 
 1802 #ifndef _SYS_SYSPROTO_H_
 1803 struct unlinkat_args {
 1804         int     fd;
 1805         char    *path;
 1806         int     flag;
 1807 };
 1808 #endif
 1809 int
 1810 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1811 {
 1812         int flag = uap->flag;
 1813         int fd = uap->fd;
 1814         char *path = uap->path;
 1815 
 1816         if (flag & ~AT_REMOVEDIR)
 1817                 return (EINVAL);
 1818 
 1819         if (flag & AT_REMOVEDIR)
 1820                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1821         else
 1822                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1823 }
 1824 
 1825 int
 1826 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1827 {
 1828 
 1829         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1830 }
 1831 
 1832 int
 1833 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1834     ino_t oldinum)
 1835 {
 1836         struct mount *mp;
 1837         struct vnode *vp;
 1838         struct nameidata nd;
 1839         struct stat sb;
 1840         cap_rights_t rights;
 1841         int error;
 1842 
 1843 restart:
 1844         bwillwrite();
 1845         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1846             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1847         if ((error = namei(&nd)) != 0)
 1848                 return (error == EINVAL ? EPERM : error);
 1849         vp = nd.ni_vp;
 1850         if (vp->v_type == VDIR && oldinum == 0) {
 1851                 error = EPERM;          /* POSIX */
 1852         } else if (oldinum != 0 &&
 1853                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1854                   sb.st_ino != oldinum) {
 1855                         error = EIDRM;  /* Identifier removed */
 1856         } else {
 1857                 /*
 1858                  * The root of a mounted filesystem cannot be deleted.
 1859                  *
 1860                  * XXX: can this only be a VDIR case?
 1861                  */
 1862                 if (vp->v_vflag & VV_ROOT)
 1863                         error = EBUSY;
 1864         }
 1865         if (error == 0) {
 1866                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1867                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1868                         vput(nd.ni_dvp);
 1869                         if (vp == nd.ni_dvp)
 1870                                 vrele(vp);
 1871                         else
 1872                                 vput(vp);
 1873                         if ((error = vn_start_write(NULL, &mp,
 1874                             V_XSLEEP | PCATCH)) != 0)
 1875                                 return (error);
 1876                         goto restart;
 1877                 }
 1878 #ifdef MAC
 1879                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1880                     &nd.ni_cnd);
 1881                 if (error != 0)
 1882                         goto out;
 1883 #endif
 1884                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1885                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1886 #ifdef MAC
 1887 out:
 1888 #endif
 1889                 vn_finished_write(mp);
 1890         }
 1891         NDFREE(&nd, NDF_ONLY_PNBUF);
 1892         vput(nd.ni_dvp);
 1893         if (vp == nd.ni_dvp)
 1894                 vrele(vp);
 1895         else
 1896                 vput(vp);
 1897         return (error);
 1898 }
 1899 
 1900 /*
 1901  * Reposition read/write file offset.
 1902  */
 1903 #ifndef _SYS_SYSPROTO_H_
 1904 struct lseek_args {
 1905         int     fd;
 1906         int     pad;
 1907         off_t   offset;
 1908         int     whence;
 1909 };
 1910 #endif
 1911 int
 1912 sys_lseek(td, uap)
 1913         struct thread *td;
 1914         register struct lseek_args /* {
 1915                 int fd;
 1916                 int pad;
 1917                 off_t offset;
 1918                 int whence;
 1919         } */ *uap;
 1920 {
 1921         struct file *fp;
 1922         cap_rights_t rights;
 1923         int error;
 1924 
 1925         AUDIT_ARG_FD(uap->fd);
 1926         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1927         if (error != 0)
 1928                 return (error);
 1929         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1930             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1931         fdrop(fp, td);
 1932         return (error);
 1933 }
 1934 
 1935 #if defined(COMPAT_43)
 1936 /*
 1937  * Reposition read/write file offset.
 1938  */
 1939 #ifndef _SYS_SYSPROTO_H_
 1940 struct olseek_args {
 1941         int     fd;
 1942         long    offset;
 1943         int     whence;
 1944 };
 1945 #endif
 1946 int
 1947 olseek(td, uap)
 1948         struct thread *td;
 1949         register struct olseek_args /* {
 1950                 int fd;
 1951                 long offset;
 1952                 int whence;
 1953         } */ *uap;
 1954 {
 1955         struct lseek_args /* {
 1956                 int fd;
 1957                 int pad;
 1958                 off_t offset;
 1959                 int whence;
 1960         } */ nuap;
 1961 
 1962         nuap.fd = uap->fd;
 1963         nuap.offset = uap->offset;
 1964         nuap.whence = uap->whence;
 1965         return (sys_lseek(td, &nuap));
 1966 }
 1967 #endif /* COMPAT_43 */
 1968 
 1969 /* Version with the 'pad' argument */
 1970 int
 1971 freebsd6_lseek(td, uap)
 1972         struct thread *td;
 1973         register struct freebsd6_lseek_args *uap;
 1974 {
 1975         struct lseek_args ouap;
 1976 
 1977         ouap.fd = uap->fd;
 1978         ouap.offset = uap->offset;
 1979         ouap.whence = uap->whence;
 1980         return (sys_lseek(td, &ouap));
 1981 }
 1982 
 1983 /*
 1984  * Check access permissions using passed credentials.
 1985  */
 1986 static int
 1987 vn_access(vp, user_flags, cred, td)
 1988         struct vnode    *vp;
 1989         int             user_flags;
 1990         struct ucred    *cred;
 1991         struct thread   *td;
 1992 {
 1993         accmode_t accmode;
 1994         int error;
 1995 
 1996         /* Flags == 0 means only check for existence. */
 1997         error = 0;
 1998         if (user_flags) {
 1999                 accmode = 0;
 2000                 if (user_flags & R_OK)
 2001                         accmode |= VREAD;
 2002                 if (user_flags & W_OK)
 2003                         accmode |= VWRITE;
 2004                 if (user_flags & X_OK)
 2005                         accmode |= VEXEC;
 2006 #ifdef MAC
 2007                 error = mac_vnode_check_access(cred, vp, accmode);
 2008                 if (error != 0)
 2009                         return (error);
 2010 #endif
 2011                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2012                         error = VOP_ACCESS(vp, accmode, cred, td);
 2013         }
 2014         return (error);
 2015 }
 2016 
 2017 /*
 2018  * Check access permissions using "real" credentials.
 2019  */
 2020 #ifndef _SYS_SYSPROTO_H_
 2021 struct access_args {
 2022         char    *path;
 2023         int     amode;
 2024 };
 2025 #endif
 2026 int
 2027 sys_access(td, uap)
 2028         struct thread *td;
 2029         register struct access_args /* {
 2030                 char *path;
 2031                 int amode;
 2032         } */ *uap;
 2033 {
 2034 
 2035         return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode));
 2036 }
 2037 
 2038 #ifndef _SYS_SYSPROTO_H_
 2039 struct faccessat_args {
 2040         int     dirfd;
 2041         char    *path;
 2042         int     amode;
 2043         int     flag;
 2044 }
 2045 #endif
 2046 int
 2047 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2048 {
 2049 
 2050         if (uap->flag & ~AT_EACCESS)
 2051                 return (EINVAL);
 2052         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2053             uap->amode));
 2054 }
 2055 
 2056 int
 2057 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2058 {
 2059 
 2060         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode));
 2061 }
 2062 
 2063 int
 2064 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2065     int flag, int amode)
 2066 {
 2067         struct ucred *cred, *tmpcred;
 2068         struct vnode *vp;
 2069         struct nameidata nd;
 2070         cap_rights_t rights;
 2071         int error;
 2072 
 2073         /*
 2074          * Create and modify a temporary credential instead of one that
 2075          * is potentially shared.
 2076          */
 2077         if (!(flag & AT_EACCESS)) {
 2078                 cred = td->td_ucred;
 2079                 tmpcred = crdup(cred);
 2080                 tmpcred->cr_uid = cred->cr_ruid;
 2081                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2082                 td->td_ucred = tmpcred;
 2083         } else
 2084                 cred = tmpcred = td->td_ucred;
 2085         AUDIT_ARG_VALUE(amode);
 2086         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2087             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 2088             td);
 2089         if ((error = namei(&nd)) != 0)
 2090                 goto out1;
 2091         vp = nd.ni_vp;
 2092 
 2093         error = vn_access(vp, amode, tmpcred, td);
 2094         NDFREE(&nd, NDF_ONLY_PNBUF);
 2095         vput(vp);
 2096 out1:
 2097         if (!(flag & AT_EACCESS)) {
 2098                 td->td_ucred = cred;
 2099                 crfree(tmpcred);
 2100         }
 2101         return (error);
 2102 }
 2103 
 2104 /*
 2105  * Check access permissions using "effective" credentials.
 2106  */
 2107 #ifndef _SYS_SYSPROTO_H_
 2108 struct eaccess_args {
 2109         char    *path;
 2110         int     amode;
 2111 };
 2112 #endif
 2113 int
 2114 sys_eaccess(td, uap)
 2115         struct thread *td;
 2116         register struct eaccess_args /* {
 2117                 char *path;
 2118                 int amode;
 2119         } */ *uap;
 2120 {
 2121 
 2122         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode));
 2123 }
 2124 
 2125 int
 2126 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2127 {
 2128 
 2129         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode));
 2130 }
 2131 
 2132 #if defined(COMPAT_43)
 2133 /*
 2134  * Get file status; this version follows links.
 2135  */
 2136 #ifndef _SYS_SYSPROTO_H_
 2137 struct ostat_args {
 2138         char    *path;
 2139         struct ostat *ub;
 2140 };
 2141 #endif
 2142 int
 2143 ostat(td, uap)
 2144         struct thread *td;
 2145         register struct ostat_args /* {
 2146                 char *path;
 2147                 struct ostat *ub;
 2148         } */ *uap;
 2149 {
 2150         struct stat sb;
 2151         struct ostat osb;
 2152         int error;
 2153 
 2154         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2155         if (error != 0)
 2156                 return (error);
 2157         cvtstat(&sb, &osb);
 2158         return (copyout(&osb, uap->ub, sizeof (osb)));
 2159 }
 2160 
 2161 /*
 2162  * Get file status; this version does not follow links.
 2163  */
 2164 #ifndef _SYS_SYSPROTO_H_
 2165 struct olstat_args {
 2166         char    *path;
 2167         struct ostat *ub;
 2168 };
 2169 #endif
 2170 int
 2171 olstat(td, uap)
 2172         struct thread *td;
 2173         register struct olstat_args /* {
 2174                 char *path;
 2175                 struct ostat *ub;
 2176         } */ *uap;
 2177 {
 2178         struct stat sb;
 2179         struct ostat osb;
 2180         int error;
 2181 
 2182         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2183         if (error != 0)
 2184                 return (error);
 2185         cvtstat(&sb, &osb);
 2186         return (copyout(&osb, uap->ub, sizeof (osb)));
 2187 }
 2188 
 2189 /*
 2190  * Convert from an old to a new stat structure.
 2191  */
 2192 void
 2193 cvtstat(st, ost)
 2194         struct stat *st;
 2195         struct ostat *ost;
 2196 {
 2197 
 2198         bzero(ost, sizeof(*ost));
 2199         ost->st_dev = st->st_dev;
 2200         ost->st_ino = st->st_ino;
 2201         ost->st_mode = st->st_mode;
 2202         ost->st_nlink = st->st_nlink;
 2203         ost->st_uid = st->st_uid;
 2204         ost->st_gid = st->st_gid;
 2205         ost->st_rdev = st->st_rdev;
 2206         if (st->st_size < (quad_t)1 << 32)
 2207                 ost->st_size = st->st_size;
 2208         else
 2209                 ost->st_size = -2;
 2210         ost->st_atim = st->st_atim;
 2211         ost->st_mtim = st->st_mtim;
 2212         ost->st_ctim = st->st_ctim;
 2213         ost->st_blksize = st->st_blksize;
 2214         ost->st_blocks = st->st_blocks;
 2215         ost->st_flags = st->st_flags;
 2216         ost->st_gen = st->st_gen;
 2217 }
 2218 #endif /* COMPAT_43 */
 2219 
 2220 /*
 2221  * Get file status; this version follows links.
 2222  */
 2223 #ifndef _SYS_SYSPROTO_H_
 2224 struct stat_args {
 2225         char    *path;
 2226         struct stat *ub;
 2227 };
 2228 #endif
 2229 int
 2230 sys_stat(td, uap)
 2231         struct thread *td;
 2232         register struct stat_args /* {
 2233                 char *path;
 2234                 struct stat *ub;
 2235         } */ *uap;
 2236 {
 2237         struct stat sb;
 2238         int error;
 2239 
 2240         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2241         if (error == 0)
 2242                 error = copyout(&sb, uap->ub, sizeof (sb));
 2243         return (error);
 2244 }
 2245 
 2246 #ifndef _SYS_SYSPROTO_H_
 2247 struct fstatat_args {
 2248         int     fd;
 2249         char    *path;
 2250         struct stat     *buf;
 2251         int     flag;
 2252 }
 2253 #endif
 2254 int
 2255 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2256 {
 2257         struct stat sb;
 2258         int error;
 2259 
 2260         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2261             UIO_USERSPACE, &sb);
 2262         if (error == 0)
 2263                 error = copyout(&sb, uap->buf, sizeof (sb));
 2264         return (error);
 2265 }
 2266 
 2267 int
 2268 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2269 {
 2270 
 2271         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2272 }
 2273 
 2274 int
 2275 kern_statat(struct thread *td, int flag, int fd, char *path,
 2276     enum uio_seg pathseg, struct stat *sbp)
 2277 {
 2278 
 2279         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2280 }
 2281 
 2282 int
 2283 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2284     enum uio_seg pathseg, struct stat *sbp,
 2285     void (*hook)(struct vnode *vp, struct stat *sbp))
 2286 {
 2287         struct nameidata nd;
 2288         struct stat sb;
 2289         cap_rights_t rights;
 2290         int error;
 2291 
 2292         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2293                 return (EINVAL);
 2294 
 2295         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2296             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2297             cap_rights_init(&rights, CAP_FSTAT), td);
 2298 
 2299         if ((error = namei(&nd)) != 0)
 2300                 return (error);
 2301         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2302         if (error == 0) {
 2303                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2304                 if (S_ISREG(sb.st_mode))
 2305                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2306                 if (__predict_false(hook != NULL))
 2307                         hook(nd.ni_vp, &sb);
 2308         }
 2309         NDFREE(&nd, NDF_ONLY_PNBUF);
 2310         vput(nd.ni_vp);
 2311         if (error != 0)
 2312                 return (error);
 2313         *sbp = sb;
 2314 #ifdef KTRACE
 2315         if (KTRPOINT(td, KTR_STRUCT))
 2316                 ktrstat(&sb);
 2317 #endif
 2318         return (0);
 2319 }
 2320 
 2321 /*
 2322  * Get file status; this version does not follow links.
 2323  */
 2324 #ifndef _SYS_SYSPROTO_H_
 2325 struct lstat_args {
 2326         char    *path;
 2327         struct stat *ub;
 2328 };
 2329 #endif
 2330 int
 2331 sys_lstat(td, uap)
 2332         struct thread *td;
 2333         register struct lstat_args /* {
 2334                 char *path;
 2335                 struct stat *ub;
 2336         } */ *uap;
 2337 {
 2338         struct stat sb;
 2339         int error;
 2340 
 2341         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2342         if (error == 0)
 2343                 error = copyout(&sb, uap->ub, sizeof (sb));
 2344         return (error);
 2345 }
 2346 
 2347 int
 2348 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2349 {
 2350 
 2351         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2352             sbp));
 2353 }
 2354 
 2355 /*
 2356  * Implementation of the NetBSD [l]stat() functions.
 2357  */
 2358 void
 2359 cvtnstat(sb, nsb)
 2360         struct stat *sb;
 2361         struct nstat *nsb;
 2362 {
 2363 
 2364         bzero(nsb, sizeof *nsb);
 2365         nsb->st_dev = sb->st_dev;
 2366         nsb->st_ino = sb->st_ino;
 2367         nsb->st_mode = sb->st_mode;
 2368         nsb->st_nlink = sb->st_nlink;
 2369         nsb->st_uid = sb->st_uid;
 2370         nsb->st_gid = sb->st_gid;
 2371         nsb->st_rdev = sb->st_rdev;
 2372         nsb->st_atim = sb->st_atim;
 2373         nsb->st_mtim = sb->st_mtim;
 2374         nsb->st_ctim = sb->st_ctim;
 2375         nsb->st_size = sb->st_size;
 2376         nsb->st_blocks = sb->st_blocks;
 2377         nsb->st_blksize = sb->st_blksize;
 2378         nsb->st_flags = sb->st_flags;
 2379         nsb->st_gen = sb->st_gen;
 2380         nsb->st_birthtim = sb->st_birthtim;
 2381 }
 2382 
 2383 #ifndef _SYS_SYSPROTO_H_
 2384 struct nstat_args {
 2385         char    *path;
 2386         struct nstat *ub;
 2387 };
 2388 #endif
 2389 int
 2390 sys_nstat(td, uap)
 2391         struct thread *td;
 2392         register struct nstat_args /* {
 2393                 char *path;
 2394                 struct nstat *ub;
 2395         } */ *uap;
 2396 {
 2397         struct stat sb;
 2398         struct nstat nsb;
 2399         int error;
 2400 
 2401         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2402         if (error != 0)
 2403                 return (error);
 2404         cvtnstat(&sb, &nsb);
 2405         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2406 }
 2407 
 2408 /*
 2409  * NetBSD lstat.  Get file status; this version does not follow links.
 2410  */
 2411 #ifndef _SYS_SYSPROTO_H_
 2412 struct lstat_args {
 2413         char    *path;
 2414         struct stat *ub;
 2415 };
 2416 #endif
 2417 int
 2418 sys_nlstat(td, uap)
 2419         struct thread *td;
 2420         register struct nlstat_args /* {
 2421                 char *path;
 2422                 struct nstat *ub;
 2423         } */ *uap;
 2424 {
 2425         struct stat sb;
 2426         struct nstat nsb;
 2427         int error;
 2428 
 2429         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2430         if (error != 0)
 2431                 return (error);
 2432         cvtnstat(&sb, &nsb);
 2433         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2434 }
 2435 
 2436 /*
 2437  * Get configurable pathname variables.
 2438  */
 2439 #ifndef _SYS_SYSPROTO_H_
 2440 struct pathconf_args {
 2441         char    *path;
 2442         int     name;
 2443 };
 2444 #endif
 2445 int
 2446 sys_pathconf(td, uap)
 2447         struct thread *td;
 2448         register struct pathconf_args /* {
 2449                 char *path;
 2450                 int name;
 2451         } */ *uap;
 2452 {
 2453 
 2454         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2455 }
 2456 
 2457 #ifndef _SYS_SYSPROTO_H_
 2458 struct lpathconf_args {
 2459         char    *path;
 2460         int     name;
 2461 };
 2462 #endif
 2463 int
 2464 sys_lpathconf(td, uap)
 2465         struct thread *td;
 2466         register struct lpathconf_args /* {
 2467                 char *path;
 2468                 int name;
 2469         } */ *uap;
 2470 {
 2471 
 2472         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2473             NOFOLLOW));
 2474 }
 2475 
 2476 int
 2477 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2478     u_long flags)
 2479 {
 2480         struct nameidata nd;
 2481         int error;
 2482 
 2483         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2484             pathseg, path, td);
 2485         if ((error = namei(&nd)) != 0)
 2486                 return (error);
 2487         NDFREE(&nd, NDF_ONLY_PNBUF);
 2488 
 2489         /* If asynchronous I/O is available, it works for all files. */
 2490         if (name == _PC_ASYNC_IO)
 2491                 td->td_retval[0] = async_io_version;
 2492         else
 2493                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2494         vput(nd.ni_vp);
 2495         return (error);
 2496 }
 2497 
 2498 /*
 2499  * Return target name of a symbolic link.
 2500  */
 2501 #ifndef _SYS_SYSPROTO_H_
 2502 struct readlink_args {
 2503         char    *path;
 2504         char    *buf;
 2505         size_t  count;
 2506 };
 2507 #endif
 2508 int
 2509 sys_readlink(td, uap)
 2510         struct thread *td;
 2511         register struct readlink_args /* {
 2512                 char *path;
 2513                 char *buf;
 2514                 size_t count;
 2515         } */ *uap;
 2516 {
 2517 
 2518         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2519             UIO_USERSPACE, uap->count));
 2520 }
 2521 #ifndef _SYS_SYSPROTO_H_
 2522 struct readlinkat_args {
 2523         int     fd;
 2524         char    *path;
 2525         char    *buf;
 2526         size_t  bufsize;
 2527 };
 2528 #endif
 2529 int
 2530 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2531 {
 2532 
 2533         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2534             uap->buf, UIO_USERSPACE, uap->bufsize));
 2535 }
 2536 
 2537 int
 2538 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2539     enum uio_seg bufseg, size_t count)
 2540 {
 2541 
 2542         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2543             count));
 2544 }
 2545 
 2546 int
 2547 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2548     char *buf, enum uio_seg bufseg, size_t count)
 2549 {
 2550         struct vnode *vp;
 2551         struct iovec aiov;
 2552         struct uio auio;
 2553         struct nameidata nd;
 2554         int error;
 2555 
 2556         if (count > IOSIZE_MAX)
 2557                 return (EINVAL);
 2558 
 2559         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2560             pathseg, path, fd, td);
 2561 
 2562         if ((error = namei(&nd)) != 0)
 2563                 return (error);
 2564         NDFREE(&nd, NDF_ONLY_PNBUF);
 2565         vp = nd.ni_vp;
 2566 #ifdef MAC
 2567         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2568         if (error != 0) {
 2569                 vput(vp);
 2570                 return (error);
 2571         }
 2572 #endif
 2573         if (vp->v_type != VLNK)
 2574                 error = EINVAL;
 2575         else {
 2576                 aiov.iov_base = buf;
 2577                 aiov.iov_len = count;
 2578                 auio.uio_iov = &aiov;
 2579                 auio.uio_iovcnt = 1;
 2580                 auio.uio_offset = 0;
 2581                 auio.uio_rw = UIO_READ;
 2582                 auio.uio_segflg = bufseg;
 2583                 auio.uio_td = td;
 2584                 auio.uio_resid = count;
 2585                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2586                 td->td_retval[0] = count - auio.uio_resid;
 2587         }
 2588         vput(vp);
 2589         return (error);
 2590 }
 2591 
 2592 /*
 2593  * Common implementation code for chflags() and fchflags().
 2594  */
 2595 static int
 2596 setfflags(td, vp, flags)
 2597         struct thread *td;
 2598         struct vnode *vp;
 2599         u_long flags;
 2600 {
 2601         struct mount *mp;
 2602         struct vattr vattr;
 2603         int error;
 2604 
 2605         /* We can't support the value matching VNOVAL. */
 2606         if (flags == VNOVAL)
 2607                 return (EOPNOTSUPP);
 2608 
 2609         /*
 2610          * Prevent non-root users from setting flags on devices.  When
 2611          * a device is reused, users can retain ownership of the device
 2612          * if they are allowed to set flags and programs assume that
 2613          * chown can't fail when done as root.
 2614          */
 2615         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2616                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2617                 if (error != 0)
 2618                         return (error);
 2619         }
 2620 
 2621         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2622                 return (error);
 2623         VATTR_NULL(&vattr);
 2624         vattr.va_flags = flags;
 2625         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2626 #ifdef MAC
 2627         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2628         if (error == 0)
 2629 #endif
 2630                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2631         VOP_UNLOCK(vp, 0);
 2632         vn_finished_write(mp);
 2633         return (error);
 2634 }
 2635 
 2636 /*
 2637  * Change flags of a file given a path name.
 2638  */
 2639 #ifndef _SYS_SYSPROTO_H_
 2640 struct chflags_args {
 2641         const char *path;
 2642         u_long  flags;
 2643 };
 2644 #endif
 2645 int
 2646 sys_chflags(td, uap)
 2647         struct thread *td;
 2648         register struct chflags_args /* {
 2649                 const char *path;
 2650                 u_long flags;
 2651         } */ *uap;
 2652 {
 2653 
 2654         return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags));
 2655 }
 2656 
 2657 #ifndef _SYS_SYSPROTO_H_
 2658 struct chflagsat_args {
 2659         int     fd;
 2660         const char *path;
 2661         u_long  flags;
 2662         int     atflag;
 2663 }
 2664 #endif
 2665 int
 2666 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2667 {
 2668         int fd = uap->fd;
 2669         const char *path = uap->path;
 2670         u_long flags = uap->flags;
 2671         int atflag = uap->atflag;
 2672 
 2673         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2674                 return (EINVAL);
 2675 
 2676         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2677 }
 2678 
 2679 static int
 2680 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg,
 2681     u_long flags)
 2682 {
 2683 
 2684         return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0));
 2685 }
 2686 
 2687 /*
 2688  * Same as chflags() but doesn't follow symlinks.
 2689  */
 2690 int
 2691 sys_lchflags(td, uap)
 2692         struct thread *td;
 2693         register struct lchflags_args /* {
 2694                 const char *path;
 2695                 u_long flags;
 2696         } */ *uap;
 2697 {
 2698 
 2699         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2700             uap->flags, AT_SYMLINK_NOFOLLOW));
 2701 }
 2702 
 2703 static int
 2704 kern_chflagsat(struct thread *td, int fd, const char *path,
 2705     enum uio_seg pathseg, u_long flags, int atflag)
 2706 {
 2707         struct nameidata nd;
 2708         cap_rights_t rights;
 2709         int error, follow;
 2710 
 2711         AUDIT_ARG_FFLAGS(flags);
 2712         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2713         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2714             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2715         if ((error = namei(&nd)) != 0)
 2716                 return (error);
 2717         NDFREE(&nd, NDF_ONLY_PNBUF);
 2718         error = setfflags(td, nd.ni_vp, flags);
 2719         vrele(nd.ni_vp);
 2720         return (error);
 2721 }
 2722 
 2723 /*
 2724  * Change flags of a file given a file descriptor.
 2725  */
 2726 #ifndef _SYS_SYSPROTO_H_
 2727 struct fchflags_args {
 2728         int     fd;
 2729         u_long  flags;
 2730 };
 2731 #endif
 2732 int
 2733 sys_fchflags(td, uap)
 2734         struct thread *td;
 2735         register struct fchflags_args /* {
 2736                 int fd;
 2737                 u_long flags;
 2738         } */ *uap;
 2739 {
 2740         struct file *fp;
 2741         cap_rights_t rights;
 2742         int error;
 2743 
 2744         AUDIT_ARG_FD(uap->fd);
 2745         AUDIT_ARG_FFLAGS(uap->flags);
 2746         error = getvnode(td->td_proc->p_fd, uap->fd,
 2747             cap_rights_init(&rights, CAP_FCHFLAGS), &fp);
 2748         if (error != 0)
 2749                 return (error);
 2750 #ifdef AUDIT
 2751         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2752         AUDIT_ARG_VNODE1(fp->f_vnode);
 2753         VOP_UNLOCK(fp->f_vnode, 0);
 2754 #endif
 2755         error = setfflags(td, fp->f_vnode, uap->flags);
 2756         fdrop(fp, td);
 2757         return (error);
 2758 }
 2759 
 2760 /*
 2761  * Common implementation code for chmod(), lchmod() and fchmod().
 2762  */
 2763 int
 2764 setfmode(td, cred, vp, mode)
 2765         struct thread *td;
 2766         struct ucred *cred;
 2767         struct vnode *vp;
 2768         int mode;
 2769 {
 2770         struct mount *mp;
 2771         struct vattr vattr;
 2772         int error;
 2773 
 2774         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2775                 return (error);
 2776         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2777         VATTR_NULL(&vattr);
 2778         vattr.va_mode = mode & ALLPERMS;
 2779 #ifdef MAC
 2780         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2781         if (error == 0)
 2782 #endif
 2783                 error = VOP_SETATTR(vp, &vattr, cred);
 2784         VOP_UNLOCK(vp, 0);
 2785         vn_finished_write(mp);
 2786         return (error);
 2787 }
 2788 
 2789 /*
 2790  * Change mode of a file given path name.
 2791  */
 2792 #ifndef _SYS_SYSPROTO_H_
 2793 struct chmod_args {
 2794         char    *path;
 2795         int     mode;
 2796 };
 2797 #endif
 2798 int
 2799 sys_chmod(td, uap)
 2800         struct thread *td;
 2801         register struct chmod_args /* {
 2802                 char *path;
 2803                 int mode;
 2804         } */ *uap;
 2805 {
 2806 
 2807         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2808 }
 2809 
 2810 #ifndef _SYS_SYSPROTO_H_
 2811 struct fchmodat_args {
 2812         int     dirfd;
 2813         char    *path;
 2814         mode_t  mode;
 2815         int     flag;
 2816 }
 2817 #endif
 2818 int
 2819 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2820 {
 2821         int flag = uap->flag;
 2822         int fd = uap->fd;
 2823         char *path = uap->path;
 2824         mode_t mode = uap->mode;
 2825 
 2826         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2827                 return (EINVAL);
 2828 
 2829         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2830 }
 2831 
 2832 int
 2833 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2834 {
 2835 
 2836         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2837 }
 2838 
 2839 /*
 2840  * Change mode of a file given path name (don't follow links.)
 2841  */
 2842 #ifndef _SYS_SYSPROTO_H_
 2843 struct lchmod_args {
 2844         char    *path;
 2845         int     mode;
 2846 };
 2847 #endif
 2848 int
 2849 sys_lchmod(td, uap)
 2850         struct thread *td;
 2851         register struct lchmod_args /* {
 2852                 char *path;
 2853                 int mode;
 2854         } */ *uap;
 2855 {
 2856 
 2857         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2858             uap->mode, AT_SYMLINK_NOFOLLOW));
 2859 }
 2860 
 2861 int
 2862 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2863     mode_t mode, int flag)
 2864 {
 2865         struct nameidata nd;
 2866         cap_rights_t rights;
 2867         int error, follow;
 2868 
 2869         AUDIT_ARG_MODE(mode);
 2870         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2871         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2872             cap_rights_init(&rights, CAP_FCHMOD), td);
 2873         if ((error = namei(&nd)) != 0)
 2874                 return (error);
 2875         NDFREE(&nd, NDF_ONLY_PNBUF);
 2876         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2877         vrele(nd.ni_vp);
 2878         return (error);
 2879 }
 2880 
 2881 /*
 2882  * Change mode of a file given a file descriptor.
 2883  */
 2884 #ifndef _SYS_SYSPROTO_H_
 2885 struct fchmod_args {
 2886         int     fd;
 2887         int     mode;
 2888 };
 2889 #endif
 2890 int
 2891 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2892 {
 2893         struct file *fp;
 2894         cap_rights_t rights;
 2895         int error;
 2896 
 2897         AUDIT_ARG_FD(uap->fd);
 2898         AUDIT_ARG_MODE(uap->mode);
 2899 
 2900         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2901         if (error != 0)
 2902                 return (error);
 2903         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2904         fdrop(fp, td);
 2905         return (error);
 2906 }
 2907 
 2908 /*
 2909  * Common implementation for chown(), lchown(), and fchown()
 2910  */
 2911 int
 2912 setfown(td, cred, vp, uid, gid)
 2913         struct thread *td;
 2914         struct ucred *cred;
 2915         struct vnode *vp;
 2916         uid_t uid;
 2917         gid_t gid;
 2918 {
 2919         struct mount *mp;
 2920         struct vattr vattr;
 2921         int error;
 2922 
 2923         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2924                 return (error);
 2925         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2926         VATTR_NULL(&vattr);
 2927         vattr.va_uid = uid;
 2928         vattr.va_gid = gid;
 2929 #ifdef MAC
 2930         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2931             vattr.va_gid);
 2932         if (error == 0)
 2933 #endif
 2934                 error = VOP_SETATTR(vp, &vattr, cred);
 2935         VOP_UNLOCK(vp, 0);
 2936         vn_finished_write(mp);
 2937         return (error);
 2938 }
 2939 
 2940 /*
 2941  * Set ownership given a path name.
 2942  */
 2943 #ifndef _SYS_SYSPROTO_H_
 2944 struct chown_args {
 2945         char    *path;
 2946         int     uid;
 2947         int     gid;
 2948 };
 2949 #endif
 2950 int
 2951 sys_chown(td, uap)
 2952         struct thread *td;
 2953         register struct chown_args /* {
 2954                 char *path;
 2955                 int uid;
 2956                 int gid;
 2957         } */ *uap;
 2958 {
 2959 
 2960         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2961 }
 2962 
 2963 #ifndef _SYS_SYSPROTO_H_
 2964 struct fchownat_args {
 2965         int fd;
 2966         const char * path;
 2967         uid_t uid;
 2968         gid_t gid;
 2969         int flag;
 2970 };
 2971 #endif
 2972 int
 2973 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2974 {
 2975         int flag;
 2976 
 2977         flag = uap->flag;
 2978         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2979                 return (EINVAL);
 2980 
 2981         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2982             uap->gid, uap->flag));
 2983 }
 2984 
 2985 int
 2986 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2987     int gid)
 2988 {
 2989 
 2990         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 2991 }
 2992 
 2993 int
 2994 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2995     int uid, int gid, int flag)
 2996 {
 2997         struct nameidata nd;
 2998         cap_rights_t rights;
 2999         int error, follow;
 3000 
 3001         AUDIT_ARG_OWNER(uid, gid);
 3002         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3003         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 3004             cap_rights_init(&rights, CAP_FCHOWN), td);
 3005 
 3006         if ((error = namei(&nd)) != 0)
 3007                 return (error);
 3008         NDFREE(&nd, NDF_ONLY_PNBUF);
 3009         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3010         vrele(nd.ni_vp);
 3011         return (error);
 3012 }
 3013 
 3014 /*
 3015  * Set ownership given a path name, do not cross symlinks.
 3016  */
 3017 #ifndef _SYS_SYSPROTO_H_
 3018 struct lchown_args {
 3019         char    *path;
 3020         int     uid;
 3021         int     gid;
 3022 };
 3023 #endif
 3024 int
 3025 sys_lchown(td, uap)
 3026         struct thread *td;
 3027         register struct lchown_args /* {
 3028                 char *path;
 3029                 int uid;
 3030                 int gid;
 3031         } */ *uap;
 3032 {
 3033 
 3034         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3035 }
 3036 
 3037 int
 3038 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3039     int gid)
 3040 {
 3041 
 3042         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3043             AT_SYMLINK_NOFOLLOW));
 3044 }
 3045 
 3046 /*
 3047  * Set ownership given a file descriptor.
 3048  */
 3049 #ifndef _SYS_SYSPROTO_H_
 3050 struct fchown_args {
 3051         int     fd;
 3052         int     uid;
 3053         int     gid;
 3054 };
 3055 #endif
 3056 int
 3057 sys_fchown(td, uap)
 3058         struct thread *td;
 3059         register struct fchown_args /* {
 3060                 int fd;
 3061                 int uid;
 3062                 int gid;
 3063         } */ *uap;
 3064 {
 3065         struct file *fp;
 3066         cap_rights_t rights;
 3067         int error;
 3068 
 3069         AUDIT_ARG_FD(uap->fd);
 3070         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3071         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 3072         if (error != 0)
 3073                 return (error);
 3074         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3075         fdrop(fp, td);
 3076         return (error);
 3077 }
 3078 
 3079 /*
 3080  * Common implementation code for utimes(), lutimes(), and futimes().
 3081  */
 3082 static int
 3083 getutimes(usrtvp, tvpseg, tsp)
 3084         const struct timeval *usrtvp;
 3085         enum uio_seg tvpseg;
 3086         struct timespec *tsp;
 3087 {
 3088         struct timeval tv[2];
 3089         const struct timeval *tvp;
 3090         int error;
 3091 
 3092         if (usrtvp == NULL) {
 3093                 vfs_timestamp(&tsp[0]);
 3094                 tsp[1] = tsp[0];
 3095         } else {
 3096                 if (tvpseg == UIO_SYSSPACE) {
 3097                         tvp = usrtvp;
 3098                 } else {
 3099                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3100                                 return (error);
 3101                         tvp = tv;
 3102                 }
 3103 
 3104                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3105                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3106                         return (EINVAL);
 3107                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3108                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3109         }
 3110         return (0);
 3111 }
 3112 
 3113 /*
 3114  * Common implementation code for utimes(), lutimes(), and futimes().
 3115  */
 3116 static int
 3117 setutimes(td, vp, ts, numtimes, nullflag)
 3118         struct thread *td;
 3119         struct vnode *vp;
 3120         const struct timespec *ts;
 3121         int numtimes;
 3122         int nullflag;
 3123 {
 3124         struct mount *mp;
 3125         struct vattr vattr;
 3126         int error, setbirthtime;
 3127 
 3128         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3129                 return (error);
 3130         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3131         setbirthtime = 0;
 3132         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3133             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3134                 setbirthtime = 1;
 3135         VATTR_NULL(&vattr);
 3136         vattr.va_atime = ts[0];
 3137         vattr.va_mtime = ts[1];
 3138         if (setbirthtime)
 3139                 vattr.va_birthtime = ts[1];
 3140         if (numtimes > 2)
 3141                 vattr.va_birthtime = ts[2];
 3142         if (nullflag)
 3143                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3144 #ifdef MAC
 3145         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3146             vattr.va_mtime);
 3147 #endif
 3148         if (error == 0)
 3149                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3150         VOP_UNLOCK(vp, 0);
 3151         vn_finished_write(mp);
 3152         return (error);
 3153 }
 3154 
 3155 /*
 3156  * Set the access and modification times of a file.
 3157  */
 3158 #ifndef _SYS_SYSPROTO_H_
 3159 struct utimes_args {
 3160         char    *path;
 3161         struct  timeval *tptr;
 3162 };
 3163 #endif
 3164 int
 3165 sys_utimes(td, uap)
 3166         struct thread *td;
 3167         register struct utimes_args /* {
 3168                 char *path;
 3169                 struct timeval *tptr;
 3170         } */ *uap;
 3171 {
 3172 
 3173         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3174             UIO_USERSPACE));
 3175 }
 3176 
 3177 #ifndef _SYS_SYSPROTO_H_
 3178 struct futimesat_args {
 3179         int fd;
 3180         const char * path;
 3181         const struct timeval * times;
 3182 };
 3183 #endif
 3184 int
 3185 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3186 {
 3187 
 3188         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3189             uap->times, UIO_USERSPACE));
 3190 }
 3191 
 3192 int
 3193 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3194     struct timeval *tptr, enum uio_seg tptrseg)
 3195 {
 3196 
 3197         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3198 }
 3199 
 3200 int
 3201 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3202     struct timeval *tptr, enum uio_seg tptrseg)
 3203 {
 3204         struct nameidata nd;
 3205         struct timespec ts[2];
 3206         cap_rights_t rights;
 3207         int error;
 3208 
 3209         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3210                 return (error);
 3211         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3212             cap_rights_init(&rights, CAP_FUTIMES), td);
 3213 
 3214         if ((error = namei(&nd)) != 0)
 3215                 return (error);
 3216         NDFREE(&nd, NDF_ONLY_PNBUF);
 3217         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3218         vrele(nd.ni_vp);
 3219         return (error);
 3220 }
 3221 
 3222 /*
 3223  * Set the access and modification times of a file.
 3224  */
 3225 #ifndef _SYS_SYSPROTO_H_
 3226 struct lutimes_args {
 3227         char    *path;
 3228         struct  timeval *tptr;
 3229 };
 3230 #endif
 3231 int
 3232 sys_lutimes(td, uap)
 3233         struct thread *td;
 3234         register struct lutimes_args /* {
 3235                 char *path;
 3236                 struct timeval *tptr;
 3237         } */ *uap;
 3238 {
 3239 
 3240         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3241             UIO_USERSPACE));
 3242 }
 3243 
 3244 int
 3245 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3246     struct timeval *tptr, enum uio_seg tptrseg)
 3247 {
 3248         struct timespec ts[2];
 3249         struct nameidata nd;
 3250         int error;
 3251 
 3252         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3253                 return (error);
 3254         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3255         if ((error = namei(&nd)) != 0)
 3256                 return (error);
 3257         NDFREE(&nd, NDF_ONLY_PNBUF);
 3258         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3259         vrele(nd.ni_vp);
 3260         return (error);
 3261 }
 3262 
 3263 /*
 3264  * Set the access and modification times of a file.
 3265  */
 3266 #ifndef _SYS_SYSPROTO_H_
 3267 struct futimes_args {
 3268         int     fd;
 3269         struct  timeval *tptr;
 3270 };
 3271 #endif
 3272 int
 3273 sys_futimes(td, uap)
 3274         struct thread *td;
 3275         register struct futimes_args /* {
 3276                 int  fd;
 3277                 struct timeval *tptr;
 3278         } */ *uap;
 3279 {
 3280 
 3281         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3282 }
 3283 
 3284 int
 3285 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3286     enum uio_seg tptrseg)
 3287 {
 3288         struct timespec ts[2];
 3289         struct file *fp;
 3290         cap_rights_t rights;
 3291         int error;
 3292 
 3293         AUDIT_ARG_FD(fd);
 3294         error = getutimes(tptr, tptrseg, ts);
 3295         if (error != 0)
 3296                 return (error);
 3297         error = getvnode(td->td_proc->p_fd, fd,
 3298             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3299         if (error != 0)
 3300                 return (error);
 3301 #ifdef AUDIT
 3302         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3303         AUDIT_ARG_VNODE1(fp->f_vnode);
 3304         VOP_UNLOCK(fp->f_vnode, 0);
 3305 #endif
 3306         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3307         fdrop(fp, td);
 3308         return (error);
 3309 }
 3310 
 3311 /*
 3312  * Truncate a file given its path name.
 3313  */
 3314 #ifndef _SYS_SYSPROTO_H_
 3315 struct truncate_args {
 3316         char    *path;
 3317         int     pad;
 3318         off_t   length;
 3319 };
 3320 #endif
 3321 int
 3322 sys_truncate(td, uap)
 3323         struct thread *td;
 3324         register struct truncate_args /* {
 3325                 char *path;
 3326                 int pad;
 3327                 off_t length;
 3328         } */ *uap;
 3329 {
 3330 
 3331         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3332 }
 3333 
 3334 int
 3335 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3336 {
 3337         struct mount *mp;
 3338         struct vnode *vp;
 3339         void *rl_cookie;
 3340         struct vattr vattr;
 3341         struct nameidata nd;
 3342         int error;
 3343 
 3344         if (length < 0)
 3345                 return(EINVAL);
 3346         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3347         if ((error = namei(&nd)) != 0)
 3348                 return (error);
 3349         vp = nd.ni_vp;
 3350         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3351         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3352                 vn_rangelock_unlock(vp, rl_cookie);
 3353                 vrele(vp);
 3354                 return (error);
 3355         }
 3356         NDFREE(&nd, NDF_ONLY_PNBUF);
 3357         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3358         if (vp->v_type == VDIR)
 3359                 error = EISDIR;
 3360 #ifdef MAC
 3361         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3362         }
 3363 #endif
 3364         else if ((error = vn_writechk(vp)) == 0 &&
 3365             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3366                 VATTR_NULL(&vattr);
 3367                 vattr.va_size = length;
 3368                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3369         }
 3370         VOP_UNLOCK(vp, 0);
 3371         vn_finished_write(mp);
 3372         vn_rangelock_unlock(vp, rl_cookie);
 3373         vrele(vp);
 3374         return (error);
 3375 }
 3376 
 3377 #if defined(COMPAT_43)
 3378 /*
 3379  * Truncate a file given its path name.
 3380  */
 3381 #ifndef _SYS_SYSPROTO_H_
 3382 struct otruncate_args {
 3383         char    *path;
 3384         long    length;
 3385 };
 3386 #endif
 3387 int
 3388 otruncate(td, uap)
 3389         struct thread *td;
 3390         register struct otruncate_args /* {
 3391                 char *path;
 3392                 long length;
 3393         } */ *uap;
 3394 {
 3395         struct truncate_args /* {
 3396                 char *path;
 3397                 int pad;
 3398                 off_t length;
 3399         } */ nuap;
 3400 
 3401         nuap.path = uap->path;
 3402         nuap.length = uap->length;
 3403         return (sys_truncate(td, &nuap));
 3404 }
 3405 #endif /* COMPAT_43 */
 3406 
 3407 /* Versions with the pad argument */
 3408 int
 3409 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3410 {
 3411         struct truncate_args ouap;
 3412 
 3413         ouap.path = uap->path;
 3414         ouap.length = uap->length;
 3415         return (sys_truncate(td, &ouap));
 3416 }
 3417 
 3418 int
 3419 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3420 {
 3421         struct ftruncate_args ouap;
 3422 
 3423         ouap.fd = uap->fd;
 3424         ouap.length = uap->length;
 3425         return (sys_ftruncate(td, &ouap));
 3426 }
 3427 
 3428 /*
 3429  * Sync an open file.
 3430  */
 3431 #ifndef _SYS_SYSPROTO_H_
 3432 struct fsync_args {
 3433         int     fd;
 3434 };
 3435 #endif
 3436 int
 3437 sys_fsync(td, uap)
 3438         struct thread *td;
 3439         struct fsync_args /* {
 3440                 int fd;
 3441         } */ *uap;
 3442 {
 3443         struct vnode *vp;
 3444         struct mount *mp;
 3445         struct file *fp;
 3446         cap_rights_t rights;
 3447         int error, lock_flags;
 3448 
 3449         AUDIT_ARG_FD(uap->fd);
 3450         error = getvnode(td->td_proc->p_fd, uap->fd,
 3451             cap_rights_init(&rights, CAP_FSYNC), &fp);
 3452         if (error != 0)
 3453                 return (error);
 3454         vp = fp->f_vnode;
 3455         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3456         if (error != 0)
 3457                 goto drop;
 3458         if (MNT_SHARED_WRITES(mp) ||
 3459             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3460                 lock_flags = LK_SHARED;
 3461         } else {
 3462                 lock_flags = LK_EXCLUSIVE;
 3463         }
 3464         vn_lock(vp, lock_flags | LK_RETRY);
 3465         AUDIT_ARG_VNODE1(vp);
 3466         if (vp->v_object != NULL) {
 3467                 VM_OBJECT_WLOCK(vp->v_object);
 3468                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3469                 VM_OBJECT_WUNLOCK(vp->v_object);
 3470         }
 3471         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3472 
 3473         VOP_UNLOCK(vp, 0);
 3474         vn_finished_write(mp);
 3475 drop:
 3476         fdrop(fp, td);
 3477         return (error);
 3478 }
 3479 
 3480 /*
 3481  * Rename files.  Source and destination must either both be directories, or
 3482  * both not be directories.  If target is a directory, it must be empty.
 3483  */
 3484 #ifndef _SYS_SYSPROTO_H_
 3485 struct rename_args {
 3486         char    *from;
 3487         char    *to;
 3488 };
 3489 #endif
 3490 int
 3491 sys_rename(td, uap)
 3492         struct thread *td;
 3493         register struct rename_args /* {
 3494                 char *from;
 3495                 char *to;
 3496         } */ *uap;
 3497 {
 3498 
 3499         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3500 }
 3501 
 3502 #ifndef _SYS_SYSPROTO_H_
 3503 struct renameat_args {
 3504         int     oldfd;
 3505         char    *old;
 3506         int     newfd;
 3507         char    *new;
 3508 };
 3509 #endif
 3510 int
 3511 sys_renameat(struct thread *td, struct renameat_args *uap)
 3512 {
 3513 
 3514         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3515             UIO_USERSPACE));
 3516 }
 3517 
 3518 int
 3519 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3520 {
 3521 
 3522         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3523 }
 3524 
 3525 int
 3526 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3527     enum uio_seg pathseg)
 3528 {
 3529         struct mount *mp = NULL;
 3530         struct vnode *tvp, *fvp, *tdvp;
 3531         struct nameidata fromnd, tond;
 3532         cap_rights_t rights;
 3533         int error;
 3534 
 3535 again:
 3536         bwillwrite();
 3537 #ifdef MAC
 3538         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3539             AUDITVNODE1, pathseg, old, oldfd,
 3540             cap_rights_init(&rights, CAP_RENAMEAT), td);
 3541 #else
 3542         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3543             pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td);
 3544 #endif
 3545 
 3546         if ((error = namei(&fromnd)) != 0)
 3547                 return (error);
 3548 #ifdef MAC
 3549         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3550             fromnd.ni_vp, &fromnd.ni_cnd);
 3551         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3552         if (fromnd.ni_dvp != fromnd.ni_vp)
 3553                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3554 #endif
 3555         fvp = fromnd.ni_vp;
 3556         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3557             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3558             cap_rights_init(&rights, CAP_LINKAT), td);
 3559         if (fromnd.ni_vp->v_type == VDIR)
 3560                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3561         if ((error = namei(&tond)) != 0) {
 3562                 /* Translate error code for rename("dir1", "dir2/."). */
 3563                 if (error == EISDIR && fvp->v_type == VDIR)
 3564                         error = EINVAL;
 3565                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3566                 vrele(fromnd.ni_dvp);
 3567                 vrele(fvp);
 3568                 goto out1;
 3569         }
 3570         tdvp = tond.ni_dvp;
 3571         tvp = tond.ni_vp;
 3572         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3573         if (error != 0) {
 3574                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3575                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3576                 if (tvp != NULL)
 3577                         vput(tvp);
 3578                 if (tdvp == tvp)
 3579                         vrele(tdvp);
 3580                 else
 3581                         vput(tdvp);
 3582                 vrele(fromnd.ni_dvp);
 3583                 vrele(fvp);
 3584                 vrele(tond.ni_startdir);
 3585                 if (fromnd.ni_startdir != NULL)
 3586                         vrele(fromnd.ni_startdir);
 3587                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3588                 if (error != 0)
 3589                         return (error);
 3590                 goto again;
 3591         }
 3592         if (tvp != NULL) {
 3593                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3594                         error = ENOTDIR;
 3595                         goto out;
 3596                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3597                         error = EISDIR;
 3598                         goto out;
 3599                 }
 3600 #ifdef CAPABILITIES
 3601                 if (newfd != AT_FDCWD) {
 3602                         /*
 3603                          * If the target already exists we require CAP_UNLINKAT
 3604                          * from 'newfd'.
 3605                          */
 3606                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3607                             cap_rights_init(&rights, CAP_UNLINKAT));
 3608                         if (error != 0)
 3609                                 goto out;
 3610                 }
 3611 #endif
 3612         }
 3613         if (fvp == tdvp) {
 3614                 error = EINVAL;
 3615                 goto out;
 3616         }
 3617         /*
 3618          * If the source is the same as the destination (that is, if they
 3619          * are links to the same vnode), then there is nothing to do.
 3620          */
 3621         if (fvp == tvp)
 3622                 error = -1;
 3623 #ifdef MAC
 3624         else
 3625                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3626                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3627 #endif
 3628 out:
 3629         if (error == 0) {
 3630                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3631                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3632                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3633                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3634         } else {
 3635                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3636                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3637                 if (tvp != NULL)
 3638                         vput(tvp);
 3639                 if (tdvp == tvp)
 3640                         vrele(tdvp);
 3641                 else
 3642                         vput(tdvp);
 3643                 vrele(fromnd.ni_dvp);
 3644                 vrele(fvp);
 3645         }
 3646         vrele(tond.ni_startdir);
 3647         vn_finished_write(mp);
 3648 out1:
 3649         if (fromnd.ni_startdir)
 3650                 vrele(fromnd.ni_startdir);
 3651         if (error == -1)
 3652                 return (0);
 3653         return (error);
 3654 }
 3655 
 3656 /*
 3657  * Make a directory file.
 3658  */
 3659 #ifndef _SYS_SYSPROTO_H_
 3660 struct mkdir_args {
 3661         char    *path;
 3662         int     mode;
 3663 };
 3664 #endif
 3665 int
 3666 sys_mkdir(td, uap)
 3667         struct thread *td;
 3668         register struct mkdir_args /* {
 3669                 char *path;
 3670                 int mode;
 3671         } */ *uap;
 3672 {
 3673 
 3674         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3675 }
 3676 
 3677 #ifndef _SYS_SYSPROTO_H_
 3678 struct mkdirat_args {
 3679         int     fd;
 3680         char    *path;
 3681         mode_t  mode;
 3682 };
 3683 #endif
 3684 int
 3685 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3686 {
 3687 
 3688         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3689 }
 3690 
 3691 int
 3692 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3693 {
 3694 
 3695         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3696 }
 3697 
 3698 int
 3699 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3700     int mode)
 3701 {
 3702         struct mount *mp;
 3703         struct vnode *vp;
 3704         struct vattr vattr;
 3705         struct nameidata nd;
 3706         cap_rights_t rights;
 3707         int error;
 3708 
 3709         AUDIT_ARG_MODE(mode);
 3710 restart:
 3711         bwillwrite();
 3712         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3713             NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 3714             td);
 3715         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3716         if ((error = namei(&nd)) != 0)
 3717                 return (error);
 3718         vp = nd.ni_vp;
 3719         if (vp != NULL) {
 3720                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3721                 /*
 3722                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3723                  * the strange behaviour of leaving the vnode unlocked
 3724                  * if the target is the same vnode as the parent.
 3725                  */
 3726                 if (vp == nd.ni_dvp)
 3727                         vrele(nd.ni_dvp);
 3728                 else
 3729                         vput(nd.ni_dvp);
 3730                 vrele(vp);
 3731                 return (EEXIST);
 3732         }
 3733         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3734                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3735                 vput(nd.ni_dvp);
 3736                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3737                         return (error);
 3738                 goto restart;
 3739         }
 3740         VATTR_NULL(&vattr);
 3741         vattr.va_type = VDIR;
 3742         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3743 #ifdef MAC
 3744         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3745             &vattr);
 3746         if (error != 0)
 3747                 goto out;
 3748 #endif
 3749         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3750 #ifdef MAC
 3751 out:
 3752 #endif
 3753         NDFREE(&nd, NDF_ONLY_PNBUF);
 3754         vput(nd.ni_dvp);
 3755         if (error == 0)
 3756                 vput(nd.ni_vp);
 3757         vn_finished_write(mp);
 3758         return (error);
 3759 }
 3760 
 3761 /*
 3762  * Remove a directory file.
 3763  */
 3764 #ifndef _SYS_SYSPROTO_H_
 3765 struct rmdir_args {
 3766         char    *path;
 3767 };
 3768 #endif
 3769 int
 3770 sys_rmdir(td, uap)
 3771         struct thread *td;
 3772         struct rmdir_args /* {
 3773                 char *path;
 3774         } */ *uap;
 3775 {
 3776 
 3777         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3778 }
 3779 
 3780 int
 3781 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3782 {
 3783 
 3784         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3785 }
 3786 
 3787 int
 3788 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3789 {
 3790         struct mount *mp;
 3791         struct vnode *vp;
 3792         struct nameidata nd;
 3793         cap_rights_t rights;
 3794         int error;
 3795 
 3796 restart:
 3797         bwillwrite();
 3798         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3799             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3800         if ((error = namei(&nd)) != 0)
 3801                 return (error);
 3802         vp = nd.ni_vp;
 3803         if (vp->v_type != VDIR) {
 3804                 error = ENOTDIR;
 3805                 goto out;
 3806         }
 3807         /*
 3808          * No rmdir "." please.
 3809          */
 3810         if (nd.ni_dvp == vp) {
 3811                 error = EINVAL;
 3812                 goto out;
 3813         }
 3814         /*
 3815          * The root of a mounted filesystem cannot be deleted.
 3816          */
 3817         if (vp->v_vflag & VV_ROOT) {
 3818                 error = EBUSY;
 3819                 goto out;
 3820         }
 3821 #ifdef MAC
 3822         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3823             &nd.ni_cnd);
 3824         if (error != 0)
 3825                 goto out;
 3826 #endif
 3827         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3828                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3829                 vput(vp);
 3830                 if (nd.ni_dvp == vp)
 3831                         vrele(nd.ni_dvp);
 3832                 else
 3833                         vput(nd.ni_dvp);
 3834                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3835                         return (error);
 3836                 goto restart;
 3837         }
 3838         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3839         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3840         vn_finished_write(mp);
 3841 out:
 3842         NDFREE(&nd, NDF_ONLY_PNBUF);
 3843         vput(vp);
 3844         if (nd.ni_dvp == vp)
 3845                 vrele(nd.ni_dvp);
 3846         else
 3847                 vput(nd.ni_dvp);
 3848         return (error);
 3849 }
 3850 
 3851 #ifdef COMPAT_43
 3852 /*
 3853  * Read a block of directory entries in a filesystem independent format.
 3854  */
 3855 #ifndef _SYS_SYSPROTO_H_
 3856 struct ogetdirentries_args {
 3857         int     fd;
 3858         char    *buf;
 3859         u_int   count;
 3860         long    *basep;
 3861 };
 3862 #endif
 3863 int
 3864 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3865 {
 3866         long loff;
 3867         int error;
 3868 
 3869         error = kern_ogetdirentries(td, uap, &loff);
 3870         if (error == 0)
 3871                 error = copyout(&loff, uap->basep, sizeof(long));
 3872         return (error);
 3873 }
 3874 
 3875 int
 3876 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3877     long *ploff)
 3878 {
 3879         struct vnode *vp;
 3880         struct file *fp;
 3881         struct uio auio, kuio;
 3882         struct iovec aiov, kiov;
 3883         struct dirent *dp, *edp;
 3884         cap_rights_t rights;
 3885         caddr_t dirbuf;
 3886         int error, eofflag, readcnt;
 3887         long loff;
 3888         off_t foffset;
 3889 
 3890         /* XXX arbitrary sanity limit on `count'. */
 3891         if (uap->count > 64 * 1024)
 3892                 return (EINVAL);
 3893         error = getvnode(td->td_proc->p_fd, uap->fd,
 3894             cap_rights_init(&rights, CAP_READ), &fp);
 3895         if (error != 0)
 3896                 return (error);
 3897         if ((fp->f_flag & FREAD) == 0) {
 3898                 fdrop(fp, td);
 3899                 return (EBADF);
 3900         }
 3901         vp = fp->f_vnode;
 3902         foffset = foffset_lock(fp, 0);
 3903 unionread:
 3904         if (vp->v_type != VDIR) {
 3905                 foffset_unlock(fp, foffset, 0);
 3906                 fdrop(fp, td);
 3907                 return (EINVAL);
 3908         }
 3909         aiov.iov_base = uap->buf;
 3910         aiov.iov_len = uap->count;
 3911         auio.uio_iov = &aiov;
 3912         auio.uio_iovcnt = 1;
 3913         auio.uio_rw = UIO_READ;
 3914         auio.uio_segflg = UIO_USERSPACE;
 3915         auio.uio_td = td;
 3916         auio.uio_resid = uap->count;
 3917         vn_lock(vp, LK_SHARED | LK_RETRY);
 3918         loff = auio.uio_offset = foffset;
 3919 #ifdef MAC
 3920         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3921         if (error != 0) {
 3922                 VOP_UNLOCK(vp, 0);
 3923                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 3924                 fdrop(fp, td);
 3925                 return (error);
 3926         }
 3927 #endif
 3928 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3929                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3930                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3931                             NULL, NULL);
 3932                         foffset = auio.uio_offset;
 3933                 } else
 3934 #       endif
 3935         {
 3936                 kuio = auio;
 3937                 kuio.uio_iov = &kiov;
 3938                 kuio.uio_segflg = UIO_SYSSPACE;
 3939                 kiov.iov_len = uap->count;
 3940                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 3941                 kiov.iov_base = dirbuf;
 3942                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3943                             NULL, NULL);
 3944                 foffset = kuio.uio_offset;
 3945                 if (error == 0) {
 3946                         readcnt = uap->count - kuio.uio_resid;
 3947                         edp = (struct dirent *)&dirbuf[readcnt];
 3948                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3949 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3950                                         /*
 3951                                          * The expected low byte of
 3952                                          * dp->d_namlen is our dp->d_type.
 3953                                          * The high MBZ byte of dp->d_namlen
 3954                                          * is our dp->d_namlen.
 3955                                          */
 3956                                         dp->d_type = dp->d_namlen;
 3957                                         dp->d_namlen = 0;
 3958 #                               else
 3959                                         /*
 3960                                          * The dp->d_type is the high byte
 3961                                          * of the expected dp->d_namlen,
 3962                                          * so must be zero'ed.
 3963                                          */
 3964                                         dp->d_type = 0;
 3965 #                               endif
 3966                                 if (dp->d_reclen > 0) {
 3967                                         dp = (struct dirent *)
 3968                                             ((char *)dp + dp->d_reclen);
 3969                                 } else {
 3970                                         error = EIO;
 3971                                         break;
 3972                                 }
 3973                         }
 3974                         if (dp >= edp)
 3975                                 error = uiomove(dirbuf, readcnt, &auio);
 3976                 }
 3977                 free(dirbuf, M_TEMP);
 3978         }
 3979         if (error != 0) {
 3980                 VOP_UNLOCK(vp, 0);
 3981                 foffset_unlock(fp, foffset, 0);
 3982                 fdrop(fp, td);
 3983                 return (error);
 3984         }
 3985         if (uap->count == auio.uio_resid &&
 3986             (vp->v_vflag & VV_ROOT) &&
 3987             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3988                 struct vnode *tvp = vp;
 3989                 vp = vp->v_mount->mnt_vnodecovered;
 3990                 VREF(vp);
 3991                 fp->f_vnode = vp;
 3992                 fp->f_data = vp;
 3993                 foffset = 0;
 3994                 vput(tvp);
 3995                 goto unionread;
 3996         }
 3997         VOP_UNLOCK(vp, 0);
 3998         foffset_unlock(fp, foffset, 0);
 3999         fdrop(fp, td);
 4000         td->td_retval[0] = uap->count - auio.uio_resid;
 4001         if (error == 0)
 4002                 *ploff = loff;
 4003         return (error);
 4004 }
 4005 #endif /* COMPAT_43 */
 4006 
 4007 /*
 4008  * Read a block of directory entries in a filesystem independent format.
 4009  */
 4010 #ifndef _SYS_SYSPROTO_H_
 4011 struct getdirentries_args {
 4012         int     fd;
 4013         char    *buf;
 4014         u_int   count;
 4015         long    *basep;
 4016 };
 4017 #endif
 4018 int
 4019 sys_getdirentries(td, uap)
 4020         struct thread *td;
 4021         register struct getdirentries_args /* {
 4022                 int fd;
 4023                 char *buf;
 4024                 u_int count;
 4025                 long *basep;
 4026         } */ *uap;
 4027 {
 4028         long base;
 4029         int error;
 4030 
 4031         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4032             NULL, UIO_USERSPACE);
 4033         if (error != 0)
 4034                 return (error);
 4035         if (uap->basep != NULL)
 4036                 error = copyout(&base, uap->basep, sizeof(long));
 4037         return (error);
 4038 }
 4039 
 4040 int
 4041 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4042     long *basep, ssize_t *residp, enum uio_seg bufseg)
 4043 {
 4044         struct vnode *vp;
 4045         struct file *fp;
 4046         struct uio auio;
 4047         struct iovec aiov;
 4048         cap_rights_t rights;
 4049         long loff;
 4050         int error, eofflag;
 4051         off_t foffset;
 4052 
 4053         AUDIT_ARG_FD(fd);
 4054         if (count > IOSIZE_MAX)
 4055                 return (EINVAL);
 4056         auio.uio_resid = count;
 4057         error = getvnode(td->td_proc->p_fd, fd,
 4058             cap_rights_init(&rights, CAP_READ), &fp);
 4059         if (error != 0)
 4060                 return (error);
 4061         if ((fp->f_flag & FREAD) == 0) {
 4062                 fdrop(fp, td);
 4063                 return (EBADF);
 4064         }
 4065         vp = fp->f_vnode;
 4066         foffset = foffset_lock(fp, 0);
 4067 unionread:
 4068         if (vp->v_type != VDIR) {
 4069                 error = EINVAL;
 4070                 goto fail;
 4071         }
 4072         aiov.iov_base = buf;
 4073         aiov.iov_len = count;
 4074         auio.uio_iov = &aiov;
 4075         auio.uio_iovcnt = 1;
 4076         auio.uio_rw = UIO_READ;
 4077         auio.uio_segflg = bufseg;
 4078         auio.uio_td = td;
 4079         vn_lock(vp, LK_SHARED | LK_RETRY);
 4080         AUDIT_ARG_VNODE1(vp);
 4081         loff = auio.uio_offset = foffset;
 4082 #ifdef MAC
 4083         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4084         if (error == 0)
 4085 #endif
 4086                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4087                     NULL);
 4088         foffset = auio.uio_offset;
 4089         if (error != 0) {
 4090                 VOP_UNLOCK(vp, 0);
 4091                 goto fail;
 4092         }
 4093         if (count == auio.uio_resid &&
 4094             (vp->v_vflag & VV_ROOT) &&
 4095             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4096                 struct vnode *tvp = vp;
 4097 
 4098                 vp = vp->v_mount->mnt_vnodecovered;
 4099                 VREF(vp);
 4100                 fp->f_vnode = vp;
 4101                 fp->f_data = vp;
 4102                 foffset = 0;
 4103                 vput(tvp);
 4104                 goto unionread;
 4105         }
 4106         VOP_UNLOCK(vp, 0);
 4107         *basep = loff;
 4108         if (residp != NULL)
 4109                 *residp = auio.uio_resid;
 4110         td->td_retval[0] = count - auio.uio_resid;
 4111 fail:
 4112         foffset_unlock(fp, foffset, 0);
 4113         fdrop(fp, td);
 4114         return (error);
 4115 }
 4116 
 4117 #ifndef _SYS_SYSPROTO_H_
 4118 struct getdents_args {
 4119         int fd;
 4120         char *buf;
 4121         size_t count;
 4122 };
 4123 #endif
 4124 int
 4125 sys_getdents(td, uap)
 4126         struct thread *td;
 4127         register struct getdents_args /* {
 4128                 int fd;
 4129                 char *buf;
 4130                 u_int count;
 4131         } */ *uap;
 4132 {
 4133         struct getdirentries_args ap;
 4134 
 4135         ap.fd = uap->fd;
 4136         ap.buf = uap->buf;
 4137         ap.count = uap->count;
 4138         ap.basep = NULL;
 4139         return (sys_getdirentries(td, &ap));
 4140 }
 4141 
 4142 /*
 4143  * Set the mode mask for creation of filesystem nodes.
 4144  */
 4145 #ifndef _SYS_SYSPROTO_H_
 4146 struct umask_args {
 4147         int     newmask;
 4148 };
 4149 #endif
 4150 int
 4151 sys_umask(td, uap)
 4152         struct thread *td;
 4153         struct umask_args /* {
 4154                 int newmask;
 4155         } */ *uap;
 4156 {
 4157         register struct filedesc *fdp;
 4158 
 4159         FILEDESC_XLOCK(td->td_proc->p_fd);
 4160         fdp = td->td_proc->p_fd;
 4161         td->td_retval[0] = fdp->fd_cmask;
 4162         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4163         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4164         return (0);
 4165 }
 4166 
 4167 /*
 4168  * Void all references to file by ripping underlying filesystem away from
 4169  * vnode.
 4170  */
 4171 #ifndef _SYS_SYSPROTO_H_
 4172 struct revoke_args {
 4173         char    *path;
 4174 };
 4175 #endif
 4176 int
 4177 sys_revoke(td, uap)
 4178         struct thread *td;
 4179         register struct revoke_args /* {
 4180                 char *path;
 4181         } */ *uap;
 4182 {
 4183         struct vnode *vp;
 4184         struct vattr vattr;
 4185         struct nameidata nd;
 4186         int error;
 4187 
 4188         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4189             uap->path, td);
 4190         if ((error = namei(&nd)) != 0)
 4191                 return (error);
 4192         vp = nd.ni_vp;
 4193         NDFREE(&nd, NDF_ONLY_PNBUF);
 4194         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4195                 error = EINVAL;
 4196                 goto out;
 4197         }
 4198 #ifdef MAC
 4199         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4200         if (error != 0)
 4201                 goto out;
 4202 #endif
 4203         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4204         if (error != 0)
 4205                 goto out;
 4206         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4207                 error = priv_check(td, PRIV_VFS_ADMIN);
 4208                 if (error != 0)
 4209                         goto out;
 4210         }
 4211         if (vcount(vp) > 1)
 4212                 VOP_REVOKE(vp, REVOKEALL);
 4213 out:
 4214         vput(vp);
 4215         return (error);
 4216 }
 4217 
 4218 /*
 4219  * Convert a user file descriptor to a kernel file entry and check that, if it
 4220  * is a capability, the correct rights are present. A reference on the file
 4221  * entry is held upon returning.
 4222  */
 4223 int
 4224 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp)
 4225 {
 4226         struct file *fp;
 4227         int error;
 4228 
 4229         error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL);
 4230         if (error != 0)
 4231                 return (error);
 4232 
 4233         /*
 4234          * The file could be not of the vnode type, or it may be not
 4235          * yet fully initialized, in which case the f_vnode pointer
 4236          * may be set, but f_ops is still badfileops.  E.g.,
 4237          * devfs_open() transiently create such situation to
 4238          * facilitate csw d_fdopen().
 4239          *
 4240          * Dupfdopen() handling in kern_openat() installs the
 4241          * half-baked file into the process descriptor table, allowing
 4242          * other thread to dereference it. Guard against the race by
 4243          * checking f_ops.
 4244          */
 4245         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4246                 fdrop(fp, curthread);
 4247                 return (EINVAL);
 4248         }
 4249         *fpp = fp;
 4250         return (0);
 4251 }
 4252 
 4253 
 4254 /*
 4255  * Get an (NFS) file handle.
 4256  */
 4257 #ifndef _SYS_SYSPROTO_H_
 4258 struct lgetfh_args {
 4259         char    *fname;
 4260         fhandle_t *fhp;
 4261 };
 4262 #endif
 4263 int
 4264 sys_lgetfh(td, uap)
 4265         struct thread *td;
 4266         register struct lgetfh_args *uap;
 4267 {
 4268         struct nameidata nd;
 4269         fhandle_t fh;
 4270         register struct vnode *vp;
 4271         int error;
 4272 
 4273         error = priv_check(td, PRIV_VFS_GETFH);
 4274         if (error != 0)
 4275                 return (error);
 4276         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4277             uap->fname, td);
 4278         error = namei(&nd);
 4279         if (error != 0)
 4280                 return (error);
 4281         NDFREE(&nd, NDF_ONLY_PNBUF);
 4282         vp = nd.ni_vp;
 4283         bzero(&fh, sizeof(fh));
 4284         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4285         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4286         vput(vp);
 4287         if (error == 0)
 4288                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4289         return (error);
 4290 }
 4291 
 4292 #ifndef _SYS_SYSPROTO_H_
 4293 struct getfh_args {
 4294         char    *fname;
 4295         fhandle_t *fhp;
 4296 };
 4297 #endif
 4298 int
 4299 sys_getfh(td, uap)
 4300         struct thread *td;
 4301         register struct getfh_args *uap;
 4302 {
 4303         struct nameidata nd;
 4304         fhandle_t fh;
 4305         register struct vnode *vp;
 4306         int error;
 4307 
 4308         error = priv_check(td, PRIV_VFS_GETFH);
 4309         if (error != 0)
 4310                 return (error);
 4311         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4312             uap->fname, td);
 4313         error = namei(&nd);
 4314         if (error != 0)
 4315                 return (error);
 4316         NDFREE(&nd, NDF_ONLY_PNBUF);
 4317         vp = nd.ni_vp;
 4318         bzero(&fh, sizeof(fh));
 4319         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4320         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4321         vput(vp);
 4322         if (error == 0)
 4323                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4324         return (error);
 4325 }
 4326 
 4327 /*
 4328  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4329  * open descriptor.
 4330  *
 4331  * warning: do not remove the priv_check() call or this becomes one giant
 4332  * security hole.
 4333  */
 4334 #ifndef _SYS_SYSPROTO_H_
 4335 struct fhopen_args {
 4336         const struct fhandle *u_fhp;
 4337         int flags;
 4338 };
 4339 #endif
 4340 int
 4341 sys_fhopen(td, uap)
 4342         struct thread *td;
 4343         struct fhopen_args /* {
 4344                 const struct fhandle *u_fhp;
 4345                 int flags;
 4346         } */ *uap;
 4347 {
 4348         struct mount *mp;
 4349         struct vnode *vp;
 4350         struct fhandle fhp;
 4351         struct file *fp;
 4352         int fmode, error;
 4353         int indx;
 4354 
 4355         error = priv_check(td, PRIV_VFS_FHOPEN);
 4356         if (error != 0)
 4357                 return (error);
 4358         indx = -1;
 4359         fmode = FFLAGS(uap->flags);
 4360         /* why not allow a non-read/write open for our lockd? */
 4361         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4362                 return (EINVAL);
 4363         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4364         if (error != 0)
 4365                 return(error);
 4366         /* find the mount point */
 4367         mp = vfs_busyfs(&fhp.fh_fsid);
 4368         if (mp == NULL)
 4369                 return (ESTALE);
 4370         /* now give me my vnode, it gets returned to me locked */
 4371         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4372         vfs_unbusy(mp);
 4373         if (error != 0)
 4374                 return (error);
 4375 
 4376         error = falloc_noinstall(td, &fp);
 4377         if (error != 0) {
 4378                 vput(vp);
 4379                 return (error);
 4380         }
 4381         /*
 4382          * An extra reference on `fp' has been held for us by
 4383          * falloc_noinstall().
 4384          */
 4385 
 4386 #ifdef INVARIANTS
 4387         td->td_dupfd = -1;
 4388 #endif
 4389         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4390         if (error != 0) {
 4391                 KASSERT(fp->f_ops == &badfileops,
 4392                     ("VOP_OPEN in fhopen() set f_ops"));
 4393                 KASSERT(td->td_dupfd < 0,
 4394                     ("fhopen() encountered fdopen()"));
 4395 
 4396                 vput(vp);
 4397                 goto bad;
 4398         }
 4399 #ifdef INVARIANTS
 4400         td->td_dupfd = 0;
 4401 #endif
 4402         fp->f_vnode = vp;
 4403         fp->f_seqcount = 1;
 4404         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4405             &vnops);
 4406         VOP_UNLOCK(vp, 0);
 4407         if ((fmode & O_TRUNC) != 0) {
 4408                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4409                 if (error != 0)
 4410                         goto bad;
 4411         }
 4412 
 4413         error = finstall(td, fp, &indx, fmode, NULL);
 4414 bad:
 4415         fdrop(fp, td);
 4416         td->td_retval[0] = indx;
 4417         return (error);
 4418 }
 4419 
 4420 /*
 4421  * Stat an (NFS) file handle.
 4422  */
 4423 #ifndef _SYS_SYSPROTO_H_
 4424 struct fhstat_args {
 4425         struct fhandle *u_fhp;
 4426         struct stat *sb;
 4427 };
 4428 #endif
 4429 int
 4430 sys_fhstat(td, uap)
 4431         struct thread *td;
 4432         register struct fhstat_args /* {
 4433                 struct fhandle *u_fhp;
 4434                 struct stat *sb;
 4435         } */ *uap;
 4436 {
 4437         struct stat sb;
 4438         struct fhandle fh;
 4439         int error;
 4440 
 4441         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4442         if (error != 0)
 4443                 return (error);
 4444         error = kern_fhstat(td, fh, &sb);
 4445         if (error == 0)
 4446                 error = copyout(&sb, uap->sb, sizeof(sb));
 4447         return (error);
 4448 }
 4449 
 4450 int
 4451 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4452 {
 4453         struct mount *mp;
 4454         struct vnode *vp;
 4455         int error;
 4456 
 4457         error = priv_check(td, PRIV_VFS_FHSTAT);
 4458         if (error != 0)
 4459                 return (error);
 4460         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4461                 return (ESTALE);
 4462         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4463         vfs_unbusy(mp);
 4464         if (error != 0)
 4465                 return (error);
 4466         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4467         vput(vp);
 4468         return (error);
 4469 }
 4470 
 4471 /*
 4472  * Implement fstatfs() for (NFS) file handles.
 4473  */
 4474 #ifndef _SYS_SYSPROTO_H_
 4475 struct fhstatfs_args {
 4476         struct fhandle *u_fhp;
 4477         struct statfs *buf;
 4478 };
 4479 #endif
 4480 int
 4481 sys_fhstatfs(td, uap)
 4482         struct thread *td;
 4483         struct fhstatfs_args /* {
 4484                 struct fhandle *u_fhp;
 4485                 struct statfs *buf;
 4486         } */ *uap;
 4487 {
 4488         struct statfs sf;
 4489         fhandle_t fh;
 4490         int error;
 4491 
 4492         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4493         if (error != 0)
 4494                 return (error);
 4495         error = kern_fhstatfs(td, fh, &sf);
 4496         if (error != 0)
 4497                 return (error);
 4498         return (copyout(&sf, uap->buf, sizeof(sf)));
 4499 }
 4500 
 4501 int
 4502 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4503 {
 4504         struct statfs *sp;
 4505         struct mount *mp;
 4506         struct vnode *vp;
 4507         int error;
 4508 
 4509         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4510         if (error != 0)
 4511                 return (error);
 4512         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4513                 return (ESTALE);
 4514         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4515         if (error != 0) {
 4516                 vfs_unbusy(mp);
 4517                 return (error);
 4518         }
 4519         vput(vp);
 4520         error = prison_canseemount(td->td_ucred, mp);
 4521         if (error != 0)
 4522                 goto out;
 4523 #ifdef MAC
 4524         error = mac_mount_check_stat(td->td_ucred, mp);
 4525         if (error != 0)
 4526                 goto out;
 4527 #endif
 4528         /*
 4529          * Set these in case the underlying filesystem fails to do so.
 4530          */
 4531         sp = &mp->mnt_stat;
 4532         sp->f_version = STATFS_VERSION;
 4533         sp->f_namemax = NAME_MAX;
 4534         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4535         error = VFS_STATFS(mp, sp);
 4536         if (error == 0)
 4537                 *buf = *sp;
 4538 out:
 4539         vfs_unbusy(mp);
 4540         return (error);
 4541 }
 4542 
 4543 int
 4544 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4545 {
 4546         struct file *fp;
 4547         struct mount *mp;
 4548         struct vnode *vp;
 4549         cap_rights_t rights;
 4550         off_t olen, ooffset;
 4551         int error;
 4552 
 4553         if (offset < 0 || len <= 0)
 4554                 return (EINVAL);
 4555         /* Check for wrap. */
 4556         if (offset > OFF_MAX - len)
 4557                 return (EFBIG);
 4558         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4559         if (error != 0)
 4560                 return (error);
 4561         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4562                 error = ESPIPE;
 4563                 goto out;
 4564         }
 4565         if ((fp->f_flag & FWRITE) == 0) {
 4566                 error = EBADF;
 4567                 goto out;
 4568         }
 4569         if (fp->f_type != DTYPE_VNODE) {
 4570                 error = ENODEV;
 4571                 goto out;
 4572         }
 4573         vp = fp->f_vnode;
 4574         if (vp->v_type != VREG) {
 4575                 error = ENODEV;
 4576                 goto out;
 4577         }
 4578 
 4579         /* Allocating blocks may take a long time, so iterate. */
 4580         for (;;) {
 4581                 olen = len;
 4582                 ooffset = offset;
 4583 
 4584                 bwillwrite();
 4585                 mp = NULL;
 4586                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4587                 if (error != 0)
 4588                         break;
 4589                 error = vn_lock(vp, LK_EXCLUSIVE);
 4590                 if (error != 0) {
 4591                         vn_finished_write(mp);
 4592                         break;
 4593                 }
 4594 #ifdef MAC
 4595                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4596                 if (error == 0)
 4597 #endif
 4598                         error = VOP_ALLOCATE(vp, &offset, &len);
 4599                 VOP_UNLOCK(vp, 0);
 4600                 vn_finished_write(mp);
 4601 
 4602                 if (olen + ooffset != offset + len) {
 4603                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4604                             ooffset, olen, offset, len);
 4605                 }
 4606                 if (error != 0 || len == 0)
 4607                         break;
 4608                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4609                 maybe_yield();
 4610         }
 4611  out:
 4612         fdrop(fp, td);
 4613         return (error);
 4614 }
 4615 
 4616 int
 4617 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4618 {
 4619 
 4620         td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
 4621             uap->len);
 4622         return (0);
 4623 }
 4624 
 4625 /*
 4626  * Unlike madvise(2), we do not make a best effort to remember every
 4627  * possible caching hint.  Instead, we remember the last setting with
 4628  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4629  * region of any current setting.
 4630  */
 4631 int
 4632 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4633     int advice)
 4634 {
 4635         struct fadvise_info *fa, *new;
 4636         struct file *fp;
 4637         struct vnode *vp;
 4638         cap_rights_t rights;
 4639         off_t end;
 4640         int error;
 4641 
 4642         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4643                 return (EINVAL);
 4644         switch (advice) {
 4645         case POSIX_FADV_SEQUENTIAL:
 4646         case POSIX_FADV_RANDOM:
 4647         case POSIX_FADV_NOREUSE:
 4648                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4649                 break;
 4650         case POSIX_FADV_NORMAL:
 4651         case POSIX_FADV_WILLNEED:
 4652         case POSIX_FADV_DONTNEED:
 4653                 new = NULL;
 4654                 break;
 4655         default:
 4656                 return (EINVAL);
 4657         }
 4658         /* XXX: CAP_POSIX_FADVISE? */
 4659         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4660         if (error != 0)
 4661                 goto out;
 4662         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4663                 error = ESPIPE;
 4664                 goto out;
 4665         }
 4666         if (fp->f_type != DTYPE_VNODE) {
 4667                 error = ENODEV;
 4668                 goto out;
 4669         }
 4670         vp = fp->f_vnode;
 4671         if (vp->v_type != VREG) {
 4672                 error = ENODEV;
 4673                 goto out;
 4674         }
 4675         if (len == 0)
 4676                 end = OFF_MAX;
 4677         else
 4678                 end = offset + len - 1;
 4679         switch (advice) {
 4680         case POSIX_FADV_SEQUENTIAL:
 4681         case POSIX_FADV_RANDOM:
 4682         case POSIX_FADV_NOREUSE:
 4683                 /*
 4684                  * Try to merge any existing non-standard region with
 4685                  * this new region if possible, otherwise create a new
 4686                  * non-standard region for this request.
 4687                  */
 4688                 mtx_pool_lock(mtxpool_sleep, fp);
 4689                 fa = fp->f_advice;
 4690                 if (fa != NULL && fa->fa_advice == advice &&
 4691                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4692                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4693                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4694                         if (offset < fa->fa_start)
 4695                                 fa->fa_start = offset;
 4696                         if (end > fa->fa_end)
 4697                                 fa->fa_end = end;
 4698                 } else {
 4699                         new->fa_advice = advice;
 4700                         new->fa_start = offset;
 4701                         new->fa_end = end;
 4702                         new->fa_prevstart = 0;
 4703                         new->fa_prevend = 0;
 4704                         fp->f_advice = new;
 4705                         new = fa;
 4706                 }
 4707                 mtx_pool_unlock(mtxpool_sleep, fp);
 4708                 break;
 4709         case POSIX_FADV_NORMAL:
 4710                 /*
 4711                  * If a the "normal" region overlaps with an existing
 4712                  * non-standard region, trim or remove the
 4713                  * non-standard region.
 4714                  */
 4715                 mtx_pool_lock(mtxpool_sleep, fp);
 4716                 fa = fp->f_advice;
 4717                 if (fa != NULL) {
 4718                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4719                                 new = fa;
 4720                                 fp->f_advice = NULL;
 4721                         } else if (offset <= fa->fa_start &&
 4722                             end >= fa->fa_start)
 4723                                 fa->fa_start = end + 1;
 4724                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4725                                 fa->fa_end = offset - 1;
 4726                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4727                                 /*
 4728                                  * If the "normal" region is a middle
 4729                                  * portion of the existing
 4730                                  * non-standard region, just remove
 4731                                  * the whole thing rather than picking
 4732                                  * one side or the other to
 4733                                  * preserve.
 4734                                  */
 4735                                 new = fa;
 4736                                 fp->f_advice = NULL;
 4737                         }
 4738                 }
 4739                 mtx_pool_unlock(mtxpool_sleep, fp);
 4740                 break;
 4741         case POSIX_FADV_WILLNEED:
 4742         case POSIX_FADV_DONTNEED:
 4743                 error = VOP_ADVISE(vp, offset, end, advice);
 4744                 break;
 4745         }
 4746 out:
 4747         if (fp != NULL)
 4748                 fdrop(fp, td);
 4749         free(new, M_FADVISE);
 4750         return (error);
 4751 }
 4752 
 4753 int
 4754 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4755 {
 4756 
 4757         td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
 4758             uap->len, uap->advice);
 4759         return (0);
 4760 }
Cache object: 9f2bf84de4cf7e0bcee97bec294601c8
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_syscalls.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c