The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.3/sys/kern/vfs_syscalls.c 301052 2016-05-31 16:55:50Z glebius $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   97 
   98 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   99 static int kern_chflags(struct thread *td, const char *path,
  100     enum uio_seg pathseg, u_long flags);
  101 static int kern_chflagsat(struct thread *td, int fd, const char *path,
  102     enum uio_seg pathseg, u_long flags, int atflag);
  103 static int setfflags(struct thread *td, struct vnode *, u_long);
  104 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  105 static int getutimens(const struct timespec *, enum uio_seg,
  106     struct timespec *, int *);
  107 static int setutimes(struct thread *td, struct vnode *,
  108     const struct timespec *, int, int);
  109 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  110     struct thread *td);
  111 
  112 /*
  113  * The module initialization routine for POSIX asynchronous I/O will
  114  * set this to the version of AIO that it implements.  (Zero means
  115  * that it is not implemented.)  This value is used here by pathconf()
  116  * and in kern_descrip.c by fpathconf().
  117  */
  118 int async_io_version;
  119 
  120 /*
  121  * Sync each mounted filesystem.
  122  */
  123 #ifndef _SYS_SYSPROTO_H_
  124 struct sync_args {
  125         int     dummy;
  126 };
  127 #endif
  128 /* ARGSUSED */
  129 int
  130 sys_sync(td, uap)
  131         struct thread *td;
  132         struct sync_args *uap;
  133 {
  134         struct mount *mp, *nmp;
  135         int save;
  136 
  137         mtx_lock(&mountlist_mtx);
  138         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  139                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  140                         nmp = TAILQ_NEXT(mp, mnt_list);
  141                         continue;
  142                 }
  143                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  144                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  145                         save = curthread_pflags_set(TDP_SYNCIO);
  146                         vfs_msync(mp, MNT_NOWAIT);
  147                         VFS_SYNC(mp, MNT_NOWAIT);
  148                         curthread_pflags_restore(save);
  149                         vn_finished_write(mp);
  150                 }
  151                 mtx_lock(&mountlist_mtx);
  152                 nmp = TAILQ_NEXT(mp, mnt_list);
  153                 vfs_unbusy(mp);
  154         }
  155         mtx_unlock(&mountlist_mtx);
  156         return (0);
  157 }
  158 
  159 /*
  160  * Change filesystem quotas.
  161  */
  162 #ifndef _SYS_SYSPROTO_H_
  163 struct quotactl_args {
  164         char *path;
  165         int cmd;
  166         int uid;
  167         caddr_t arg;
  168 };
  169 #endif
  170 int
  171 sys_quotactl(td, uap)
  172         struct thread *td;
  173         register struct quotactl_args /* {
  174                 char *path;
  175                 int cmd;
  176                 int uid;
  177                 caddr_t arg;
  178         } */ *uap;
  179 {
  180         struct mount *mp;
  181         struct nameidata nd;
  182         int error;
  183 
  184         AUDIT_ARG_CMD(uap->cmd);
  185         AUDIT_ARG_UID(uap->uid);
  186         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  187                 return (EPERM);
  188         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  189             uap->path, td);
  190         if ((error = namei(&nd)) != 0)
  191                 return (error);
  192         NDFREE(&nd, NDF_ONLY_PNBUF);
  193         mp = nd.ni_vp->v_mount;
  194         vfs_ref(mp);
  195         vput(nd.ni_vp);
  196         error = vfs_busy(mp, 0);
  197         vfs_rel(mp);
  198         if (error != 0)
  199                 return (error);
  200         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  201 
  202         /*
  203          * Since quota on operation typically needs to open quota
  204          * file, the Q_QUOTAON handler needs to unbusy the mount point
  205          * before calling into namei.  Otherwise, unmount might be
  206          * started between two vfs_busy() invocations (first is our,
  207          * second is from mount point cross-walk code in lookup()),
  208          * causing deadlock.
  209          *
  210          * Require that Q_QUOTAON handles the vfs_busy() reference on
  211          * its own, always returning with ubusied mount point.
  212          */
  213         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  214                 vfs_unbusy(mp);
  215         return (error);
  216 }
  217 
  218 /*
  219  * Used by statfs conversion routines to scale the block size up if
  220  * necessary so that all of the block counts are <= 'max_size'.  Note
  221  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  222  * value of 'n'.
  223  */
  224 void
  225 statfs_scale_blocks(struct statfs *sf, long max_size)
  226 {
  227         uint64_t count;
  228         int shift;
  229 
  230         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  231 
  232         /*
  233          * Attempt to scale the block counts to give a more accurate
  234          * overview to userland of the ratio of free space to used
  235          * space.  To do this, find the largest block count and compute
  236          * a divisor that lets it fit into a signed integer <= max_size.
  237          */
  238         if (sf->f_bavail < 0)
  239                 count = -sf->f_bavail;
  240         else
  241                 count = sf->f_bavail;
  242         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  243         if (count <= max_size)
  244                 return;
  245 
  246         count >>= flsl(max_size);
  247         shift = 0;
  248         while (count > 0) {
  249                 shift++;
  250                 count >>=1;
  251         }
  252 
  253         sf->f_bsize <<= shift;
  254         sf->f_blocks >>= shift;
  255         sf->f_bfree >>= shift;
  256         sf->f_bavail >>= shift;
  257 }
  258 
  259 /*
  260  * Get filesystem statistics.
  261  */
  262 #ifndef _SYS_SYSPROTO_H_
  263 struct statfs_args {
  264         char *path;
  265         struct statfs *buf;
  266 };
  267 #endif
  268 int
  269 sys_statfs(td, uap)
  270         struct thread *td;
  271         register struct statfs_args /* {
  272                 char *path;
  273                 struct statfs *buf;
  274         } */ *uap;
  275 {
  276         struct statfs sf;
  277         int error;
  278 
  279         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  280         if (error == 0)
  281                 error = copyout(&sf, uap->buf, sizeof(sf));
  282         return (error);
  283 }
  284 
  285 int
  286 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  287     struct statfs *buf)
  288 {
  289         struct mount *mp;
  290         struct statfs *sp, sb;
  291         struct nameidata nd;
  292         int error;
  293 
  294         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  295             pathseg, path, td);
  296         error = namei(&nd);
  297         if (error != 0)
  298                 return (error);
  299         mp = nd.ni_vp->v_mount;
  300         vfs_ref(mp);
  301         NDFREE(&nd, NDF_ONLY_PNBUF);
  302         vput(nd.ni_vp);
  303         error = vfs_busy(mp, 0);
  304         vfs_rel(mp);
  305         if (error != 0)
  306                 return (error);
  307 #ifdef MAC
  308         error = mac_mount_check_stat(td->td_ucred, mp);
  309         if (error != 0)
  310                 goto out;
  311 #endif
  312         /*
  313          * Set these in case the underlying filesystem fails to do so.
  314          */
  315         sp = &mp->mnt_stat;
  316         sp->f_version = STATFS_VERSION;
  317         sp->f_namemax = NAME_MAX;
  318         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  319         error = VFS_STATFS(mp, sp);
  320         if (error != 0)
  321                 goto out;
  322         if (priv_check(td, PRIV_VFS_GENERATION)) {
  323                 bcopy(sp, &sb, sizeof(sb));
  324                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  325                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  326                 sp = &sb;
  327         }
  328         *buf = *sp;
  329 out:
  330         vfs_unbusy(mp);
  331         return (error);
  332 }
  333 
  334 /*
  335  * Get filesystem statistics.
  336  */
  337 #ifndef _SYS_SYSPROTO_H_
  338 struct fstatfs_args {
  339         int fd;
  340         struct statfs *buf;
  341 };
  342 #endif
  343 int
  344 sys_fstatfs(td, uap)
  345         struct thread *td;
  346         register struct fstatfs_args /* {
  347                 int fd;
  348                 struct statfs *buf;
  349         } */ *uap;
  350 {
  351         struct statfs sf;
  352         int error;
  353 
  354         error = kern_fstatfs(td, uap->fd, &sf);
  355         if (error == 0)
  356                 error = copyout(&sf, uap->buf, sizeof(sf));
  357         return (error);
  358 }
  359 
  360 int
  361 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  362 {
  363         struct file *fp;
  364         struct mount *mp;
  365         struct statfs *sp, sb;
  366         struct vnode *vp;
  367         cap_rights_t rights;
  368         int error;
  369 
  370         AUDIT_ARG_FD(fd);
  371         error = getvnode(td->td_proc->p_fd, fd,
  372             cap_rights_init(&rights, CAP_FSTATFS), &fp);
  373         if (error != 0)
  374                 return (error);
  375         vp = fp->f_vnode;
  376         vn_lock(vp, LK_SHARED | LK_RETRY);
  377 #ifdef AUDIT
  378         AUDIT_ARG_VNODE1(vp);
  379 #endif
  380         mp = vp->v_mount;
  381         if (mp)
  382                 vfs_ref(mp);
  383         VOP_UNLOCK(vp, 0);
  384         fdrop(fp, td);
  385         if (mp == NULL) {
  386                 error = EBADF;
  387                 goto out;
  388         }
  389         error = vfs_busy(mp, 0);
  390         vfs_rel(mp);
  391         if (error != 0)
  392                 return (error);
  393 #ifdef MAC
  394         error = mac_mount_check_stat(td->td_ucred, mp);
  395         if (error != 0)
  396                 goto out;
  397 #endif
  398         /*
  399          * Set these in case the underlying filesystem fails to do so.
  400          */
  401         sp = &mp->mnt_stat;
  402         sp->f_version = STATFS_VERSION;
  403         sp->f_namemax = NAME_MAX;
  404         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  405         error = VFS_STATFS(mp, sp);
  406         if (error != 0)
  407                 goto out;
  408         if (priv_check(td, PRIV_VFS_GENERATION)) {
  409                 bcopy(sp, &sb, sizeof(sb));
  410                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  411                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  412                 sp = &sb;
  413         }
  414         *buf = *sp;
  415 out:
  416         if (mp)
  417                 vfs_unbusy(mp);
  418         return (error);
  419 }
  420 
  421 /*
  422  * Get statistics on all filesystems.
  423  */
  424 #ifndef _SYS_SYSPROTO_H_
  425 struct getfsstat_args {
  426         struct statfs *buf;
  427         long bufsize;
  428         int flags;
  429 };
  430 #endif
  431 int
  432 sys_getfsstat(td, uap)
  433         struct thread *td;
  434         register struct getfsstat_args /* {
  435                 struct statfs *buf;
  436                 long bufsize;
  437                 int flags;
  438         } */ *uap;
  439 {
  440 
  441         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  442             uap->flags));
  443 }
  444 
  445 /*
  446  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  447  *      The caller is responsible for freeing memory which will be allocated
  448  *      in '*buf'.
  449  */
  450 int
  451 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  452     enum uio_seg bufseg, int flags)
  453 {
  454         struct mount *mp, *nmp;
  455         struct statfs *sfsp, *sp, sb;
  456         size_t count, maxcount;
  457         int error;
  458 
  459         maxcount = bufsize / sizeof(struct statfs);
  460         if (bufsize == 0)
  461                 sfsp = NULL;
  462         else if (bufseg == UIO_USERSPACE)
  463                 sfsp = *buf;
  464         else /* if (bufseg == UIO_SYSSPACE) */ {
  465                 count = 0;
  466                 mtx_lock(&mountlist_mtx);
  467                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  468                         count++;
  469                 }
  470                 mtx_unlock(&mountlist_mtx);
  471                 if (maxcount > count)
  472                         maxcount = count;
  473                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  474                     M_WAITOK);
  475         }
  476         count = 0;
  477         mtx_lock(&mountlist_mtx);
  478         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  479                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  480                         nmp = TAILQ_NEXT(mp, mnt_list);
  481                         continue;
  482                 }
  483 #ifdef MAC
  484                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  485                         nmp = TAILQ_NEXT(mp, mnt_list);
  486                         continue;
  487                 }
  488 #endif
  489                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  490                         nmp = TAILQ_NEXT(mp, mnt_list);
  491                         continue;
  492                 }
  493                 if (sfsp && count < maxcount) {
  494                         sp = &mp->mnt_stat;
  495                         /*
  496                          * Set these in case the underlying filesystem
  497                          * fails to do so.
  498                          */
  499                         sp->f_version = STATFS_VERSION;
  500                         sp->f_namemax = NAME_MAX;
  501                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  502                         /*
  503                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  504                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  505                          * overrides MNT_WAIT.
  506                          */
  507                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  508                             (flags & MNT_WAIT)) &&
  509                             (error = VFS_STATFS(mp, sp))) {
  510                                 mtx_lock(&mountlist_mtx);
  511                                 nmp = TAILQ_NEXT(mp, mnt_list);
  512                                 vfs_unbusy(mp);
  513                                 continue;
  514                         }
  515                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  516                                 bcopy(sp, &sb, sizeof(sb));
  517                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  518                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  519                                 sp = &sb;
  520                         }
  521                         if (bufseg == UIO_SYSSPACE)
  522                                 bcopy(sp, sfsp, sizeof(*sp));
  523                         else /* if (bufseg == UIO_USERSPACE) */ {
  524                                 error = copyout(sp, sfsp, sizeof(*sp));
  525                                 if (error != 0) {
  526                                         vfs_unbusy(mp);
  527                                         return (error);
  528                                 }
  529                         }
  530                         sfsp++;
  531                 }
  532                 count++;
  533                 mtx_lock(&mountlist_mtx);
  534                 nmp = TAILQ_NEXT(mp, mnt_list);
  535                 vfs_unbusy(mp);
  536         }
  537         mtx_unlock(&mountlist_mtx);
  538         if (sfsp && count > maxcount)
  539                 td->td_retval[0] = maxcount;
  540         else
  541                 td->td_retval[0] = count;
  542         return (0);
  543 }
  544 
  545 #ifdef COMPAT_FREEBSD4
  546 /*
  547  * Get old format filesystem statistics.
  548  */
  549 static void cvtstatfs(struct statfs *, struct ostatfs *);
  550 
  551 #ifndef _SYS_SYSPROTO_H_
  552 struct freebsd4_statfs_args {
  553         char *path;
  554         struct ostatfs *buf;
  555 };
  556 #endif
  557 int
  558 freebsd4_statfs(td, uap)
  559         struct thread *td;
  560         struct freebsd4_statfs_args /* {
  561                 char *path;
  562                 struct ostatfs *buf;
  563         } */ *uap;
  564 {
  565         struct ostatfs osb;
  566         struct statfs sf;
  567         int error;
  568 
  569         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  570         if (error != 0)
  571                 return (error);
  572         cvtstatfs(&sf, &osb);
  573         return (copyout(&osb, uap->buf, sizeof(osb)));
  574 }
  575 
  576 /*
  577  * Get filesystem statistics.
  578  */
  579 #ifndef _SYS_SYSPROTO_H_
  580 struct freebsd4_fstatfs_args {
  581         int fd;
  582         struct ostatfs *buf;
  583 };
  584 #endif
  585 int
  586 freebsd4_fstatfs(td, uap)
  587         struct thread *td;
  588         struct freebsd4_fstatfs_args /* {
  589                 int fd;
  590                 struct ostatfs *buf;
  591         } */ *uap;
  592 {
  593         struct ostatfs osb;
  594         struct statfs sf;
  595         int error;
  596 
  597         error = kern_fstatfs(td, uap->fd, &sf);
  598         if (error != 0)
  599                 return (error);
  600         cvtstatfs(&sf, &osb);
  601         return (copyout(&osb, uap->buf, sizeof(osb)));
  602 }
  603 
  604 /*
  605  * Get statistics on all filesystems.
  606  */
  607 #ifndef _SYS_SYSPROTO_H_
  608 struct freebsd4_getfsstat_args {
  609         struct ostatfs *buf;
  610         long bufsize;
  611         int flags;
  612 };
  613 #endif
  614 int
  615 freebsd4_getfsstat(td, uap)
  616         struct thread *td;
  617         register struct freebsd4_getfsstat_args /* {
  618                 struct ostatfs *buf;
  619                 long bufsize;
  620                 int flags;
  621         } */ *uap;
  622 {
  623         struct statfs *buf, *sp;
  624         struct ostatfs osb;
  625         size_t count, size;
  626         int error;
  627 
  628         count = uap->bufsize / sizeof(struct ostatfs);
  629         size = count * sizeof(struct statfs);
  630         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  631         if (size > 0) {
  632                 count = td->td_retval[0];
  633                 sp = buf;
  634                 while (count > 0 && error == 0) {
  635                         cvtstatfs(sp, &osb);
  636                         error = copyout(&osb, uap->buf, sizeof(osb));
  637                         sp++;
  638                         uap->buf++;
  639                         count--;
  640                 }
  641                 free(buf, M_TEMP);
  642         }
  643         return (error);
  644 }
  645 
  646 /*
  647  * Implement fstatfs() for (NFS) file handles.
  648  */
  649 #ifndef _SYS_SYSPROTO_H_
  650 struct freebsd4_fhstatfs_args {
  651         struct fhandle *u_fhp;
  652         struct ostatfs *buf;
  653 };
  654 #endif
  655 int
  656 freebsd4_fhstatfs(td, uap)
  657         struct thread *td;
  658         struct freebsd4_fhstatfs_args /* {
  659                 struct fhandle *u_fhp;
  660                 struct ostatfs *buf;
  661         } */ *uap;
  662 {
  663         struct ostatfs osb;
  664         struct statfs sf;
  665         fhandle_t fh;
  666         int error;
  667 
  668         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  669         if (error != 0)
  670                 return (error);
  671         error = kern_fhstatfs(td, fh, &sf);
  672         if (error != 0)
  673                 return (error);
  674         cvtstatfs(&sf, &osb);
  675         return (copyout(&osb, uap->buf, sizeof(osb)));
  676 }
  677 
  678 /*
  679  * Convert a new format statfs structure to an old format statfs structure.
  680  */
  681 static void
  682 cvtstatfs(nsp, osp)
  683         struct statfs *nsp;
  684         struct ostatfs *osp;
  685 {
  686 
  687         statfs_scale_blocks(nsp, LONG_MAX);
  688         bzero(osp, sizeof(*osp));
  689         osp->f_bsize = nsp->f_bsize;
  690         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  691         osp->f_blocks = nsp->f_blocks;
  692         osp->f_bfree = nsp->f_bfree;
  693         osp->f_bavail = nsp->f_bavail;
  694         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  695         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  696         osp->f_owner = nsp->f_owner;
  697         osp->f_type = nsp->f_type;
  698         osp->f_flags = nsp->f_flags;
  699         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  700         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  701         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  702         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  703         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  704             MIN(MFSNAMELEN, OMFSNAMELEN));
  705         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  706             MIN(MNAMELEN, OMNAMELEN));
  707         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  708             MIN(MNAMELEN, OMNAMELEN));
  709         osp->f_fsid = nsp->f_fsid;
  710 }
  711 #endif /* COMPAT_FREEBSD4 */
  712 
  713 /*
  714  * Change current working directory to a given file descriptor.
  715  */
  716 #ifndef _SYS_SYSPROTO_H_
  717 struct fchdir_args {
  718         int     fd;
  719 };
  720 #endif
  721 int
  722 sys_fchdir(td, uap)
  723         struct thread *td;
  724         struct fchdir_args /* {
  725                 int fd;
  726         } */ *uap;
  727 {
  728         register struct filedesc *fdp = td->td_proc->p_fd;
  729         struct vnode *vp, *tdp, *vpold;
  730         struct mount *mp;
  731         struct file *fp;
  732         cap_rights_t rights;
  733         int error;
  734 
  735         AUDIT_ARG_FD(uap->fd);
  736         error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  737             &fp);
  738         if (error != 0)
  739                 return (error);
  740         vp = fp->f_vnode;
  741         VREF(vp);
  742         fdrop(fp, td);
  743         vn_lock(vp, LK_SHARED | LK_RETRY);
  744         AUDIT_ARG_VNODE1(vp);
  745         error = change_dir(vp, td);
  746         while (!error && (mp = vp->v_mountedhere) != NULL) {
  747                 if (vfs_busy(mp, 0))
  748                         continue;
  749                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  750                 vfs_unbusy(mp);
  751                 if (error != 0)
  752                         break;
  753                 vput(vp);
  754                 vp = tdp;
  755         }
  756         if (error != 0) {
  757                 vput(vp);
  758                 return (error);
  759         }
  760         VOP_UNLOCK(vp, 0);
  761         FILEDESC_XLOCK(fdp);
  762         vpold = fdp->fd_cdir;
  763         fdp->fd_cdir = vp;
  764         FILEDESC_XUNLOCK(fdp);
  765         vrele(vpold);
  766         return (0);
  767 }
  768 
  769 /*
  770  * Change current working directory (``.'').
  771  */
  772 #ifndef _SYS_SYSPROTO_H_
  773 struct chdir_args {
  774         char    *path;
  775 };
  776 #endif
  777 int
  778 sys_chdir(td, uap)
  779         struct thread *td;
  780         struct chdir_args /* {
  781                 char *path;
  782         } */ *uap;
  783 {
  784 
  785         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  786 }
  787 
  788 int
  789 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  790 {
  791         register struct filedesc *fdp = td->td_proc->p_fd;
  792         struct nameidata nd;
  793         struct vnode *vp;
  794         int error;
  795 
  796         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  797             pathseg, path, td);
  798         if ((error = namei(&nd)) != 0)
  799                 return (error);
  800         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  801                 vput(nd.ni_vp);
  802                 NDFREE(&nd, NDF_ONLY_PNBUF);
  803                 return (error);
  804         }
  805         VOP_UNLOCK(nd.ni_vp, 0);
  806         NDFREE(&nd, NDF_ONLY_PNBUF);
  807         FILEDESC_XLOCK(fdp);
  808         vp = fdp->fd_cdir;
  809         fdp->fd_cdir = nd.ni_vp;
  810         FILEDESC_XUNLOCK(fdp);
  811         vrele(vp);
  812         return (0);
  813 }
  814 
  815 /*
  816  * Helper function for raised chroot(2) security function:  Refuse if
  817  * any filedescriptors are open directories.
  818  */
  819 static int
  820 chroot_refuse_vdir_fds(fdp)
  821         struct filedesc *fdp;
  822 {
  823         struct vnode *vp;
  824         struct file *fp;
  825         int fd;
  826 
  827         FILEDESC_LOCK_ASSERT(fdp);
  828 
  829         for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
  830                 fp = fget_locked(fdp, fd);
  831                 if (fp == NULL)
  832                         continue;
  833                 if (fp->f_type == DTYPE_VNODE) {
  834                         vp = fp->f_vnode;
  835                         if (vp->v_type == VDIR)
  836                                 return (EPERM);
  837                 }
  838         }
  839         return (0);
  840 }
  841 
  842 /*
  843  * This sysctl determines if we will allow a process to chroot(2) if it
  844  * has a directory open:
  845  *      0: disallowed for all processes.
  846  *      1: allowed for processes that were not already chroot(2)'ed.
  847  *      2: allowed for all processes.
  848  */
  849 
  850 static int chroot_allow_open_directories = 1;
  851 
  852 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  853      &chroot_allow_open_directories, 0,
  854      "Allow a process to chroot(2) if it has a directory open");
  855 
  856 /*
  857  * Change notion of root (``/'') directory.
  858  */
  859 #ifndef _SYS_SYSPROTO_H_
  860 struct chroot_args {
  861         char    *path;
  862 };
  863 #endif
  864 int
  865 sys_chroot(td, uap)
  866         struct thread *td;
  867         struct chroot_args /* {
  868                 char *path;
  869         } */ *uap;
  870 {
  871         struct nameidata nd;
  872         int error;
  873 
  874         error = priv_check(td, PRIV_VFS_CHROOT);
  875         if (error != 0)
  876                 return (error);
  877         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  878             UIO_USERSPACE, uap->path, td);
  879         error = namei(&nd);
  880         if (error != 0)
  881                 goto error;
  882         error = change_dir(nd.ni_vp, td);
  883         if (error != 0)
  884                 goto e_vunlock;
  885 #ifdef MAC
  886         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  887         if (error != 0)
  888                 goto e_vunlock;
  889 #endif
  890         VOP_UNLOCK(nd.ni_vp, 0);
  891         error = change_root(nd.ni_vp, td);
  892         vrele(nd.ni_vp);
  893         NDFREE(&nd, NDF_ONLY_PNBUF);
  894         return (error);
  895 e_vunlock:
  896         vput(nd.ni_vp);
  897 error:
  898         NDFREE(&nd, NDF_ONLY_PNBUF);
  899         return (error);
  900 }
  901 
  902 /*
  903  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  904  * instance.
  905  */
  906 int
  907 change_dir(vp, td)
  908         struct vnode *vp;
  909         struct thread *td;
  910 {
  911 #ifdef MAC
  912         int error;
  913 #endif
  914 
  915         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  916         if (vp->v_type != VDIR)
  917                 return (ENOTDIR);
  918 #ifdef MAC
  919         error = mac_vnode_check_chdir(td->td_ucred, vp);
  920         if (error != 0)
  921                 return (error);
  922 #endif
  923         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  924 }
  925 
  926 /*
  927  * Common routine for kern_chroot() and jail_attach().  The caller is
  928  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  929  * authorize this operation.
  930  */
  931 int
  932 change_root(vp, td)
  933         struct vnode *vp;
  934         struct thread *td;
  935 {
  936         struct filedesc *fdp;
  937         struct vnode *oldvp;
  938         int error;
  939 
  940         fdp = td->td_proc->p_fd;
  941         FILEDESC_XLOCK(fdp);
  942         if (chroot_allow_open_directories == 0 ||
  943             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  944                 error = chroot_refuse_vdir_fds(fdp);
  945                 if (error != 0) {
  946                         FILEDESC_XUNLOCK(fdp);
  947                         return (error);
  948                 }
  949         }
  950         oldvp = fdp->fd_rdir;
  951         fdp->fd_rdir = vp;
  952         VREF(fdp->fd_rdir);
  953         if (!fdp->fd_jdir) {
  954                 fdp->fd_jdir = vp;
  955                 VREF(fdp->fd_jdir);
  956         }
  957         FILEDESC_XUNLOCK(fdp);
  958         vrele(oldvp);
  959         return (0);
  960 }
  961 
  962 static __inline void
  963 flags_to_rights(int flags, cap_rights_t *rightsp)
  964 {
  965 
  966         if (flags & O_EXEC) {
  967                 cap_rights_set(rightsp, CAP_FEXECVE);
  968         } else {
  969                 switch ((flags & O_ACCMODE)) {
  970                 case O_RDONLY:
  971                         cap_rights_set(rightsp, CAP_READ);
  972                         break;
  973                 case O_RDWR:
  974                         cap_rights_set(rightsp, CAP_READ);
  975                         /* FALLTHROUGH */
  976                 case O_WRONLY:
  977                         cap_rights_set(rightsp, CAP_WRITE);
  978                         if (!(flags & (O_APPEND | O_TRUNC)))
  979                                 cap_rights_set(rightsp, CAP_SEEK);
  980                         break;
  981                 }
  982         }
  983 
  984         if (flags & O_CREAT)
  985                 cap_rights_set(rightsp, CAP_CREATE);
  986 
  987         if (flags & O_TRUNC)
  988                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  989 
  990         if (flags & (O_SYNC | O_FSYNC))
  991                 cap_rights_set(rightsp, CAP_FSYNC);
  992 
  993         if (flags & (O_EXLOCK | O_SHLOCK))
  994                 cap_rights_set(rightsp, CAP_FLOCK);
  995 }
  996 
  997 /*
  998  * Check permissions, allocate an open file structure, and call the device
  999  * open routine if any.
 1000  */
 1001 #ifndef _SYS_SYSPROTO_H_
 1002 struct open_args {
 1003         char    *path;
 1004         int     flags;
 1005         int     mode;
 1006 };
 1007 #endif
 1008 int
 1009 sys_open(td, uap)
 1010         struct thread *td;
 1011         register struct open_args /* {
 1012                 char *path;
 1013                 int flags;
 1014                 int mode;
 1015         } */ *uap;
 1016 {
 1017 
 1018         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1019 }
 1020 
 1021 #ifndef _SYS_SYSPROTO_H_
 1022 struct openat_args {
 1023         int     fd;
 1024         char    *path;
 1025         int     flag;
 1026         int     mode;
 1027 };
 1028 #endif
 1029 int
 1030 sys_openat(struct thread *td, struct openat_args *uap)
 1031 {
 1032 
 1033         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1034             uap->mode));
 1035 }
 1036 
 1037 int
 1038 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1039     int mode)
 1040 {
 1041 
 1042         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1043 }
 1044 
 1045 int
 1046 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1047     int flags, int mode)
 1048 {
 1049         struct proc *p = td->td_proc;
 1050         struct filedesc *fdp = p->p_fd;
 1051         struct file *fp;
 1052         struct vnode *vp;
 1053         struct nameidata nd;
 1054         cap_rights_t rights;
 1055         int cmode, error, indx;
 1056 
 1057         indx = -1;
 1058 
 1059         AUDIT_ARG_FFLAGS(flags);
 1060         AUDIT_ARG_MODE(mode);
 1061         /* XXX: audit dirfd */
 1062         cap_rights_init(&rights, CAP_LOOKUP);
 1063         flags_to_rights(flags, &rights);
 1064         /*
 1065          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1066          * may be specified.
 1067          */
 1068         if (flags & O_EXEC) {
 1069                 if (flags & O_ACCMODE)
 1070                         return (EINVAL);
 1071         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1072                 return (EINVAL);
 1073         } else {
 1074                 flags = FFLAGS(flags);
 1075         }
 1076 
 1077         /*
 1078          * Allocate the file descriptor, but don't install a descriptor yet.
 1079          */
 1080         error = falloc_noinstall(td, &fp);
 1081         if (error != 0)
 1082                 return (error);
 1083         /*
 1084          * An extra reference on `fp' has been held for us by
 1085          * falloc_noinstall().
 1086          */
 1087         /* Set the flags early so the finit in devfs can pick them up. */
 1088         fp->f_flag = flags & FMASK;
 1089         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1090         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1091             &rights, td);
 1092         td->td_dupfd = -1;              /* XXX check for fdopen */
 1093         error = vn_open(&nd, &flags, cmode, fp);
 1094         if (error != 0) {
 1095                 /*
 1096                  * If the vn_open replaced the method vector, something
 1097                  * wonderous happened deep below and we just pass it up
 1098                  * pretending we know what we do.
 1099                  */
 1100                 if (error == ENXIO && fp->f_ops != &badfileops)
 1101                         goto success;
 1102 
 1103                 /*
 1104                  * Handle special fdopen() case. bleh.
 1105                  *
 1106                  * Don't do this for relative (capability) lookups; we don't
 1107                  * understand exactly what would happen, and we don't think
 1108                  * that it ever should.
 1109                  */
 1110                 if (nd.ni_strictrelative == 0 &&
 1111                     (error == ENODEV || error == ENXIO) &&
 1112                     td->td_dupfd >= 0) {
 1113                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1114                             &indx);
 1115                         if (error == 0)
 1116                                 goto success;
 1117                 }
 1118 
 1119                 goto bad;
 1120         }
 1121         td->td_dupfd = 0;
 1122         NDFREE(&nd, NDF_ONLY_PNBUF);
 1123         vp = nd.ni_vp;
 1124 
 1125         /*
 1126          * Store the vnode, for any f_type. Typically, the vnode use
 1127          * count is decremented by direct call to vn_closefile() for
 1128          * files that switched type in the cdevsw fdopen() method.
 1129          */
 1130         fp->f_vnode = vp;
 1131         /*
 1132          * If the file wasn't claimed by devfs bind it to the normal
 1133          * vnode operations here.
 1134          */
 1135         if (fp->f_ops == &badfileops) {
 1136                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1137                 fp->f_seqcount = 1;
 1138                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1139                     DTYPE_VNODE, vp, &vnops);
 1140         }
 1141 
 1142         VOP_UNLOCK(vp, 0);
 1143         if (flags & O_TRUNC) {
 1144                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1145                 if (error != 0)
 1146                         goto bad;
 1147         }
 1148 success:
 1149         /*
 1150          * If we haven't already installed the FD (for dupfdopen), do so now.
 1151          */
 1152         if (indx == -1) {
 1153                 struct filecaps *fcaps;
 1154 
 1155 #ifdef CAPABILITIES
 1156                 if (nd.ni_strictrelative == 1)
 1157                         fcaps = &nd.ni_filecaps;
 1158                 else
 1159 #endif
 1160                         fcaps = NULL;
 1161                 error = finstall(td, fp, &indx, flags, fcaps);
 1162                 /* On success finstall() consumes fcaps. */
 1163                 if (error != 0) {
 1164                         filecaps_free(&nd.ni_filecaps);
 1165                         goto bad;
 1166                 }
 1167         } else {
 1168                 filecaps_free(&nd.ni_filecaps);
 1169         }
 1170 
 1171         /*
 1172          * Release our private reference, leaving the one associated with
 1173          * the descriptor table intact.
 1174          */
 1175         fdrop(fp, td);
 1176         td->td_retval[0] = indx;
 1177         return (0);
 1178 bad:
 1179         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1180         fdrop(fp, td);
 1181         return (error);
 1182 }
 1183 
 1184 #ifdef COMPAT_43
 1185 /*
 1186  * Create a file.
 1187  */
 1188 #ifndef _SYS_SYSPROTO_H_
 1189 struct ocreat_args {
 1190         char    *path;
 1191         int     mode;
 1192 };
 1193 #endif
 1194 int
 1195 ocreat(td, uap)
 1196         struct thread *td;
 1197         register struct ocreat_args /* {
 1198                 char *path;
 1199                 int mode;
 1200         } */ *uap;
 1201 {
 1202 
 1203         return (kern_open(td, uap->path, UIO_USERSPACE,
 1204             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1205 }
 1206 #endif /* COMPAT_43 */
 1207 
 1208 /*
 1209  * Create a special file.
 1210  */
 1211 #ifndef _SYS_SYSPROTO_H_
 1212 struct mknod_args {
 1213         char    *path;
 1214         int     mode;
 1215         int     dev;
 1216 };
 1217 #endif
 1218 int
 1219 sys_mknod(td, uap)
 1220         struct thread *td;
 1221         register struct mknod_args /* {
 1222                 char *path;
 1223                 int mode;
 1224                 int dev;
 1225         } */ *uap;
 1226 {
 1227 
 1228         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1229 }
 1230 
 1231 #ifndef _SYS_SYSPROTO_H_
 1232 struct mknodat_args {
 1233         int     fd;
 1234         char    *path;
 1235         mode_t  mode;
 1236         dev_t   dev;
 1237 };
 1238 #endif
 1239 int
 1240 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1241 {
 1242 
 1243         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1244             uap->dev));
 1245 }
 1246 
 1247 int
 1248 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1249     int dev)
 1250 {
 1251 
 1252         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1253 }
 1254 
 1255 int
 1256 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1257     int mode, int dev)
 1258 {
 1259         struct vnode *vp;
 1260         struct mount *mp;
 1261         struct vattr vattr;
 1262         struct nameidata nd;
 1263         cap_rights_t rights;
 1264         int error, whiteout = 0;
 1265 
 1266         AUDIT_ARG_MODE(mode);
 1267         AUDIT_ARG_DEV(dev);
 1268         switch (mode & S_IFMT) {
 1269         case S_IFCHR:
 1270         case S_IFBLK:
 1271                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1272                 break;
 1273         case S_IFMT:
 1274                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1275                 break;
 1276         case S_IFWHT:
 1277                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1278                 break;
 1279         case S_IFIFO:
 1280                 if (dev == 0)
 1281                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1282                 /* FALLTHROUGH */
 1283         default:
 1284                 error = EINVAL;
 1285                 break;
 1286         }
 1287         if (error != 0)
 1288                 return (error);
 1289 restart:
 1290         bwillwrite();
 1291         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1292             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 1293             td);
 1294         if ((error = namei(&nd)) != 0)
 1295                 return (error);
 1296         vp = nd.ni_vp;
 1297         if (vp != NULL) {
 1298                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1299                 if (vp == nd.ni_dvp)
 1300                         vrele(nd.ni_dvp);
 1301                 else
 1302                         vput(nd.ni_dvp);
 1303                 vrele(vp);
 1304                 return (EEXIST);
 1305         } else {
 1306                 VATTR_NULL(&vattr);
 1307                 vattr.va_mode = (mode & ALLPERMS) &
 1308                     ~td->td_proc->p_fd->fd_cmask;
 1309                 vattr.va_rdev = dev;
 1310                 whiteout = 0;
 1311 
 1312                 switch (mode & S_IFMT) {
 1313                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1314                         vattr.va_type = VBAD;
 1315                         break;
 1316                 case S_IFCHR:
 1317                         vattr.va_type = VCHR;
 1318                         break;
 1319                 case S_IFBLK:
 1320                         vattr.va_type = VBLK;
 1321                         break;
 1322                 case S_IFWHT:
 1323                         whiteout = 1;
 1324                         break;
 1325                 default:
 1326                         panic("kern_mknod: invalid mode");
 1327                 }
 1328         }
 1329         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1330                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1331                 vput(nd.ni_dvp);
 1332                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1333                         return (error);
 1334                 goto restart;
 1335         }
 1336 #ifdef MAC
 1337         if (error == 0 && !whiteout)
 1338                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1339                     &nd.ni_cnd, &vattr);
 1340 #endif
 1341         if (error == 0) {
 1342                 if (whiteout)
 1343                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1344                 else {
 1345                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1346                                                 &nd.ni_cnd, &vattr);
 1347                         if (error == 0)
 1348                                 vput(nd.ni_vp);
 1349                 }
 1350         }
 1351         NDFREE(&nd, NDF_ONLY_PNBUF);
 1352         vput(nd.ni_dvp);
 1353         vn_finished_write(mp);
 1354         return (error);
 1355 }
 1356 
 1357 /*
 1358  * Create a named pipe.
 1359  */
 1360 #ifndef _SYS_SYSPROTO_H_
 1361 struct mkfifo_args {
 1362         char    *path;
 1363         int     mode;
 1364 };
 1365 #endif
 1366 int
 1367 sys_mkfifo(td, uap)
 1368         struct thread *td;
 1369         register struct mkfifo_args /* {
 1370                 char *path;
 1371                 int mode;
 1372         } */ *uap;
 1373 {
 1374 
 1375         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1376 }
 1377 
 1378 #ifndef _SYS_SYSPROTO_H_
 1379 struct mkfifoat_args {
 1380         int     fd;
 1381         char    *path;
 1382         mode_t  mode;
 1383 };
 1384 #endif
 1385 int
 1386 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1387 {
 1388 
 1389         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1390             uap->mode));
 1391 }
 1392 
 1393 int
 1394 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1395 {
 1396 
 1397         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1398 }
 1399 
 1400 int
 1401 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1402     int mode)
 1403 {
 1404         struct mount *mp;
 1405         struct vattr vattr;
 1406         struct nameidata nd;
 1407         cap_rights_t rights;
 1408         int error;
 1409 
 1410         AUDIT_ARG_MODE(mode);
 1411 restart:
 1412         bwillwrite();
 1413         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1414             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 1415             td);
 1416         if ((error = namei(&nd)) != 0)
 1417                 return (error);
 1418         if (nd.ni_vp != NULL) {
 1419                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1420                 if (nd.ni_vp == nd.ni_dvp)
 1421                         vrele(nd.ni_dvp);
 1422                 else
 1423                         vput(nd.ni_dvp);
 1424                 vrele(nd.ni_vp);
 1425                 return (EEXIST);
 1426         }
 1427         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1428                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1429                 vput(nd.ni_dvp);
 1430                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1431                         return (error);
 1432                 goto restart;
 1433         }
 1434         VATTR_NULL(&vattr);
 1435         vattr.va_type = VFIFO;
 1436         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1437 #ifdef MAC
 1438         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1439             &vattr);
 1440         if (error != 0)
 1441                 goto out;
 1442 #endif
 1443         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1444         if (error == 0)
 1445                 vput(nd.ni_vp);
 1446 #ifdef MAC
 1447 out:
 1448 #endif
 1449         vput(nd.ni_dvp);
 1450         vn_finished_write(mp);
 1451         NDFREE(&nd, NDF_ONLY_PNBUF);
 1452         return (error);
 1453 }
 1454 
 1455 /*
 1456  * Make a hard file link.
 1457  */
 1458 #ifndef _SYS_SYSPROTO_H_
 1459 struct link_args {
 1460         char    *path;
 1461         char    *link;
 1462 };
 1463 #endif
 1464 int
 1465 sys_link(td, uap)
 1466         struct thread *td;
 1467         register struct link_args /* {
 1468                 char *path;
 1469                 char *link;
 1470         } */ *uap;
 1471 {
 1472 
 1473         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1474 }
 1475 
 1476 #ifndef _SYS_SYSPROTO_H_
 1477 struct linkat_args {
 1478         int     fd1;
 1479         char    *path1;
 1480         int     fd2;
 1481         char    *path2;
 1482         int     flag;
 1483 };
 1484 #endif
 1485 int
 1486 sys_linkat(struct thread *td, struct linkat_args *uap)
 1487 {
 1488         int flag;
 1489 
 1490         flag = uap->flag;
 1491         if (flag & ~AT_SYMLINK_FOLLOW)
 1492                 return (EINVAL);
 1493 
 1494         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1495             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1496 }
 1497 
 1498 int hardlink_check_uid = 0;
 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1500     &hardlink_check_uid, 0,
 1501     "Unprivileged processes cannot create hard links to files owned by other "
 1502     "users");
 1503 static int hardlink_check_gid = 0;
 1504 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1505     &hardlink_check_gid, 0,
 1506     "Unprivileged processes cannot create hard links to files owned by other "
 1507     "groups");
 1508 
 1509 static int
 1510 can_hardlink(struct vnode *vp, struct ucred *cred)
 1511 {
 1512         struct vattr va;
 1513         int error;
 1514 
 1515         if (!hardlink_check_uid && !hardlink_check_gid)
 1516                 return (0);
 1517 
 1518         error = VOP_GETATTR(vp, &va, cred);
 1519         if (error != 0)
 1520                 return (error);
 1521 
 1522         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1523                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1524                 if (error != 0)
 1525                         return (error);
 1526         }
 1527 
 1528         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1529                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1530                 if (error != 0)
 1531                         return (error);
 1532         }
 1533 
 1534         return (0);
 1535 }
 1536 
 1537 int
 1538 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1539 {
 1540 
 1541         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1542 }
 1543 
 1544 int
 1545 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1546     enum uio_seg segflg, int follow)
 1547 {
 1548         struct vnode *vp;
 1549         struct mount *mp;
 1550         struct nameidata nd;
 1551         cap_rights_t rights;
 1552         int error;
 1553 
 1554 again:
 1555         bwillwrite();
 1556         NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 1557 
 1558         if ((error = namei(&nd)) != 0)
 1559                 return (error);
 1560         NDFREE(&nd, NDF_ONLY_PNBUF);
 1561         vp = nd.ni_vp;
 1562         if (vp->v_type == VDIR) {
 1563                 vrele(vp);
 1564                 return (EPERM);         /* POSIX */
 1565         }
 1566         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 |
 1567             NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT),
 1568             td);
 1569         if ((error = namei(&nd)) == 0) {
 1570                 if (nd.ni_vp != NULL) {
 1571                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1572                         if (nd.ni_dvp == nd.ni_vp)
 1573                                 vrele(nd.ni_dvp);
 1574                         else
 1575                                 vput(nd.ni_dvp);
 1576                         vrele(nd.ni_vp);
 1577                         vrele(vp);
 1578                         return (EEXIST);
 1579                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1580                         /*
 1581                          * Cross-device link.  No need to recheck
 1582                          * vp->v_type, since it cannot change, except
 1583                          * to VBAD.
 1584                          */
 1585                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1586                         vput(nd.ni_dvp);
 1587                         vrele(vp);
 1588                         return (EXDEV);
 1589                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1590                         error = can_hardlink(vp, td->td_ucred);
 1591 #ifdef MAC
 1592                         if (error == 0)
 1593                                 error = mac_vnode_check_link(td->td_ucred,
 1594                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1595 #endif
 1596                         if (error != 0) {
 1597                                 vput(vp);
 1598                                 vput(nd.ni_dvp);
 1599                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1600                                 return (error);
 1601                         }
 1602                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1603                         if (error != 0) {
 1604                                 vput(vp);
 1605                                 vput(nd.ni_dvp);
 1606                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1607                                 error = vn_start_write(NULL, &mp,
 1608                                     V_XSLEEP | PCATCH);
 1609                                 if (error != 0)
 1610                                         return (error);
 1611                                 goto again;
 1612                         }
 1613                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1614                         VOP_UNLOCK(vp, 0);
 1615                         vput(nd.ni_dvp);
 1616                         vn_finished_write(mp);
 1617                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1618                 } else {
 1619                         vput(nd.ni_dvp);
 1620                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1621                         vrele(vp);
 1622                         goto again;
 1623                 }
 1624         }
 1625         vrele(vp);
 1626         return (error);
 1627 }
 1628 
 1629 /*
 1630  * Make a symbolic link.
 1631  */
 1632 #ifndef _SYS_SYSPROTO_H_
 1633 struct symlink_args {
 1634         char    *path;
 1635         char    *link;
 1636 };
 1637 #endif
 1638 int
 1639 sys_symlink(td, uap)
 1640         struct thread *td;
 1641         register struct symlink_args /* {
 1642                 char *path;
 1643                 char *link;
 1644         } */ *uap;
 1645 {
 1646 
 1647         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1648 }
 1649 
 1650 #ifndef _SYS_SYSPROTO_H_
 1651 struct symlinkat_args {
 1652         char    *path;
 1653         int     fd;
 1654         char    *path2;
 1655 };
 1656 #endif
 1657 int
 1658 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1659 {
 1660 
 1661         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1662             UIO_USERSPACE));
 1663 }
 1664 
 1665 int
 1666 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1667 {
 1668 
 1669         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1670 }
 1671 
 1672 int
 1673 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1674     enum uio_seg segflg)
 1675 {
 1676         struct mount *mp;
 1677         struct vattr vattr;
 1678         char *syspath;
 1679         struct nameidata nd;
 1680         int error;
 1681         cap_rights_t rights;
 1682 
 1683         if (segflg == UIO_SYSSPACE) {
 1684                 syspath = path1;
 1685         } else {
 1686                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1687                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1688                         goto out;
 1689         }
 1690         AUDIT_ARG_TEXT(syspath);
 1691 restart:
 1692         bwillwrite();
 1693         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1694             NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 1695             td);
 1696         if ((error = namei(&nd)) != 0)
 1697                 goto out;
 1698         if (nd.ni_vp) {
 1699                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1700                 if (nd.ni_vp == nd.ni_dvp)
 1701                         vrele(nd.ni_dvp);
 1702                 else
 1703                         vput(nd.ni_dvp);
 1704                 vrele(nd.ni_vp);
 1705                 error = EEXIST;
 1706                 goto out;
 1707         }
 1708         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1709                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1710                 vput(nd.ni_dvp);
 1711                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1712                         goto out;
 1713                 goto restart;
 1714         }
 1715         VATTR_NULL(&vattr);
 1716         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1717 #ifdef MAC
 1718         vattr.va_type = VLNK;
 1719         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1720             &vattr);
 1721         if (error != 0)
 1722                 goto out2;
 1723 #endif
 1724         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1725         if (error == 0)
 1726                 vput(nd.ni_vp);
 1727 #ifdef MAC
 1728 out2:
 1729 #endif
 1730         NDFREE(&nd, NDF_ONLY_PNBUF);
 1731         vput(nd.ni_dvp);
 1732         vn_finished_write(mp);
 1733 out:
 1734         if (segflg != UIO_SYSSPACE)
 1735                 uma_zfree(namei_zone, syspath);
 1736         return (error);
 1737 }
 1738 
 1739 /*
 1740  * Delete a whiteout from the filesystem.
 1741  */
 1742 int
 1743 sys_undelete(td, uap)
 1744         struct thread *td;
 1745         register struct undelete_args /* {
 1746                 char *path;
 1747         } */ *uap;
 1748 {
 1749         struct mount *mp;
 1750         struct nameidata nd;
 1751         int error;
 1752 
 1753 restart:
 1754         bwillwrite();
 1755         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1756             UIO_USERSPACE, uap->path, td);
 1757         error = namei(&nd);
 1758         if (error != 0)
 1759                 return (error);
 1760 
 1761         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1762                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1763                 if (nd.ni_vp == nd.ni_dvp)
 1764                         vrele(nd.ni_dvp);
 1765                 else
 1766                         vput(nd.ni_dvp);
 1767                 if (nd.ni_vp)
 1768                         vrele(nd.ni_vp);
 1769                 return (EEXIST);
 1770         }
 1771         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1772                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1773                 vput(nd.ni_dvp);
 1774                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1775                         return (error);
 1776                 goto restart;
 1777         }
 1778         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1779         NDFREE(&nd, NDF_ONLY_PNBUF);
 1780         vput(nd.ni_dvp);
 1781         vn_finished_write(mp);
 1782         return (error);
 1783 }
 1784 
 1785 /*
 1786  * Delete a name from the filesystem.
 1787  */
 1788 #ifndef _SYS_SYSPROTO_H_
 1789 struct unlink_args {
 1790         char    *path;
 1791 };
 1792 #endif
 1793 int
 1794 sys_unlink(td, uap)
 1795         struct thread *td;
 1796         struct unlink_args /* {
 1797                 char *path;
 1798         } */ *uap;
 1799 {
 1800 
 1801         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1802 }
 1803 
 1804 #ifndef _SYS_SYSPROTO_H_
 1805 struct unlinkat_args {
 1806         int     fd;
 1807         char    *path;
 1808         int     flag;
 1809 };
 1810 #endif
 1811 int
 1812 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1813 {
 1814         int flag = uap->flag;
 1815         int fd = uap->fd;
 1816         char *path = uap->path;
 1817 
 1818         if (flag & ~AT_REMOVEDIR)
 1819                 return (EINVAL);
 1820 
 1821         if (flag & AT_REMOVEDIR)
 1822                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1823         else
 1824                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1825 }
 1826 
 1827 int
 1828 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1829 {
 1830 
 1831         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1832 }
 1833 
 1834 int
 1835 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1836     ino_t oldinum)
 1837 {
 1838         struct mount *mp;
 1839         struct vnode *vp;
 1840         struct nameidata nd;
 1841         struct stat sb;
 1842         cap_rights_t rights;
 1843         int error;
 1844 
 1845 restart:
 1846         bwillwrite();
 1847         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1848             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1849         if ((error = namei(&nd)) != 0)
 1850                 return (error == EINVAL ? EPERM : error);
 1851         vp = nd.ni_vp;
 1852         if (vp->v_type == VDIR && oldinum == 0) {
 1853                 error = EPERM;          /* POSIX */
 1854         } else if (oldinum != 0 &&
 1855                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1856                   sb.st_ino != oldinum) {
 1857                         error = EIDRM;  /* Identifier removed */
 1858         } else {
 1859                 /*
 1860                  * The root of a mounted filesystem cannot be deleted.
 1861                  *
 1862                  * XXX: can this only be a VDIR case?
 1863                  */
 1864                 if (vp->v_vflag & VV_ROOT)
 1865                         error = EBUSY;
 1866         }
 1867         if (error == 0) {
 1868                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1869                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1870                         vput(nd.ni_dvp);
 1871                         if (vp == nd.ni_dvp)
 1872                                 vrele(vp);
 1873                         else
 1874                                 vput(vp);
 1875                         if ((error = vn_start_write(NULL, &mp,
 1876                             V_XSLEEP | PCATCH)) != 0)
 1877                                 return (error);
 1878                         goto restart;
 1879                 }
 1880 #ifdef MAC
 1881                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1882                     &nd.ni_cnd);
 1883                 if (error != 0)
 1884                         goto out;
 1885 #endif
 1886                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1887                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1888 #ifdef MAC
 1889 out:
 1890 #endif
 1891                 vn_finished_write(mp);
 1892         }
 1893         NDFREE(&nd, NDF_ONLY_PNBUF);
 1894         vput(nd.ni_dvp);
 1895         if (vp == nd.ni_dvp)
 1896                 vrele(vp);
 1897         else
 1898                 vput(vp);
 1899         return (error);
 1900 }
 1901 
 1902 /*
 1903  * Reposition read/write file offset.
 1904  */
 1905 #ifndef _SYS_SYSPROTO_H_
 1906 struct lseek_args {
 1907         int     fd;
 1908         int     pad;
 1909         off_t   offset;
 1910         int     whence;
 1911 };
 1912 #endif
 1913 int
 1914 sys_lseek(td, uap)
 1915         struct thread *td;
 1916         register struct lseek_args /* {
 1917                 int fd;
 1918                 int pad;
 1919                 off_t offset;
 1920                 int whence;
 1921         } */ *uap;
 1922 {
 1923         struct file *fp;
 1924         cap_rights_t rights;
 1925         int error;
 1926 
 1927         AUDIT_ARG_FD(uap->fd);
 1928         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1929         if (error != 0)
 1930                 return (error);
 1931         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1932             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1933         fdrop(fp, td);
 1934         return (error);
 1935 }
 1936 
 1937 #if defined(COMPAT_43)
 1938 /*
 1939  * Reposition read/write file offset.
 1940  */
 1941 #ifndef _SYS_SYSPROTO_H_
 1942 struct olseek_args {
 1943         int     fd;
 1944         long    offset;
 1945         int     whence;
 1946 };
 1947 #endif
 1948 int
 1949 olseek(td, uap)
 1950         struct thread *td;
 1951         register struct olseek_args /* {
 1952                 int fd;
 1953                 long offset;
 1954                 int whence;
 1955         } */ *uap;
 1956 {
 1957         struct lseek_args /* {
 1958                 int fd;
 1959                 int pad;
 1960                 off_t offset;
 1961                 int whence;
 1962         } */ nuap;
 1963 
 1964         nuap.fd = uap->fd;
 1965         nuap.offset = uap->offset;
 1966         nuap.whence = uap->whence;
 1967         return (sys_lseek(td, &nuap));
 1968 }
 1969 #endif /* COMPAT_43 */
 1970 
 1971 /* Version with the 'pad' argument */
 1972 int
 1973 freebsd6_lseek(td, uap)
 1974         struct thread *td;
 1975         register struct freebsd6_lseek_args *uap;
 1976 {
 1977         struct lseek_args ouap;
 1978 
 1979         ouap.fd = uap->fd;
 1980         ouap.offset = uap->offset;
 1981         ouap.whence = uap->whence;
 1982         return (sys_lseek(td, &ouap));
 1983 }
 1984 
 1985 /*
 1986  * Check access permissions using passed credentials.
 1987  */
 1988 static int
 1989 vn_access(vp, user_flags, cred, td)
 1990         struct vnode    *vp;
 1991         int             user_flags;
 1992         struct ucred    *cred;
 1993         struct thread   *td;
 1994 {
 1995         accmode_t accmode;
 1996         int error;
 1997 
 1998         /* Flags == 0 means only check for existence. */
 1999         error = 0;
 2000         if (user_flags) {
 2001                 accmode = 0;
 2002                 if (user_flags & R_OK)
 2003                         accmode |= VREAD;
 2004                 if (user_flags & W_OK)
 2005                         accmode |= VWRITE;
 2006                 if (user_flags & X_OK)
 2007                         accmode |= VEXEC;
 2008 #ifdef MAC
 2009                 error = mac_vnode_check_access(cred, vp, accmode);
 2010                 if (error != 0)
 2011                         return (error);
 2012 #endif
 2013                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2014                         error = VOP_ACCESS(vp, accmode, cred, td);
 2015         }
 2016         return (error);
 2017 }
 2018 
 2019 /*
 2020  * Check access permissions using "real" credentials.
 2021  */
 2022 #ifndef _SYS_SYSPROTO_H_
 2023 struct access_args {
 2024         char    *path;
 2025         int     amode;
 2026 };
 2027 #endif
 2028 int
 2029 sys_access(td, uap)
 2030         struct thread *td;
 2031         register struct access_args /* {
 2032                 char *path;
 2033                 int amode;
 2034         } */ *uap;
 2035 {
 2036 
 2037         return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode));
 2038 }
 2039 
 2040 #ifndef _SYS_SYSPROTO_H_
 2041 struct faccessat_args {
 2042         int     dirfd;
 2043         char    *path;
 2044         int     amode;
 2045         int     flag;
 2046 }
 2047 #endif
 2048 int
 2049 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2050 {
 2051 
 2052         if (uap->flag & ~AT_EACCESS)
 2053                 return (EINVAL);
 2054         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2055             uap->amode));
 2056 }
 2057 
 2058 int
 2059 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2060 {
 2061 
 2062         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode));
 2063 }
 2064 
 2065 int
 2066 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2067     int flag, int amode)
 2068 {
 2069         struct ucred *cred, *tmpcred;
 2070         struct vnode *vp;
 2071         struct nameidata nd;
 2072         cap_rights_t rights;
 2073         int error;
 2074 
 2075         /*
 2076          * Create and modify a temporary credential instead of one that
 2077          * is potentially shared.
 2078          */
 2079         if (!(flag & AT_EACCESS)) {
 2080                 cred = td->td_ucred;
 2081                 tmpcred = crdup(cred);
 2082                 tmpcred->cr_uid = cred->cr_ruid;
 2083                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2084                 td->td_ucred = tmpcred;
 2085         } else
 2086                 cred = tmpcred = td->td_ucred;
 2087         AUDIT_ARG_VALUE(amode);
 2088         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2089             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 2090             td);
 2091         if ((error = namei(&nd)) != 0)
 2092                 goto out1;
 2093         vp = nd.ni_vp;
 2094 
 2095         error = vn_access(vp, amode, tmpcred, td);
 2096         NDFREE(&nd, NDF_ONLY_PNBUF);
 2097         vput(vp);
 2098 out1:
 2099         if (!(flag & AT_EACCESS)) {
 2100                 td->td_ucred = cred;
 2101                 crfree(tmpcred);
 2102         }
 2103         return (error);
 2104 }
 2105 
 2106 /*
 2107  * Check access permissions using "effective" credentials.
 2108  */
 2109 #ifndef _SYS_SYSPROTO_H_
 2110 struct eaccess_args {
 2111         char    *path;
 2112         int     amode;
 2113 };
 2114 #endif
 2115 int
 2116 sys_eaccess(td, uap)
 2117         struct thread *td;
 2118         register struct eaccess_args /* {
 2119                 char *path;
 2120                 int amode;
 2121         } */ *uap;
 2122 {
 2123 
 2124         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode));
 2125 }
 2126 
 2127 int
 2128 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2129 {
 2130 
 2131         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode));
 2132 }
 2133 
 2134 #if defined(COMPAT_43)
 2135 /*
 2136  * Get file status; this version follows links.
 2137  */
 2138 #ifndef _SYS_SYSPROTO_H_
 2139 struct ostat_args {
 2140         char    *path;
 2141         struct ostat *ub;
 2142 };
 2143 #endif
 2144 int
 2145 ostat(td, uap)
 2146         struct thread *td;
 2147         register struct ostat_args /* {
 2148                 char *path;
 2149                 struct ostat *ub;
 2150         } */ *uap;
 2151 {
 2152         struct stat sb;
 2153         struct ostat osb;
 2154         int error;
 2155 
 2156         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2157         if (error != 0)
 2158                 return (error);
 2159         cvtstat(&sb, &osb);
 2160         return (copyout(&osb, uap->ub, sizeof (osb)));
 2161 }
 2162 
 2163 /*
 2164  * Get file status; this version does not follow links.
 2165  */
 2166 #ifndef _SYS_SYSPROTO_H_
 2167 struct olstat_args {
 2168         char    *path;
 2169         struct ostat *ub;
 2170 };
 2171 #endif
 2172 int
 2173 olstat(td, uap)
 2174         struct thread *td;
 2175         register struct olstat_args /* {
 2176                 char *path;
 2177                 struct ostat *ub;
 2178         } */ *uap;
 2179 {
 2180         struct stat sb;
 2181         struct ostat osb;
 2182         int error;
 2183 
 2184         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2185         if (error != 0)
 2186                 return (error);
 2187         cvtstat(&sb, &osb);
 2188         return (copyout(&osb, uap->ub, sizeof (osb)));
 2189 }
 2190 
 2191 /*
 2192  * Convert from an old to a new stat structure.
 2193  */
 2194 void
 2195 cvtstat(st, ost)
 2196         struct stat *st;
 2197         struct ostat *ost;
 2198 {
 2199 
 2200         bzero(ost, sizeof(*ost));
 2201         ost->st_dev = st->st_dev;
 2202         ost->st_ino = st->st_ino;
 2203         ost->st_mode = st->st_mode;
 2204         ost->st_nlink = st->st_nlink;
 2205         ost->st_uid = st->st_uid;
 2206         ost->st_gid = st->st_gid;
 2207         ost->st_rdev = st->st_rdev;
 2208         if (st->st_size < (quad_t)1 << 32)
 2209                 ost->st_size = st->st_size;
 2210         else
 2211                 ost->st_size = -2;
 2212         ost->st_atim = st->st_atim;
 2213         ost->st_mtim = st->st_mtim;
 2214         ost->st_ctim = st->st_ctim;
 2215         ost->st_blksize = st->st_blksize;
 2216         ost->st_blocks = st->st_blocks;
 2217         ost->st_flags = st->st_flags;
 2218         ost->st_gen = st->st_gen;
 2219 }
 2220 #endif /* COMPAT_43 */
 2221 
 2222 /*
 2223  * Get file status; this version follows links.
 2224  */
 2225 #ifndef _SYS_SYSPROTO_H_
 2226 struct stat_args {
 2227         char    *path;
 2228         struct stat *ub;
 2229 };
 2230 #endif
 2231 int
 2232 sys_stat(td, uap)
 2233         struct thread *td;
 2234         register struct stat_args /* {
 2235                 char *path;
 2236                 struct stat *ub;
 2237         } */ *uap;
 2238 {
 2239         struct stat sb;
 2240         int error;
 2241 
 2242         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2243         if (error == 0)
 2244                 error = copyout(&sb, uap->ub, sizeof (sb));
 2245         return (error);
 2246 }
 2247 
 2248 #ifndef _SYS_SYSPROTO_H_
 2249 struct fstatat_args {
 2250         int     fd;
 2251         char    *path;
 2252         struct stat     *buf;
 2253         int     flag;
 2254 }
 2255 #endif
 2256 int
 2257 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2258 {
 2259         struct stat sb;
 2260         int error;
 2261 
 2262         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2263             UIO_USERSPACE, &sb);
 2264         if (error == 0)
 2265                 error = copyout(&sb, uap->buf, sizeof (sb));
 2266         return (error);
 2267 }
 2268 
 2269 int
 2270 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2271 {
 2272 
 2273         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2274 }
 2275 
 2276 int
 2277 kern_statat(struct thread *td, int flag, int fd, char *path,
 2278     enum uio_seg pathseg, struct stat *sbp)
 2279 {
 2280 
 2281         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2282 }
 2283 
 2284 int
 2285 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2286     enum uio_seg pathseg, struct stat *sbp,
 2287     void (*hook)(struct vnode *vp, struct stat *sbp))
 2288 {
 2289         struct nameidata nd;
 2290         struct stat sb;
 2291         cap_rights_t rights;
 2292         int error;
 2293 
 2294         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2295                 return (EINVAL);
 2296 
 2297         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2298             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2299             cap_rights_init(&rights, CAP_FSTAT), td);
 2300 
 2301         if ((error = namei(&nd)) != 0)
 2302                 return (error);
 2303         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2304         if (error == 0) {
 2305                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2306                 if (S_ISREG(sb.st_mode))
 2307                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2308                 if (__predict_false(hook != NULL))
 2309                         hook(nd.ni_vp, &sb);
 2310         }
 2311         NDFREE(&nd, NDF_ONLY_PNBUF);
 2312         vput(nd.ni_vp);
 2313         if (error != 0)
 2314                 return (error);
 2315         *sbp = sb;
 2316 #ifdef KTRACE
 2317         if (KTRPOINT(td, KTR_STRUCT))
 2318                 ktrstat(&sb);
 2319 #endif
 2320         return (0);
 2321 }
 2322 
 2323 /*
 2324  * Get file status; this version does not follow links.
 2325  */
 2326 #ifndef _SYS_SYSPROTO_H_
 2327 struct lstat_args {
 2328         char    *path;
 2329         struct stat *ub;
 2330 };
 2331 #endif
 2332 int
 2333 sys_lstat(td, uap)
 2334         struct thread *td;
 2335         register struct lstat_args /* {
 2336                 char *path;
 2337                 struct stat *ub;
 2338         } */ *uap;
 2339 {
 2340         struct stat sb;
 2341         int error;
 2342 
 2343         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2344         if (error == 0)
 2345                 error = copyout(&sb, uap->ub, sizeof (sb));
 2346         return (error);
 2347 }
 2348 
 2349 int
 2350 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2351 {
 2352 
 2353         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2354             sbp));
 2355 }
 2356 
 2357 /*
 2358  * Implementation of the NetBSD [l]stat() functions.
 2359  */
 2360 void
 2361 cvtnstat(sb, nsb)
 2362         struct stat *sb;
 2363         struct nstat *nsb;
 2364 {
 2365 
 2366         bzero(nsb, sizeof *nsb);
 2367         nsb->st_dev = sb->st_dev;
 2368         nsb->st_ino = sb->st_ino;
 2369         nsb->st_mode = sb->st_mode;
 2370         nsb->st_nlink = sb->st_nlink;
 2371         nsb->st_uid = sb->st_uid;
 2372         nsb->st_gid = sb->st_gid;
 2373         nsb->st_rdev = sb->st_rdev;
 2374         nsb->st_atim = sb->st_atim;
 2375         nsb->st_mtim = sb->st_mtim;
 2376         nsb->st_ctim = sb->st_ctim;
 2377         nsb->st_size = sb->st_size;
 2378         nsb->st_blocks = sb->st_blocks;
 2379         nsb->st_blksize = sb->st_blksize;
 2380         nsb->st_flags = sb->st_flags;
 2381         nsb->st_gen = sb->st_gen;
 2382         nsb->st_birthtim = sb->st_birthtim;
 2383 }
 2384 
 2385 #ifndef _SYS_SYSPROTO_H_
 2386 struct nstat_args {
 2387         char    *path;
 2388         struct nstat *ub;
 2389 };
 2390 #endif
 2391 int
 2392 sys_nstat(td, uap)
 2393         struct thread *td;
 2394         register struct nstat_args /* {
 2395                 char *path;
 2396                 struct nstat *ub;
 2397         } */ *uap;
 2398 {
 2399         struct stat sb;
 2400         struct nstat nsb;
 2401         int error;
 2402 
 2403         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2404         if (error != 0)
 2405                 return (error);
 2406         cvtnstat(&sb, &nsb);
 2407         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2408 }
 2409 
 2410 /*
 2411  * NetBSD lstat.  Get file status; this version does not follow links.
 2412  */
 2413 #ifndef _SYS_SYSPROTO_H_
 2414 struct lstat_args {
 2415         char    *path;
 2416         struct stat *ub;
 2417 };
 2418 #endif
 2419 int
 2420 sys_nlstat(td, uap)
 2421         struct thread *td;
 2422         register struct nlstat_args /* {
 2423                 char *path;
 2424                 struct nstat *ub;
 2425         } */ *uap;
 2426 {
 2427         struct stat sb;
 2428         struct nstat nsb;
 2429         int error;
 2430 
 2431         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2432         if (error != 0)
 2433                 return (error);
 2434         cvtnstat(&sb, &nsb);
 2435         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2436 }
 2437 
 2438 /*
 2439  * Get configurable pathname variables.
 2440  */
 2441 #ifndef _SYS_SYSPROTO_H_
 2442 struct pathconf_args {
 2443         char    *path;
 2444         int     name;
 2445 };
 2446 #endif
 2447 int
 2448 sys_pathconf(td, uap)
 2449         struct thread *td;
 2450         register struct pathconf_args /* {
 2451                 char *path;
 2452                 int name;
 2453         } */ *uap;
 2454 {
 2455 
 2456         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2457 }
 2458 
 2459 #ifndef _SYS_SYSPROTO_H_
 2460 struct lpathconf_args {
 2461         char    *path;
 2462         int     name;
 2463 };
 2464 #endif
 2465 int
 2466 sys_lpathconf(td, uap)
 2467         struct thread *td;
 2468         register struct lpathconf_args /* {
 2469                 char *path;
 2470                 int name;
 2471         } */ *uap;
 2472 {
 2473 
 2474         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2475             NOFOLLOW));
 2476 }
 2477 
 2478 int
 2479 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2480     u_long flags)
 2481 {
 2482         struct nameidata nd;
 2483         int error;
 2484 
 2485         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2486             pathseg, path, td);
 2487         if ((error = namei(&nd)) != 0)
 2488                 return (error);
 2489         NDFREE(&nd, NDF_ONLY_PNBUF);
 2490 
 2491         /* If asynchronous I/O is available, it works for all files. */
 2492         if (name == _PC_ASYNC_IO)
 2493                 td->td_retval[0] = async_io_version;
 2494         else
 2495                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2496         vput(nd.ni_vp);
 2497         return (error);
 2498 }
 2499 
 2500 /*
 2501  * Return target name of a symbolic link.
 2502  */
 2503 #ifndef _SYS_SYSPROTO_H_
 2504 struct readlink_args {
 2505         char    *path;
 2506         char    *buf;
 2507         size_t  count;
 2508 };
 2509 #endif
 2510 int
 2511 sys_readlink(td, uap)
 2512         struct thread *td;
 2513         register struct readlink_args /* {
 2514                 char *path;
 2515                 char *buf;
 2516                 size_t count;
 2517         } */ *uap;
 2518 {
 2519 
 2520         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2521             UIO_USERSPACE, uap->count));
 2522 }
 2523 #ifndef _SYS_SYSPROTO_H_
 2524 struct readlinkat_args {
 2525         int     fd;
 2526         char    *path;
 2527         char    *buf;
 2528         size_t  bufsize;
 2529 };
 2530 #endif
 2531 int
 2532 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2533 {
 2534 
 2535         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2536             uap->buf, UIO_USERSPACE, uap->bufsize));
 2537 }
 2538 
 2539 int
 2540 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2541     enum uio_seg bufseg, size_t count)
 2542 {
 2543 
 2544         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2545             count));
 2546 }
 2547 
 2548 int
 2549 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2550     char *buf, enum uio_seg bufseg, size_t count)
 2551 {
 2552         struct vnode *vp;
 2553         struct iovec aiov;
 2554         struct uio auio;
 2555         struct nameidata nd;
 2556         int error;
 2557 
 2558         if (count > IOSIZE_MAX)
 2559                 return (EINVAL);
 2560 
 2561         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2562             pathseg, path, fd, td);
 2563 
 2564         if ((error = namei(&nd)) != 0)
 2565                 return (error);
 2566         NDFREE(&nd, NDF_ONLY_PNBUF);
 2567         vp = nd.ni_vp;
 2568 #ifdef MAC
 2569         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2570         if (error != 0) {
 2571                 vput(vp);
 2572                 return (error);
 2573         }
 2574 #endif
 2575         if (vp->v_type != VLNK)
 2576                 error = EINVAL;
 2577         else {
 2578                 aiov.iov_base = buf;
 2579                 aiov.iov_len = count;
 2580                 auio.uio_iov = &aiov;
 2581                 auio.uio_iovcnt = 1;
 2582                 auio.uio_offset = 0;
 2583                 auio.uio_rw = UIO_READ;
 2584                 auio.uio_segflg = bufseg;
 2585                 auio.uio_td = td;
 2586                 auio.uio_resid = count;
 2587                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2588                 td->td_retval[0] = count - auio.uio_resid;
 2589         }
 2590         vput(vp);
 2591         return (error);
 2592 }
 2593 
 2594 /*
 2595  * Common implementation code for chflags() and fchflags().
 2596  */
 2597 static int
 2598 setfflags(td, vp, flags)
 2599         struct thread *td;
 2600         struct vnode *vp;
 2601         u_long flags;
 2602 {
 2603         struct mount *mp;
 2604         struct vattr vattr;
 2605         int error;
 2606 
 2607         /* We can't support the value matching VNOVAL. */
 2608         if (flags == VNOVAL)
 2609                 return (EOPNOTSUPP);
 2610 
 2611         /*
 2612          * Prevent non-root users from setting flags on devices.  When
 2613          * a device is reused, users can retain ownership of the device
 2614          * if they are allowed to set flags and programs assume that
 2615          * chown can't fail when done as root.
 2616          */
 2617         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2618                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2619                 if (error != 0)
 2620                         return (error);
 2621         }
 2622 
 2623         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2624                 return (error);
 2625         VATTR_NULL(&vattr);
 2626         vattr.va_flags = flags;
 2627         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2628 #ifdef MAC
 2629         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2630         if (error == 0)
 2631 #endif
 2632                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2633         VOP_UNLOCK(vp, 0);
 2634         vn_finished_write(mp);
 2635         return (error);
 2636 }
 2637 
 2638 /*
 2639  * Change flags of a file given a path name.
 2640  */
 2641 #ifndef _SYS_SYSPROTO_H_
 2642 struct chflags_args {
 2643         const char *path;
 2644         u_long  flags;
 2645 };
 2646 #endif
 2647 int
 2648 sys_chflags(td, uap)
 2649         struct thread *td;
 2650         register struct chflags_args /* {
 2651                 const char *path;
 2652                 u_long flags;
 2653         } */ *uap;
 2654 {
 2655 
 2656         return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags));
 2657 }
 2658 
 2659 #ifndef _SYS_SYSPROTO_H_
 2660 struct chflagsat_args {
 2661         int     fd;
 2662         const char *path;
 2663         u_long  flags;
 2664         int     atflag;
 2665 }
 2666 #endif
 2667 int
 2668 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2669 {
 2670         int fd = uap->fd;
 2671         const char *path = uap->path;
 2672         u_long flags = uap->flags;
 2673         int atflag = uap->atflag;
 2674 
 2675         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2676                 return (EINVAL);
 2677 
 2678         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2679 }
 2680 
 2681 static int
 2682 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg,
 2683     u_long flags)
 2684 {
 2685 
 2686         return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0));
 2687 }
 2688 
 2689 /*
 2690  * Same as chflags() but doesn't follow symlinks.
 2691  */
 2692 int
 2693 sys_lchflags(td, uap)
 2694         struct thread *td;
 2695         register struct lchflags_args /* {
 2696                 const char *path;
 2697                 u_long flags;
 2698         } */ *uap;
 2699 {
 2700 
 2701         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2702             uap->flags, AT_SYMLINK_NOFOLLOW));
 2703 }
 2704 
 2705 static int
 2706 kern_chflagsat(struct thread *td, int fd, const char *path,
 2707     enum uio_seg pathseg, u_long flags, int atflag)
 2708 {
 2709         struct nameidata nd;
 2710         cap_rights_t rights;
 2711         int error, follow;
 2712 
 2713         AUDIT_ARG_FFLAGS(flags);
 2714         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2715         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2716             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2717         if ((error = namei(&nd)) != 0)
 2718                 return (error);
 2719         NDFREE(&nd, NDF_ONLY_PNBUF);
 2720         error = setfflags(td, nd.ni_vp, flags);
 2721         vrele(nd.ni_vp);
 2722         return (error);
 2723 }
 2724 
 2725 /*
 2726  * Change flags of a file given a file descriptor.
 2727  */
 2728 #ifndef _SYS_SYSPROTO_H_
 2729 struct fchflags_args {
 2730         int     fd;
 2731         u_long  flags;
 2732 };
 2733 #endif
 2734 int
 2735 sys_fchflags(td, uap)
 2736         struct thread *td;
 2737         register struct fchflags_args /* {
 2738                 int fd;
 2739                 u_long flags;
 2740         } */ *uap;
 2741 {
 2742         struct file *fp;
 2743         cap_rights_t rights;
 2744         int error;
 2745 
 2746         AUDIT_ARG_FD(uap->fd);
 2747         AUDIT_ARG_FFLAGS(uap->flags);
 2748         error = getvnode(td->td_proc->p_fd, uap->fd,
 2749             cap_rights_init(&rights, CAP_FCHFLAGS), &fp);
 2750         if (error != 0)
 2751                 return (error);
 2752 #ifdef AUDIT
 2753         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2754         AUDIT_ARG_VNODE1(fp->f_vnode);
 2755         VOP_UNLOCK(fp->f_vnode, 0);
 2756 #endif
 2757         error = setfflags(td, fp->f_vnode, uap->flags);
 2758         fdrop(fp, td);
 2759         return (error);
 2760 }
 2761 
 2762 /*
 2763  * Common implementation code for chmod(), lchmod() and fchmod().
 2764  */
 2765 int
 2766 setfmode(td, cred, vp, mode)
 2767         struct thread *td;
 2768         struct ucred *cred;
 2769         struct vnode *vp;
 2770         int mode;
 2771 {
 2772         struct mount *mp;
 2773         struct vattr vattr;
 2774         int error;
 2775 
 2776         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2777                 return (error);
 2778         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2779         VATTR_NULL(&vattr);
 2780         vattr.va_mode = mode & ALLPERMS;
 2781 #ifdef MAC
 2782         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2783         if (error == 0)
 2784 #endif
 2785                 error = VOP_SETATTR(vp, &vattr, cred);
 2786         VOP_UNLOCK(vp, 0);
 2787         vn_finished_write(mp);
 2788         return (error);
 2789 }
 2790 
 2791 /*
 2792  * Change mode of a file given path name.
 2793  */
 2794 #ifndef _SYS_SYSPROTO_H_
 2795 struct chmod_args {
 2796         char    *path;
 2797         int     mode;
 2798 };
 2799 #endif
 2800 int
 2801 sys_chmod(td, uap)
 2802         struct thread *td;
 2803         register struct chmod_args /* {
 2804                 char *path;
 2805                 int mode;
 2806         } */ *uap;
 2807 {
 2808 
 2809         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2810 }
 2811 
 2812 #ifndef _SYS_SYSPROTO_H_
 2813 struct fchmodat_args {
 2814         int     dirfd;
 2815         char    *path;
 2816         mode_t  mode;
 2817         int     flag;
 2818 }
 2819 #endif
 2820 int
 2821 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2822 {
 2823         int flag = uap->flag;
 2824         int fd = uap->fd;
 2825         char *path = uap->path;
 2826         mode_t mode = uap->mode;
 2827 
 2828         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2829                 return (EINVAL);
 2830 
 2831         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2832 }
 2833 
 2834 int
 2835 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2836 {
 2837 
 2838         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2839 }
 2840 
 2841 /*
 2842  * Change mode of a file given path name (don't follow links.)
 2843  */
 2844 #ifndef _SYS_SYSPROTO_H_
 2845 struct lchmod_args {
 2846         char    *path;
 2847         int     mode;
 2848 };
 2849 #endif
 2850 int
 2851 sys_lchmod(td, uap)
 2852         struct thread *td;
 2853         register struct lchmod_args /* {
 2854                 char *path;
 2855                 int mode;
 2856         } */ *uap;
 2857 {
 2858 
 2859         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2860             uap->mode, AT_SYMLINK_NOFOLLOW));
 2861 }
 2862 
 2863 int
 2864 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2865     mode_t mode, int flag)
 2866 {
 2867         struct nameidata nd;
 2868         cap_rights_t rights;
 2869         int error, follow;
 2870 
 2871         AUDIT_ARG_MODE(mode);
 2872         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2873         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2874             cap_rights_init(&rights, CAP_FCHMOD), td);
 2875         if ((error = namei(&nd)) != 0)
 2876                 return (error);
 2877         NDFREE(&nd, NDF_ONLY_PNBUF);
 2878         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2879         vrele(nd.ni_vp);
 2880         return (error);
 2881 }
 2882 
 2883 /*
 2884  * Change mode of a file given a file descriptor.
 2885  */
 2886 #ifndef _SYS_SYSPROTO_H_
 2887 struct fchmod_args {
 2888         int     fd;
 2889         int     mode;
 2890 };
 2891 #endif
 2892 int
 2893 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2894 {
 2895         struct file *fp;
 2896         cap_rights_t rights;
 2897         int error;
 2898 
 2899         AUDIT_ARG_FD(uap->fd);
 2900         AUDIT_ARG_MODE(uap->mode);
 2901 
 2902         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2903         if (error != 0)
 2904                 return (error);
 2905         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2906         fdrop(fp, td);
 2907         return (error);
 2908 }
 2909 
 2910 /*
 2911  * Common implementation for chown(), lchown(), and fchown()
 2912  */
 2913 int
 2914 setfown(td, cred, vp, uid, gid)
 2915         struct thread *td;
 2916         struct ucred *cred;
 2917         struct vnode *vp;
 2918         uid_t uid;
 2919         gid_t gid;
 2920 {
 2921         struct mount *mp;
 2922         struct vattr vattr;
 2923         int error;
 2924 
 2925         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2926                 return (error);
 2927         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2928         VATTR_NULL(&vattr);
 2929         vattr.va_uid = uid;
 2930         vattr.va_gid = gid;
 2931 #ifdef MAC
 2932         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2933             vattr.va_gid);
 2934         if (error == 0)
 2935 #endif
 2936                 error = VOP_SETATTR(vp, &vattr, cred);
 2937         VOP_UNLOCK(vp, 0);
 2938         vn_finished_write(mp);
 2939         return (error);
 2940 }
 2941 
 2942 /*
 2943  * Set ownership given a path name.
 2944  */
 2945 #ifndef _SYS_SYSPROTO_H_
 2946 struct chown_args {
 2947         char    *path;
 2948         int     uid;
 2949         int     gid;
 2950 };
 2951 #endif
 2952 int
 2953 sys_chown(td, uap)
 2954         struct thread *td;
 2955         register struct chown_args /* {
 2956                 char *path;
 2957                 int uid;
 2958                 int gid;
 2959         } */ *uap;
 2960 {
 2961 
 2962         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2963 }
 2964 
 2965 #ifndef _SYS_SYSPROTO_H_
 2966 struct fchownat_args {
 2967         int fd;
 2968         const char * path;
 2969         uid_t uid;
 2970         gid_t gid;
 2971         int flag;
 2972 };
 2973 #endif
 2974 int
 2975 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2976 {
 2977         int flag;
 2978 
 2979         flag = uap->flag;
 2980         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2981                 return (EINVAL);
 2982 
 2983         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2984             uap->gid, uap->flag));
 2985 }
 2986 
 2987 int
 2988 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2989     int gid)
 2990 {
 2991 
 2992         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 2993 }
 2994 
 2995 int
 2996 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2997     int uid, int gid, int flag)
 2998 {
 2999         struct nameidata nd;
 3000         cap_rights_t rights;
 3001         int error, follow;
 3002 
 3003         AUDIT_ARG_OWNER(uid, gid);
 3004         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3005         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 3006             cap_rights_init(&rights, CAP_FCHOWN), td);
 3007 
 3008         if ((error = namei(&nd)) != 0)
 3009                 return (error);
 3010         NDFREE(&nd, NDF_ONLY_PNBUF);
 3011         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3012         vrele(nd.ni_vp);
 3013         return (error);
 3014 }
 3015 
 3016 /*
 3017  * Set ownership given a path name, do not cross symlinks.
 3018  */
 3019 #ifndef _SYS_SYSPROTO_H_
 3020 struct lchown_args {
 3021         char    *path;
 3022         int     uid;
 3023         int     gid;
 3024 };
 3025 #endif
 3026 int
 3027 sys_lchown(td, uap)
 3028         struct thread *td;
 3029         register struct lchown_args /* {
 3030                 char *path;
 3031                 int uid;
 3032                 int gid;
 3033         } */ *uap;
 3034 {
 3035 
 3036         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3037 }
 3038 
 3039 int
 3040 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3041     int gid)
 3042 {
 3043 
 3044         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3045             AT_SYMLINK_NOFOLLOW));
 3046 }
 3047 
 3048 /*
 3049  * Set ownership given a file descriptor.
 3050  */
 3051 #ifndef _SYS_SYSPROTO_H_
 3052 struct fchown_args {
 3053         int     fd;
 3054         int     uid;
 3055         int     gid;
 3056 };
 3057 #endif
 3058 int
 3059 sys_fchown(td, uap)
 3060         struct thread *td;
 3061         register struct fchown_args /* {
 3062                 int fd;
 3063                 int uid;
 3064                 int gid;
 3065         } */ *uap;
 3066 {
 3067         struct file *fp;
 3068         cap_rights_t rights;
 3069         int error;
 3070 
 3071         AUDIT_ARG_FD(uap->fd);
 3072         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3073         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 3074         if (error != 0)
 3075                 return (error);
 3076         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3077         fdrop(fp, td);
 3078         return (error);
 3079 }
 3080 
 3081 /*
 3082  * Common implementation code for utimes(), lutimes(), and futimes().
 3083  */
 3084 static int
 3085 getutimes(usrtvp, tvpseg, tsp)
 3086         const struct timeval *usrtvp;
 3087         enum uio_seg tvpseg;
 3088         struct timespec *tsp;
 3089 {
 3090         struct timeval tv[2];
 3091         const struct timeval *tvp;
 3092         int error;
 3093 
 3094         if (usrtvp == NULL) {
 3095                 vfs_timestamp(&tsp[0]);
 3096                 tsp[1] = tsp[0];
 3097         } else {
 3098                 if (tvpseg == UIO_SYSSPACE) {
 3099                         tvp = usrtvp;
 3100                 } else {
 3101                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3102                                 return (error);
 3103                         tvp = tv;
 3104                 }
 3105 
 3106                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3107                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3108                         return (EINVAL);
 3109                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3110                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3111         }
 3112         return (0);
 3113 }
 3114 
 3115 /*
 3116  * Common implementation code for futimens(), utimensat().
 3117  */
 3118 #define UTIMENS_NULL    0x1
 3119 #define UTIMENS_EXIT    0x2
 3120 static int
 3121 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 3122     struct timespec *tsp, int *retflags)
 3123 {
 3124         struct timespec tsnow;
 3125         int error;
 3126 
 3127         vfs_timestamp(&tsnow);
 3128         *retflags = 0;
 3129         if (usrtsp == NULL) {
 3130                 tsp[0] = tsnow;
 3131                 tsp[1] = tsnow;
 3132                 *retflags |= UTIMENS_NULL;
 3133                 return (0);
 3134         }
 3135         if (tspseg == UIO_SYSSPACE) {
 3136                 tsp[0] = usrtsp[0];
 3137                 tsp[1] = usrtsp[1];
 3138         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 3139                 return (error);
 3140         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 3141                 *retflags |= UTIMENS_EXIT;
 3142         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 3143                 *retflags |= UTIMENS_NULL;
 3144         if (tsp[0].tv_nsec == UTIME_OMIT)
 3145                 tsp[0].tv_sec = VNOVAL;
 3146         else if (tsp[0].tv_nsec == UTIME_NOW)
 3147                 tsp[0] = tsnow;
 3148         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 3149                 return (EINVAL);
 3150         if (tsp[1].tv_nsec == UTIME_OMIT)
 3151                 tsp[1].tv_sec = VNOVAL;
 3152         else if (tsp[1].tv_nsec == UTIME_NOW)
 3153                 tsp[1] = tsnow;
 3154         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 3155                 return (EINVAL);
 3156 
 3157         return (0);
 3158 }
 3159 
 3160 /*
 3161  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 3162  * and utimensat().
 3163  */
 3164 static int
 3165 setutimes(td, vp, ts, numtimes, nullflag)
 3166         struct thread *td;
 3167         struct vnode *vp;
 3168         const struct timespec *ts;
 3169         int numtimes;
 3170         int nullflag;
 3171 {
 3172         struct mount *mp;
 3173         struct vattr vattr;
 3174         int error, setbirthtime;
 3175 
 3176         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3177                 return (error);
 3178         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3179         setbirthtime = 0;
 3180         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3181             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3182                 setbirthtime = 1;
 3183         VATTR_NULL(&vattr);
 3184         vattr.va_atime = ts[0];
 3185         vattr.va_mtime = ts[1];
 3186         if (setbirthtime)
 3187                 vattr.va_birthtime = ts[1];
 3188         if (numtimes > 2)
 3189                 vattr.va_birthtime = ts[2];
 3190         if (nullflag)
 3191                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3192 #ifdef MAC
 3193         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3194             vattr.va_mtime);
 3195 #endif
 3196         if (error == 0)
 3197                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3198         VOP_UNLOCK(vp, 0);
 3199         vn_finished_write(mp);
 3200         return (error);
 3201 }
 3202 
 3203 /*
 3204  * Set the access and modification times of a file.
 3205  */
 3206 #ifndef _SYS_SYSPROTO_H_
 3207 struct utimes_args {
 3208         char    *path;
 3209         struct  timeval *tptr;
 3210 };
 3211 #endif
 3212 int
 3213 sys_utimes(td, uap)
 3214         struct thread *td;
 3215         register struct utimes_args /* {
 3216                 char *path;
 3217                 struct timeval *tptr;
 3218         } */ *uap;
 3219 {
 3220 
 3221         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3222             UIO_USERSPACE));
 3223 }
 3224 
 3225 #ifndef _SYS_SYSPROTO_H_
 3226 struct futimesat_args {
 3227         int fd;
 3228         const char * path;
 3229         const struct timeval * times;
 3230 };
 3231 #endif
 3232 int
 3233 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3234 {
 3235 
 3236         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3237             uap->times, UIO_USERSPACE));
 3238 }
 3239 
 3240 int
 3241 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3242     struct timeval *tptr, enum uio_seg tptrseg)
 3243 {
 3244 
 3245         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3246 }
 3247 
 3248 int
 3249 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3250     struct timeval *tptr, enum uio_seg tptrseg)
 3251 {
 3252         struct nameidata nd;
 3253         struct timespec ts[2];
 3254         cap_rights_t rights;
 3255         int error;
 3256 
 3257         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3258                 return (error);
 3259         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3260             cap_rights_init(&rights, CAP_FUTIMES), td);
 3261 
 3262         if ((error = namei(&nd)) != 0)
 3263                 return (error);
 3264         NDFREE(&nd, NDF_ONLY_PNBUF);
 3265         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3266         vrele(nd.ni_vp);
 3267         return (error);
 3268 }
 3269 
 3270 /*
 3271  * Set the access and modification times of a file.
 3272  */
 3273 #ifndef _SYS_SYSPROTO_H_
 3274 struct lutimes_args {
 3275         char    *path;
 3276         struct  timeval *tptr;
 3277 };
 3278 #endif
 3279 int
 3280 sys_lutimes(td, uap)
 3281         struct thread *td;
 3282         register struct lutimes_args /* {
 3283                 char *path;
 3284                 struct timeval *tptr;
 3285         } */ *uap;
 3286 {
 3287 
 3288         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3289             UIO_USERSPACE));
 3290 }
 3291 
 3292 int
 3293 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3294     struct timeval *tptr, enum uio_seg tptrseg)
 3295 {
 3296         struct timespec ts[2];
 3297         struct nameidata nd;
 3298         int error;
 3299 
 3300         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3301                 return (error);
 3302         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3303         if ((error = namei(&nd)) != 0)
 3304                 return (error);
 3305         NDFREE(&nd, NDF_ONLY_PNBUF);
 3306         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3307         vrele(nd.ni_vp);
 3308         return (error);
 3309 }
 3310 
 3311 /*
 3312  * Set the access and modification times of a file.
 3313  */
 3314 #ifndef _SYS_SYSPROTO_H_
 3315 struct futimes_args {
 3316         int     fd;
 3317         struct  timeval *tptr;
 3318 };
 3319 #endif
 3320 int
 3321 sys_futimes(td, uap)
 3322         struct thread *td;
 3323         register struct futimes_args /* {
 3324                 int  fd;
 3325                 struct timeval *tptr;
 3326         } */ *uap;
 3327 {
 3328 
 3329         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3330 }
 3331 
 3332 int
 3333 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3334     enum uio_seg tptrseg)
 3335 {
 3336         struct timespec ts[2];
 3337         struct file *fp;
 3338         cap_rights_t rights;
 3339         int error;
 3340 
 3341         AUDIT_ARG_FD(fd);
 3342         error = getutimes(tptr, tptrseg, ts);
 3343         if (error != 0)
 3344                 return (error);
 3345         error = getvnode(td->td_proc->p_fd, fd,
 3346             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3347         if (error != 0)
 3348                 return (error);
 3349 #ifdef AUDIT
 3350         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3351         AUDIT_ARG_VNODE1(fp->f_vnode);
 3352         VOP_UNLOCK(fp->f_vnode, 0);
 3353 #endif
 3354         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3355         fdrop(fp, td);
 3356         return (error);
 3357 }
 3358 
 3359 int
 3360 sys_futimens(struct thread *td, struct futimens_args *uap)
 3361 {
 3362 
 3363         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3364 }
 3365 
 3366 int
 3367 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3368     enum uio_seg tptrseg)
 3369 {
 3370         struct timespec ts[2];
 3371         struct file *fp;
 3372         cap_rights_t rights;
 3373         int error, flags;
 3374 
 3375         AUDIT_ARG_FD(fd);
 3376         error = getutimens(tptr, tptrseg, ts, &flags);
 3377         if (error != 0)
 3378                 return (error);
 3379         if (flags & UTIMENS_EXIT)
 3380                 return (0);
 3381         error = getvnode(td->td_proc->p_fd, fd,
 3382             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3383         if (error != 0)
 3384                 return (error);
 3385 #ifdef AUDIT
 3386         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3387         AUDIT_ARG_VNODE1(fp->f_vnode);
 3388         VOP_UNLOCK(fp->f_vnode, 0);
 3389 #endif
 3390         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3391         fdrop(fp, td);
 3392         return (error);
 3393 }
 3394 
 3395 int
 3396 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3397 {
 3398 
 3399         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3400             uap->times, UIO_USERSPACE, uap->flag));
 3401 }
 3402 
 3403 int
 3404 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3405     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 3406 {
 3407         struct nameidata nd;
 3408         struct timespec ts[2];
 3409         int error, flags;
 3410 
 3411         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3412                 return (EINVAL);
 3413 
 3414         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3415                 return (error);
 3416         NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 3417             FOLLOW) | AUDITVNODE1, pathseg, path, fd, td);
 3418         if ((error = namei(&nd)) != 0)
 3419                 return (error);
 3420         /*
 3421          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3422          * POSIX states:
 3423          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3424          * "Search permission is denied by a component of the path prefix."
 3425          */
 3426         NDFREE(&nd, NDF_ONLY_PNBUF);
 3427         if ((flags & UTIMENS_EXIT) == 0)
 3428                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3429         vrele(nd.ni_vp);
 3430         return (error);
 3431 }
 3432 
 3433 /*
 3434  * Truncate a file given its path name.
 3435  */
 3436 #ifndef _SYS_SYSPROTO_H_
 3437 struct truncate_args {
 3438         char    *path;
 3439         int     pad;
 3440         off_t   length;
 3441 };
 3442 #endif
 3443 int
 3444 sys_truncate(td, uap)
 3445         struct thread *td;
 3446         register struct truncate_args /* {
 3447                 char *path;
 3448                 int pad;
 3449                 off_t length;
 3450         } */ *uap;
 3451 {
 3452 
 3453         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3454 }
 3455 
 3456 int
 3457 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3458 {
 3459         struct mount *mp;
 3460         struct vnode *vp;
 3461         void *rl_cookie;
 3462         struct vattr vattr;
 3463         struct nameidata nd;
 3464         int error;
 3465 
 3466         if (length < 0)
 3467                 return(EINVAL);
 3468         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3469         if ((error = namei(&nd)) != 0)
 3470                 return (error);
 3471         vp = nd.ni_vp;
 3472         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3473         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3474                 vn_rangelock_unlock(vp, rl_cookie);
 3475                 vrele(vp);
 3476                 return (error);
 3477         }
 3478         NDFREE(&nd, NDF_ONLY_PNBUF);
 3479         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3480         if (vp->v_type == VDIR)
 3481                 error = EISDIR;
 3482 #ifdef MAC
 3483         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3484         }
 3485 #endif
 3486         else if ((error = vn_writechk(vp)) == 0 &&
 3487             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3488                 VATTR_NULL(&vattr);
 3489                 vattr.va_size = length;
 3490                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3491         }
 3492         VOP_UNLOCK(vp, 0);
 3493         vn_finished_write(mp);
 3494         vn_rangelock_unlock(vp, rl_cookie);
 3495         vrele(vp);
 3496         return (error);
 3497 }
 3498 
 3499 #if defined(COMPAT_43)
 3500 /*
 3501  * Truncate a file given its path name.
 3502  */
 3503 #ifndef _SYS_SYSPROTO_H_
 3504 struct otruncate_args {
 3505         char    *path;
 3506         long    length;
 3507 };
 3508 #endif
 3509 int
 3510 otruncate(td, uap)
 3511         struct thread *td;
 3512         register struct otruncate_args /* {
 3513                 char *path;
 3514                 long length;
 3515         } */ *uap;
 3516 {
 3517         struct truncate_args /* {
 3518                 char *path;
 3519                 int pad;
 3520                 off_t length;
 3521         } */ nuap;
 3522 
 3523         nuap.path = uap->path;
 3524         nuap.length = uap->length;
 3525         return (sys_truncate(td, &nuap));
 3526 }
 3527 #endif /* COMPAT_43 */
 3528 
 3529 /* Versions with the pad argument */
 3530 int
 3531 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3532 {
 3533         struct truncate_args ouap;
 3534 
 3535         ouap.path = uap->path;
 3536         ouap.length = uap->length;
 3537         return (sys_truncate(td, &ouap));
 3538 }
 3539 
 3540 int
 3541 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3542 {
 3543         struct ftruncate_args ouap;
 3544 
 3545         ouap.fd = uap->fd;
 3546         ouap.length = uap->length;
 3547         return (sys_ftruncate(td, &ouap));
 3548 }
 3549 
 3550 /*
 3551  * Sync an open file.
 3552  */
 3553 #ifndef _SYS_SYSPROTO_H_
 3554 struct fsync_args {
 3555         int     fd;
 3556 };
 3557 #endif
 3558 int
 3559 sys_fsync(td, uap)
 3560         struct thread *td;
 3561         struct fsync_args /* {
 3562                 int fd;
 3563         } */ *uap;
 3564 {
 3565         struct vnode *vp;
 3566         struct mount *mp;
 3567         struct file *fp;
 3568         cap_rights_t rights;
 3569         int error, lock_flags;
 3570 
 3571         AUDIT_ARG_FD(uap->fd);
 3572         error = getvnode(td->td_proc->p_fd, uap->fd,
 3573             cap_rights_init(&rights, CAP_FSYNC), &fp);
 3574         if (error != 0)
 3575                 return (error);
 3576         vp = fp->f_vnode;
 3577         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3578         if (error != 0)
 3579                 goto drop;
 3580         if (MNT_SHARED_WRITES(mp) ||
 3581             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3582                 lock_flags = LK_SHARED;
 3583         } else {
 3584                 lock_flags = LK_EXCLUSIVE;
 3585         }
 3586         vn_lock(vp, lock_flags | LK_RETRY);
 3587         AUDIT_ARG_VNODE1(vp);
 3588         if (vp->v_object != NULL) {
 3589                 VM_OBJECT_WLOCK(vp->v_object);
 3590                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3591                 VM_OBJECT_WUNLOCK(vp->v_object);
 3592         }
 3593         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3594 
 3595         VOP_UNLOCK(vp, 0);
 3596         vn_finished_write(mp);
 3597 drop:
 3598         fdrop(fp, td);
 3599         return (error);
 3600 }
 3601 
 3602 /*
 3603  * Rename files.  Source and destination must either both be directories, or
 3604  * both not be directories.  If target is a directory, it must be empty.
 3605  */
 3606 #ifndef _SYS_SYSPROTO_H_
 3607 struct rename_args {
 3608         char    *from;
 3609         char    *to;
 3610 };
 3611 #endif
 3612 int
 3613 sys_rename(td, uap)
 3614         struct thread *td;
 3615         register struct rename_args /* {
 3616                 char *from;
 3617                 char *to;
 3618         } */ *uap;
 3619 {
 3620 
 3621         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3622 }
 3623 
 3624 #ifndef _SYS_SYSPROTO_H_
 3625 struct renameat_args {
 3626         int     oldfd;
 3627         char    *old;
 3628         int     newfd;
 3629         char    *new;
 3630 };
 3631 #endif
 3632 int
 3633 sys_renameat(struct thread *td, struct renameat_args *uap)
 3634 {
 3635 
 3636         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3637             UIO_USERSPACE));
 3638 }
 3639 
 3640 int
 3641 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3642 {
 3643 
 3644         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3645 }
 3646 
 3647 int
 3648 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3649     enum uio_seg pathseg)
 3650 {
 3651         struct mount *mp = NULL;
 3652         struct vnode *tvp, *fvp, *tdvp;
 3653         struct nameidata fromnd, tond;
 3654         cap_rights_t rights;
 3655         int error;
 3656 
 3657 again:
 3658         bwillwrite();
 3659 #ifdef MAC
 3660         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3661             AUDITVNODE1, pathseg, old, oldfd,
 3662             cap_rights_init(&rights, CAP_RENAMEAT), td);
 3663 #else
 3664         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3665             pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td);
 3666 #endif
 3667 
 3668         if ((error = namei(&fromnd)) != 0)
 3669                 return (error);
 3670 #ifdef MAC
 3671         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3672             fromnd.ni_vp, &fromnd.ni_cnd);
 3673         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3674         if (fromnd.ni_dvp != fromnd.ni_vp)
 3675                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3676 #endif
 3677         fvp = fromnd.ni_vp;
 3678         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3679             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3680             cap_rights_init(&rights, CAP_LINKAT), td);
 3681         if (fromnd.ni_vp->v_type == VDIR)
 3682                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3683         if ((error = namei(&tond)) != 0) {
 3684                 /* Translate error code for rename("dir1", "dir2/."). */
 3685                 if (error == EISDIR && fvp->v_type == VDIR)
 3686                         error = EINVAL;
 3687                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3688                 vrele(fromnd.ni_dvp);
 3689                 vrele(fvp);
 3690                 goto out1;
 3691         }
 3692         tdvp = tond.ni_dvp;
 3693         tvp = tond.ni_vp;
 3694         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3695         if (error != 0) {
 3696                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3697                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3698                 if (tvp != NULL)
 3699                         vput(tvp);
 3700                 if (tdvp == tvp)
 3701                         vrele(tdvp);
 3702                 else
 3703                         vput(tdvp);
 3704                 vrele(fromnd.ni_dvp);
 3705                 vrele(fvp);
 3706                 vrele(tond.ni_startdir);
 3707                 if (fromnd.ni_startdir != NULL)
 3708                         vrele(fromnd.ni_startdir);
 3709                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3710                 if (error != 0)
 3711                         return (error);
 3712                 goto again;
 3713         }
 3714         if (tvp != NULL) {
 3715                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3716                         error = ENOTDIR;
 3717                         goto out;
 3718                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3719                         error = EISDIR;
 3720                         goto out;
 3721                 }
 3722 #ifdef CAPABILITIES
 3723                 if (newfd != AT_FDCWD) {
 3724                         /*
 3725                          * If the target already exists we require CAP_UNLINKAT
 3726                          * from 'newfd'.
 3727                          */
 3728                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3729                             cap_rights_init(&rights, CAP_UNLINKAT));
 3730                         if (error != 0)
 3731                                 goto out;
 3732                 }
 3733 #endif
 3734         }
 3735         if (fvp == tdvp) {
 3736                 error = EINVAL;
 3737                 goto out;
 3738         }
 3739         /*
 3740          * If the source is the same as the destination (that is, if they
 3741          * are links to the same vnode), then there is nothing to do.
 3742          */
 3743         if (fvp == tvp)
 3744                 error = -1;
 3745 #ifdef MAC
 3746         else
 3747                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3748                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3749 #endif
 3750 out:
 3751         if (error == 0) {
 3752                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3753                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3754                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3755                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3756         } else {
 3757                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3758                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3759                 if (tvp != NULL)
 3760                         vput(tvp);
 3761                 if (tdvp == tvp)
 3762                         vrele(tdvp);
 3763                 else
 3764                         vput(tdvp);
 3765                 vrele(fromnd.ni_dvp);
 3766                 vrele(fvp);
 3767         }
 3768         vrele(tond.ni_startdir);
 3769         vn_finished_write(mp);
 3770 out1:
 3771         if (fromnd.ni_startdir)
 3772                 vrele(fromnd.ni_startdir);
 3773         if (error == -1)
 3774                 return (0);
 3775         return (error);
 3776 }
 3777 
 3778 /*
 3779  * Make a directory file.
 3780  */
 3781 #ifndef _SYS_SYSPROTO_H_
 3782 struct mkdir_args {
 3783         char    *path;
 3784         int     mode;
 3785 };
 3786 #endif
 3787 int
 3788 sys_mkdir(td, uap)
 3789         struct thread *td;
 3790         register struct mkdir_args /* {
 3791                 char *path;
 3792                 int mode;
 3793         } */ *uap;
 3794 {
 3795 
 3796         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3797 }
 3798 
 3799 #ifndef _SYS_SYSPROTO_H_
 3800 struct mkdirat_args {
 3801         int     fd;
 3802         char    *path;
 3803         mode_t  mode;
 3804 };
 3805 #endif
 3806 int
 3807 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3808 {
 3809 
 3810         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3811 }
 3812 
 3813 int
 3814 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3815 {
 3816 
 3817         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3818 }
 3819 
 3820 int
 3821 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3822     int mode)
 3823 {
 3824         struct mount *mp;
 3825         struct vnode *vp;
 3826         struct vattr vattr;
 3827         struct nameidata nd;
 3828         cap_rights_t rights;
 3829         int error;
 3830 
 3831         AUDIT_ARG_MODE(mode);
 3832 restart:
 3833         bwillwrite();
 3834         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3835             NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 3836             td);
 3837         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3838         if ((error = namei(&nd)) != 0)
 3839                 return (error);
 3840         vp = nd.ni_vp;
 3841         if (vp != NULL) {
 3842                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3843                 /*
 3844                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3845                  * the strange behaviour of leaving the vnode unlocked
 3846                  * if the target is the same vnode as the parent.
 3847                  */
 3848                 if (vp == nd.ni_dvp)
 3849                         vrele(nd.ni_dvp);
 3850                 else
 3851                         vput(nd.ni_dvp);
 3852                 vrele(vp);
 3853                 return (EEXIST);
 3854         }
 3855         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3856                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3857                 vput(nd.ni_dvp);
 3858                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3859                         return (error);
 3860                 goto restart;
 3861         }
 3862         VATTR_NULL(&vattr);
 3863         vattr.va_type = VDIR;
 3864         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3865 #ifdef MAC
 3866         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3867             &vattr);
 3868         if (error != 0)
 3869                 goto out;
 3870 #endif
 3871         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3872 #ifdef MAC
 3873 out:
 3874 #endif
 3875         NDFREE(&nd, NDF_ONLY_PNBUF);
 3876         vput(nd.ni_dvp);
 3877         if (error == 0)
 3878                 vput(nd.ni_vp);
 3879         vn_finished_write(mp);
 3880         return (error);
 3881 }
 3882 
 3883 /*
 3884  * Remove a directory file.
 3885  */
 3886 #ifndef _SYS_SYSPROTO_H_
 3887 struct rmdir_args {
 3888         char    *path;
 3889 };
 3890 #endif
 3891 int
 3892 sys_rmdir(td, uap)
 3893         struct thread *td;
 3894         struct rmdir_args /* {
 3895                 char *path;
 3896         } */ *uap;
 3897 {
 3898 
 3899         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3900 }
 3901 
 3902 int
 3903 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3904 {
 3905 
 3906         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3907 }
 3908 
 3909 int
 3910 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3911 {
 3912         struct mount *mp;
 3913         struct vnode *vp;
 3914         struct nameidata nd;
 3915         cap_rights_t rights;
 3916         int error;
 3917 
 3918 restart:
 3919         bwillwrite();
 3920         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3921             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3922         if ((error = namei(&nd)) != 0)
 3923                 return (error);
 3924         vp = nd.ni_vp;
 3925         if (vp->v_type != VDIR) {
 3926                 error = ENOTDIR;
 3927                 goto out;
 3928         }
 3929         /*
 3930          * No rmdir "." please.
 3931          */
 3932         if (nd.ni_dvp == vp) {
 3933                 error = EINVAL;
 3934                 goto out;
 3935         }
 3936         /*
 3937          * The root of a mounted filesystem cannot be deleted.
 3938          */
 3939         if (vp->v_vflag & VV_ROOT) {
 3940                 error = EBUSY;
 3941                 goto out;
 3942         }
 3943 #ifdef MAC
 3944         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3945             &nd.ni_cnd);
 3946         if (error != 0)
 3947                 goto out;
 3948 #endif
 3949         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3950                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3951                 vput(vp);
 3952                 if (nd.ni_dvp == vp)
 3953                         vrele(nd.ni_dvp);
 3954                 else
 3955                         vput(nd.ni_dvp);
 3956                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3957                         return (error);
 3958                 goto restart;
 3959         }
 3960         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3961         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3962         vn_finished_write(mp);
 3963 out:
 3964         NDFREE(&nd, NDF_ONLY_PNBUF);
 3965         vput(vp);
 3966         if (nd.ni_dvp == vp)
 3967                 vrele(nd.ni_dvp);
 3968         else
 3969                 vput(nd.ni_dvp);
 3970         return (error);
 3971 }
 3972 
 3973 #ifdef COMPAT_43
 3974 /*
 3975  * Read a block of directory entries in a filesystem independent format.
 3976  */
 3977 #ifndef _SYS_SYSPROTO_H_
 3978 struct ogetdirentries_args {
 3979         int     fd;
 3980         char    *buf;
 3981         u_int   count;
 3982         long    *basep;
 3983 };
 3984 #endif
 3985 int
 3986 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3987 {
 3988         long loff;
 3989         int error;
 3990 
 3991         error = kern_ogetdirentries(td, uap, &loff);
 3992         if (error == 0)
 3993                 error = copyout(&loff, uap->basep, sizeof(long));
 3994         return (error);
 3995 }
 3996 
 3997 int
 3998 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3999     long *ploff)
 4000 {
 4001         struct vnode *vp;
 4002         struct file *fp;
 4003         struct uio auio, kuio;
 4004         struct iovec aiov, kiov;
 4005         struct dirent *dp, *edp;
 4006         cap_rights_t rights;
 4007         caddr_t dirbuf;
 4008         int error, eofflag, readcnt;
 4009         long loff;
 4010         off_t foffset;
 4011 
 4012         /* XXX arbitrary sanity limit on `count'. */
 4013         if (uap->count > 64 * 1024)
 4014                 return (EINVAL);
 4015         error = getvnode(td->td_proc->p_fd, uap->fd,
 4016             cap_rights_init(&rights, CAP_READ), &fp);
 4017         if (error != 0)
 4018                 return (error);
 4019         if ((fp->f_flag & FREAD) == 0) {
 4020                 fdrop(fp, td);
 4021                 return (EBADF);
 4022         }
 4023         vp = fp->f_vnode;
 4024         foffset = foffset_lock(fp, 0);
 4025 unionread:
 4026         if (vp->v_type != VDIR) {
 4027                 foffset_unlock(fp, foffset, 0);
 4028                 fdrop(fp, td);
 4029                 return (EINVAL);
 4030         }
 4031         aiov.iov_base = uap->buf;
 4032         aiov.iov_len = uap->count;
 4033         auio.uio_iov = &aiov;
 4034         auio.uio_iovcnt = 1;
 4035         auio.uio_rw = UIO_READ;
 4036         auio.uio_segflg = UIO_USERSPACE;
 4037         auio.uio_td = td;
 4038         auio.uio_resid = uap->count;
 4039         vn_lock(vp, LK_SHARED | LK_RETRY);
 4040         loff = auio.uio_offset = foffset;
 4041 #ifdef MAC
 4042         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4043         if (error != 0) {
 4044                 VOP_UNLOCK(vp, 0);
 4045                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 4046                 fdrop(fp, td);
 4047                 return (error);
 4048         }
 4049 #endif
 4050 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4051                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4052                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4053                             NULL, NULL);
 4054                         foffset = auio.uio_offset;
 4055                 } else
 4056 #       endif
 4057         {
 4058                 kuio = auio;
 4059                 kuio.uio_iov = &kiov;
 4060                 kuio.uio_segflg = UIO_SYSSPACE;
 4061                 kiov.iov_len = uap->count;
 4062                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4063                 kiov.iov_base = dirbuf;
 4064                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4065                             NULL, NULL);
 4066                 foffset = kuio.uio_offset;
 4067                 if (error == 0) {
 4068                         readcnt = uap->count - kuio.uio_resid;
 4069                         edp = (struct dirent *)&dirbuf[readcnt];
 4070                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4071 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4072                                         /*
 4073                                          * The expected low byte of
 4074                                          * dp->d_namlen is our dp->d_type.
 4075                                          * The high MBZ byte of dp->d_namlen
 4076                                          * is our dp->d_namlen.
 4077                                          */
 4078                                         dp->d_type = dp->d_namlen;
 4079                                         dp->d_namlen = 0;
 4080 #                               else
 4081                                         /*
 4082                                          * The dp->d_type is the high byte
 4083                                          * of the expected dp->d_namlen,
 4084                                          * so must be zero'ed.
 4085                                          */
 4086                                         dp->d_type = 0;
 4087 #                               endif
 4088                                 if (dp->d_reclen > 0) {
 4089                                         dp = (struct dirent *)
 4090                                             ((char *)dp + dp->d_reclen);
 4091                                 } else {
 4092                                         error = EIO;
 4093                                         break;
 4094                                 }
 4095                         }
 4096                         if (dp >= edp)
 4097                                 error = uiomove(dirbuf, readcnt, &auio);
 4098                 }
 4099                 free(dirbuf, M_TEMP);
 4100         }
 4101         if (error != 0) {
 4102                 VOP_UNLOCK(vp, 0);
 4103                 foffset_unlock(fp, foffset, 0);
 4104                 fdrop(fp, td);
 4105                 return (error);
 4106         }
 4107         if (uap->count == auio.uio_resid &&
 4108             (vp->v_vflag & VV_ROOT) &&
 4109             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4110                 struct vnode *tvp = vp;
 4111                 vp = vp->v_mount->mnt_vnodecovered;
 4112                 VREF(vp);
 4113                 fp->f_vnode = vp;
 4114                 fp->f_data = vp;
 4115                 foffset = 0;
 4116                 vput(tvp);
 4117                 goto unionread;
 4118         }
 4119         VOP_UNLOCK(vp, 0);
 4120         foffset_unlock(fp, foffset, 0);
 4121         fdrop(fp, td);
 4122         td->td_retval[0] = uap->count - auio.uio_resid;
 4123         if (error == 0)
 4124                 *ploff = loff;
 4125         return (error);
 4126 }
 4127 #endif /* COMPAT_43 */
 4128 
 4129 /*
 4130  * Read a block of directory entries in a filesystem independent format.
 4131  */
 4132 #ifndef _SYS_SYSPROTO_H_
 4133 struct getdirentries_args {
 4134         int     fd;
 4135         char    *buf;
 4136         u_int   count;
 4137         long    *basep;
 4138 };
 4139 #endif
 4140 int
 4141 sys_getdirentries(td, uap)
 4142         struct thread *td;
 4143         register struct getdirentries_args /* {
 4144                 int fd;
 4145                 char *buf;
 4146                 u_int count;
 4147                 long *basep;
 4148         } */ *uap;
 4149 {
 4150         long base;
 4151         int error;
 4152 
 4153         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4154             NULL, UIO_USERSPACE);
 4155         if (error != 0)
 4156                 return (error);
 4157         if (uap->basep != NULL)
 4158                 error = copyout(&base, uap->basep, sizeof(long));
 4159         return (error);
 4160 }
 4161 
 4162 int
 4163 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4164     long *basep, ssize_t *residp, enum uio_seg bufseg)
 4165 {
 4166         struct vnode *vp;
 4167         struct file *fp;
 4168         struct uio auio;
 4169         struct iovec aiov;
 4170         cap_rights_t rights;
 4171         long loff;
 4172         int error, eofflag;
 4173         off_t foffset;
 4174 
 4175         AUDIT_ARG_FD(fd);
 4176         if (count > IOSIZE_MAX)
 4177                 return (EINVAL);
 4178         auio.uio_resid = count;
 4179         error = getvnode(td->td_proc->p_fd, fd,
 4180             cap_rights_init(&rights, CAP_READ), &fp);
 4181         if (error != 0)
 4182                 return (error);
 4183         if ((fp->f_flag & FREAD) == 0) {
 4184                 fdrop(fp, td);
 4185                 return (EBADF);
 4186         }
 4187         vp = fp->f_vnode;
 4188         foffset = foffset_lock(fp, 0);
 4189 unionread:
 4190         if (vp->v_type != VDIR) {
 4191                 error = EINVAL;
 4192                 goto fail;
 4193         }
 4194         aiov.iov_base = buf;
 4195         aiov.iov_len = count;
 4196         auio.uio_iov = &aiov;
 4197         auio.uio_iovcnt = 1;
 4198         auio.uio_rw = UIO_READ;
 4199         auio.uio_segflg = bufseg;
 4200         auio.uio_td = td;
 4201         vn_lock(vp, LK_SHARED | LK_RETRY);
 4202         AUDIT_ARG_VNODE1(vp);
 4203         loff = auio.uio_offset = foffset;
 4204 #ifdef MAC
 4205         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4206         if (error == 0)
 4207 #endif
 4208                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4209                     NULL);
 4210         foffset = auio.uio_offset;
 4211         if (error != 0) {
 4212                 VOP_UNLOCK(vp, 0);
 4213                 goto fail;
 4214         }
 4215         if (count == auio.uio_resid &&
 4216             (vp->v_vflag & VV_ROOT) &&
 4217             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4218                 struct vnode *tvp = vp;
 4219 
 4220                 vp = vp->v_mount->mnt_vnodecovered;
 4221                 VREF(vp);
 4222                 fp->f_vnode = vp;
 4223                 fp->f_data = vp;
 4224                 foffset = 0;
 4225                 vput(tvp);
 4226                 goto unionread;
 4227         }
 4228         VOP_UNLOCK(vp, 0);
 4229         *basep = loff;
 4230         if (residp != NULL)
 4231                 *residp = auio.uio_resid;
 4232         td->td_retval[0] = count - auio.uio_resid;
 4233 fail:
 4234         foffset_unlock(fp, foffset, 0);
 4235         fdrop(fp, td);
 4236         return (error);
 4237 }
 4238 
 4239 #ifndef _SYS_SYSPROTO_H_
 4240 struct getdents_args {
 4241         int fd;
 4242         char *buf;
 4243         size_t count;
 4244 };
 4245 #endif
 4246 int
 4247 sys_getdents(td, uap)
 4248         struct thread *td;
 4249         register struct getdents_args /* {
 4250                 int fd;
 4251                 char *buf;
 4252                 u_int count;
 4253         } */ *uap;
 4254 {
 4255         struct getdirentries_args ap;
 4256 
 4257         ap.fd = uap->fd;
 4258         ap.buf = uap->buf;
 4259         ap.count = uap->count;
 4260         ap.basep = NULL;
 4261         return (sys_getdirentries(td, &ap));
 4262 }
 4263 
 4264 /*
 4265  * Set the mode mask for creation of filesystem nodes.
 4266  */
 4267 #ifndef _SYS_SYSPROTO_H_
 4268 struct umask_args {
 4269         int     newmask;
 4270 };
 4271 #endif
 4272 int
 4273 sys_umask(td, uap)
 4274         struct thread *td;
 4275         struct umask_args /* {
 4276                 int newmask;
 4277         } */ *uap;
 4278 {
 4279         register struct filedesc *fdp;
 4280 
 4281         FILEDESC_XLOCK(td->td_proc->p_fd);
 4282         fdp = td->td_proc->p_fd;
 4283         td->td_retval[0] = fdp->fd_cmask;
 4284         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4285         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4286         return (0);
 4287 }
 4288 
 4289 /*
 4290  * Void all references to file by ripping underlying filesystem away from
 4291  * vnode.
 4292  */
 4293 #ifndef _SYS_SYSPROTO_H_
 4294 struct revoke_args {
 4295         char    *path;
 4296 };
 4297 #endif
 4298 int
 4299 sys_revoke(td, uap)
 4300         struct thread *td;
 4301         register struct revoke_args /* {
 4302                 char *path;
 4303         } */ *uap;
 4304 {
 4305         struct vnode *vp;
 4306         struct vattr vattr;
 4307         struct nameidata nd;
 4308         int error;
 4309 
 4310         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4311             uap->path, td);
 4312         if ((error = namei(&nd)) != 0)
 4313                 return (error);
 4314         vp = nd.ni_vp;
 4315         NDFREE(&nd, NDF_ONLY_PNBUF);
 4316         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4317                 error = EINVAL;
 4318                 goto out;
 4319         }
 4320 #ifdef MAC
 4321         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4322         if (error != 0)
 4323                 goto out;
 4324 #endif
 4325         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4326         if (error != 0)
 4327                 goto out;
 4328         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4329                 error = priv_check(td, PRIV_VFS_ADMIN);
 4330                 if (error != 0)
 4331                         goto out;
 4332         }
 4333         if (vcount(vp) > 1)
 4334                 VOP_REVOKE(vp, REVOKEALL);
 4335 out:
 4336         vput(vp);
 4337         return (error);
 4338 }
 4339 
 4340 /*
 4341  * Convert a user file descriptor to a kernel file entry and check that, if it
 4342  * is a capability, the correct rights are present. A reference on the file
 4343  * entry is held upon returning.
 4344  */
 4345 int
 4346 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp)
 4347 {
 4348         struct file *fp;
 4349         int error;
 4350 
 4351         error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL);
 4352         if (error != 0)
 4353                 return (error);
 4354 
 4355         /*
 4356          * The file could be not of the vnode type, or it may be not
 4357          * yet fully initialized, in which case the f_vnode pointer
 4358          * may be set, but f_ops is still badfileops.  E.g.,
 4359          * devfs_open() transiently create such situation to
 4360          * facilitate csw d_fdopen().
 4361          *
 4362          * Dupfdopen() handling in kern_openat() installs the
 4363          * half-baked file into the process descriptor table, allowing
 4364          * other thread to dereference it. Guard against the race by
 4365          * checking f_ops.
 4366          */
 4367         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4368                 fdrop(fp, curthread);
 4369                 return (EINVAL);
 4370         }
 4371         *fpp = fp;
 4372         return (0);
 4373 }
 4374 
 4375 
 4376 /*
 4377  * Get an (NFS) file handle.
 4378  */
 4379 #ifndef _SYS_SYSPROTO_H_
 4380 struct lgetfh_args {
 4381         char    *fname;
 4382         fhandle_t *fhp;
 4383 };
 4384 #endif
 4385 int
 4386 sys_lgetfh(td, uap)
 4387         struct thread *td;
 4388         register struct lgetfh_args *uap;
 4389 {
 4390         struct nameidata nd;
 4391         fhandle_t fh;
 4392         register struct vnode *vp;
 4393         int error;
 4394 
 4395         error = priv_check(td, PRIV_VFS_GETFH);
 4396         if (error != 0)
 4397                 return (error);
 4398         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4399             uap->fname, td);
 4400         error = namei(&nd);
 4401         if (error != 0)
 4402                 return (error);
 4403         NDFREE(&nd, NDF_ONLY_PNBUF);
 4404         vp = nd.ni_vp;
 4405         bzero(&fh, sizeof(fh));
 4406         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4407         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4408         vput(vp);
 4409         if (error == 0)
 4410                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4411         return (error);
 4412 }
 4413 
 4414 #ifndef _SYS_SYSPROTO_H_
 4415 struct getfh_args {
 4416         char    *fname;
 4417         fhandle_t *fhp;
 4418 };
 4419 #endif
 4420 int
 4421 sys_getfh(td, uap)
 4422         struct thread *td;
 4423         register struct getfh_args *uap;
 4424 {
 4425         struct nameidata nd;
 4426         fhandle_t fh;
 4427         register struct vnode *vp;
 4428         int error;
 4429 
 4430         error = priv_check(td, PRIV_VFS_GETFH);
 4431         if (error != 0)
 4432                 return (error);
 4433         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4434             uap->fname, td);
 4435         error = namei(&nd);
 4436         if (error != 0)
 4437                 return (error);
 4438         NDFREE(&nd, NDF_ONLY_PNBUF);
 4439         vp = nd.ni_vp;
 4440         bzero(&fh, sizeof(fh));
 4441         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4442         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4443         vput(vp);
 4444         if (error == 0)
 4445                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4446         return (error);
 4447 }
 4448 
 4449 /*
 4450  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4451  * open descriptor.
 4452  *
 4453  * warning: do not remove the priv_check() call or this becomes one giant
 4454  * security hole.
 4455  */
 4456 #ifndef _SYS_SYSPROTO_H_
 4457 struct fhopen_args {
 4458         const struct fhandle *u_fhp;
 4459         int flags;
 4460 };
 4461 #endif
 4462 int
 4463 sys_fhopen(td, uap)
 4464         struct thread *td;
 4465         struct fhopen_args /* {
 4466                 const struct fhandle *u_fhp;
 4467                 int flags;
 4468         } */ *uap;
 4469 {
 4470         struct mount *mp;
 4471         struct vnode *vp;
 4472         struct fhandle fhp;
 4473         struct file *fp;
 4474         int fmode, error;
 4475         int indx;
 4476 
 4477         error = priv_check(td, PRIV_VFS_FHOPEN);
 4478         if (error != 0)
 4479                 return (error);
 4480         indx = -1;
 4481         fmode = FFLAGS(uap->flags);
 4482         /* why not allow a non-read/write open for our lockd? */
 4483         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4484                 return (EINVAL);
 4485         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4486         if (error != 0)
 4487                 return(error);
 4488         /* find the mount point */
 4489         mp = vfs_busyfs(&fhp.fh_fsid);
 4490         if (mp == NULL)
 4491                 return (ESTALE);
 4492         /* now give me my vnode, it gets returned to me locked */
 4493         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4494         vfs_unbusy(mp);
 4495         if (error != 0)
 4496                 return (error);
 4497 
 4498         error = falloc_noinstall(td, &fp);
 4499         if (error != 0) {
 4500                 vput(vp);
 4501                 return (error);
 4502         }
 4503         /*
 4504          * An extra reference on `fp' has been held for us by
 4505          * falloc_noinstall().
 4506          */
 4507 
 4508 #ifdef INVARIANTS
 4509         td->td_dupfd = -1;
 4510 #endif
 4511         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4512         if (error != 0) {
 4513                 KASSERT(fp->f_ops == &badfileops,
 4514                     ("VOP_OPEN in fhopen() set f_ops"));
 4515                 KASSERT(td->td_dupfd < 0,
 4516                     ("fhopen() encountered fdopen()"));
 4517 
 4518                 vput(vp);
 4519                 goto bad;
 4520         }
 4521 #ifdef INVARIANTS
 4522         td->td_dupfd = 0;
 4523 #endif
 4524         fp->f_vnode = vp;
 4525         fp->f_seqcount = 1;
 4526         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4527             &vnops);
 4528         VOP_UNLOCK(vp, 0);
 4529         if ((fmode & O_TRUNC) != 0) {
 4530                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4531                 if (error != 0)
 4532                         goto bad;
 4533         }
 4534 
 4535         error = finstall(td, fp, &indx, fmode, NULL);
 4536 bad:
 4537         fdrop(fp, td);
 4538         td->td_retval[0] = indx;
 4539         return (error);
 4540 }
 4541 
 4542 /*
 4543  * Stat an (NFS) file handle.
 4544  */
 4545 #ifndef _SYS_SYSPROTO_H_
 4546 struct fhstat_args {
 4547         struct fhandle *u_fhp;
 4548         struct stat *sb;
 4549 };
 4550 #endif
 4551 int
 4552 sys_fhstat(td, uap)
 4553         struct thread *td;
 4554         register struct fhstat_args /* {
 4555                 struct fhandle *u_fhp;
 4556                 struct stat *sb;
 4557         } */ *uap;
 4558 {
 4559         struct stat sb;
 4560         struct fhandle fh;
 4561         int error;
 4562 
 4563         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4564         if (error != 0)
 4565                 return (error);
 4566         error = kern_fhstat(td, fh, &sb);
 4567         if (error == 0)
 4568                 error = copyout(&sb, uap->sb, sizeof(sb));
 4569         return (error);
 4570 }
 4571 
 4572 int
 4573 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4574 {
 4575         struct mount *mp;
 4576         struct vnode *vp;
 4577         int error;
 4578 
 4579         error = priv_check(td, PRIV_VFS_FHSTAT);
 4580         if (error != 0)
 4581                 return (error);
 4582         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4583                 return (ESTALE);
 4584         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4585         vfs_unbusy(mp);
 4586         if (error != 0)
 4587                 return (error);
 4588         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4589         vput(vp);
 4590         return (error);
 4591 }
 4592 
 4593 /*
 4594  * Implement fstatfs() for (NFS) file handles.
 4595  */
 4596 #ifndef _SYS_SYSPROTO_H_
 4597 struct fhstatfs_args {
 4598         struct fhandle *u_fhp;
 4599         struct statfs *buf;
 4600 };
 4601 #endif
 4602 int
 4603 sys_fhstatfs(td, uap)
 4604         struct thread *td;
 4605         struct fhstatfs_args /* {
 4606                 struct fhandle *u_fhp;
 4607                 struct statfs *buf;
 4608         } */ *uap;
 4609 {
 4610         struct statfs sf;
 4611         fhandle_t fh;
 4612         int error;
 4613 
 4614         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4615         if (error != 0)
 4616                 return (error);
 4617         error = kern_fhstatfs(td, fh, &sf);
 4618         if (error != 0)
 4619                 return (error);
 4620         return (copyout(&sf, uap->buf, sizeof(sf)));
 4621 }
 4622 
 4623 int
 4624 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4625 {
 4626         struct statfs *sp;
 4627         struct mount *mp;
 4628         struct vnode *vp;
 4629         int error;
 4630 
 4631         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4632         if (error != 0)
 4633                 return (error);
 4634         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4635                 return (ESTALE);
 4636         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4637         if (error != 0) {
 4638                 vfs_unbusy(mp);
 4639                 return (error);
 4640         }
 4641         vput(vp);
 4642         error = prison_canseemount(td->td_ucred, mp);
 4643         if (error != 0)
 4644                 goto out;
 4645 #ifdef MAC
 4646         error = mac_mount_check_stat(td->td_ucred, mp);
 4647         if (error != 0)
 4648                 goto out;
 4649 #endif
 4650         /*
 4651          * Set these in case the underlying filesystem fails to do so.
 4652          */
 4653         sp = &mp->mnt_stat;
 4654         sp->f_version = STATFS_VERSION;
 4655         sp->f_namemax = NAME_MAX;
 4656         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4657         error = VFS_STATFS(mp, sp);
 4658         if (error == 0)
 4659                 *buf = *sp;
 4660 out:
 4661         vfs_unbusy(mp);
 4662         return (error);
 4663 }
 4664 
 4665 int
 4666 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4667 {
 4668         struct file *fp;
 4669         struct mount *mp;
 4670         struct vnode *vp;
 4671         cap_rights_t rights;
 4672         off_t olen, ooffset;
 4673         int error;
 4674 
 4675         if (offset < 0 || len <= 0)
 4676                 return (EINVAL);
 4677         /* Check for wrap. */
 4678         if (offset > OFF_MAX - len)
 4679                 return (EFBIG);
 4680         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4681         if (error != 0)
 4682                 return (error);
 4683         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4684                 error = ESPIPE;
 4685                 goto out;
 4686         }
 4687         if ((fp->f_flag & FWRITE) == 0) {
 4688                 error = EBADF;
 4689                 goto out;
 4690         }
 4691         if (fp->f_type != DTYPE_VNODE) {
 4692                 error = ENODEV;
 4693                 goto out;
 4694         }
 4695         vp = fp->f_vnode;
 4696         if (vp->v_type != VREG) {
 4697                 error = ENODEV;
 4698                 goto out;
 4699         }
 4700 
 4701         /* Allocating blocks may take a long time, so iterate. */
 4702         for (;;) {
 4703                 olen = len;
 4704                 ooffset = offset;
 4705 
 4706                 bwillwrite();
 4707                 mp = NULL;
 4708                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4709                 if (error != 0)
 4710                         break;
 4711                 error = vn_lock(vp, LK_EXCLUSIVE);
 4712                 if (error != 0) {
 4713                         vn_finished_write(mp);
 4714                         break;
 4715                 }
 4716 #ifdef MAC
 4717                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4718                 if (error == 0)
 4719 #endif
 4720                         error = VOP_ALLOCATE(vp, &offset, &len);
 4721                 VOP_UNLOCK(vp, 0);
 4722                 vn_finished_write(mp);
 4723 
 4724                 if (olen + ooffset != offset + len) {
 4725                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4726                             ooffset, olen, offset, len);
 4727                 }
 4728                 if (error != 0 || len == 0)
 4729                         break;
 4730                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4731                 maybe_yield();
 4732         }
 4733  out:
 4734         fdrop(fp, td);
 4735         return (error);
 4736 }
 4737 
 4738 int
 4739 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4740 {
 4741 
 4742         td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
 4743             uap->len);
 4744         return (0);
 4745 }
 4746 
 4747 /*
 4748  * Unlike madvise(2), we do not make a best effort to remember every
 4749  * possible caching hint.  Instead, we remember the last setting with
 4750  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4751  * region of any current setting.
 4752  */
 4753 int
 4754 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4755     int advice)
 4756 {
 4757         struct fadvise_info *fa, *new;
 4758         struct file *fp;
 4759         struct vnode *vp;
 4760         cap_rights_t rights;
 4761         off_t end;
 4762         int error;
 4763 
 4764         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4765                 return (EINVAL);
 4766         switch (advice) {
 4767         case POSIX_FADV_SEQUENTIAL:
 4768         case POSIX_FADV_RANDOM:
 4769         case POSIX_FADV_NOREUSE:
 4770                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4771                 break;
 4772         case POSIX_FADV_NORMAL:
 4773         case POSIX_FADV_WILLNEED:
 4774         case POSIX_FADV_DONTNEED:
 4775                 new = NULL;
 4776                 break;
 4777         default:
 4778                 return (EINVAL);
 4779         }
 4780         /* XXX: CAP_POSIX_FADVISE? */
 4781         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4782         if (error != 0)
 4783                 goto out;
 4784         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4785                 error = ESPIPE;
 4786                 goto out;
 4787         }
 4788         if (fp->f_type != DTYPE_VNODE) {
 4789                 error = ENODEV;
 4790                 goto out;
 4791         }
 4792         vp = fp->f_vnode;
 4793         if (vp->v_type != VREG) {
 4794                 error = ENODEV;
 4795                 goto out;
 4796         }
 4797         if (len == 0)
 4798                 end = OFF_MAX;
 4799         else
 4800                 end = offset + len - 1;
 4801         switch (advice) {
 4802         case POSIX_FADV_SEQUENTIAL:
 4803         case POSIX_FADV_RANDOM:
 4804         case POSIX_FADV_NOREUSE:
 4805                 /*
 4806                  * Try to merge any existing non-standard region with
 4807                  * this new region if possible, otherwise create a new
 4808                  * non-standard region for this request.
 4809                  */
 4810                 mtx_pool_lock(mtxpool_sleep, fp);
 4811                 fa = fp->f_advice;
 4812                 if (fa != NULL && fa->fa_advice == advice &&
 4813                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4814                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4815                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4816                         if (offset < fa->fa_start)
 4817                                 fa->fa_start = offset;
 4818                         if (end > fa->fa_end)
 4819                                 fa->fa_end = end;
 4820                 } else {
 4821                         new->fa_advice = advice;
 4822                         new->fa_start = offset;
 4823                         new->fa_end = end;
 4824                         new->fa_prevstart = 0;
 4825                         new->fa_prevend = 0;
 4826                         fp->f_advice = new;
 4827                         new = fa;
 4828                 }
 4829                 mtx_pool_unlock(mtxpool_sleep, fp);
 4830                 break;
 4831         case POSIX_FADV_NORMAL:
 4832                 /*
 4833                  * If a the "normal" region overlaps with an existing
 4834                  * non-standard region, trim or remove the
 4835                  * non-standard region.
 4836                  */
 4837                 mtx_pool_lock(mtxpool_sleep, fp);
 4838                 fa = fp->f_advice;
 4839                 if (fa != NULL) {
 4840                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4841                                 new = fa;
 4842                                 fp->f_advice = NULL;
 4843                         } else if (offset <= fa->fa_start &&
 4844                             end >= fa->fa_start)
 4845                                 fa->fa_start = end + 1;
 4846                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4847                                 fa->fa_end = offset - 1;
 4848                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4849                                 /*
 4850                                  * If the "normal" region is a middle
 4851                                  * portion of the existing
 4852                                  * non-standard region, just remove
 4853                                  * the whole thing rather than picking
 4854                                  * one side or the other to
 4855                                  * preserve.
 4856                                  */
 4857                                 new = fa;
 4858                                 fp->f_advice = NULL;
 4859                         }
 4860                 }
 4861                 mtx_pool_unlock(mtxpool_sleep, fp);
 4862                 break;
 4863         case POSIX_FADV_WILLNEED:
 4864         case POSIX_FADV_DONTNEED:
 4865                 error = VOP_ADVISE(vp, offset, end, advice);
 4866                 break;
 4867         }
 4868 out:
 4869         if (fp != NULL)
 4870                 fdrop(fp, td);
 4871         free(new, M_FADVISE);
 4872         return (error);
 4873 }
 4874 
 4875 int
 4876 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4877 {
 4878 
 4879         td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
 4880             uap->len, uap->advice);
 4881         return (0);
 4882 }

Cache object: dcbdc85b3d518ce6d63dc2304172b5f3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.