The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/9.0/sys/kern/vfs_syscalls.c 228035 2011-11-27 19:02:18Z kib $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capability.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/sdt.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/unistd.h>
   68 #include <sys/vnode.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/dirent.h>
   72 #include <sys/jail.h>
   73 #include <sys/syscallsubr.h>
   74 #include <sys/sysctl.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 
   79 #include <machine/stdarg.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/uma.h>
   88 
   89 SDT_PROVIDER_DEFINE(vfs);
   90 SDT_PROBE_DEFINE(vfs, , stat, mode, mode);
   91 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
   92 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
   93 SDT_PROBE_DEFINE(vfs, , stat, reg, reg);
   94 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
   95 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
   96 
   97 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
   99 static int setfflags(struct thread *td, struct vnode *, int);
  100 static int setutimes(struct thread *td, struct vnode *,
  101     const struct timespec *, int, int);
  102 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  103     struct thread *td);
  104 
  105 /*
  106  * The module initialization routine for POSIX asynchronous I/O will
  107  * set this to the version of AIO that it implements.  (Zero means
  108  * that it is not implemented.)  This value is used here by pathconf()
  109  * and in kern_descrip.c by fpathconf().
  110  */
  111 int async_io_version;
  112 
  113 #ifdef DEBUG
  114 static int syncprt = 0;
  115 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  116 #endif
  117 
  118 /*
  119  * Sync each mounted filesystem.
  120  */
  121 #ifndef _SYS_SYSPROTO_H_
  122 struct sync_args {
  123         int     dummy;
  124 };
  125 #endif
  126 /* ARGSUSED */
  127 int
  128 sys_sync(td, uap)
  129         struct thread *td;
  130         struct sync_args *uap;
  131 {
  132         struct mount *mp, *nmp;
  133         int vfslocked;
  134 
  135         mtx_lock(&mountlist_mtx);
  136         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  137                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  138                         nmp = TAILQ_NEXT(mp, mnt_list);
  139                         continue;
  140                 }
  141                 vfslocked = VFS_LOCK_GIANT(mp);
  142                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  143                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  144                         MNT_ILOCK(mp);
  145                         mp->mnt_noasync++;
  146                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
  147                         MNT_IUNLOCK(mp);
  148                         vfs_msync(mp, MNT_NOWAIT);
  149                         VFS_SYNC(mp, MNT_NOWAIT);
  150                         MNT_ILOCK(mp);
  151                         mp->mnt_noasync--;
  152                         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
  153                             mp->mnt_noasync == 0)
  154                                 mp->mnt_kern_flag |= MNTK_ASYNC;
  155                         MNT_IUNLOCK(mp);
  156                         vn_finished_write(mp);
  157                 }
  158                 VFS_UNLOCK_GIANT(vfslocked);
  159                 mtx_lock(&mountlist_mtx);
  160                 nmp = TAILQ_NEXT(mp, mnt_list);
  161                 vfs_unbusy(mp);
  162         }
  163         mtx_unlock(&mountlist_mtx);
  164         return (0);
  165 }
  166 
  167 /*
  168  * Change filesystem quotas.
  169  */
  170 #ifndef _SYS_SYSPROTO_H_
  171 struct quotactl_args {
  172         char *path;
  173         int cmd;
  174         int uid;
  175         caddr_t arg;
  176 };
  177 #endif
  178 int
  179 sys_quotactl(td, uap)
  180         struct thread *td;
  181         register struct quotactl_args /* {
  182                 char *path;
  183                 int cmd;
  184                 int uid;
  185                 caddr_t arg;
  186         } */ *uap;
  187 {
  188         struct mount *mp;
  189         int vfslocked;
  190         int error;
  191         struct nameidata nd;
  192 
  193         AUDIT_ARG_CMD(uap->cmd);
  194         AUDIT_ARG_UID(uap->uid);
  195         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  196                 return (EPERM);
  197         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
  198            UIO_USERSPACE, uap->path, td);
  199         if ((error = namei(&nd)) != 0)
  200                 return (error);
  201         vfslocked = NDHASGIANT(&nd);
  202         NDFREE(&nd, NDF_ONLY_PNBUF);
  203         mp = nd.ni_vp->v_mount;
  204         vfs_ref(mp);
  205         vput(nd.ni_vp);
  206         error = vfs_busy(mp, 0);
  207         vfs_rel(mp);
  208         if (error) {
  209                 VFS_UNLOCK_GIANT(vfslocked);
  210                 return (error);
  211         }
  212         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  213         vfs_unbusy(mp);
  214         VFS_UNLOCK_GIANT(vfslocked);
  215         return (error);
  216 }
  217 
  218 /*
  219  * Used by statfs conversion routines to scale the block size up if
  220  * necessary so that all of the block counts are <= 'max_size'.  Note
  221  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  222  * value of 'n'.
  223  */
  224 void
  225 statfs_scale_blocks(struct statfs *sf, long max_size)
  226 {
  227         uint64_t count;
  228         int shift;
  229 
  230         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  231 
  232         /*
  233          * Attempt to scale the block counts to give a more accurate
  234          * overview to userland of the ratio of free space to used
  235          * space.  To do this, find the largest block count and compute
  236          * a divisor that lets it fit into a signed integer <= max_size.
  237          */
  238         if (sf->f_bavail < 0)
  239                 count = -sf->f_bavail;
  240         else
  241                 count = sf->f_bavail;
  242         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  243         if (count <= max_size)
  244                 return;
  245 
  246         count >>= flsl(max_size);
  247         shift = 0;
  248         while (count > 0) {
  249                 shift++;
  250                 count >>=1;
  251         }
  252 
  253         sf->f_bsize <<= shift;
  254         sf->f_blocks >>= shift;
  255         sf->f_bfree >>= shift;
  256         sf->f_bavail >>= shift;
  257 }
  258 
  259 /*
  260  * Get filesystem statistics.
  261  */
  262 #ifndef _SYS_SYSPROTO_H_
  263 struct statfs_args {
  264         char *path;
  265         struct statfs *buf;
  266 };
  267 #endif
  268 int
  269 sys_statfs(td, uap)
  270         struct thread *td;
  271         register struct statfs_args /* {
  272                 char *path;
  273                 struct statfs *buf;
  274         } */ *uap;
  275 {
  276         struct statfs sf;
  277         int error;
  278 
  279         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  280         if (error == 0)
  281                 error = copyout(&sf, uap->buf, sizeof(sf));
  282         return (error);
  283 }
  284 
  285 int
  286 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  287     struct statfs *buf)
  288 {
  289         struct mount *mp;
  290         struct statfs *sp, sb;
  291         int vfslocked;
  292         int error;
  293         struct nameidata nd;
  294 
  295         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  296             AUDITVNODE1, pathseg, path, td);
  297         error = namei(&nd);
  298         if (error)
  299                 return (error);
  300         vfslocked = NDHASGIANT(&nd);
  301         mp = nd.ni_vp->v_mount;
  302         vfs_ref(mp);
  303         NDFREE(&nd, NDF_ONLY_PNBUF);
  304         vput(nd.ni_vp);
  305         error = vfs_busy(mp, 0);
  306         vfs_rel(mp);
  307         if (error) {
  308                 VFS_UNLOCK_GIANT(vfslocked);
  309                 return (error);
  310         }
  311 #ifdef MAC
  312         error = mac_mount_check_stat(td->td_ucred, mp);
  313         if (error)
  314                 goto out;
  315 #endif
  316         /*
  317          * Set these in case the underlying filesystem fails to do so.
  318          */
  319         sp = &mp->mnt_stat;
  320         sp->f_version = STATFS_VERSION;
  321         sp->f_namemax = NAME_MAX;
  322         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  323         error = VFS_STATFS(mp, sp);
  324         if (error)
  325                 goto out;
  326         if (priv_check(td, PRIV_VFS_GENERATION)) {
  327                 bcopy(sp, &sb, sizeof(sb));
  328                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  329                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  330                 sp = &sb;
  331         }
  332         *buf = *sp;
  333 out:
  334         vfs_unbusy(mp);
  335         VFS_UNLOCK_GIANT(vfslocked);
  336         return (error);
  337 }
  338 
  339 /*
  340  * Get filesystem statistics.
  341  */
  342 #ifndef _SYS_SYSPROTO_H_
  343 struct fstatfs_args {
  344         int fd;
  345         struct statfs *buf;
  346 };
  347 #endif
  348 int
  349 sys_fstatfs(td, uap)
  350         struct thread *td;
  351         register struct fstatfs_args /* {
  352                 int fd;
  353                 struct statfs *buf;
  354         } */ *uap;
  355 {
  356         struct statfs sf;
  357         int error;
  358 
  359         error = kern_fstatfs(td, uap->fd, &sf);
  360         if (error == 0)
  361                 error = copyout(&sf, uap->buf, sizeof(sf));
  362         return (error);
  363 }
  364 
  365 int
  366 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  367 {
  368         struct file *fp;
  369         struct mount *mp;
  370         struct statfs *sp, sb;
  371         int vfslocked;
  372         struct vnode *vp;
  373         int error;
  374 
  375         AUDIT_ARG_FD(fd);
  376         error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp);
  377         if (error)
  378                 return (error);
  379         vp = fp->f_vnode;
  380         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  381         vn_lock(vp, LK_SHARED | LK_RETRY);
  382 #ifdef AUDIT
  383         AUDIT_ARG_VNODE1(vp);
  384 #endif
  385         mp = vp->v_mount;
  386         if (mp)
  387                 vfs_ref(mp);
  388         VOP_UNLOCK(vp, 0);
  389         fdrop(fp, td);
  390         if (mp == NULL) {
  391                 error = EBADF;
  392                 goto out;
  393         }
  394         error = vfs_busy(mp, 0);
  395         vfs_rel(mp);
  396         if (error) {
  397                 VFS_UNLOCK_GIANT(vfslocked);
  398                 return (error);
  399         }
  400 #ifdef MAC
  401         error = mac_mount_check_stat(td->td_ucred, mp);
  402         if (error)
  403                 goto out;
  404 #endif
  405         /*
  406          * Set these in case the underlying filesystem fails to do so.
  407          */
  408         sp = &mp->mnt_stat;
  409         sp->f_version = STATFS_VERSION;
  410         sp->f_namemax = NAME_MAX;
  411         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  412         error = VFS_STATFS(mp, sp);
  413         if (error)
  414                 goto out;
  415         if (priv_check(td, PRIV_VFS_GENERATION)) {
  416                 bcopy(sp, &sb, sizeof(sb));
  417                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  418                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  419                 sp = &sb;
  420         }
  421         *buf = *sp;
  422 out:
  423         if (mp)
  424                 vfs_unbusy(mp);
  425         VFS_UNLOCK_GIANT(vfslocked);
  426         return (error);
  427 }
  428 
  429 /*
  430  * Get statistics on all filesystems.
  431  */
  432 #ifndef _SYS_SYSPROTO_H_
  433 struct getfsstat_args {
  434         struct statfs *buf;
  435         long bufsize;
  436         int flags;
  437 };
  438 #endif
  439 int
  440 sys_getfsstat(td, uap)
  441         struct thread *td;
  442         register struct getfsstat_args /* {
  443                 struct statfs *buf;
  444                 long bufsize;
  445                 int flags;
  446         } */ *uap;
  447 {
  448 
  449         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  450             uap->flags));
  451 }
  452 
  453 /*
  454  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  455  *      The caller is responsible for freeing memory which will be allocated
  456  *      in '*buf'.
  457  */
  458 int
  459 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  460     enum uio_seg bufseg, int flags)
  461 {
  462         struct mount *mp, *nmp;
  463         struct statfs *sfsp, *sp, sb;
  464         size_t count, maxcount;
  465         int vfslocked;
  466         int error;
  467 
  468         maxcount = bufsize / sizeof(struct statfs);
  469         if (bufsize == 0)
  470                 sfsp = NULL;
  471         else if (bufseg == UIO_USERSPACE)
  472                 sfsp = *buf;
  473         else /* if (bufseg == UIO_SYSSPACE) */ {
  474                 count = 0;
  475                 mtx_lock(&mountlist_mtx);
  476                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  477                         count++;
  478                 }
  479                 mtx_unlock(&mountlist_mtx);
  480                 if (maxcount > count)
  481                         maxcount = count;
  482                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  483                     M_WAITOK);
  484         }
  485         count = 0;
  486         mtx_lock(&mountlist_mtx);
  487         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  488                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  489                         nmp = TAILQ_NEXT(mp, mnt_list);
  490                         continue;
  491                 }
  492 #ifdef MAC
  493                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  494                         nmp = TAILQ_NEXT(mp, mnt_list);
  495                         continue;
  496                 }
  497 #endif
  498                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  499                         nmp = TAILQ_NEXT(mp, mnt_list);
  500                         continue;
  501                 }
  502                 vfslocked = VFS_LOCK_GIANT(mp);
  503                 if (sfsp && count < maxcount) {
  504                         sp = &mp->mnt_stat;
  505                         /*
  506                          * Set these in case the underlying filesystem
  507                          * fails to do so.
  508                          */
  509                         sp->f_version = STATFS_VERSION;
  510                         sp->f_namemax = NAME_MAX;
  511                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  512                         /*
  513                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  514                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  515                          * overrides MNT_WAIT.
  516                          */
  517                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  518                             (flags & MNT_WAIT)) &&
  519                             (error = VFS_STATFS(mp, sp))) {
  520                                 VFS_UNLOCK_GIANT(vfslocked);
  521                                 mtx_lock(&mountlist_mtx);
  522                                 nmp = TAILQ_NEXT(mp, mnt_list);
  523                                 vfs_unbusy(mp);
  524                                 continue;
  525                         }
  526                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  527                                 bcopy(sp, &sb, sizeof(sb));
  528                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  529                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  530                                 sp = &sb;
  531                         }
  532                         if (bufseg == UIO_SYSSPACE)
  533                                 bcopy(sp, sfsp, sizeof(*sp));
  534                         else /* if (bufseg == UIO_USERSPACE) */ {
  535                                 error = copyout(sp, sfsp, sizeof(*sp));
  536                                 if (error) {
  537                                         vfs_unbusy(mp);
  538                                         VFS_UNLOCK_GIANT(vfslocked);
  539                                         return (error);
  540                                 }
  541                         }
  542                         sfsp++;
  543                 }
  544                 VFS_UNLOCK_GIANT(vfslocked);
  545                 count++;
  546                 mtx_lock(&mountlist_mtx);
  547                 nmp = TAILQ_NEXT(mp, mnt_list);
  548                 vfs_unbusy(mp);
  549         }
  550         mtx_unlock(&mountlist_mtx);
  551         if (sfsp && count > maxcount)
  552                 td->td_retval[0] = maxcount;
  553         else
  554                 td->td_retval[0] = count;
  555         return (0);
  556 }
  557 
  558 #ifdef COMPAT_FREEBSD4
  559 /*
  560  * Get old format filesystem statistics.
  561  */
  562 static void cvtstatfs(struct statfs *, struct ostatfs *);
  563 
  564 #ifndef _SYS_SYSPROTO_H_
  565 struct freebsd4_statfs_args {
  566         char *path;
  567         struct ostatfs *buf;
  568 };
  569 #endif
  570 int
  571 freebsd4_statfs(td, uap)
  572         struct thread *td;
  573         struct freebsd4_statfs_args /* {
  574                 char *path;
  575                 struct ostatfs *buf;
  576         } */ *uap;
  577 {
  578         struct ostatfs osb;
  579         struct statfs sf;
  580         int error;
  581 
  582         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  583         if (error)
  584                 return (error);
  585         cvtstatfs(&sf, &osb);
  586         return (copyout(&osb, uap->buf, sizeof(osb)));
  587 }
  588 
  589 /*
  590  * Get filesystem statistics.
  591  */
  592 #ifndef _SYS_SYSPROTO_H_
  593 struct freebsd4_fstatfs_args {
  594         int fd;
  595         struct ostatfs *buf;
  596 };
  597 #endif
  598 int
  599 freebsd4_fstatfs(td, uap)
  600         struct thread *td;
  601         struct freebsd4_fstatfs_args /* {
  602                 int fd;
  603                 struct ostatfs *buf;
  604         } */ *uap;
  605 {
  606         struct ostatfs osb;
  607         struct statfs sf;
  608         int error;
  609 
  610         error = kern_fstatfs(td, uap->fd, &sf);
  611         if (error)
  612                 return (error);
  613         cvtstatfs(&sf, &osb);
  614         return (copyout(&osb, uap->buf, sizeof(osb)));
  615 }
  616 
  617 /*
  618  * Get statistics on all filesystems.
  619  */
  620 #ifndef _SYS_SYSPROTO_H_
  621 struct freebsd4_getfsstat_args {
  622         struct ostatfs *buf;
  623         long bufsize;
  624         int flags;
  625 };
  626 #endif
  627 int
  628 freebsd4_getfsstat(td, uap)
  629         struct thread *td;
  630         register struct freebsd4_getfsstat_args /* {
  631                 struct ostatfs *buf;
  632                 long bufsize;
  633                 int flags;
  634         } */ *uap;
  635 {
  636         struct statfs *buf, *sp;
  637         struct ostatfs osb;
  638         size_t count, size;
  639         int error;
  640 
  641         count = uap->bufsize / sizeof(struct ostatfs);
  642         size = count * sizeof(struct statfs);
  643         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  644         if (size > 0) {
  645                 count = td->td_retval[0];
  646                 sp = buf;
  647                 while (count > 0 && error == 0) {
  648                         cvtstatfs(sp, &osb);
  649                         error = copyout(&osb, uap->buf, sizeof(osb));
  650                         sp++;
  651                         uap->buf++;
  652                         count--;
  653                 }
  654                 free(buf, M_TEMP);
  655         }
  656         return (error);
  657 }
  658 
  659 /*
  660  * Implement fstatfs() for (NFS) file handles.
  661  */
  662 #ifndef _SYS_SYSPROTO_H_
  663 struct freebsd4_fhstatfs_args {
  664         struct fhandle *u_fhp;
  665         struct ostatfs *buf;
  666 };
  667 #endif
  668 int
  669 freebsd4_fhstatfs(td, uap)
  670         struct thread *td;
  671         struct freebsd4_fhstatfs_args /* {
  672                 struct fhandle *u_fhp;
  673                 struct ostatfs *buf;
  674         } */ *uap;
  675 {
  676         struct ostatfs osb;
  677         struct statfs sf;
  678         fhandle_t fh;
  679         int error;
  680 
  681         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  682         if (error)
  683                 return (error);
  684         error = kern_fhstatfs(td, fh, &sf);
  685         if (error)
  686                 return (error);
  687         cvtstatfs(&sf, &osb);
  688         return (copyout(&osb, uap->buf, sizeof(osb)));
  689 }
  690 
  691 /*
  692  * Convert a new format statfs structure to an old format statfs structure.
  693  */
  694 static void
  695 cvtstatfs(nsp, osp)
  696         struct statfs *nsp;
  697         struct ostatfs *osp;
  698 {
  699 
  700         statfs_scale_blocks(nsp, LONG_MAX);
  701         bzero(osp, sizeof(*osp));
  702         osp->f_bsize = nsp->f_bsize;
  703         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  704         osp->f_blocks = nsp->f_blocks;
  705         osp->f_bfree = nsp->f_bfree;
  706         osp->f_bavail = nsp->f_bavail;
  707         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  708         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  709         osp->f_owner = nsp->f_owner;
  710         osp->f_type = nsp->f_type;
  711         osp->f_flags = nsp->f_flags;
  712         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  713         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  714         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  715         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  716         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  717             MIN(MFSNAMELEN, OMFSNAMELEN));
  718         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  719             MIN(MNAMELEN, OMNAMELEN));
  720         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  721             MIN(MNAMELEN, OMNAMELEN));
  722         osp->f_fsid = nsp->f_fsid;
  723 }
  724 #endif /* COMPAT_FREEBSD4 */
  725 
  726 /*
  727  * Change current working directory to a given file descriptor.
  728  */
  729 #ifndef _SYS_SYSPROTO_H_
  730 struct fchdir_args {
  731         int     fd;
  732 };
  733 #endif
  734 int
  735 sys_fchdir(td, uap)
  736         struct thread *td;
  737         struct fchdir_args /* {
  738                 int fd;
  739         } */ *uap;
  740 {
  741         register struct filedesc *fdp = td->td_proc->p_fd;
  742         struct vnode *vp, *tdp, *vpold;
  743         struct mount *mp;
  744         struct file *fp;
  745         int vfslocked;
  746         int error;
  747 
  748         AUDIT_ARG_FD(uap->fd);
  749         if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0)
  750                 return (error);
  751         vp = fp->f_vnode;
  752         VREF(vp);
  753         fdrop(fp, td);
  754         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  755         vn_lock(vp, LK_SHARED | LK_RETRY);
  756         AUDIT_ARG_VNODE1(vp);
  757         error = change_dir(vp, td);
  758         while (!error && (mp = vp->v_mountedhere) != NULL) {
  759                 int tvfslocked;
  760                 if (vfs_busy(mp, 0))
  761                         continue;
  762                 tvfslocked = VFS_LOCK_GIANT(mp);
  763                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  764                 vfs_unbusy(mp);
  765                 if (error) {
  766                         VFS_UNLOCK_GIANT(tvfslocked);
  767                         break;
  768                 }
  769                 vput(vp);
  770                 VFS_UNLOCK_GIANT(vfslocked);
  771                 vp = tdp;
  772                 vfslocked = tvfslocked;
  773         }
  774         if (error) {
  775                 vput(vp);
  776                 VFS_UNLOCK_GIANT(vfslocked);
  777                 return (error);
  778         }
  779         VOP_UNLOCK(vp, 0);
  780         VFS_UNLOCK_GIANT(vfslocked);
  781         FILEDESC_XLOCK(fdp);
  782         vpold = fdp->fd_cdir;
  783         fdp->fd_cdir = vp;
  784         FILEDESC_XUNLOCK(fdp);
  785         vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  786         vrele(vpold);
  787         VFS_UNLOCK_GIANT(vfslocked);
  788         return (0);
  789 }
  790 
  791 /*
  792  * Change current working directory (``.'').
  793  */
  794 #ifndef _SYS_SYSPROTO_H_
  795 struct chdir_args {
  796         char    *path;
  797 };
  798 #endif
  799 int
  800 sys_chdir(td, uap)
  801         struct thread *td;
  802         struct chdir_args /* {
  803                 char *path;
  804         } */ *uap;
  805 {
  806 
  807         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  808 }
  809 
  810 int
  811 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  812 {
  813         register struct filedesc *fdp = td->td_proc->p_fd;
  814         int error;
  815         struct nameidata nd;
  816         struct vnode *vp;
  817         int vfslocked;
  818 
  819         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
  820             MPSAFE, pathseg, path, td);
  821         if ((error = namei(&nd)) != 0)
  822                 return (error);
  823         vfslocked = NDHASGIANT(&nd);
  824         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  825                 vput(nd.ni_vp);
  826                 VFS_UNLOCK_GIANT(vfslocked);
  827                 NDFREE(&nd, NDF_ONLY_PNBUF);
  828                 return (error);
  829         }
  830         VOP_UNLOCK(nd.ni_vp, 0);
  831         VFS_UNLOCK_GIANT(vfslocked);
  832         NDFREE(&nd, NDF_ONLY_PNBUF);
  833         FILEDESC_XLOCK(fdp);
  834         vp = fdp->fd_cdir;
  835         fdp->fd_cdir = nd.ni_vp;
  836         FILEDESC_XUNLOCK(fdp);
  837         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  838         vrele(vp);
  839         VFS_UNLOCK_GIANT(vfslocked);
  840         return (0);
  841 }
  842 
  843 /*
  844  * Helper function for raised chroot(2) security function:  Refuse if
  845  * any filedescriptors are open directories.
  846  */
  847 static int
  848 chroot_refuse_vdir_fds(fdp)
  849         struct filedesc *fdp;
  850 {
  851         struct vnode *vp;
  852         struct file *fp;
  853         int fd;
  854 
  855         FILEDESC_LOCK_ASSERT(fdp);
  856 
  857         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  858                 fp = fget_locked(fdp, fd);
  859                 if (fp == NULL)
  860                         continue;
  861                 if (fp->f_type == DTYPE_VNODE) {
  862                         vp = fp->f_vnode;
  863                         if (vp->v_type == VDIR)
  864                                 return (EPERM);
  865                 }
  866         }
  867         return (0);
  868 }
  869 
  870 /*
  871  * This sysctl determines if we will allow a process to chroot(2) if it
  872  * has a directory open:
  873  *      0: disallowed for all processes.
  874  *      1: allowed for processes that were not already chroot(2)'ed.
  875  *      2: allowed for all processes.
  876  */
  877 
  878 static int chroot_allow_open_directories = 1;
  879 
  880 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  881      &chroot_allow_open_directories, 0, "");
  882 
  883 /*
  884  * Change notion of root (``/'') directory.
  885  */
  886 #ifndef _SYS_SYSPROTO_H_
  887 struct chroot_args {
  888         char    *path;
  889 };
  890 #endif
  891 int
  892 sys_chroot(td, uap)
  893         struct thread *td;
  894         struct chroot_args /* {
  895                 char *path;
  896         } */ *uap;
  897 {
  898         int error;
  899         struct nameidata nd;
  900         int vfslocked;
  901 
  902         error = priv_check(td, PRIV_VFS_CHROOT);
  903         if (error)
  904                 return (error);
  905         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  906             AUDITVNODE1, UIO_USERSPACE, uap->path, td);
  907         error = namei(&nd);
  908         if (error)
  909                 goto error;
  910         vfslocked = NDHASGIANT(&nd);
  911         if ((error = change_dir(nd.ni_vp, td)) != 0)
  912                 goto e_vunlock;
  913 #ifdef MAC
  914         if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
  915                 goto e_vunlock;
  916 #endif
  917         VOP_UNLOCK(nd.ni_vp, 0);
  918         error = change_root(nd.ni_vp, td);
  919         vrele(nd.ni_vp);
  920         VFS_UNLOCK_GIANT(vfslocked);
  921         NDFREE(&nd, NDF_ONLY_PNBUF);
  922         return (error);
  923 e_vunlock:
  924         vput(nd.ni_vp);
  925         VFS_UNLOCK_GIANT(vfslocked);
  926 error:
  927         NDFREE(&nd, NDF_ONLY_PNBUF);
  928         return (error);
  929 }
  930 
  931 /*
  932  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  933  * instance.
  934  */
  935 int
  936 change_dir(vp, td)
  937         struct vnode *vp;
  938         struct thread *td;
  939 {
  940         int error;
  941 
  942         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  943         if (vp->v_type != VDIR)
  944                 return (ENOTDIR);
  945 #ifdef MAC
  946         error = mac_vnode_check_chdir(td->td_ucred, vp);
  947         if (error)
  948                 return (error);
  949 #endif
  950         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  951         return (error);
  952 }
  953 
  954 /*
  955  * Common routine for kern_chroot() and jail_attach().  The caller is
  956  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  957  * authorize this operation.
  958  */
  959 int
  960 change_root(vp, td)
  961         struct vnode *vp;
  962         struct thread *td;
  963 {
  964         struct filedesc *fdp;
  965         struct vnode *oldvp;
  966         int vfslocked;
  967         int error;
  968 
  969         VFS_ASSERT_GIANT(vp->v_mount);
  970         fdp = td->td_proc->p_fd;
  971         FILEDESC_XLOCK(fdp);
  972         if (chroot_allow_open_directories == 0 ||
  973             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  974                 error = chroot_refuse_vdir_fds(fdp);
  975                 if (error) {
  976                         FILEDESC_XUNLOCK(fdp);
  977                         return (error);
  978                 }
  979         }
  980         oldvp = fdp->fd_rdir;
  981         fdp->fd_rdir = vp;
  982         VREF(fdp->fd_rdir);
  983         if (!fdp->fd_jdir) {
  984                 fdp->fd_jdir = vp;
  985                 VREF(fdp->fd_jdir);
  986         }
  987         FILEDESC_XUNLOCK(fdp);
  988         vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  989         vrele(oldvp);
  990         VFS_UNLOCK_GIANT(vfslocked);
  991         return (0);
  992 }
  993 
  994 static __inline cap_rights_t
  995 flags_to_rights(int flags)
  996 {
  997         cap_rights_t rights = 0;
  998 
  999         switch ((flags & O_ACCMODE)) {
 1000         case O_RDONLY:
 1001                 rights |= CAP_READ;
 1002                 break;
 1003 
 1004         case O_RDWR:
 1005                 rights |= CAP_READ;
 1006                 /* fall through */
 1007 
 1008         case O_WRONLY:
 1009                 rights |= CAP_WRITE;
 1010                 break;
 1011 
 1012         case O_EXEC:
 1013                 rights |= CAP_FEXECVE;
 1014                 break;
 1015         }
 1016 
 1017         if (flags & O_CREAT)
 1018                 rights |= CAP_CREATE;
 1019 
 1020         if (flags & O_TRUNC)
 1021                 rights |= CAP_FTRUNCATE;
 1022 
 1023         if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
 1024                 rights |= CAP_FLOCK;
 1025 
 1026         return (rights);
 1027 }
 1028 
 1029 /*
 1030  * Check permissions, allocate an open file structure, and call the device
 1031  * open routine if any.
 1032  */
 1033 #ifndef _SYS_SYSPROTO_H_
 1034 struct open_args {
 1035         char    *path;
 1036         int     flags;
 1037         int     mode;
 1038 };
 1039 #endif
 1040 int
 1041 sys_open(td, uap)
 1042         struct thread *td;
 1043         register struct open_args /* {
 1044                 char *path;
 1045                 int flags;
 1046                 int mode;
 1047         } */ *uap;
 1048 {
 1049 
 1050         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1051 }
 1052 
 1053 #ifndef _SYS_SYSPROTO_H_
 1054 struct openat_args {
 1055         int     fd;
 1056         char    *path;
 1057         int     flag;
 1058         int     mode;
 1059 };
 1060 #endif
 1061 int
 1062 sys_openat(struct thread *td, struct openat_args *uap)
 1063 {
 1064 
 1065         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1066             uap->mode));
 1067 }
 1068 
 1069 int
 1070 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1071     int mode)
 1072 {
 1073 
 1074         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1075 }
 1076 
 1077 int
 1078 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1079     int flags, int mode)
 1080 {
 1081         struct proc *p = td->td_proc;
 1082         struct filedesc *fdp = p->p_fd;
 1083         struct file *fp;
 1084         struct vnode *vp;
 1085         int cmode;
 1086         struct file *nfp;
 1087         int type, indx = -1, error, error_open;
 1088         struct flock lf;
 1089         struct nameidata nd;
 1090         int vfslocked;
 1091         cap_rights_t rights_needed = CAP_LOOKUP;
 1092 
 1093         AUDIT_ARG_FFLAGS(flags);
 1094         AUDIT_ARG_MODE(mode);
 1095         /* XXX: audit dirfd */
 1096         rights_needed |= flags_to_rights(flags);
 1097         /*
 1098          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1099          * may be specified.
 1100          */
 1101         if (flags & O_EXEC) {
 1102                 if (flags & O_ACCMODE)
 1103                         return (EINVAL);
 1104         } else if ((flags & O_ACCMODE) == O_ACCMODE)
 1105                 return (EINVAL);
 1106         else
 1107                 flags = FFLAGS(flags);
 1108 
 1109         /*
 1110          * allocate the file descriptor, but don't install a descriptor yet
 1111          */
 1112         error = falloc_noinstall(td, &nfp);
 1113         if (error)
 1114                 return (error);
 1115         /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
 1116         fp = nfp;
 1117         /* Set the flags early so the finit in devfs can pick them up. */
 1118         fp->f_flag = flags & FMASK;
 1119         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 1120         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
 1121             path, fd, rights_needed, td);
 1122         td->td_dupfd = -1;              /* XXX check for fdopen */
 1123         error = vn_open(&nd, &flags, cmode, fp);
 1124         if (error) {
 1125                 /*
 1126                  * If the vn_open replaced the method vector, something
 1127                  * wonderous happened deep below and we just pass it up
 1128                  * pretending we know what we do.
 1129                  */
 1130                 if (error == ENXIO && fp->f_ops != &badfileops)
 1131                         goto success;
 1132 
 1133                 /*
 1134                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1135                  * responsible for dropping the old contents of ofiles[indx]
 1136                  * if it succeeds.
 1137                  *
 1138                  * Don't do this for relative (capability) lookups; we don't
 1139                  * understand exactly what would happen, and we don't think
 1140                  * that it ever should.
 1141                  */
 1142                 if ((nd.ni_strictrelative == 0) &&
 1143                     (error == ENODEV || error == ENXIO) &&
 1144                     (td->td_dupfd >= 0)) {
 1145                         /* XXX from fdopen */
 1146                         error_open = error;
 1147                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1148                                 goto bad_unlocked;
 1149                         if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
 1150                             flags, error_open)) == 0)
 1151                                 goto success;
 1152                 }
 1153                 /*
 1154                  * Clean up the descriptor, but only if another thread hadn't
 1155                  * replaced or closed it.
 1156                  */
 1157                 if (indx != -1)
 1158                         fdclose(fdp, fp, indx, td);
 1159                 fdrop(fp, td);
 1160 
 1161                 if (error == ERESTART)
 1162                         error = EINTR;
 1163                 return (error);
 1164         }
 1165         td->td_dupfd = 0;
 1166         vfslocked = NDHASGIANT(&nd);
 1167         NDFREE(&nd, NDF_ONLY_PNBUF);
 1168         vp = nd.ni_vp;
 1169 
 1170         /*
 1171          * Store the vnode, for any f_type. Typically, the vnode use
 1172          * count is decremented by direct call to vn_closefile() for
 1173          * files that switched type in the cdevsw fdopen() method.
 1174          */
 1175         fp->f_vnode = vp;
 1176         /*
 1177          * If the file wasn't claimed by devfs bind it to the normal
 1178          * vnode operations here.
 1179          */
 1180         if (fp->f_ops == &badfileops) {
 1181                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1182                 fp->f_seqcount = 1;
 1183                 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
 1184         }
 1185 
 1186         VOP_UNLOCK(vp, 0);
 1187         if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
 1188                 lf.l_whence = SEEK_SET;
 1189                 lf.l_start = 0;
 1190                 lf.l_len = 0;
 1191                 if (flags & O_EXLOCK)
 1192                         lf.l_type = F_WRLCK;
 1193                 else
 1194                         lf.l_type = F_RDLCK;
 1195                 type = F_FLOCK;
 1196                 if ((flags & FNONBLOCK) == 0)
 1197                         type |= F_WAIT;
 1198                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 1199                             type)) != 0)
 1200                         goto bad;
 1201                 atomic_set_int(&fp->f_flag, FHASLOCK);
 1202         }
 1203         if (flags & O_TRUNC) {
 1204                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1205                 if (error)
 1206                         goto bad;
 1207         }
 1208         VFS_UNLOCK_GIANT(vfslocked);
 1209 success:
 1210         /*
 1211          * If we haven't already installed the FD (for dupfdopen), do so now.
 1212          */
 1213         if (indx == -1) {
 1214 #ifdef CAPABILITIES
 1215                 if (nd.ni_strictrelative == 1) {
 1216                         /*
 1217                          * We are doing a strict relative lookup; wrap the
 1218                          * result in a capability.
 1219                          */
 1220                         if ((error = kern_capwrap(td, fp, nd.ni_baserights,
 1221                             &indx)) != 0)
 1222                                 goto bad_unlocked;
 1223                 } else
 1224 #endif
 1225                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1226                                 goto bad_unlocked;
 1227 
 1228         }
 1229 
 1230         /*
 1231          * Release our private reference, leaving the one associated with
 1232          * the descriptor table intact.
 1233          */
 1234         fdrop(fp, td);
 1235         td->td_retval[0] = indx;
 1236         return (0);
 1237 bad:
 1238         VFS_UNLOCK_GIANT(vfslocked);
 1239 bad_unlocked:
 1240         if (indx != -1)
 1241                 fdclose(fdp, fp, indx, td);
 1242         fdrop(fp, td);
 1243         td->td_retval[0] = -1;
 1244         return (error);
 1245 }
 1246 
 1247 #ifdef COMPAT_43
 1248 /*
 1249  * Create a file.
 1250  */
 1251 #ifndef _SYS_SYSPROTO_H_
 1252 struct ocreat_args {
 1253         char    *path;
 1254         int     mode;
 1255 };
 1256 #endif
 1257 int
 1258 ocreat(td, uap)
 1259         struct thread *td;
 1260         register struct ocreat_args /* {
 1261                 char *path;
 1262                 int mode;
 1263         } */ *uap;
 1264 {
 1265 
 1266         return (kern_open(td, uap->path, UIO_USERSPACE,
 1267             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1268 }
 1269 #endif /* COMPAT_43 */
 1270 
 1271 /*
 1272  * Create a special file.
 1273  */
 1274 #ifndef _SYS_SYSPROTO_H_
 1275 struct mknod_args {
 1276         char    *path;
 1277         int     mode;
 1278         int     dev;
 1279 };
 1280 #endif
 1281 int
 1282 sys_mknod(td, uap)
 1283         struct thread *td;
 1284         register struct mknod_args /* {
 1285                 char *path;
 1286                 int mode;
 1287                 int dev;
 1288         } */ *uap;
 1289 {
 1290 
 1291         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1292 }
 1293 
 1294 #ifndef _SYS_SYSPROTO_H_
 1295 struct mknodat_args {
 1296         int     fd;
 1297         char    *path;
 1298         mode_t  mode;
 1299         dev_t   dev;
 1300 };
 1301 #endif
 1302 int
 1303 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1304 {
 1305 
 1306         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1307             uap->dev));
 1308 }
 1309 
 1310 int
 1311 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1312     int dev)
 1313 {
 1314 
 1315         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1316 }
 1317 
 1318 int
 1319 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1320     int mode, int dev)
 1321 {
 1322         struct vnode *vp;
 1323         struct mount *mp;
 1324         struct vattr vattr;
 1325         int error;
 1326         int whiteout = 0;
 1327         struct nameidata nd;
 1328         int vfslocked;
 1329 
 1330         AUDIT_ARG_MODE(mode);
 1331         AUDIT_ARG_DEV(dev);
 1332         switch (mode & S_IFMT) {
 1333         case S_IFCHR:
 1334         case S_IFBLK:
 1335                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1336                 break;
 1337         case S_IFMT:
 1338                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1339                 break;
 1340         case S_IFWHT:
 1341                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1342                 break;
 1343         case S_IFIFO:
 1344                 if (dev == 0)
 1345                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1346                 /* FALLTHROUGH */
 1347         default:
 1348                 error = EINVAL;
 1349                 break;
 1350         }
 1351         if (error)
 1352                 return (error);
 1353 restart:
 1354         bwillwrite();
 1355         NDINIT_ATRIGHTS(&nd, CREATE,
 1356             LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
 1357             CAP_MKFIFO, td);
 1358         if ((error = namei(&nd)) != 0)
 1359                 return (error);
 1360         vfslocked = NDHASGIANT(&nd);
 1361         vp = nd.ni_vp;
 1362         if (vp != NULL) {
 1363                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1364                 if (vp == nd.ni_dvp)
 1365                         vrele(nd.ni_dvp);
 1366                 else
 1367                         vput(nd.ni_dvp);
 1368                 vrele(vp);
 1369                 VFS_UNLOCK_GIANT(vfslocked);
 1370                 return (EEXIST);
 1371         } else {
 1372                 VATTR_NULL(&vattr);
 1373                 vattr.va_mode = (mode & ALLPERMS) &
 1374                     ~td->td_proc->p_fd->fd_cmask;
 1375                 vattr.va_rdev = dev;
 1376                 whiteout = 0;
 1377 
 1378                 switch (mode & S_IFMT) {
 1379                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1380                         vattr.va_type = VBAD;
 1381                         break;
 1382                 case S_IFCHR:
 1383                         vattr.va_type = VCHR;
 1384                         break;
 1385                 case S_IFBLK:
 1386                         vattr.va_type = VBLK;
 1387                         break;
 1388                 case S_IFWHT:
 1389                         whiteout = 1;
 1390                         break;
 1391                 default:
 1392                         panic("kern_mknod: invalid mode");
 1393                 }
 1394         }
 1395         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1396                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1397                 vput(nd.ni_dvp);
 1398                 VFS_UNLOCK_GIANT(vfslocked);
 1399                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1400                         return (error);
 1401                 goto restart;
 1402         }
 1403 #ifdef MAC
 1404         if (error == 0 && !whiteout)
 1405                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1406                     &nd.ni_cnd, &vattr);
 1407 #endif
 1408         if (!error) {
 1409                 if (whiteout)
 1410                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1411                 else {
 1412                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1413                                                 &nd.ni_cnd, &vattr);
 1414                         if (error == 0)
 1415                                 vput(nd.ni_vp);
 1416                 }
 1417         }
 1418         NDFREE(&nd, NDF_ONLY_PNBUF);
 1419         vput(nd.ni_dvp);
 1420         vn_finished_write(mp);
 1421         VFS_UNLOCK_GIANT(vfslocked);
 1422         return (error);
 1423 }
 1424 
 1425 /*
 1426  * Create a named pipe.
 1427  */
 1428 #ifndef _SYS_SYSPROTO_H_
 1429 struct mkfifo_args {
 1430         char    *path;
 1431         int     mode;
 1432 };
 1433 #endif
 1434 int
 1435 sys_mkfifo(td, uap)
 1436         struct thread *td;
 1437         register struct mkfifo_args /* {
 1438                 char *path;
 1439                 int mode;
 1440         } */ *uap;
 1441 {
 1442 
 1443         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1444 }
 1445 
 1446 #ifndef _SYS_SYSPROTO_H_
 1447 struct mkfifoat_args {
 1448         int     fd;
 1449         char    *path;
 1450         mode_t  mode;
 1451 };
 1452 #endif
 1453 int
 1454 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1455 {
 1456 
 1457         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1458             uap->mode));
 1459 }
 1460 
 1461 int
 1462 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1463 {
 1464 
 1465         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1466 }
 1467 
 1468 int
 1469 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1470     int mode)
 1471 {
 1472         struct mount *mp;
 1473         struct vattr vattr;
 1474         int error;
 1475         struct nameidata nd;
 1476         int vfslocked;
 1477 
 1478         AUDIT_ARG_MODE(mode);
 1479 restart:
 1480         bwillwrite();
 1481         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1482             pathseg, path, fd, td);
 1483         if ((error = namei(&nd)) != 0)
 1484                 return (error);
 1485         vfslocked = NDHASGIANT(&nd);
 1486         if (nd.ni_vp != NULL) {
 1487                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1488                 if (nd.ni_vp == nd.ni_dvp)
 1489                         vrele(nd.ni_dvp);
 1490                 else
 1491                         vput(nd.ni_dvp);
 1492                 vrele(nd.ni_vp);
 1493                 VFS_UNLOCK_GIANT(vfslocked);
 1494                 return (EEXIST);
 1495         }
 1496         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1497                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1498                 vput(nd.ni_dvp);
 1499                 VFS_UNLOCK_GIANT(vfslocked);
 1500                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1501                         return (error);
 1502                 goto restart;
 1503         }
 1504         VATTR_NULL(&vattr);
 1505         vattr.va_type = VFIFO;
 1506         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1507 #ifdef MAC
 1508         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1509             &vattr);
 1510         if (error)
 1511                 goto out;
 1512 #endif
 1513         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1514         if (error == 0)
 1515                 vput(nd.ni_vp);
 1516 #ifdef MAC
 1517 out:
 1518 #endif
 1519         vput(nd.ni_dvp);
 1520         vn_finished_write(mp);
 1521         VFS_UNLOCK_GIANT(vfslocked);
 1522         NDFREE(&nd, NDF_ONLY_PNBUF);
 1523         return (error);
 1524 }
 1525 
 1526 /*
 1527  * Make a hard file link.
 1528  */
 1529 #ifndef _SYS_SYSPROTO_H_
 1530 struct link_args {
 1531         char    *path;
 1532         char    *link;
 1533 };
 1534 #endif
 1535 int
 1536 sys_link(td, uap)
 1537         struct thread *td;
 1538         register struct link_args /* {
 1539                 char *path;
 1540                 char *link;
 1541         } */ *uap;
 1542 {
 1543 
 1544         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1545 }
 1546 
 1547 #ifndef _SYS_SYSPROTO_H_
 1548 struct linkat_args {
 1549         int     fd1;
 1550         char    *path1;
 1551         int     fd2;
 1552         char    *path2;
 1553         int     flag;
 1554 };
 1555 #endif
 1556 int
 1557 sys_linkat(struct thread *td, struct linkat_args *uap)
 1558 {
 1559         int flag;
 1560 
 1561         flag = uap->flag;
 1562         if (flag & ~AT_SYMLINK_FOLLOW)
 1563                 return (EINVAL);
 1564 
 1565         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1566             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1567 }
 1568 
 1569 int hardlink_check_uid = 0;
 1570 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1571     &hardlink_check_uid, 0,
 1572     "Unprivileged processes cannot create hard links to files owned by other "
 1573     "users");
 1574 static int hardlink_check_gid = 0;
 1575 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1576     &hardlink_check_gid, 0,
 1577     "Unprivileged processes cannot create hard links to files owned by other "
 1578     "groups");
 1579 
 1580 static int
 1581 can_hardlink(struct vnode *vp, struct ucred *cred)
 1582 {
 1583         struct vattr va;
 1584         int error;
 1585 
 1586         if (!hardlink_check_uid && !hardlink_check_gid)
 1587                 return (0);
 1588 
 1589         error = VOP_GETATTR(vp, &va, cred);
 1590         if (error != 0)
 1591                 return (error);
 1592 
 1593         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1594                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1595                 if (error)
 1596                         return (error);
 1597         }
 1598 
 1599         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1600                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1601                 if (error)
 1602                         return (error);
 1603         }
 1604 
 1605         return (0);
 1606 }
 1607 
 1608 int
 1609 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1610 {
 1611 
 1612         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1613 }
 1614 
 1615 int
 1616 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1617     enum uio_seg segflg, int follow)
 1618 {
 1619         struct vnode *vp;
 1620         struct mount *mp;
 1621         struct nameidata nd;
 1622         int vfslocked;
 1623         int lvfslocked;
 1624         int error;
 1625 
 1626         bwillwrite();
 1627         NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
 1628             fd1, td);
 1629 
 1630         if ((error = namei(&nd)) != 0)
 1631                 return (error);
 1632         vfslocked = NDHASGIANT(&nd);
 1633         NDFREE(&nd, NDF_ONLY_PNBUF);
 1634         vp = nd.ni_vp;
 1635         if (vp->v_type == VDIR) {
 1636                 vrele(vp);
 1637                 VFS_UNLOCK_GIANT(vfslocked);
 1638                 return (EPERM);         /* POSIX */
 1639         }
 1640         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1641                 vrele(vp);
 1642                 VFS_UNLOCK_GIANT(vfslocked);
 1643                 return (error);
 1644         }
 1645         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
 1646             segflg, path2, fd2, td);
 1647         if ((error = namei(&nd)) == 0) {
 1648                 lvfslocked = NDHASGIANT(&nd);
 1649                 if (nd.ni_vp != NULL) {
 1650                         if (nd.ni_dvp == nd.ni_vp)
 1651                                 vrele(nd.ni_dvp);
 1652                         else
 1653                                 vput(nd.ni_dvp);
 1654                         vrele(nd.ni_vp);
 1655                         error = EEXIST;
 1656                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
 1657                     == 0) {
 1658                         error = can_hardlink(vp, td->td_ucred);
 1659                         if (error == 0)
 1660 #ifdef MAC
 1661                                 error = mac_vnode_check_link(td->td_ucred,
 1662                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1663                         if (error == 0)
 1664 #endif
 1665                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1666                         VOP_UNLOCK(vp, 0);
 1667                         vput(nd.ni_dvp);
 1668                 }
 1669                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1670                 VFS_UNLOCK_GIANT(lvfslocked);
 1671         }
 1672         vrele(vp);
 1673         vn_finished_write(mp);
 1674         VFS_UNLOCK_GIANT(vfslocked);
 1675         return (error);
 1676 }
 1677 
 1678 /*
 1679  * Make a symbolic link.
 1680  */
 1681 #ifndef _SYS_SYSPROTO_H_
 1682 struct symlink_args {
 1683         char    *path;
 1684         char    *link;
 1685 };
 1686 #endif
 1687 int
 1688 sys_symlink(td, uap)
 1689         struct thread *td;
 1690         register struct symlink_args /* {
 1691                 char *path;
 1692                 char *link;
 1693         } */ *uap;
 1694 {
 1695 
 1696         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1697 }
 1698 
 1699 #ifndef _SYS_SYSPROTO_H_
 1700 struct symlinkat_args {
 1701         char    *path;
 1702         int     fd;
 1703         char    *path2;
 1704 };
 1705 #endif
 1706 int
 1707 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1708 {
 1709 
 1710         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1711             UIO_USERSPACE));
 1712 }
 1713 
 1714 int
 1715 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1716 {
 1717 
 1718         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1719 }
 1720 
 1721 int
 1722 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1723     enum uio_seg segflg)
 1724 {
 1725         struct mount *mp;
 1726         struct vattr vattr;
 1727         char *syspath;
 1728         int error;
 1729         struct nameidata nd;
 1730         int vfslocked;
 1731 
 1732         if (segflg == UIO_SYSSPACE) {
 1733                 syspath = path1;
 1734         } else {
 1735                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1736                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1737                         goto out;
 1738         }
 1739         AUDIT_ARG_TEXT(syspath);
 1740 restart:
 1741         bwillwrite();
 1742         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1743             segflg, path2, fd, td);
 1744         if ((error = namei(&nd)) != 0)
 1745                 goto out;
 1746         vfslocked = NDHASGIANT(&nd);
 1747         if (nd.ni_vp) {
 1748                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1749                 if (nd.ni_vp == nd.ni_dvp)
 1750                         vrele(nd.ni_dvp);
 1751                 else
 1752                         vput(nd.ni_dvp);
 1753                 vrele(nd.ni_vp);
 1754                 VFS_UNLOCK_GIANT(vfslocked);
 1755                 error = EEXIST;
 1756                 goto out;
 1757         }
 1758         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1759                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1760                 vput(nd.ni_dvp);
 1761                 VFS_UNLOCK_GIANT(vfslocked);
 1762                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1763                         goto out;
 1764                 goto restart;
 1765         }
 1766         VATTR_NULL(&vattr);
 1767         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1768 #ifdef MAC
 1769         vattr.va_type = VLNK;
 1770         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1771             &vattr);
 1772         if (error)
 1773                 goto out2;
 1774 #endif
 1775         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1776         if (error == 0)
 1777                 vput(nd.ni_vp);
 1778 #ifdef MAC
 1779 out2:
 1780 #endif
 1781         NDFREE(&nd, NDF_ONLY_PNBUF);
 1782         vput(nd.ni_dvp);
 1783         vn_finished_write(mp);
 1784         VFS_UNLOCK_GIANT(vfslocked);
 1785 out:
 1786         if (segflg != UIO_SYSSPACE)
 1787                 uma_zfree(namei_zone, syspath);
 1788         return (error);
 1789 }
 1790 
 1791 /*
 1792  * Delete a whiteout from the filesystem.
 1793  */
 1794 int
 1795 sys_undelete(td, uap)
 1796         struct thread *td;
 1797         register struct undelete_args /* {
 1798                 char *path;
 1799         } */ *uap;
 1800 {
 1801         int error;
 1802         struct mount *mp;
 1803         struct nameidata nd;
 1804         int vfslocked;
 1805 
 1806 restart:
 1807         bwillwrite();
 1808         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
 1809             UIO_USERSPACE, uap->path, td);
 1810         error = namei(&nd);
 1811         if (error)
 1812                 return (error);
 1813         vfslocked = NDHASGIANT(&nd);
 1814 
 1815         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1816                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1817                 if (nd.ni_vp == nd.ni_dvp)
 1818                         vrele(nd.ni_dvp);
 1819                 else
 1820                         vput(nd.ni_dvp);
 1821                 if (nd.ni_vp)
 1822                         vrele(nd.ni_vp);
 1823                 VFS_UNLOCK_GIANT(vfslocked);
 1824                 return (EEXIST);
 1825         }
 1826         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1827                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1828                 vput(nd.ni_dvp);
 1829                 VFS_UNLOCK_GIANT(vfslocked);
 1830                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1831                         return (error);
 1832                 goto restart;
 1833         }
 1834         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1835         NDFREE(&nd, NDF_ONLY_PNBUF);
 1836         vput(nd.ni_dvp);
 1837         vn_finished_write(mp);
 1838         VFS_UNLOCK_GIANT(vfslocked);
 1839         return (error);
 1840 }
 1841 
 1842 /*
 1843  * Delete a name from the filesystem.
 1844  */
 1845 #ifndef _SYS_SYSPROTO_H_
 1846 struct unlink_args {
 1847         char    *path;
 1848 };
 1849 #endif
 1850 int
 1851 sys_unlink(td, uap)
 1852         struct thread *td;
 1853         struct unlink_args /* {
 1854                 char *path;
 1855         } */ *uap;
 1856 {
 1857 
 1858         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1859 }
 1860 
 1861 #ifndef _SYS_SYSPROTO_H_
 1862 struct unlinkat_args {
 1863         int     fd;
 1864         char    *path;
 1865         int     flag;
 1866 };
 1867 #endif
 1868 int
 1869 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1870 {
 1871         int flag = uap->flag;
 1872         int fd = uap->fd;
 1873         char *path = uap->path;
 1874 
 1875         if (flag & ~AT_REMOVEDIR)
 1876                 return (EINVAL);
 1877 
 1878         if (flag & AT_REMOVEDIR)
 1879                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1880         else
 1881                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1882 }
 1883 
 1884 int
 1885 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1886 {
 1887 
 1888         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1889 }
 1890 
 1891 int
 1892 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1893     ino_t oldinum)
 1894 {
 1895         struct mount *mp;
 1896         struct vnode *vp;
 1897         int error;
 1898         struct nameidata nd;
 1899         struct stat sb;
 1900         int vfslocked;
 1901 
 1902 restart:
 1903         bwillwrite();
 1904         NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1905             pathseg, path, fd, td);
 1906         if ((error = namei(&nd)) != 0)
 1907                 return (error == EINVAL ? EPERM : error);
 1908         vfslocked = NDHASGIANT(&nd);
 1909         vp = nd.ni_vp;
 1910         if (vp->v_type == VDIR && oldinum == 0) {
 1911                 error = EPERM;          /* POSIX */
 1912         } else if (oldinum != 0 &&
 1913                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1914                   sb.st_ino != oldinum) {
 1915                         error = EIDRM;  /* Identifier removed */
 1916         } else {
 1917                 /*
 1918                  * The root of a mounted filesystem cannot be deleted.
 1919                  *
 1920                  * XXX: can this only be a VDIR case?
 1921                  */
 1922                 if (vp->v_vflag & VV_ROOT)
 1923                         error = EBUSY;
 1924         }
 1925         if (error == 0) {
 1926                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1927                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1928                         vput(nd.ni_dvp);
 1929                         if (vp == nd.ni_dvp)
 1930                                 vrele(vp);
 1931                         else
 1932                                 vput(vp);
 1933                         VFS_UNLOCK_GIANT(vfslocked);
 1934                         if ((error = vn_start_write(NULL, &mp,
 1935                             V_XSLEEP | PCATCH)) != 0)
 1936                                 return (error);
 1937                         goto restart;
 1938                 }
 1939 #ifdef MAC
 1940                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1941                     &nd.ni_cnd);
 1942                 if (error)
 1943                         goto out;
 1944 #endif
 1945                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1946 #ifdef MAC
 1947 out:
 1948 #endif
 1949                 vn_finished_write(mp);
 1950         }
 1951         NDFREE(&nd, NDF_ONLY_PNBUF);
 1952         vput(nd.ni_dvp);
 1953         if (vp == nd.ni_dvp)
 1954                 vrele(vp);
 1955         else
 1956                 vput(vp);
 1957         VFS_UNLOCK_GIANT(vfslocked);
 1958         return (error);
 1959 }
 1960 
 1961 /*
 1962  * Reposition read/write file offset.
 1963  */
 1964 #ifndef _SYS_SYSPROTO_H_
 1965 struct lseek_args {
 1966         int     fd;
 1967         int     pad;
 1968         off_t   offset;
 1969         int     whence;
 1970 };
 1971 #endif
 1972 int
 1973 sys_lseek(td, uap)
 1974         struct thread *td;
 1975         register struct lseek_args /* {
 1976                 int fd;
 1977                 int pad;
 1978                 off_t offset;
 1979                 int whence;
 1980         } */ *uap;
 1981 {
 1982         struct ucred *cred = td->td_ucred;
 1983         struct file *fp;
 1984         struct vnode *vp;
 1985         struct vattr vattr;
 1986         off_t offset, size;
 1987         int error, noneg;
 1988         int vfslocked;
 1989 
 1990         AUDIT_ARG_FD(uap->fd);
 1991         if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0)
 1992                 return (error);
 1993         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 1994                 fdrop(fp, td);
 1995                 return (ESPIPE);
 1996         }
 1997         vp = fp->f_vnode;
 1998         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1999         noneg = (vp->v_type != VCHR);
 2000         offset = uap->offset;
 2001         switch (uap->whence) {
 2002         case L_INCR:
 2003                 if (noneg &&
 2004                     (fp->f_offset < 0 ||
 2005                     (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
 2006                         error = EOVERFLOW;
 2007                         break;
 2008                 }
 2009                 offset += fp->f_offset;
 2010                 break;
 2011         case L_XTND:
 2012                 vn_lock(vp, LK_SHARED | LK_RETRY);
 2013                 error = VOP_GETATTR(vp, &vattr, cred);
 2014                 VOP_UNLOCK(vp, 0);
 2015                 if (error)
 2016                         break;
 2017 
 2018                 /*
 2019                  * If the file references a disk device, then fetch
 2020                  * the media size and use that to determine the ending
 2021                  * offset.
 2022                  */
 2023                 if (vattr.va_size == 0 && vp->v_type == VCHR &&
 2024                     fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 2025                         vattr.va_size = size;
 2026                 if (noneg &&
 2027                     (vattr.va_size > OFF_MAX ||
 2028                     (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 2029                         error = EOVERFLOW;
 2030                         break;
 2031                 }
 2032                 offset += vattr.va_size;
 2033                 break;
 2034         case L_SET:
 2035                 break;
 2036         case SEEK_DATA:
 2037                 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 2038                 break;
 2039         case SEEK_HOLE:
 2040                 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 2041                 break;
 2042         default:
 2043                 error = EINVAL;
 2044         }
 2045         if (error == 0 && noneg && offset < 0)
 2046                 error = EINVAL;
 2047         if (error != 0)
 2048                 goto drop;
 2049         fp->f_offset = offset;
 2050         *(off_t *)(td->td_retval) = fp->f_offset;
 2051 drop:
 2052         fdrop(fp, td);
 2053         VFS_UNLOCK_GIANT(vfslocked);
 2054         return (error);
 2055 }
 2056 
 2057 #if defined(COMPAT_43)
 2058 /*
 2059  * Reposition read/write file offset.
 2060  */
 2061 #ifndef _SYS_SYSPROTO_H_
 2062 struct olseek_args {
 2063         int     fd;
 2064         long    offset;
 2065         int     whence;
 2066 };
 2067 #endif
 2068 int
 2069 olseek(td, uap)
 2070         struct thread *td;
 2071         register struct olseek_args /* {
 2072                 int fd;
 2073                 long offset;
 2074                 int whence;
 2075         } */ *uap;
 2076 {
 2077         struct lseek_args /* {
 2078                 int fd;
 2079                 int pad;
 2080                 off_t offset;
 2081                 int whence;
 2082         } */ nuap;
 2083 
 2084         nuap.fd = uap->fd;
 2085         nuap.offset = uap->offset;
 2086         nuap.whence = uap->whence;
 2087         return (sys_lseek(td, &nuap));
 2088 }
 2089 #endif /* COMPAT_43 */
 2090 
 2091 /* Version with the 'pad' argument */
 2092 int
 2093 freebsd6_lseek(td, uap)
 2094         struct thread *td;
 2095         register struct freebsd6_lseek_args *uap;
 2096 {
 2097         struct lseek_args ouap;
 2098 
 2099         ouap.fd = uap->fd;
 2100         ouap.offset = uap->offset;
 2101         ouap.whence = uap->whence;
 2102         return (sys_lseek(td, &ouap));
 2103 }
 2104 
 2105 /*
 2106  * Check access permissions using passed credentials.
 2107  */
 2108 static int
 2109 vn_access(vp, user_flags, cred, td)
 2110         struct vnode    *vp;
 2111         int             user_flags;
 2112         struct ucred    *cred;
 2113         struct thread   *td;
 2114 {
 2115         int error;
 2116         accmode_t accmode;
 2117 
 2118         /* Flags == 0 means only check for existence. */
 2119         error = 0;
 2120         if (user_flags) {
 2121                 accmode = 0;
 2122                 if (user_flags & R_OK)
 2123                         accmode |= VREAD;
 2124                 if (user_flags & W_OK)
 2125                         accmode |= VWRITE;
 2126                 if (user_flags & X_OK)
 2127                         accmode |= VEXEC;
 2128 #ifdef MAC
 2129                 error = mac_vnode_check_access(cred, vp, accmode);
 2130                 if (error)
 2131                         return (error);
 2132 #endif
 2133                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2134                         error = VOP_ACCESS(vp, accmode, cred, td);
 2135         }
 2136         return (error);
 2137 }
 2138 
 2139 /*
 2140  * Check access permissions using "real" credentials.
 2141  */
 2142 #ifndef _SYS_SYSPROTO_H_
 2143 struct access_args {
 2144         char    *path;
 2145         int     flags;
 2146 };
 2147 #endif
 2148 int
 2149 sys_access(td, uap)
 2150         struct thread *td;
 2151         register struct access_args /* {
 2152                 char *path;
 2153                 int flags;
 2154         } */ *uap;
 2155 {
 2156 
 2157         return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 2158 }
 2159 
 2160 #ifndef _SYS_SYSPROTO_H_
 2161 struct faccessat_args {
 2162         int     dirfd;
 2163         char    *path;
 2164         int     mode;
 2165         int     flag;
 2166 }
 2167 #endif
 2168 int
 2169 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2170 {
 2171 
 2172         if (uap->flag & ~AT_EACCESS)
 2173                 return (EINVAL);
 2174         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2175             uap->mode));
 2176 }
 2177 
 2178 int
 2179 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2180 {
 2181 
 2182         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
 2183 }
 2184 
 2185 int
 2186 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2187     int flags, int mode)
 2188 {
 2189         struct ucred *cred, *tmpcred;
 2190         struct vnode *vp;
 2191         struct nameidata nd;
 2192         int vfslocked;
 2193         int error;
 2194 
 2195         /*
 2196          * Create and modify a temporary credential instead of one that
 2197          * is potentially shared.
 2198          */
 2199         if (!(flags & AT_EACCESS)) {
 2200                 cred = td->td_ucred;
 2201                 tmpcred = crdup(cred);
 2202                 tmpcred->cr_uid = cred->cr_ruid;
 2203                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2204                 td->td_ucred = tmpcred;
 2205         } else
 2206                 cred = tmpcred = td->td_ucred;
 2207         AUDIT_ARG_VALUE(mode);
 2208         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2209             AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
 2210         if ((error = namei(&nd)) != 0)
 2211                 goto out1;
 2212         vfslocked = NDHASGIANT(&nd);
 2213         vp = nd.ni_vp;
 2214 
 2215         error = vn_access(vp, mode, tmpcred, td);
 2216         NDFREE(&nd, NDF_ONLY_PNBUF);
 2217         vput(vp);
 2218         VFS_UNLOCK_GIANT(vfslocked);
 2219 out1:
 2220         if (!(flags & AT_EACCESS)) {
 2221                 td->td_ucred = cred;
 2222                 crfree(tmpcred);
 2223         }
 2224         return (error);
 2225 }
 2226 
 2227 /*
 2228  * Check access permissions using "effective" credentials.
 2229  */
 2230 #ifndef _SYS_SYSPROTO_H_
 2231 struct eaccess_args {
 2232         char    *path;
 2233         int     flags;
 2234 };
 2235 #endif
 2236 int
 2237 sys_eaccess(td, uap)
 2238         struct thread *td;
 2239         register struct eaccess_args /* {
 2240                 char *path;
 2241                 int flags;
 2242         } */ *uap;
 2243 {
 2244 
 2245         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
 2246 }
 2247 
 2248 int
 2249 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 2250 {
 2251 
 2252         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
 2253 }
 2254 
 2255 #if defined(COMPAT_43)
 2256 /*
 2257  * Get file status; this version follows links.
 2258  */
 2259 #ifndef _SYS_SYSPROTO_H_
 2260 struct ostat_args {
 2261         char    *path;
 2262         struct ostat *ub;
 2263 };
 2264 #endif
 2265 int
 2266 ostat(td, uap)
 2267         struct thread *td;
 2268         register struct ostat_args /* {
 2269                 char *path;
 2270                 struct ostat *ub;
 2271         } */ *uap;
 2272 {
 2273         struct stat sb;
 2274         struct ostat osb;
 2275         int error;
 2276 
 2277         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2278         if (error)
 2279                 return (error);
 2280         cvtstat(&sb, &osb);
 2281         error = copyout(&osb, uap->ub, sizeof (osb));
 2282         return (error);
 2283 }
 2284 
 2285 /*
 2286  * Get file status; this version does not follow links.
 2287  */
 2288 #ifndef _SYS_SYSPROTO_H_
 2289 struct olstat_args {
 2290         char    *path;
 2291         struct ostat *ub;
 2292 };
 2293 #endif
 2294 int
 2295 olstat(td, uap)
 2296         struct thread *td;
 2297         register struct olstat_args /* {
 2298                 char *path;
 2299                 struct ostat *ub;
 2300         } */ *uap;
 2301 {
 2302         struct stat sb;
 2303         struct ostat osb;
 2304         int error;
 2305 
 2306         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2307         if (error)
 2308                 return (error);
 2309         cvtstat(&sb, &osb);
 2310         error = copyout(&osb, uap->ub, sizeof (osb));
 2311         return (error);
 2312 }
 2313 
 2314 /*
 2315  * Convert from an old to a new stat structure.
 2316  */
 2317 void
 2318 cvtstat(st, ost)
 2319         struct stat *st;
 2320         struct ostat *ost;
 2321 {
 2322 
 2323         ost->st_dev = st->st_dev;
 2324         ost->st_ino = st->st_ino;
 2325         ost->st_mode = st->st_mode;
 2326         ost->st_nlink = st->st_nlink;
 2327         ost->st_uid = st->st_uid;
 2328         ost->st_gid = st->st_gid;
 2329         ost->st_rdev = st->st_rdev;
 2330         if (st->st_size < (quad_t)1 << 32)
 2331                 ost->st_size = st->st_size;
 2332         else
 2333                 ost->st_size = -2;
 2334         ost->st_atim = st->st_atim;
 2335         ost->st_mtim = st->st_mtim;
 2336         ost->st_ctim = st->st_ctim;
 2337         ost->st_blksize = st->st_blksize;
 2338         ost->st_blocks = st->st_blocks;
 2339         ost->st_flags = st->st_flags;
 2340         ost->st_gen = st->st_gen;
 2341 }
 2342 #endif /* COMPAT_43 */
 2343 
 2344 /*
 2345  * Get file status; this version follows links.
 2346  */
 2347 #ifndef _SYS_SYSPROTO_H_
 2348 struct stat_args {
 2349         char    *path;
 2350         struct stat *ub;
 2351 };
 2352 #endif
 2353 int
 2354 sys_stat(td, uap)
 2355         struct thread *td;
 2356         register struct stat_args /* {
 2357                 char *path;
 2358                 struct stat *ub;
 2359         } */ *uap;
 2360 {
 2361         struct stat sb;
 2362         int error;
 2363 
 2364         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2365         if (error == 0)
 2366                 error = copyout(&sb, uap->ub, sizeof (sb));
 2367         return (error);
 2368 }
 2369 
 2370 #ifndef _SYS_SYSPROTO_H_
 2371 struct fstatat_args {
 2372         int     fd;
 2373         char    *path;
 2374         struct stat     *buf;
 2375         int     flag;
 2376 }
 2377 #endif
 2378 int
 2379 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2380 {
 2381         struct stat sb;
 2382         int error;
 2383 
 2384         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2385             UIO_USERSPACE, &sb);
 2386         if (error == 0)
 2387                 error = copyout(&sb, uap->buf, sizeof (sb));
 2388         return (error);
 2389 }
 2390 
 2391 int
 2392 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2393 {
 2394 
 2395         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2396 }
 2397 
 2398 int
 2399 kern_statat(struct thread *td, int flag, int fd, char *path,
 2400     enum uio_seg pathseg, struct stat *sbp)
 2401 {
 2402 
 2403         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2404 }
 2405 
 2406 int
 2407 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2408     enum uio_seg pathseg, struct stat *sbp,
 2409     void (*hook)(struct vnode *vp, struct stat *sbp))
 2410 {
 2411         struct nameidata nd;
 2412         struct stat sb;
 2413         int error, vfslocked;
 2414 
 2415         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2416                 return (EINVAL);
 2417 
 2418         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2419             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
 2420             path, fd, CAP_FSTAT, td);
 2421 
 2422         if ((error = namei(&nd)) != 0)
 2423                 return (error);
 2424         vfslocked = NDHASGIANT(&nd);
 2425         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2426         if (!error) {
 2427                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2428                 if (S_ISREG(sb.st_mode))
 2429                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2430                 if (__predict_false(hook != NULL))
 2431                         hook(nd.ni_vp, &sb);
 2432         }
 2433         NDFREE(&nd, NDF_ONLY_PNBUF);
 2434         vput(nd.ni_vp);
 2435         VFS_UNLOCK_GIANT(vfslocked);
 2436         if (error)
 2437                 return (error);
 2438         *sbp = sb;
 2439 #ifdef KTRACE
 2440         if (KTRPOINT(td, KTR_STRUCT))
 2441                 ktrstat(&sb);
 2442 #endif
 2443         return (0);
 2444 }
 2445 
 2446 /*
 2447  * Get file status; this version does not follow links.
 2448  */
 2449 #ifndef _SYS_SYSPROTO_H_
 2450 struct lstat_args {
 2451         char    *path;
 2452         struct stat *ub;
 2453 };
 2454 #endif
 2455 int
 2456 sys_lstat(td, uap)
 2457         struct thread *td;
 2458         register struct lstat_args /* {
 2459                 char *path;
 2460                 struct stat *ub;
 2461         } */ *uap;
 2462 {
 2463         struct stat sb;
 2464         int error;
 2465 
 2466         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2467         if (error == 0)
 2468                 error = copyout(&sb, uap->ub, sizeof (sb));
 2469         return (error);
 2470 }
 2471 
 2472 int
 2473 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2474 {
 2475 
 2476         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2477             sbp));
 2478 }
 2479 
 2480 /*
 2481  * Implementation of the NetBSD [l]stat() functions.
 2482  */
 2483 void
 2484 cvtnstat(sb, nsb)
 2485         struct stat *sb;
 2486         struct nstat *nsb;
 2487 {
 2488         bzero(nsb, sizeof *nsb);
 2489         nsb->st_dev = sb->st_dev;
 2490         nsb->st_ino = sb->st_ino;
 2491         nsb->st_mode = sb->st_mode;
 2492         nsb->st_nlink = sb->st_nlink;
 2493         nsb->st_uid = sb->st_uid;
 2494         nsb->st_gid = sb->st_gid;
 2495         nsb->st_rdev = sb->st_rdev;
 2496         nsb->st_atim = sb->st_atim;
 2497         nsb->st_mtim = sb->st_mtim;
 2498         nsb->st_ctim = sb->st_ctim;
 2499         nsb->st_size = sb->st_size;
 2500         nsb->st_blocks = sb->st_blocks;
 2501         nsb->st_blksize = sb->st_blksize;
 2502         nsb->st_flags = sb->st_flags;
 2503         nsb->st_gen = sb->st_gen;
 2504         nsb->st_birthtim = sb->st_birthtim;
 2505 }
 2506 
 2507 #ifndef _SYS_SYSPROTO_H_
 2508 struct nstat_args {
 2509         char    *path;
 2510         struct nstat *ub;
 2511 };
 2512 #endif
 2513 int
 2514 sys_nstat(td, uap)
 2515         struct thread *td;
 2516         register struct nstat_args /* {
 2517                 char *path;
 2518                 struct nstat *ub;
 2519         } */ *uap;
 2520 {
 2521         struct stat sb;
 2522         struct nstat nsb;
 2523         int error;
 2524 
 2525         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2526         if (error)
 2527                 return (error);
 2528         cvtnstat(&sb, &nsb);
 2529         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2530         return (error);
 2531 }
 2532 
 2533 /*
 2534  * NetBSD lstat.  Get file status; this version does not follow links.
 2535  */
 2536 #ifndef _SYS_SYSPROTO_H_
 2537 struct lstat_args {
 2538         char    *path;
 2539         struct stat *ub;
 2540 };
 2541 #endif
 2542 int
 2543 sys_nlstat(td, uap)
 2544         struct thread *td;
 2545         register struct nlstat_args /* {
 2546                 char *path;
 2547                 struct nstat *ub;
 2548         } */ *uap;
 2549 {
 2550         struct stat sb;
 2551         struct nstat nsb;
 2552         int error;
 2553 
 2554         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2555         if (error)
 2556                 return (error);
 2557         cvtnstat(&sb, &nsb);
 2558         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2559         return (error);
 2560 }
 2561 
 2562 /*
 2563  * Get configurable pathname variables.
 2564  */
 2565 #ifndef _SYS_SYSPROTO_H_
 2566 struct pathconf_args {
 2567         char    *path;
 2568         int     name;
 2569 };
 2570 #endif
 2571 int
 2572 sys_pathconf(td, uap)
 2573         struct thread *td;
 2574         register struct pathconf_args /* {
 2575                 char *path;
 2576                 int name;
 2577         } */ *uap;
 2578 {
 2579 
 2580         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2581 }
 2582 
 2583 #ifndef _SYS_SYSPROTO_H_
 2584 struct lpathconf_args {
 2585         char    *path;
 2586         int     name;
 2587 };
 2588 #endif
 2589 int
 2590 sys_lpathconf(td, uap)
 2591         struct thread *td;
 2592         register struct lpathconf_args /* {
 2593                 char *path;
 2594                 int name;
 2595         } */ *uap;
 2596 {
 2597 
 2598         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
 2599 }
 2600 
 2601 int
 2602 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2603     u_long flags)
 2604 {
 2605         struct nameidata nd;
 2606         int error, vfslocked;
 2607 
 2608         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
 2609             flags, pathseg, path, td);
 2610         if ((error = namei(&nd)) != 0)
 2611                 return (error);
 2612         vfslocked = NDHASGIANT(&nd);
 2613         NDFREE(&nd, NDF_ONLY_PNBUF);
 2614 
 2615         /* If asynchronous I/O is available, it works for all files. */
 2616         if (name == _PC_ASYNC_IO)
 2617                 td->td_retval[0] = async_io_version;
 2618         else
 2619                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2620         vput(nd.ni_vp);
 2621         VFS_UNLOCK_GIANT(vfslocked);
 2622         return (error);
 2623 }
 2624 
 2625 /*
 2626  * Return target name of a symbolic link.
 2627  */
 2628 #ifndef _SYS_SYSPROTO_H_
 2629 struct readlink_args {
 2630         char    *path;
 2631         char    *buf;
 2632         size_t  count;
 2633 };
 2634 #endif
 2635 int
 2636 sys_readlink(td, uap)
 2637         struct thread *td;
 2638         register struct readlink_args /* {
 2639                 char *path;
 2640                 char *buf;
 2641                 size_t count;
 2642         } */ *uap;
 2643 {
 2644 
 2645         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2646             UIO_USERSPACE, uap->count));
 2647 }
 2648 #ifndef _SYS_SYSPROTO_H_
 2649 struct readlinkat_args {
 2650         int     fd;
 2651         char    *path;
 2652         char    *buf;
 2653         size_t  bufsize;
 2654 };
 2655 #endif
 2656 int
 2657 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2658 {
 2659 
 2660         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2661             uap->buf, UIO_USERSPACE, uap->bufsize));
 2662 }
 2663 
 2664 int
 2665 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2666     enum uio_seg bufseg, size_t count)
 2667 {
 2668 
 2669         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2670             count));
 2671 }
 2672 
 2673 int
 2674 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2675     char *buf, enum uio_seg bufseg, size_t count)
 2676 {
 2677         struct vnode *vp;
 2678         struct iovec aiov;
 2679         struct uio auio;
 2680         int error;
 2681         struct nameidata nd;
 2682         int vfslocked;
 2683 
 2684         if (count > INT_MAX)
 2685                 return (EINVAL);
 2686 
 2687         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2688             AUDITVNODE1, pathseg, path, fd, td);
 2689 
 2690         if ((error = namei(&nd)) != 0)
 2691                 return (error);
 2692         NDFREE(&nd, NDF_ONLY_PNBUF);
 2693         vfslocked = NDHASGIANT(&nd);
 2694         vp = nd.ni_vp;
 2695 #ifdef MAC
 2696         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2697         if (error) {
 2698                 vput(vp);
 2699                 VFS_UNLOCK_GIANT(vfslocked);
 2700                 return (error);
 2701         }
 2702 #endif
 2703         if (vp->v_type != VLNK)
 2704                 error = EINVAL;
 2705         else {
 2706                 aiov.iov_base = buf;
 2707                 aiov.iov_len = count;
 2708                 auio.uio_iov = &aiov;
 2709                 auio.uio_iovcnt = 1;
 2710                 auio.uio_offset = 0;
 2711                 auio.uio_rw = UIO_READ;
 2712                 auio.uio_segflg = bufseg;
 2713                 auio.uio_td = td;
 2714                 auio.uio_resid = count;
 2715                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2716         }
 2717         vput(vp);
 2718         VFS_UNLOCK_GIANT(vfslocked);
 2719         td->td_retval[0] = count - auio.uio_resid;
 2720         return (error);
 2721 }
 2722 
 2723 /*
 2724  * Common implementation code for chflags() and fchflags().
 2725  */
 2726 static int
 2727 setfflags(td, vp, flags)
 2728         struct thread *td;
 2729         struct vnode *vp;
 2730         int flags;
 2731 {
 2732         int error;
 2733         struct mount *mp;
 2734         struct vattr vattr;
 2735 
 2736         /*
 2737          * Prevent non-root users from setting flags on devices.  When
 2738          * a device is reused, users can retain ownership of the device
 2739          * if they are allowed to set flags and programs assume that
 2740          * chown can't fail when done as root.
 2741          */
 2742         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2743                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2744                 if (error)
 2745                         return (error);
 2746         }
 2747 
 2748         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2749                 return (error);
 2750         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2751         VATTR_NULL(&vattr);
 2752         vattr.va_flags = flags;
 2753 #ifdef MAC
 2754         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2755         if (error == 0)
 2756 #endif
 2757                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2758         VOP_UNLOCK(vp, 0);
 2759         vn_finished_write(mp);
 2760         return (error);
 2761 }
 2762 
 2763 /*
 2764  * Change flags of a file given a path name.
 2765  */
 2766 #ifndef _SYS_SYSPROTO_H_
 2767 struct chflags_args {
 2768         char    *path;
 2769         int     flags;
 2770 };
 2771 #endif
 2772 int
 2773 sys_chflags(td, uap)
 2774         struct thread *td;
 2775         register struct chflags_args /* {
 2776                 char *path;
 2777                 int flags;
 2778         } */ *uap;
 2779 {
 2780         int error;
 2781         struct nameidata nd;
 2782         int vfslocked;
 2783 
 2784         AUDIT_ARG_FFLAGS(uap->flags);
 2785         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2786             uap->path, td);
 2787         if ((error = namei(&nd)) != 0)
 2788                 return (error);
 2789         NDFREE(&nd, NDF_ONLY_PNBUF);
 2790         vfslocked = NDHASGIANT(&nd);
 2791         error = setfflags(td, nd.ni_vp, uap->flags);
 2792         vrele(nd.ni_vp);
 2793         VFS_UNLOCK_GIANT(vfslocked);
 2794         return (error);
 2795 }
 2796 
 2797 /*
 2798  * Same as chflags() but doesn't follow symlinks.
 2799  */
 2800 int
 2801 sys_lchflags(td, uap)
 2802         struct thread *td;
 2803         register struct lchflags_args /* {
 2804                 char *path;
 2805                 int flags;
 2806         } */ *uap;
 2807 {
 2808         int error;
 2809         struct nameidata nd;
 2810         int vfslocked;
 2811 
 2812         AUDIT_ARG_FFLAGS(uap->flags);
 2813         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2814             uap->path, td);
 2815         if ((error = namei(&nd)) != 0)
 2816                 return (error);
 2817         vfslocked = NDHASGIANT(&nd);
 2818         NDFREE(&nd, NDF_ONLY_PNBUF);
 2819         error = setfflags(td, nd.ni_vp, uap->flags);
 2820         vrele(nd.ni_vp);
 2821         VFS_UNLOCK_GIANT(vfslocked);
 2822         return (error);
 2823 }
 2824 
 2825 /*
 2826  * Change flags of a file given a file descriptor.
 2827  */
 2828 #ifndef _SYS_SYSPROTO_H_
 2829 struct fchflags_args {
 2830         int     fd;
 2831         int     flags;
 2832 };
 2833 #endif
 2834 int
 2835 sys_fchflags(td, uap)
 2836         struct thread *td;
 2837         register struct fchflags_args /* {
 2838                 int fd;
 2839                 int flags;
 2840         } */ *uap;
 2841 {
 2842         struct file *fp;
 2843         int vfslocked;
 2844         int error;
 2845 
 2846         AUDIT_ARG_FD(uap->fd);
 2847         AUDIT_ARG_FFLAGS(uap->flags);
 2848         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS,
 2849             &fp)) != 0)
 2850                 return (error);
 2851         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2852 #ifdef AUDIT
 2853         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2854         AUDIT_ARG_VNODE1(fp->f_vnode);
 2855         VOP_UNLOCK(fp->f_vnode, 0);
 2856 #endif
 2857         error = setfflags(td, fp->f_vnode, uap->flags);
 2858         VFS_UNLOCK_GIANT(vfslocked);
 2859         fdrop(fp, td);
 2860         return (error);
 2861 }
 2862 
 2863 /*
 2864  * Common implementation code for chmod(), lchmod() and fchmod().
 2865  */
 2866 int
 2867 setfmode(td, cred, vp, mode)
 2868         struct thread *td;
 2869         struct ucred *cred;
 2870         struct vnode *vp;
 2871         int mode;
 2872 {
 2873         int error;
 2874         struct mount *mp;
 2875         struct vattr vattr;
 2876 
 2877         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2878                 return (error);
 2879         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2880         VATTR_NULL(&vattr);
 2881         vattr.va_mode = mode & ALLPERMS;
 2882 #ifdef MAC
 2883         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2884         if (error == 0)
 2885 #endif
 2886                 error = VOP_SETATTR(vp, &vattr, cred);
 2887         VOP_UNLOCK(vp, 0);
 2888         vn_finished_write(mp);
 2889         return (error);
 2890 }
 2891 
 2892 /*
 2893  * Change mode of a file given path name.
 2894  */
 2895 #ifndef _SYS_SYSPROTO_H_
 2896 struct chmod_args {
 2897         char    *path;
 2898         int     mode;
 2899 };
 2900 #endif
 2901 int
 2902 sys_chmod(td, uap)
 2903         struct thread *td;
 2904         register struct chmod_args /* {
 2905                 char *path;
 2906                 int mode;
 2907         } */ *uap;
 2908 {
 2909 
 2910         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2911 }
 2912 
 2913 #ifndef _SYS_SYSPROTO_H_
 2914 struct fchmodat_args {
 2915         int     dirfd;
 2916         char    *path;
 2917         mode_t  mode;
 2918         int     flag;
 2919 }
 2920 #endif
 2921 int
 2922 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2923 {
 2924         int flag = uap->flag;
 2925         int fd = uap->fd;
 2926         char *path = uap->path;
 2927         mode_t mode = uap->mode;
 2928 
 2929         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2930                 return (EINVAL);
 2931 
 2932         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2933 }
 2934 
 2935 int
 2936 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2937 {
 2938 
 2939         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2940 }
 2941 
 2942 /*
 2943  * Change mode of a file given path name (don't follow links.)
 2944  */
 2945 #ifndef _SYS_SYSPROTO_H_
 2946 struct lchmod_args {
 2947         char    *path;
 2948         int     mode;
 2949 };
 2950 #endif
 2951 int
 2952 sys_lchmod(td, uap)
 2953         struct thread *td;
 2954         register struct lchmod_args /* {
 2955                 char *path;
 2956                 int mode;
 2957         } */ *uap;
 2958 {
 2959 
 2960         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2961             uap->mode, AT_SYMLINK_NOFOLLOW));
 2962 }
 2963 
 2964 
 2965 int
 2966 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2967     mode_t mode, int flag)
 2968 {
 2969         int error;
 2970         struct nameidata nd;
 2971         int vfslocked;
 2972         int follow;
 2973 
 2974         AUDIT_ARG_MODE(mode);
 2975         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2976         NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
 2977             path, fd, CAP_FCHMOD, td);
 2978         if ((error = namei(&nd)) != 0)
 2979                 return (error);
 2980         vfslocked = NDHASGIANT(&nd);
 2981         NDFREE(&nd, NDF_ONLY_PNBUF);
 2982         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2983         vrele(nd.ni_vp);
 2984         VFS_UNLOCK_GIANT(vfslocked);
 2985         return (error);
 2986 }
 2987 
 2988 /*
 2989  * Change mode of a file given a file descriptor.
 2990  */
 2991 #ifndef _SYS_SYSPROTO_H_
 2992 struct fchmod_args {
 2993         int     fd;
 2994         int     mode;
 2995 };
 2996 #endif
 2997 int
 2998 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2999 {
 3000         struct file *fp;
 3001         int error;
 3002 
 3003         AUDIT_ARG_FD(uap->fd);
 3004         AUDIT_ARG_MODE(uap->mode);
 3005 
 3006         error = fget(td, uap->fd, CAP_FCHMOD, &fp);
 3007         if (error != 0)
 3008                 return (error);
 3009         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 3010         fdrop(fp, td);
 3011         return (error);
 3012 }
 3013 
 3014 /*
 3015  * Common implementation for chown(), lchown(), and fchown()
 3016  */
 3017 int
 3018 setfown(td, cred, vp, uid, gid)
 3019         struct thread *td;
 3020         struct ucred *cred;
 3021         struct vnode *vp;
 3022         uid_t uid;
 3023         gid_t gid;
 3024 {
 3025         int error;
 3026         struct mount *mp;
 3027         struct vattr vattr;
 3028 
 3029         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3030                 return (error);
 3031         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3032         VATTR_NULL(&vattr);
 3033         vattr.va_uid = uid;
 3034         vattr.va_gid = gid;
 3035 #ifdef MAC
 3036         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 3037             vattr.va_gid);
 3038         if (error == 0)
 3039 #endif
 3040                 error = VOP_SETATTR(vp, &vattr, cred);
 3041         VOP_UNLOCK(vp, 0);
 3042         vn_finished_write(mp);
 3043         return (error);
 3044 }
 3045 
 3046 /*
 3047  * Set ownership given a path name.
 3048  */
 3049 #ifndef _SYS_SYSPROTO_H_
 3050 struct chown_args {
 3051         char    *path;
 3052         int     uid;
 3053         int     gid;
 3054 };
 3055 #endif
 3056 int
 3057 sys_chown(td, uap)
 3058         struct thread *td;
 3059         register struct chown_args /* {
 3060                 char *path;
 3061                 int uid;
 3062                 int gid;
 3063         } */ *uap;
 3064 {
 3065 
 3066         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3067 }
 3068 
 3069 #ifndef _SYS_SYSPROTO_H_
 3070 struct fchownat_args {
 3071         int fd;
 3072         const char * path;
 3073         uid_t uid;
 3074         gid_t gid;
 3075         int flag;
 3076 };
 3077 #endif
 3078 int
 3079 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 3080 {
 3081         int flag;
 3082 
 3083         flag = uap->flag;
 3084         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3085                 return (EINVAL);
 3086 
 3087         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 3088             uap->gid, uap->flag));
 3089 }
 3090 
 3091 int
 3092 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3093     int gid)
 3094 {
 3095 
 3096         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 3097 }
 3098 
 3099 int
 3100 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3101     int uid, int gid, int flag)
 3102 {
 3103         struct nameidata nd;
 3104         int error, vfslocked, follow;
 3105 
 3106         AUDIT_ARG_OWNER(uid, gid);
 3107         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3108         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
 3109             path, fd, CAP_FCHOWN, td);
 3110 
 3111         if ((error = namei(&nd)) != 0)
 3112                 return (error);
 3113         vfslocked = NDHASGIANT(&nd);
 3114         NDFREE(&nd, NDF_ONLY_PNBUF);
 3115         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3116         vrele(nd.ni_vp);
 3117         VFS_UNLOCK_GIANT(vfslocked);
 3118         return (error);
 3119 }
 3120 
 3121 /*
 3122  * Set ownership given a path name, do not cross symlinks.
 3123  */
 3124 #ifndef _SYS_SYSPROTO_H_
 3125 struct lchown_args {
 3126         char    *path;
 3127         int     uid;
 3128         int     gid;
 3129 };
 3130 #endif
 3131 int
 3132 sys_lchown(td, uap)
 3133         struct thread *td;
 3134         register struct lchown_args /* {
 3135                 char *path;
 3136                 int uid;
 3137                 int gid;
 3138         } */ *uap;
 3139 {
 3140 
 3141         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3142 }
 3143 
 3144 int
 3145 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3146     int gid)
 3147 {
 3148 
 3149         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3150             AT_SYMLINK_NOFOLLOW));
 3151 }
 3152 
 3153 /*
 3154  * Set ownership given a file descriptor.
 3155  */
 3156 #ifndef _SYS_SYSPROTO_H_
 3157 struct fchown_args {
 3158         int     fd;
 3159         int     uid;
 3160         int     gid;
 3161 };
 3162 #endif
 3163 int
 3164 sys_fchown(td, uap)
 3165         struct thread *td;
 3166         register struct fchown_args /* {
 3167                 int fd;
 3168                 int uid;
 3169                 int gid;
 3170         } */ *uap;
 3171 {
 3172         struct file *fp;
 3173         int error;
 3174 
 3175         AUDIT_ARG_FD(uap->fd);
 3176         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3177         error = fget(td, uap->fd, CAP_FCHOWN, &fp);
 3178         if (error != 0)
 3179                 return (error);
 3180         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3181         fdrop(fp, td);
 3182         return (error);
 3183 }
 3184 
 3185 /*
 3186  * Common implementation code for utimes(), lutimes(), and futimes().
 3187  */
 3188 static int
 3189 getutimes(usrtvp, tvpseg, tsp)
 3190         const struct timeval *usrtvp;
 3191         enum uio_seg tvpseg;
 3192         struct timespec *tsp;
 3193 {
 3194         struct timeval tv[2];
 3195         const struct timeval *tvp;
 3196         int error;
 3197 
 3198         if (usrtvp == NULL) {
 3199                 vfs_timestamp(&tsp[0]);
 3200                 tsp[1] = tsp[0];
 3201         } else {
 3202                 if (tvpseg == UIO_SYSSPACE) {
 3203                         tvp = usrtvp;
 3204                 } else {
 3205                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3206                                 return (error);
 3207                         tvp = tv;
 3208                 }
 3209 
 3210                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3211                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3212                         return (EINVAL);
 3213                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3214                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3215         }
 3216         return (0);
 3217 }
 3218 
 3219 /*
 3220  * Common implementation code for utimes(), lutimes(), and futimes().
 3221  */
 3222 static int
 3223 setutimes(td, vp, ts, numtimes, nullflag)
 3224         struct thread *td;
 3225         struct vnode *vp;
 3226         const struct timespec *ts;
 3227         int numtimes;
 3228         int nullflag;
 3229 {
 3230         int error, setbirthtime;
 3231         struct mount *mp;
 3232         struct vattr vattr;
 3233 
 3234         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3235                 return (error);
 3236         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3237         setbirthtime = 0;
 3238         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3239             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3240                 setbirthtime = 1;
 3241         VATTR_NULL(&vattr);
 3242         vattr.va_atime = ts[0];
 3243         vattr.va_mtime = ts[1];
 3244         if (setbirthtime)
 3245                 vattr.va_birthtime = ts[1];
 3246         if (numtimes > 2)
 3247                 vattr.va_birthtime = ts[2];
 3248         if (nullflag)
 3249                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3250 #ifdef MAC
 3251         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3252             vattr.va_mtime);
 3253 #endif
 3254         if (error == 0)
 3255                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3256         VOP_UNLOCK(vp, 0);
 3257         vn_finished_write(mp);
 3258         return (error);
 3259 }
 3260 
 3261 /*
 3262  * Set the access and modification times of a file.
 3263  */
 3264 #ifndef _SYS_SYSPROTO_H_
 3265 struct utimes_args {
 3266         char    *path;
 3267         struct  timeval *tptr;
 3268 };
 3269 #endif
 3270 int
 3271 sys_utimes(td, uap)
 3272         struct thread *td;
 3273         register struct utimes_args /* {
 3274                 char *path;
 3275                 struct timeval *tptr;
 3276         } */ *uap;
 3277 {
 3278 
 3279         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3280             UIO_USERSPACE));
 3281 }
 3282 
 3283 #ifndef _SYS_SYSPROTO_H_
 3284 struct futimesat_args {
 3285         int fd;
 3286         const char * path;
 3287         const struct timeval * times;
 3288 };
 3289 #endif
 3290 int
 3291 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3292 {
 3293 
 3294         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3295             uap->times, UIO_USERSPACE));
 3296 }
 3297 
 3298 int
 3299 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3300     struct timeval *tptr, enum uio_seg tptrseg)
 3301 {
 3302 
 3303         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3304 }
 3305 
 3306 int
 3307 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3308     struct timeval *tptr, enum uio_seg tptrseg)
 3309 {
 3310         struct nameidata nd;
 3311         struct timespec ts[2];
 3312         int error, vfslocked;
 3313 
 3314         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3315                 return (error);
 3316         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
 3317             path, fd, CAP_FUTIMES, td);
 3318 
 3319         if ((error = namei(&nd)) != 0)
 3320                 return (error);
 3321         vfslocked = NDHASGIANT(&nd);
 3322         NDFREE(&nd, NDF_ONLY_PNBUF);
 3323         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3324         vrele(nd.ni_vp);
 3325         VFS_UNLOCK_GIANT(vfslocked);
 3326         return (error);
 3327 }
 3328 
 3329 /*
 3330  * Set the access and modification times of a file.
 3331  */
 3332 #ifndef _SYS_SYSPROTO_H_
 3333 struct lutimes_args {
 3334         char    *path;
 3335         struct  timeval *tptr;
 3336 };
 3337 #endif
 3338 int
 3339 sys_lutimes(td, uap)
 3340         struct thread *td;
 3341         register struct lutimes_args /* {
 3342                 char *path;
 3343                 struct timeval *tptr;
 3344         } */ *uap;
 3345 {
 3346 
 3347         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3348             UIO_USERSPACE));
 3349 }
 3350 
 3351 int
 3352 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3353     struct timeval *tptr, enum uio_seg tptrseg)
 3354 {
 3355         struct timespec ts[2];
 3356         int error;
 3357         struct nameidata nd;
 3358         int vfslocked;
 3359 
 3360         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3361                 return (error);
 3362         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3363         if ((error = namei(&nd)) != 0)
 3364                 return (error);
 3365         vfslocked = NDHASGIANT(&nd);
 3366         NDFREE(&nd, NDF_ONLY_PNBUF);
 3367         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3368         vrele(nd.ni_vp);
 3369         VFS_UNLOCK_GIANT(vfslocked);
 3370         return (error);
 3371 }
 3372 
 3373 /*
 3374  * Set the access and modification times of a file.
 3375  */
 3376 #ifndef _SYS_SYSPROTO_H_
 3377 struct futimes_args {
 3378         int     fd;
 3379         struct  timeval *tptr;
 3380 };
 3381 #endif
 3382 int
 3383 sys_futimes(td, uap)
 3384         struct thread *td;
 3385         register struct futimes_args /* {
 3386                 int  fd;
 3387                 struct timeval *tptr;
 3388         } */ *uap;
 3389 {
 3390 
 3391         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3392 }
 3393 
 3394 int
 3395 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3396     enum uio_seg tptrseg)
 3397 {
 3398         struct timespec ts[2];
 3399         struct file *fp;
 3400         int vfslocked;
 3401         int error;
 3402 
 3403         AUDIT_ARG_FD(fd);
 3404         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3405                 return (error);
 3406         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp))
 3407             != 0)
 3408                 return (error);
 3409         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3410 #ifdef AUDIT
 3411         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3412         AUDIT_ARG_VNODE1(fp->f_vnode);
 3413         VOP_UNLOCK(fp->f_vnode, 0);
 3414 #endif
 3415         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3416         VFS_UNLOCK_GIANT(vfslocked);
 3417         fdrop(fp, td);
 3418         return (error);
 3419 }
 3420 
 3421 /*
 3422  * Truncate a file given its path name.
 3423  */
 3424 #ifndef _SYS_SYSPROTO_H_
 3425 struct truncate_args {
 3426         char    *path;
 3427         int     pad;
 3428         off_t   length;
 3429 };
 3430 #endif
 3431 int
 3432 sys_truncate(td, uap)
 3433         struct thread *td;
 3434         register struct truncate_args /* {
 3435                 char *path;
 3436                 int pad;
 3437                 off_t length;
 3438         } */ *uap;
 3439 {
 3440 
 3441         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3442 }
 3443 
 3444 int
 3445 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3446 {
 3447         struct mount *mp;
 3448         struct vnode *vp;
 3449         struct vattr vattr;
 3450         int error;
 3451         struct nameidata nd;
 3452         int vfslocked;
 3453 
 3454         if (length < 0)
 3455                 return(EINVAL);
 3456         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3457         if ((error = namei(&nd)) != 0)
 3458                 return (error);
 3459         vfslocked = NDHASGIANT(&nd);
 3460         vp = nd.ni_vp;
 3461         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3462                 vrele(vp);
 3463                 VFS_UNLOCK_GIANT(vfslocked);
 3464                 return (error);
 3465         }
 3466         NDFREE(&nd, NDF_ONLY_PNBUF);
 3467         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3468         if (vp->v_type == VDIR)
 3469                 error = EISDIR;
 3470 #ifdef MAC
 3471         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3472         }
 3473 #endif
 3474         else if ((error = vn_writechk(vp)) == 0 &&
 3475             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3476                 VATTR_NULL(&vattr);
 3477                 vattr.va_size = length;
 3478                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3479         }
 3480         vput(vp);
 3481         vn_finished_write(mp);
 3482         VFS_UNLOCK_GIANT(vfslocked);
 3483         return (error);
 3484 }
 3485 
 3486 #if defined(COMPAT_43)
 3487 /*
 3488  * Truncate a file given its path name.
 3489  */
 3490 #ifndef _SYS_SYSPROTO_H_
 3491 struct otruncate_args {
 3492         char    *path;
 3493         long    length;
 3494 };
 3495 #endif
 3496 int
 3497 otruncate(td, uap)
 3498         struct thread *td;
 3499         register struct otruncate_args /* {
 3500                 char *path;
 3501                 long length;
 3502         } */ *uap;
 3503 {
 3504         struct truncate_args /* {
 3505                 char *path;
 3506                 int pad;
 3507                 off_t length;
 3508         } */ nuap;
 3509 
 3510         nuap.path = uap->path;
 3511         nuap.length = uap->length;
 3512         return (sys_truncate(td, &nuap));
 3513 }
 3514 #endif /* COMPAT_43 */
 3515 
 3516 /* Versions with the pad argument */
 3517 int
 3518 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3519 {
 3520         struct truncate_args ouap;
 3521 
 3522         ouap.path = uap->path;
 3523         ouap.length = uap->length;
 3524         return (sys_truncate(td, &ouap));
 3525 }
 3526 
 3527 int
 3528 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3529 {
 3530         struct ftruncate_args ouap;
 3531 
 3532         ouap.fd = uap->fd;
 3533         ouap.length = uap->length;
 3534         return (sys_ftruncate(td, &ouap));
 3535 }
 3536 
 3537 /*
 3538  * Sync an open file.
 3539  */
 3540 #ifndef _SYS_SYSPROTO_H_
 3541 struct fsync_args {
 3542         int     fd;
 3543 };
 3544 #endif
 3545 int
 3546 sys_fsync(td, uap)
 3547         struct thread *td;
 3548         struct fsync_args /* {
 3549                 int fd;
 3550         } */ *uap;
 3551 {
 3552         struct vnode *vp;
 3553         struct mount *mp;
 3554         struct file *fp;
 3555         int vfslocked;
 3556         int error, lock_flags;
 3557 
 3558         AUDIT_ARG_FD(uap->fd);
 3559         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC,
 3560             &fp)) != 0)
 3561                 return (error);
 3562         vp = fp->f_vnode;
 3563         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3564         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3565                 goto drop;
 3566         if (MNT_SHARED_WRITES(mp) ||
 3567             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3568                 lock_flags = LK_SHARED;
 3569         } else {
 3570                 lock_flags = LK_EXCLUSIVE;
 3571         }
 3572         vn_lock(vp, lock_flags | LK_RETRY);
 3573         AUDIT_ARG_VNODE1(vp);
 3574         if (vp->v_object != NULL) {
 3575                 VM_OBJECT_LOCK(vp->v_object);
 3576                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3577                 VM_OBJECT_UNLOCK(vp->v_object);
 3578         }
 3579         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3580 
 3581         VOP_UNLOCK(vp, 0);
 3582         vn_finished_write(mp);
 3583 drop:
 3584         VFS_UNLOCK_GIANT(vfslocked);
 3585         fdrop(fp, td);
 3586         return (error);
 3587 }
 3588 
 3589 /*
 3590  * Rename files.  Source and destination must either both be directories, or
 3591  * both not be directories.  If target is a directory, it must be empty.
 3592  */
 3593 #ifndef _SYS_SYSPROTO_H_
 3594 struct rename_args {
 3595         char    *from;
 3596         char    *to;
 3597 };
 3598 #endif
 3599 int
 3600 sys_rename(td, uap)
 3601         struct thread *td;
 3602         register struct rename_args /* {
 3603                 char *from;
 3604                 char *to;
 3605         } */ *uap;
 3606 {
 3607 
 3608         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3609 }
 3610 
 3611 #ifndef _SYS_SYSPROTO_H_
 3612 struct renameat_args {
 3613         int     oldfd;
 3614         char    *old;
 3615         int     newfd;
 3616         char    *new;
 3617 };
 3618 #endif
 3619 int
 3620 sys_renameat(struct thread *td, struct renameat_args *uap)
 3621 {
 3622 
 3623         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3624             UIO_USERSPACE));
 3625 }
 3626 
 3627 int
 3628 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3629 {
 3630 
 3631         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3632 }
 3633 
 3634 int
 3635 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3636     enum uio_seg pathseg)
 3637 {
 3638         struct mount *mp = NULL;
 3639         struct vnode *tvp, *fvp, *tdvp;
 3640         struct nameidata fromnd, tond;
 3641         int tvfslocked;
 3642         int fvfslocked;
 3643         int error;
 3644 
 3645         bwillwrite();
 3646 #ifdef MAC
 3647         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3648             MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3649 #else
 3650         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
 3651             AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3652 #endif
 3653 
 3654         if ((error = namei(&fromnd)) != 0)
 3655                 return (error);
 3656         fvfslocked = NDHASGIANT(&fromnd);
 3657         tvfslocked = 0;
 3658 #ifdef MAC
 3659         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3660             fromnd.ni_vp, &fromnd.ni_cnd);
 3661         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3662         if (fromnd.ni_dvp != fromnd.ni_vp)
 3663                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3664 #endif
 3665         fvp = fromnd.ni_vp;
 3666         if (error == 0)
 3667                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3668         if (error != 0) {
 3669                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3670                 vrele(fromnd.ni_dvp);
 3671                 vrele(fvp);
 3672                 goto out1;
 3673         }
 3674         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3675             SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
 3676             td);
 3677         if (fromnd.ni_vp->v_type == VDIR)
 3678                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3679         if ((error = namei(&tond)) != 0) {
 3680                 /* Translate error code for rename("dir1", "dir2/."). */
 3681                 if (error == EISDIR && fvp->v_type == VDIR)
 3682                         error = EINVAL;
 3683                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3684                 vrele(fromnd.ni_dvp);
 3685                 vrele(fvp);
 3686                 vn_finished_write(mp);
 3687                 goto out1;
 3688         }
 3689         tvfslocked = NDHASGIANT(&tond);
 3690         tdvp = tond.ni_dvp;
 3691         tvp = tond.ni_vp;
 3692         if (tvp != NULL) {
 3693                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3694                         error = ENOTDIR;
 3695                         goto out;
 3696                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3697                         error = EISDIR;
 3698                         goto out;
 3699                 }
 3700         }
 3701         if (fvp == tdvp) {
 3702                 error = EINVAL;
 3703                 goto out;
 3704         }
 3705         /*
 3706          * If the source is the same as the destination (that is, if they
 3707          * are links to the same vnode), then there is nothing to do.
 3708          */
 3709         if (fvp == tvp)
 3710                 error = -1;
 3711 #ifdef MAC
 3712         else
 3713                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3714                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3715 #endif
 3716 out:
 3717         if (!error) {
 3718                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3719                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3720                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3721                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3722         } else {
 3723                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3724                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3725                 if (tvp)
 3726                         vput(tvp);
 3727                 if (tdvp == tvp)
 3728                         vrele(tdvp);
 3729                 else
 3730                         vput(tdvp);
 3731                 vrele(fromnd.ni_dvp);
 3732                 vrele(fvp);
 3733         }
 3734         vrele(tond.ni_startdir);
 3735         vn_finished_write(mp);
 3736 out1:
 3737         if (fromnd.ni_startdir)
 3738                 vrele(fromnd.ni_startdir);
 3739         VFS_UNLOCK_GIANT(fvfslocked);
 3740         VFS_UNLOCK_GIANT(tvfslocked);
 3741         if (error == -1)
 3742                 return (0);
 3743         return (error);
 3744 }
 3745 
 3746 /*
 3747  * Make a directory file.
 3748  */
 3749 #ifndef _SYS_SYSPROTO_H_
 3750 struct mkdir_args {
 3751         char    *path;
 3752         int     mode;
 3753 };
 3754 #endif
 3755 int
 3756 sys_mkdir(td, uap)
 3757         struct thread *td;
 3758         register struct mkdir_args /* {
 3759                 char *path;
 3760                 int mode;
 3761         } */ *uap;
 3762 {
 3763 
 3764         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3765 }
 3766 
 3767 #ifndef _SYS_SYSPROTO_H_
 3768 struct mkdirat_args {
 3769         int     fd;
 3770         char    *path;
 3771         mode_t  mode;
 3772 };
 3773 #endif
 3774 int
 3775 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3776 {
 3777 
 3778         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3779 }
 3780 
 3781 int
 3782 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3783 {
 3784 
 3785         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3786 }
 3787 
 3788 int
 3789 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3790     int mode)
 3791 {
 3792         struct mount *mp;
 3793         struct vnode *vp;
 3794         struct vattr vattr;
 3795         int error;
 3796         struct nameidata nd;
 3797         int vfslocked;
 3798 
 3799         AUDIT_ARG_MODE(mode);
 3800 restart:
 3801         bwillwrite();
 3802         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
 3803             AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
 3804         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3805         if ((error = namei(&nd)) != 0)
 3806                 return (error);
 3807         vfslocked = NDHASGIANT(&nd);
 3808         vp = nd.ni_vp;
 3809         if (vp != NULL) {
 3810                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3811                 /*
 3812                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3813                  * the strange behaviour of leaving the vnode unlocked
 3814                  * if the target is the same vnode as the parent.
 3815                  */
 3816                 if (vp == nd.ni_dvp)
 3817                         vrele(nd.ni_dvp);
 3818                 else
 3819                         vput(nd.ni_dvp);
 3820                 vrele(vp);
 3821                 VFS_UNLOCK_GIANT(vfslocked);
 3822                 return (EEXIST);
 3823         }
 3824         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3825                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3826                 vput(nd.ni_dvp);
 3827                 VFS_UNLOCK_GIANT(vfslocked);
 3828                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3829                         return (error);
 3830                 goto restart;
 3831         }
 3832         VATTR_NULL(&vattr);
 3833         vattr.va_type = VDIR;
 3834         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3835 #ifdef MAC
 3836         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3837             &vattr);
 3838         if (error)
 3839                 goto out;
 3840 #endif
 3841         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3842 #ifdef MAC
 3843 out:
 3844 #endif
 3845         NDFREE(&nd, NDF_ONLY_PNBUF);
 3846         vput(nd.ni_dvp);
 3847         if (!error)
 3848                 vput(nd.ni_vp);
 3849         vn_finished_write(mp);
 3850         VFS_UNLOCK_GIANT(vfslocked);
 3851         return (error);
 3852 }
 3853 
 3854 /*
 3855  * Remove a directory file.
 3856  */
 3857 #ifndef _SYS_SYSPROTO_H_
 3858 struct rmdir_args {
 3859         char    *path;
 3860 };
 3861 #endif
 3862 int
 3863 sys_rmdir(td, uap)
 3864         struct thread *td;
 3865         struct rmdir_args /* {
 3866                 char *path;
 3867         } */ *uap;
 3868 {
 3869 
 3870         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3871 }
 3872 
 3873 int
 3874 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3875 {
 3876 
 3877         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3878 }
 3879 
 3880 int
 3881 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3882 {
 3883         struct mount *mp;
 3884         struct vnode *vp;
 3885         int error;
 3886         struct nameidata nd;
 3887         int vfslocked;
 3888 
 3889 restart:
 3890         bwillwrite();
 3891         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
 3892             AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
 3893         if ((error = namei(&nd)) != 0)
 3894                 return (error);
 3895         vfslocked = NDHASGIANT(&nd);
 3896         vp = nd.ni_vp;
 3897         if (vp->v_type != VDIR) {
 3898                 error = ENOTDIR;
 3899                 goto out;
 3900         }
 3901         /*
 3902          * No rmdir "." please.
 3903          */
 3904         if (nd.ni_dvp == vp) {
 3905                 error = EINVAL;
 3906                 goto out;
 3907         }
 3908         /*
 3909          * The root of a mounted filesystem cannot be deleted.
 3910          */
 3911         if (vp->v_vflag & VV_ROOT) {
 3912                 error = EBUSY;
 3913                 goto out;
 3914         }
 3915 #ifdef MAC
 3916         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3917             &nd.ni_cnd);
 3918         if (error)
 3919                 goto out;
 3920 #endif
 3921         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3922                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3923                 vput(vp);
 3924                 if (nd.ni_dvp == vp)
 3925                         vrele(nd.ni_dvp);
 3926                 else
 3927                         vput(nd.ni_dvp);
 3928                 VFS_UNLOCK_GIANT(vfslocked);
 3929                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3930                         return (error);
 3931                 goto restart;
 3932         }
 3933         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3934         vn_finished_write(mp);
 3935 out:
 3936         NDFREE(&nd, NDF_ONLY_PNBUF);
 3937         vput(vp);
 3938         if (nd.ni_dvp == vp)
 3939                 vrele(nd.ni_dvp);
 3940         else
 3941                 vput(nd.ni_dvp);
 3942         VFS_UNLOCK_GIANT(vfslocked);
 3943         return (error);
 3944 }
 3945 
 3946 #ifdef COMPAT_43
 3947 /*
 3948  * Read a block of directory entries in a filesystem independent format.
 3949  */
 3950 #ifndef _SYS_SYSPROTO_H_
 3951 struct ogetdirentries_args {
 3952         int     fd;
 3953         char    *buf;
 3954         u_int   count;
 3955         long    *basep;
 3956 };
 3957 #endif
 3958 int
 3959 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3960 {
 3961         long loff;
 3962         int error;
 3963 
 3964         error = kern_ogetdirentries(td, uap, &loff);
 3965         if (error == 0)
 3966                 error = copyout(&loff, uap->basep, sizeof(long));
 3967         return (error);
 3968 }
 3969 
 3970 int
 3971 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3972     long *ploff)
 3973 {
 3974         struct vnode *vp;
 3975         struct file *fp;
 3976         struct uio auio, kuio;
 3977         struct iovec aiov, kiov;
 3978         struct dirent *dp, *edp;
 3979         caddr_t dirbuf;
 3980         int error, eofflag, readcnt, vfslocked;
 3981         long loff;
 3982 
 3983         /* XXX arbitrary sanity limit on `count'. */
 3984         if (uap->count > 64 * 1024)
 3985                 return (EINVAL);
 3986         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
 3987             &fp)) != 0)
 3988                 return (error);
 3989         if ((fp->f_flag & FREAD) == 0) {
 3990                 fdrop(fp, td);
 3991                 return (EBADF);
 3992         }
 3993         vp = fp->f_vnode;
 3994 unionread:
 3995         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3996         if (vp->v_type != VDIR) {
 3997                 VFS_UNLOCK_GIANT(vfslocked);
 3998                 fdrop(fp, td);
 3999                 return (EINVAL);
 4000         }
 4001         aiov.iov_base = uap->buf;
 4002         aiov.iov_len = uap->count;
 4003         auio.uio_iov = &aiov;
 4004         auio.uio_iovcnt = 1;
 4005         auio.uio_rw = UIO_READ;
 4006         auio.uio_segflg = UIO_USERSPACE;
 4007         auio.uio_td = td;
 4008         auio.uio_resid = uap->count;
 4009         vn_lock(vp, LK_SHARED | LK_RETRY);
 4010         loff = auio.uio_offset = fp->f_offset;
 4011 #ifdef MAC
 4012         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4013         if (error) {
 4014                 VOP_UNLOCK(vp, 0);
 4015                 VFS_UNLOCK_GIANT(vfslocked);
 4016                 fdrop(fp, td);
 4017                 return (error);
 4018         }
 4019 #endif
 4020 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4021                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4022                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4023                             NULL, NULL);
 4024                         fp->f_offset = auio.uio_offset;
 4025                 } else
 4026 #       endif
 4027         {
 4028                 kuio = auio;
 4029                 kuio.uio_iov = &kiov;
 4030                 kuio.uio_segflg = UIO_SYSSPACE;
 4031                 kiov.iov_len = uap->count;
 4032                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4033                 kiov.iov_base = dirbuf;
 4034                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4035                             NULL, NULL);
 4036                 fp->f_offset = kuio.uio_offset;
 4037                 if (error == 0) {
 4038                         readcnt = uap->count - kuio.uio_resid;
 4039                         edp = (struct dirent *)&dirbuf[readcnt];
 4040                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4041 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4042                                         /*
 4043                                          * The expected low byte of
 4044                                          * dp->d_namlen is our dp->d_type.
 4045                                          * The high MBZ byte of dp->d_namlen
 4046                                          * is our dp->d_namlen.
 4047                                          */
 4048                                         dp->d_type = dp->d_namlen;
 4049                                         dp->d_namlen = 0;
 4050 #                               else
 4051                                         /*
 4052                                          * The dp->d_type is the high byte
 4053                                          * of the expected dp->d_namlen,
 4054                                          * so must be zero'ed.
 4055                                          */
 4056                                         dp->d_type = 0;
 4057 #                               endif
 4058                                 if (dp->d_reclen > 0) {
 4059                                         dp = (struct dirent *)
 4060                                             ((char *)dp + dp->d_reclen);
 4061                                 } else {
 4062                                         error = EIO;
 4063                                         break;
 4064                                 }
 4065                         }
 4066                         if (dp >= edp)
 4067                                 error = uiomove(dirbuf, readcnt, &auio);
 4068                 }
 4069                 free(dirbuf, M_TEMP);
 4070         }
 4071         if (error) {
 4072                 VOP_UNLOCK(vp, 0);
 4073                 VFS_UNLOCK_GIANT(vfslocked);
 4074                 fdrop(fp, td);
 4075                 return (error);
 4076         }
 4077         if (uap->count == auio.uio_resid &&
 4078             (vp->v_vflag & VV_ROOT) &&
 4079             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4080                 struct vnode *tvp = vp;
 4081                 vp = vp->v_mount->mnt_vnodecovered;
 4082                 VREF(vp);
 4083                 fp->f_vnode = vp;
 4084                 fp->f_data = vp;
 4085                 fp->f_offset = 0;
 4086                 vput(tvp);
 4087                 VFS_UNLOCK_GIANT(vfslocked);
 4088                 goto unionread;
 4089         }
 4090         VOP_UNLOCK(vp, 0);
 4091         VFS_UNLOCK_GIANT(vfslocked);
 4092         fdrop(fp, td);
 4093         td->td_retval[0] = uap->count - auio.uio_resid;
 4094         if (error == 0)
 4095                 *ploff = loff;
 4096         return (error);
 4097 }
 4098 #endif /* COMPAT_43 */
 4099 
 4100 /*
 4101  * Read a block of directory entries in a filesystem independent format.
 4102  */
 4103 #ifndef _SYS_SYSPROTO_H_
 4104 struct getdirentries_args {
 4105         int     fd;
 4106         char    *buf;
 4107         u_int   count;
 4108         long    *basep;
 4109 };
 4110 #endif
 4111 int
 4112 sys_getdirentries(td, uap)
 4113         struct thread *td;
 4114         register struct getdirentries_args /* {
 4115                 int fd;
 4116                 char *buf;
 4117                 u_int count;
 4118                 long *basep;
 4119         } */ *uap;
 4120 {
 4121         long base;
 4122         int error;
 4123 
 4124         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 4125         if (error)
 4126                 return (error);
 4127         if (uap->basep != NULL)
 4128                 error = copyout(&base, uap->basep, sizeof(long));
 4129         return (error);
 4130 }
 4131 
 4132 int
 4133 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4134     long *basep)
 4135 {
 4136         struct vnode *vp;
 4137         struct file *fp;
 4138         struct uio auio;
 4139         struct iovec aiov;
 4140         int vfslocked;
 4141         long loff;
 4142         int error, eofflag;
 4143 
 4144         AUDIT_ARG_FD(fd);
 4145         if (count > INT_MAX)
 4146                 return (EINVAL);
 4147         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
 4148             &fp)) != 0)
 4149                 return (error);
 4150         if ((fp->f_flag & FREAD) == 0) {
 4151                 fdrop(fp, td);
 4152                 return (EBADF);
 4153         }
 4154         vp = fp->f_vnode;
 4155 unionread:
 4156         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4157         if (vp->v_type != VDIR) {
 4158                 VFS_UNLOCK_GIANT(vfslocked);
 4159                 error = EINVAL;
 4160                 goto fail;
 4161         }
 4162         aiov.iov_base = buf;
 4163         aiov.iov_len = count;
 4164         auio.uio_iov = &aiov;
 4165         auio.uio_iovcnt = 1;
 4166         auio.uio_rw = UIO_READ;
 4167         auio.uio_segflg = UIO_USERSPACE;
 4168         auio.uio_td = td;
 4169         auio.uio_resid = count;
 4170         vn_lock(vp, LK_SHARED | LK_RETRY);
 4171         AUDIT_ARG_VNODE1(vp);
 4172         loff = auio.uio_offset = fp->f_offset;
 4173 #ifdef MAC
 4174         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4175         if (error == 0)
 4176 #endif
 4177                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4178                     NULL);
 4179         fp->f_offset = auio.uio_offset;
 4180         if (error) {
 4181                 VOP_UNLOCK(vp, 0);
 4182                 VFS_UNLOCK_GIANT(vfslocked);
 4183                 goto fail;
 4184         }
 4185         if (count == auio.uio_resid &&
 4186             (vp->v_vflag & VV_ROOT) &&
 4187             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4188                 struct vnode *tvp = vp;
 4189                 vp = vp->v_mount->mnt_vnodecovered;
 4190                 VREF(vp);
 4191                 fp->f_vnode = vp;
 4192                 fp->f_data = vp;
 4193                 fp->f_offset = 0;
 4194                 vput(tvp);
 4195                 VFS_UNLOCK_GIANT(vfslocked);
 4196                 goto unionread;
 4197         }
 4198         VOP_UNLOCK(vp, 0);
 4199         VFS_UNLOCK_GIANT(vfslocked);
 4200         *basep = loff;
 4201         td->td_retval[0] = count - auio.uio_resid;
 4202 fail:
 4203         fdrop(fp, td);
 4204         return (error);
 4205 }
 4206 
 4207 #ifndef _SYS_SYSPROTO_H_
 4208 struct getdents_args {
 4209         int fd;
 4210         char *buf;
 4211         size_t count;
 4212 };
 4213 #endif
 4214 int
 4215 sys_getdents(td, uap)
 4216         struct thread *td;
 4217         register struct getdents_args /* {
 4218                 int fd;
 4219                 char *buf;
 4220                 u_int count;
 4221         } */ *uap;
 4222 {
 4223         struct getdirentries_args ap;
 4224         ap.fd = uap->fd;
 4225         ap.buf = uap->buf;
 4226         ap.count = uap->count;
 4227         ap.basep = NULL;
 4228         return (sys_getdirentries(td, &ap));
 4229 }
 4230 
 4231 /*
 4232  * Set the mode mask for creation of filesystem nodes.
 4233  */
 4234 #ifndef _SYS_SYSPROTO_H_
 4235 struct umask_args {
 4236         int     newmask;
 4237 };
 4238 #endif
 4239 int
 4240 sys_umask(td, uap)
 4241         struct thread *td;
 4242         struct umask_args /* {
 4243                 int newmask;
 4244         } */ *uap;
 4245 {
 4246         register struct filedesc *fdp;
 4247 
 4248         FILEDESC_XLOCK(td->td_proc->p_fd);
 4249         fdp = td->td_proc->p_fd;
 4250         td->td_retval[0] = fdp->fd_cmask;
 4251         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4252         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4253         return (0);
 4254 }
 4255 
 4256 /*
 4257  * Void all references to file by ripping underlying filesystem away from
 4258  * vnode.
 4259  */
 4260 #ifndef _SYS_SYSPROTO_H_
 4261 struct revoke_args {
 4262         char    *path;
 4263 };
 4264 #endif
 4265 int
 4266 sys_revoke(td, uap)
 4267         struct thread *td;
 4268         register struct revoke_args /* {
 4269                 char *path;
 4270         } */ *uap;
 4271 {
 4272         struct vnode *vp;
 4273         struct vattr vattr;
 4274         int error;
 4275         struct nameidata nd;
 4276         int vfslocked;
 4277 
 4278         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4279             UIO_USERSPACE, uap->path, td);
 4280         if ((error = namei(&nd)) != 0)
 4281                 return (error);
 4282         vfslocked = NDHASGIANT(&nd);
 4283         vp = nd.ni_vp;
 4284         NDFREE(&nd, NDF_ONLY_PNBUF);
 4285         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4286                 error = EINVAL;
 4287                 goto out;
 4288         }
 4289 #ifdef MAC
 4290         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4291         if (error)
 4292                 goto out;
 4293 #endif
 4294         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4295         if (error)
 4296                 goto out;
 4297         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4298                 error = priv_check(td, PRIV_VFS_ADMIN);
 4299                 if (error)
 4300                         goto out;
 4301         }
 4302         if (vcount(vp) > 1)
 4303                 VOP_REVOKE(vp, REVOKEALL);
 4304 out:
 4305         vput(vp);
 4306         VFS_UNLOCK_GIANT(vfslocked);
 4307         return (error);
 4308 }
 4309 
 4310 /*
 4311  * Convert a user file descriptor to a kernel file entry and check that, if it
 4312  * is a capability, the correct rights are present. A reference on the file
 4313  * entry is held upon returning.
 4314  */
 4315 int
 4316 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
 4317     struct file **fpp)
 4318 {
 4319         struct file *fp;
 4320 #ifdef CAPABILITIES
 4321         struct file *fp_fromcap;
 4322 #endif
 4323         int error;
 4324 
 4325         error = 0;
 4326         fp = NULL;
 4327         if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
 4328                 return (EBADF);
 4329 #ifdef CAPABILITIES
 4330         /*
 4331          * If the file descriptor is for a capability, test rights and use the
 4332          * file descriptor referenced by the capability.
 4333          */
 4334         error = cap_funwrap(fp, rights, &fp_fromcap);
 4335         if (error) {
 4336                 fdrop(fp, curthread);
 4337                 return (error);
 4338         }
 4339         if (fp != fp_fromcap) {
 4340                 fhold(fp_fromcap);
 4341                 fdrop(fp, curthread);
 4342                 fp = fp_fromcap;
 4343         }
 4344 #endif /* CAPABILITIES */
 4345 
 4346         /*
 4347          * The file could be not of the vnode type, or it may be not
 4348          * yet fully initialized, in which case the f_vnode pointer
 4349          * may be set, but f_ops is still badfileops.  E.g.,
 4350          * devfs_open() transiently create such situation to
 4351          * facilitate csw d_fdopen().
 4352          *
 4353          * Dupfdopen() handling in kern_openat() installs the
 4354          * half-baked file into the process descriptor table, allowing
 4355          * other thread to dereference it. Guard against the race by
 4356          * checking f_ops.
 4357          */
 4358         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4359                 fdrop(fp, curthread);
 4360                 return (EINVAL);
 4361         }
 4362         *fpp = fp;
 4363         return (0);
 4364 }
 4365 
 4366 
 4367 /*
 4368  * Get an (NFS) file handle.
 4369  */
 4370 #ifndef _SYS_SYSPROTO_H_
 4371 struct lgetfh_args {
 4372         char    *fname;
 4373         fhandle_t *fhp;
 4374 };
 4375 #endif
 4376 int
 4377 sys_lgetfh(td, uap)
 4378         struct thread *td;
 4379         register struct lgetfh_args *uap;
 4380 {
 4381         struct nameidata nd;
 4382         fhandle_t fh;
 4383         register struct vnode *vp;
 4384         int vfslocked;
 4385         int error;
 4386 
 4387         error = priv_check(td, PRIV_VFS_GETFH);
 4388         if (error)
 4389                 return (error);
 4390         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4391             UIO_USERSPACE, uap->fname, td);
 4392         error = namei(&nd);
 4393         if (error)
 4394                 return (error);
 4395         vfslocked = NDHASGIANT(&nd);
 4396         NDFREE(&nd, NDF_ONLY_PNBUF);
 4397         vp = nd.ni_vp;
 4398         bzero(&fh, sizeof(fh));
 4399         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4400         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4401         vput(vp);
 4402         VFS_UNLOCK_GIANT(vfslocked);
 4403         if (error)
 4404                 return (error);
 4405         error = copyout(&fh, uap->fhp, sizeof (fh));
 4406         return (error);
 4407 }
 4408 
 4409 #ifndef _SYS_SYSPROTO_H_
 4410 struct getfh_args {
 4411         char    *fname;
 4412         fhandle_t *fhp;
 4413 };
 4414 #endif
 4415 int
 4416 sys_getfh(td, uap)
 4417         struct thread *td;
 4418         register struct getfh_args *uap;
 4419 {
 4420         struct nameidata nd;
 4421         fhandle_t fh;
 4422         register struct vnode *vp;
 4423         int vfslocked;
 4424         int error;
 4425 
 4426         error = priv_check(td, PRIV_VFS_GETFH);
 4427         if (error)
 4428                 return (error);
 4429         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4430             UIO_USERSPACE, uap->fname, td);
 4431         error = namei(&nd);
 4432         if (error)
 4433                 return (error);
 4434         vfslocked = NDHASGIANT(&nd);
 4435         NDFREE(&nd, NDF_ONLY_PNBUF);
 4436         vp = nd.ni_vp;
 4437         bzero(&fh, sizeof(fh));
 4438         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4439         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4440         vput(vp);
 4441         VFS_UNLOCK_GIANT(vfslocked);
 4442         if (error)
 4443                 return (error);
 4444         error = copyout(&fh, uap->fhp, sizeof (fh));
 4445         return (error);
 4446 }
 4447 
 4448 /*
 4449  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4450  * open descriptor.
 4451  *
 4452  * warning: do not remove the priv_check() call or this becomes one giant
 4453  * security hole.
 4454  */
 4455 #ifndef _SYS_SYSPROTO_H_
 4456 struct fhopen_args {
 4457         const struct fhandle *u_fhp;
 4458         int flags;
 4459 };
 4460 #endif
 4461 int
 4462 sys_fhopen(td, uap)
 4463         struct thread *td;
 4464         struct fhopen_args /* {
 4465                 const struct fhandle *u_fhp;
 4466                 int flags;
 4467         } */ *uap;
 4468 {
 4469         struct proc *p = td->td_proc;
 4470         struct mount *mp;
 4471         struct vnode *vp;
 4472         struct fhandle fhp;
 4473         struct vattr vat;
 4474         struct vattr *vap = &vat;
 4475         struct flock lf;
 4476         struct file *fp;
 4477         register struct filedesc *fdp = p->p_fd;
 4478         int fmode, error, type;
 4479         accmode_t accmode;
 4480         struct file *nfp;
 4481         int vfslocked;
 4482         int indx;
 4483 
 4484         error = priv_check(td, PRIV_VFS_FHOPEN);
 4485         if (error)
 4486                 return (error);
 4487         fmode = FFLAGS(uap->flags);
 4488         /* why not allow a non-read/write open for our lockd? */
 4489         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4490                 return (EINVAL);
 4491         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4492         if (error)
 4493                 return(error);
 4494         /* find the mount point */
 4495         mp = vfs_busyfs(&fhp.fh_fsid);
 4496         if (mp == NULL)
 4497                 return (ESTALE);
 4498         vfslocked = VFS_LOCK_GIANT(mp);
 4499         /* now give me my vnode, it gets returned to me locked */
 4500         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4501         vfs_unbusy(mp);
 4502         if (error)
 4503                 goto out;
 4504         /*
 4505          * from now on we have to make sure not
 4506          * to forget about the vnode
 4507          * any error that causes an abort must vput(vp)
 4508          * just set error = err and 'goto bad;'.
 4509          */
 4510 
 4511         /*
 4512          * from vn_open
 4513          */
 4514         if (vp->v_type == VLNK) {
 4515                 error = EMLINK;
 4516                 goto bad;
 4517         }
 4518         if (vp->v_type == VSOCK) {
 4519                 error = EOPNOTSUPP;
 4520                 goto bad;
 4521         }
 4522         if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
 4523                 error = ENOTDIR;
 4524                 goto bad;
 4525         }
 4526         accmode = 0;
 4527         if (fmode & (FWRITE | O_TRUNC)) {
 4528                 if (vp->v_type == VDIR) {
 4529                         error = EISDIR;
 4530                         goto bad;
 4531                 }
 4532                 error = vn_writechk(vp);
 4533                 if (error)
 4534                         goto bad;
 4535                 accmode |= VWRITE;
 4536         }
 4537         if (fmode & FREAD)
 4538                 accmode |= VREAD;
 4539         if ((fmode & O_APPEND) && (fmode & FWRITE))
 4540                 accmode |= VAPPEND;
 4541 #ifdef MAC
 4542         error = mac_vnode_check_open(td->td_ucred, vp, accmode);
 4543         if (error)
 4544                 goto bad;
 4545 #endif
 4546         if (accmode) {
 4547                 error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
 4548                 if (error)
 4549                         goto bad;
 4550         }
 4551         if (fmode & O_TRUNC) {
 4552                 vfs_ref(mp);
 4553                 VOP_UNLOCK(vp, 0);                              /* XXX */
 4554                 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 4555                         vrele(vp);
 4556                         vfs_rel(mp);
 4557                         goto out;
 4558                 }
 4559                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 4560                 vfs_rel(mp);
 4561 #ifdef MAC
 4562                 /*
 4563                  * We don't yet have fp->f_cred, so use td->td_ucred, which
 4564                  * should be right.
 4565                  */
 4566                 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
 4567                 if (error == 0) {
 4568 #endif
 4569                         VATTR_NULL(vap);
 4570                         vap->va_size = 0;
 4571                         error = VOP_SETATTR(vp, vap, td->td_ucred);
 4572 #ifdef MAC
 4573                 }
 4574 #endif
 4575                 vn_finished_write(mp);
 4576                 if (error)
 4577                         goto bad;
 4578         }
 4579         error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
 4580         if (error)
 4581                 goto bad;
 4582 
 4583         if (fmode & FWRITE)
 4584                 vp->v_writecount++;
 4585 
 4586         /*
 4587          * end of vn_open code
 4588          */
 4589 
 4590         if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
 4591                 if (fmode & FWRITE)
 4592                         vp->v_writecount--;
 4593                 goto bad;
 4594         }
 4595         /* An extra reference on `nfp' has been held for us by falloc(). */
 4596         fp = nfp;
 4597         nfp->f_vnode = vp;
 4598         finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
 4599         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4600                 lf.l_whence = SEEK_SET;
 4601                 lf.l_start = 0;
 4602                 lf.l_len = 0;
 4603                 if (fmode & O_EXLOCK)
 4604                         lf.l_type = F_WRLCK;
 4605                 else
 4606                         lf.l_type = F_RDLCK;
 4607                 type = F_FLOCK;
 4608                 if ((fmode & FNONBLOCK) == 0)
 4609                         type |= F_WAIT;
 4610                 VOP_UNLOCK(vp, 0);
 4611                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 4612                             type)) != 0) {
 4613                         /*
 4614                          * The lock request failed.  Normally close the
 4615                          * descriptor but handle the case where someone might
 4616                          * have dup()d or close()d it when we weren't looking.
 4617                          */
 4618                         fdclose(fdp, fp, indx, td);
 4619 
 4620                         /*
 4621                          * release our private reference
 4622                          */
 4623                         fdrop(fp, td);
 4624                         goto out;
 4625                 }
 4626                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4627                 atomic_set_int(&fp->f_flag, FHASLOCK);
 4628         }
 4629 
 4630         VOP_UNLOCK(vp, 0);
 4631         fdrop(fp, td);
 4632         VFS_UNLOCK_GIANT(vfslocked);
 4633         td->td_retval[0] = indx;
 4634         return (0);
 4635 
 4636 bad:
 4637         vput(vp);
 4638 out:
 4639         VFS_UNLOCK_GIANT(vfslocked);
 4640         return (error);
 4641 }
 4642 
 4643 /*
 4644  * Stat an (NFS) file handle.
 4645  */
 4646 #ifndef _SYS_SYSPROTO_H_
 4647 struct fhstat_args {
 4648         struct fhandle *u_fhp;
 4649         struct stat *sb;
 4650 };
 4651 #endif
 4652 int
 4653 sys_fhstat(td, uap)
 4654         struct thread *td;
 4655         register struct fhstat_args /* {
 4656                 struct fhandle *u_fhp;
 4657                 struct stat *sb;
 4658         } */ *uap;
 4659 {
 4660         struct stat sb;
 4661         fhandle_t fh;
 4662         struct mount *mp;
 4663         struct vnode *vp;
 4664         int vfslocked;
 4665         int error;
 4666 
 4667         error = priv_check(td, PRIV_VFS_FHSTAT);
 4668         if (error)
 4669                 return (error);
 4670         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4671         if (error)
 4672                 return (error);
 4673         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4674                 return (ESTALE);
 4675         vfslocked = VFS_LOCK_GIANT(mp);
 4676         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4677         vfs_unbusy(mp);
 4678         if (error) {
 4679                 VFS_UNLOCK_GIANT(vfslocked);
 4680                 return (error);
 4681         }
 4682         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 4683         vput(vp);
 4684         VFS_UNLOCK_GIANT(vfslocked);
 4685         if (error)
 4686                 return (error);
 4687         error = copyout(&sb, uap->sb, sizeof(sb));
 4688         return (error);
 4689 }
 4690 
 4691 /*
 4692  * Implement fstatfs() for (NFS) file handles.
 4693  */
 4694 #ifndef _SYS_SYSPROTO_H_
 4695 struct fhstatfs_args {
 4696         struct fhandle *u_fhp;
 4697         struct statfs *buf;
 4698 };
 4699 #endif
 4700 int
 4701 sys_fhstatfs(td, uap)
 4702         struct thread *td;
 4703         struct fhstatfs_args /* {
 4704                 struct fhandle *u_fhp;
 4705                 struct statfs *buf;
 4706         } */ *uap;
 4707 {
 4708         struct statfs sf;
 4709         fhandle_t fh;
 4710         int error;
 4711 
 4712         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4713         if (error)
 4714                 return (error);
 4715         error = kern_fhstatfs(td, fh, &sf);
 4716         if (error)
 4717                 return (error);
 4718         return (copyout(&sf, uap->buf, sizeof(sf)));
 4719 }
 4720 
 4721 int
 4722 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4723 {
 4724         struct statfs *sp;
 4725         struct mount *mp;
 4726         struct vnode *vp;
 4727         int vfslocked;
 4728         int error;
 4729 
 4730         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4731         if (error)
 4732                 return (error);
 4733         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4734                 return (ESTALE);
 4735         vfslocked = VFS_LOCK_GIANT(mp);
 4736         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4737         if (error) {
 4738                 vfs_unbusy(mp);
 4739                 VFS_UNLOCK_GIANT(vfslocked);
 4740                 return (error);
 4741         }
 4742         vput(vp);
 4743         error = prison_canseemount(td->td_ucred, mp);
 4744         if (error)
 4745                 goto out;
 4746 #ifdef MAC
 4747         error = mac_mount_check_stat(td->td_ucred, mp);
 4748         if (error)
 4749                 goto out;
 4750 #endif
 4751         /*
 4752          * Set these in case the underlying filesystem fails to do so.
 4753          */
 4754         sp = &mp->mnt_stat;
 4755         sp->f_version = STATFS_VERSION;
 4756         sp->f_namemax = NAME_MAX;
 4757         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4758         error = VFS_STATFS(mp, sp);
 4759         if (error == 0)
 4760                 *buf = *sp;
 4761 out:
 4762         vfs_unbusy(mp);
 4763         VFS_UNLOCK_GIANT(vfslocked);
 4764         return (error);
 4765 }
 4766 
 4767 static int
 4768 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4769 {
 4770         struct file *fp;
 4771         struct mount *mp;
 4772         struct vnode *vp;
 4773         off_t olen, ooffset;
 4774         int error, vfslocked;
 4775 
 4776         fp = NULL;
 4777         vfslocked = 0;
 4778         error = fget(td, fd, CAP_WRITE, &fp);
 4779         if (error != 0)
 4780                 goto out;
 4781 
 4782         switch (fp->f_type) {
 4783         case DTYPE_VNODE:
 4784                 break;
 4785         case DTYPE_PIPE:
 4786         case DTYPE_FIFO:
 4787                 error = ESPIPE;
 4788                 goto out;
 4789         default:
 4790                 error = ENODEV;
 4791                 goto out;
 4792         }
 4793         if ((fp->f_flag & FWRITE) == 0) {
 4794                 error = EBADF;
 4795                 goto out;
 4796         }
 4797         vp = fp->f_vnode;
 4798         if (vp->v_type != VREG) {
 4799                 error = ENODEV;
 4800                 goto out;
 4801         }
 4802         if (offset < 0 || len <= 0) {
 4803                 error = EINVAL;
 4804                 goto out;
 4805         }
 4806         /* Check for wrap. */
 4807         if (offset > OFF_MAX - len) {
 4808                 error = EFBIG;
 4809                 goto out;
 4810         }
 4811 
 4812         /* Allocating blocks may take a long time, so iterate. */
 4813         for (;;) {
 4814                 olen = len;
 4815                 ooffset = offset;
 4816 
 4817                 bwillwrite();
 4818                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4819                 mp = NULL;
 4820                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4821                 if (error != 0) {
 4822                         VFS_UNLOCK_GIANT(vfslocked);
 4823                         break;
 4824                 }
 4825                 error = vn_lock(vp, LK_EXCLUSIVE);
 4826                 if (error != 0) {
 4827                         vn_finished_write(mp);
 4828                         VFS_UNLOCK_GIANT(vfslocked);
 4829                         break;
 4830                 }
 4831 #ifdef MAC
 4832                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4833                 if (error == 0)
 4834 #endif
 4835                         error = VOP_ALLOCATE(vp, &offset, &len);
 4836                 VOP_UNLOCK(vp, 0);
 4837                 vn_finished_write(mp);
 4838                 VFS_UNLOCK_GIANT(vfslocked);
 4839 
 4840                 if (olen + ooffset != offset + len) {
 4841                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4842                             ooffset, olen, offset, len);
 4843                 }
 4844                 if (error != 0 || len == 0)
 4845                         break;
 4846                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4847                 maybe_yield();
 4848         }
 4849  out:
 4850         if (fp != NULL)
 4851                 fdrop(fp, td);
 4852         return (error);
 4853 }
 4854 
 4855 int
 4856 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4857 {
 4858 
 4859         return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
 4860 }

Cache object: 35e1e513f6161bb80890f44ab5f607fc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.