The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/8.3/sys/kern/vfs_syscalls.c 231948 2012-02-21 00:32:24Z kib $");
   39 
   40 #include "opt_compat.h"
   41 #include "opt_kdtrace.h"
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/bio.h>
   47 #include <sys/buf.h>
   48 #include <sys/disk.h>
   49 #include <sys/sysent.h>
   50 #include <sys/malloc.h>
   51 #include <sys/mount.h>
   52 #include <sys/mutex.h>
   53 #include <sys/sysproto.h>
   54 #include <sys/namei.h>
   55 #include <sys/filedesc.h>
   56 #include <sys/kernel.h>
   57 #include <sys/fcntl.h>
   58 #include <sys/file.h>
   59 #include <sys/filio.h>
   60 #include <sys/limits.h>
   61 #include <sys/linker.h>
   62 #include <sys/sdt.h>
   63 #include <sys/stat.h>
   64 #include <sys/sx.h>
   65 #include <sys/unistd.h>
   66 #include <sys/vnode.h>
   67 #include <sys/priv.h>
   68 #include <sys/proc.h>
   69 #include <sys/dirent.h>
   70 #include <sys/jail.h>
   71 #include <sys/syscallsubr.h>
   72 #include <sys/sysctl.h>
   73 #ifdef KTRACE
   74 #include <sys/ktrace.h>
   75 #endif
   76 
   77 #include <machine/stdarg.h>
   78 
   79 #include <security/audit/audit.h>
   80 #include <security/mac/mac_framework.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_object.h>
   84 #include <vm/vm_page.h>
   85 #include <vm/uma.h>
   86 
   87 static MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   88 
   89 SDT_PROVIDER_DEFINE(vfs);
   90 SDT_PROBE_DEFINE(vfs, , stat, mode, mode);
   91 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
   92 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
   93 SDT_PROBE_DEFINE(vfs, , stat, reg, reg);
   94 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
   95 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
   96 
   97 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
   99 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
  100 static int setfmode(struct thread *td, struct vnode *, int);
  101 static int setfflags(struct thread *td, struct vnode *, int);
  102 static int setutimes(struct thread *td, struct vnode *,
  103     const struct timespec *, int, int);
  104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  105     struct thread *td);
  106 
  107 /*
  108  * The module initialization routine for POSIX asynchronous I/O will
  109  * set this to the version of AIO that it implements.  (Zero means
  110  * that it is not implemented.)  This value is used here by pathconf()
  111  * and in kern_descrip.c by fpathconf().
  112  */
  113 int async_io_version;
  114 
  115 #ifdef DEBUG
  116 static int syncprt = 0;
  117 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  118 #endif
  119 
  120 /*
  121  * Sync each mounted filesystem.
  122  */
  123 #ifndef _SYS_SYSPROTO_H_
  124 struct sync_args {
  125         int     dummy;
  126 };
  127 #endif
  128 /* ARGSUSED */
  129 int
  130 sync(td, uap)
  131         struct thread *td;
  132         struct sync_args *uap;
  133 {
  134         struct mount *mp, *nmp;
  135         int save, vfslocked;
  136 
  137         mtx_lock(&mountlist_mtx);
  138         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  139                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  140                         nmp = TAILQ_NEXT(mp, mnt_list);
  141                         continue;
  142                 }
  143                 vfslocked = VFS_LOCK_GIANT(mp);
  144                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  145                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  146                         save = curthread_pflags_set(TDP_SYNCIO);
  147                         vfs_msync(mp, MNT_NOWAIT);
  148                         VFS_SYNC(mp, MNT_NOWAIT);
  149                         curthread_pflags_restore(save);
  150                         vn_finished_write(mp);
  151                 }
  152                 VFS_UNLOCK_GIANT(vfslocked);
  153                 mtx_lock(&mountlist_mtx);
  154                 nmp = TAILQ_NEXT(mp, mnt_list);
  155                 vfs_unbusy(mp);
  156         }
  157         mtx_unlock(&mountlist_mtx);
  158         return (0);
  159 }
  160 
  161 /*
  162  * Change filesystem quotas.
  163  */
  164 #ifndef _SYS_SYSPROTO_H_
  165 struct quotactl_args {
  166         char *path;
  167         int cmd;
  168         int uid;
  169         caddr_t arg;
  170 };
  171 #endif
  172 int
  173 quotactl(td, uap)
  174         struct thread *td;
  175         register struct quotactl_args /* {
  176                 char *path;
  177                 int cmd;
  178                 int uid;
  179                 caddr_t arg;
  180         } */ *uap;
  181 {
  182         struct mount *mp;
  183         int vfslocked;
  184         int error;
  185         struct nameidata nd;
  186 
  187         AUDIT_ARG_CMD(uap->cmd);
  188         AUDIT_ARG_UID(uap->uid);
  189         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  190                 return (EPERM);
  191         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
  192            UIO_USERSPACE, uap->path, td);
  193         if ((error = namei(&nd)) != 0)
  194                 return (error);
  195         vfslocked = NDHASGIANT(&nd);
  196         NDFREE(&nd, NDF_ONLY_PNBUF);
  197         mp = nd.ni_vp->v_mount;
  198         vfs_ref(mp);
  199         vput(nd.ni_vp);
  200         error = vfs_busy(mp, 0);
  201         vfs_rel(mp);
  202         if (error) {
  203                 VFS_UNLOCK_GIANT(vfslocked);
  204                 return (error);
  205         }
  206         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  207         vfs_unbusy(mp);
  208         VFS_UNLOCK_GIANT(vfslocked);
  209         return (error);
  210 }
  211 
  212 /*
  213  * Used by statfs conversion routines to scale the block size up if
  214  * necessary so that all of the block counts are <= 'max_size'.  Note
  215  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  216  * value of 'n'.
  217  */
  218 void
  219 statfs_scale_blocks(struct statfs *sf, long max_size)
  220 {
  221         uint64_t count;
  222         int shift;
  223 
  224         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  225 
  226         /*
  227          * Attempt to scale the block counts to give a more accurate
  228          * overview to userland of the ratio of free space to used
  229          * space.  To do this, find the largest block count and compute
  230          * a divisor that lets it fit into a signed integer <= max_size.
  231          */
  232         if (sf->f_bavail < 0)
  233                 count = -sf->f_bavail;
  234         else
  235                 count = sf->f_bavail;
  236         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  237         if (count <= max_size)
  238                 return;
  239 
  240         count >>= flsl(max_size);
  241         shift = 0;
  242         while (count > 0) {
  243                 shift++;
  244                 count >>=1;
  245         }
  246 
  247         sf->f_bsize <<= shift;
  248         sf->f_blocks >>= shift;
  249         sf->f_bfree >>= shift;
  250         sf->f_bavail >>= shift;
  251 }
  252 
  253 /*
  254  * Get filesystem statistics.
  255  */
  256 #ifndef _SYS_SYSPROTO_H_
  257 struct statfs_args {
  258         char *path;
  259         struct statfs *buf;
  260 };
  261 #endif
  262 int
  263 statfs(td, uap)
  264         struct thread *td;
  265         register struct statfs_args /* {
  266                 char *path;
  267                 struct statfs *buf;
  268         } */ *uap;
  269 {
  270         struct statfs sf;
  271         int error;
  272 
  273         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  274         if (error == 0)
  275                 error = copyout(&sf, uap->buf, sizeof(sf));
  276         return (error);
  277 }
  278 
  279 int
  280 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  281     struct statfs *buf)
  282 {
  283         struct mount *mp;
  284         struct statfs *sp, sb;
  285         int vfslocked;
  286         int error;
  287         struct nameidata nd;
  288 
  289         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  290             AUDITVNODE1, pathseg, path, td);
  291         error = namei(&nd);
  292         if (error)
  293                 return (error);
  294         vfslocked = NDHASGIANT(&nd);
  295         mp = nd.ni_vp->v_mount;
  296         vfs_ref(mp);
  297         NDFREE(&nd, NDF_ONLY_PNBUF);
  298         vput(nd.ni_vp);
  299         error = vfs_busy(mp, 0);
  300         vfs_rel(mp);
  301         if (error) {
  302                 VFS_UNLOCK_GIANT(vfslocked);
  303                 return (error);
  304         }
  305 #ifdef MAC
  306         error = mac_mount_check_stat(td->td_ucred, mp);
  307         if (error)
  308                 goto out;
  309 #endif
  310         /*
  311          * Set these in case the underlying filesystem fails to do so.
  312          */
  313         sp = &mp->mnt_stat;
  314         sp->f_version = STATFS_VERSION;
  315         sp->f_namemax = NAME_MAX;
  316         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  317         error = VFS_STATFS(mp, sp);
  318         if (error)
  319                 goto out;
  320         if (priv_check(td, PRIV_VFS_GENERATION)) {
  321                 bcopy(sp, &sb, sizeof(sb));
  322                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  323                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  324                 sp = &sb;
  325         }
  326         *buf = *sp;
  327 out:
  328         vfs_unbusy(mp);
  329         VFS_UNLOCK_GIANT(vfslocked);
  330         return (error);
  331 }
  332 
  333 /*
  334  * Get filesystem statistics.
  335  */
  336 #ifndef _SYS_SYSPROTO_H_
  337 struct fstatfs_args {
  338         int fd;
  339         struct statfs *buf;
  340 };
  341 #endif
  342 int
  343 fstatfs(td, uap)
  344         struct thread *td;
  345         register struct fstatfs_args /* {
  346                 int fd;
  347                 struct statfs *buf;
  348         } */ *uap;
  349 {
  350         struct statfs sf;
  351         int error;
  352 
  353         error = kern_fstatfs(td, uap->fd, &sf);
  354         if (error == 0)
  355                 error = copyout(&sf, uap->buf, sizeof(sf));
  356         return (error);
  357 }
  358 
  359 int
  360 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  361 {
  362         struct file *fp;
  363         struct mount *mp;
  364         struct statfs *sp, sb;
  365         int vfslocked;
  366         struct vnode *vp;
  367         int error;
  368 
  369         AUDIT_ARG_FD(fd);
  370         error = getvnode(td->td_proc->p_fd, fd, &fp);
  371         if (error)
  372                 return (error);
  373         vp = fp->f_vnode;
  374         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  375         vn_lock(vp, LK_SHARED | LK_RETRY);
  376 #ifdef AUDIT
  377         AUDIT_ARG_VNODE1(vp);
  378 #endif
  379         mp = vp->v_mount;
  380         if (mp)
  381                 vfs_ref(mp);
  382         VOP_UNLOCK(vp, 0);
  383         fdrop(fp, td);
  384         if (mp == NULL) {
  385                 error = EBADF;
  386                 goto out;
  387         }
  388         error = vfs_busy(mp, 0);
  389         vfs_rel(mp);
  390         if (error) {
  391                 VFS_UNLOCK_GIANT(vfslocked);
  392                 return (error);
  393         }
  394 #ifdef MAC
  395         error = mac_mount_check_stat(td->td_ucred, mp);
  396         if (error)
  397                 goto out;
  398 #endif
  399         /*
  400          * Set these in case the underlying filesystem fails to do so.
  401          */
  402         sp = &mp->mnt_stat;
  403         sp->f_version = STATFS_VERSION;
  404         sp->f_namemax = NAME_MAX;
  405         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  406         error = VFS_STATFS(mp, sp);
  407         if (error)
  408                 goto out;
  409         if (priv_check(td, PRIV_VFS_GENERATION)) {
  410                 bcopy(sp, &sb, sizeof(sb));
  411                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  412                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  413                 sp = &sb;
  414         }
  415         *buf = *sp;
  416 out:
  417         if (mp)
  418                 vfs_unbusy(mp);
  419         VFS_UNLOCK_GIANT(vfslocked);
  420         return (error);
  421 }
  422 
  423 /*
  424  * Get statistics on all filesystems.
  425  */
  426 #ifndef _SYS_SYSPROTO_H_
  427 struct getfsstat_args {
  428         struct statfs *buf;
  429         long bufsize;
  430         int flags;
  431 };
  432 #endif
  433 int
  434 getfsstat(td, uap)
  435         struct thread *td;
  436         register struct getfsstat_args /* {
  437                 struct statfs *buf;
  438                 long bufsize;
  439                 int flags;
  440         } */ *uap;
  441 {
  442 
  443         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  444             uap->flags));
  445 }
  446 
  447 /*
  448  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  449  *      The caller is responsible for freeing memory which will be allocated
  450  *      in '*buf'.
  451  */
  452 int
  453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  454     enum uio_seg bufseg, int flags)
  455 {
  456         struct mount *mp, *nmp;
  457         struct statfs *sfsp, *sp, sb;
  458         size_t count, maxcount;
  459         int vfslocked;
  460         int error;
  461 
  462         maxcount = bufsize / sizeof(struct statfs);
  463         if (bufsize == 0)
  464                 sfsp = NULL;
  465         else if (bufseg == UIO_USERSPACE)
  466                 sfsp = *buf;
  467         else /* if (bufseg == UIO_SYSSPACE) */ {
  468                 count = 0;
  469                 mtx_lock(&mountlist_mtx);
  470                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  471                         count++;
  472                 }
  473                 mtx_unlock(&mountlist_mtx);
  474                 if (maxcount > count)
  475                         maxcount = count;
  476                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  477                     M_WAITOK);
  478         }
  479         count = 0;
  480         mtx_lock(&mountlist_mtx);
  481         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  482                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  483                         nmp = TAILQ_NEXT(mp, mnt_list);
  484                         continue;
  485                 }
  486 #ifdef MAC
  487                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  488                         nmp = TAILQ_NEXT(mp, mnt_list);
  489                         continue;
  490                 }
  491 #endif
  492                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  493                         nmp = TAILQ_NEXT(mp, mnt_list);
  494                         continue;
  495                 }
  496                 vfslocked = VFS_LOCK_GIANT(mp);
  497                 if (sfsp && count < maxcount) {
  498                         sp = &mp->mnt_stat;
  499                         /*
  500                          * Set these in case the underlying filesystem
  501                          * fails to do so.
  502                          */
  503                         sp->f_version = STATFS_VERSION;
  504                         sp->f_namemax = NAME_MAX;
  505                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  506                         /*
  507                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  508                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  509                          * overrides MNT_WAIT.
  510                          */
  511                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  512                             (flags & MNT_WAIT)) &&
  513                             (error = VFS_STATFS(mp, sp))) {
  514                                 VFS_UNLOCK_GIANT(vfslocked);
  515                                 mtx_lock(&mountlist_mtx);
  516                                 nmp = TAILQ_NEXT(mp, mnt_list);
  517                                 vfs_unbusy(mp);
  518                                 continue;
  519                         }
  520                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  521                                 bcopy(sp, &sb, sizeof(sb));
  522                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  523                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  524                                 sp = &sb;
  525                         }
  526                         if (bufseg == UIO_SYSSPACE)
  527                                 bcopy(sp, sfsp, sizeof(*sp));
  528                         else /* if (bufseg == UIO_USERSPACE) */ {
  529                                 error = copyout(sp, sfsp, sizeof(*sp));
  530                                 if (error) {
  531                                         vfs_unbusy(mp);
  532                                         VFS_UNLOCK_GIANT(vfslocked);
  533                                         return (error);
  534                                 }
  535                         }
  536                         sfsp++;
  537                 }
  538                 VFS_UNLOCK_GIANT(vfslocked);
  539                 count++;
  540                 mtx_lock(&mountlist_mtx);
  541                 nmp = TAILQ_NEXT(mp, mnt_list);
  542                 vfs_unbusy(mp);
  543         }
  544         mtx_unlock(&mountlist_mtx);
  545         if (sfsp && count > maxcount)
  546                 td->td_retval[0] = maxcount;
  547         else
  548                 td->td_retval[0] = count;
  549         return (0);
  550 }
  551 
  552 #ifdef COMPAT_FREEBSD4
  553 /*
  554  * Get old format filesystem statistics.
  555  */
  556 static void cvtstatfs(struct statfs *, struct ostatfs *);
  557 
  558 #ifndef _SYS_SYSPROTO_H_
  559 struct freebsd4_statfs_args {
  560         char *path;
  561         struct ostatfs *buf;
  562 };
  563 #endif
  564 int
  565 freebsd4_statfs(td, uap)
  566         struct thread *td;
  567         struct freebsd4_statfs_args /* {
  568                 char *path;
  569                 struct ostatfs *buf;
  570         } */ *uap;
  571 {
  572         struct ostatfs osb;
  573         struct statfs sf;
  574         int error;
  575 
  576         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  577         if (error)
  578                 return (error);
  579         cvtstatfs(&sf, &osb);
  580         return (copyout(&osb, uap->buf, sizeof(osb)));
  581 }
  582 
  583 /*
  584  * Get filesystem statistics.
  585  */
  586 #ifndef _SYS_SYSPROTO_H_
  587 struct freebsd4_fstatfs_args {
  588         int fd;
  589         struct ostatfs *buf;
  590 };
  591 #endif
  592 int
  593 freebsd4_fstatfs(td, uap)
  594         struct thread *td;
  595         struct freebsd4_fstatfs_args /* {
  596                 int fd;
  597                 struct ostatfs *buf;
  598         } */ *uap;
  599 {
  600         struct ostatfs osb;
  601         struct statfs sf;
  602         int error;
  603 
  604         error = kern_fstatfs(td, uap->fd, &sf);
  605         if (error)
  606                 return (error);
  607         cvtstatfs(&sf, &osb);
  608         return (copyout(&osb, uap->buf, sizeof(osb)));
  609 }
  610 
  611 /*
  612  * Get statistics on all filesystems.
  613  */
  614 #ifndef _SYS_SYSPROTO_H_
  615 struct freebsd4_getfsstat_args {
  616         struct ostatfs *buf;
  617         long bufsize;
  618         int flags;
  619 };
  620 #endif
  621 int
  622 freebsd4_getfsstat(td, uap)
  623         struct thread *td;
  624         register struct freebsd4_getfsstat_args /* {
  625                 struct ostatfs *buf;
  626                 long bufsize;
  627                 int flags;
  628         } */ *uap;
  629 {
  630         struct statfs *buf, *sp;
  631         struct ostatfs osb;
  632         size_t count, size;
  633         int error;
  634 
  635         count = uap->bufsize / sizeof(struct ostatfs);
  636         size = count * sizeof(struct statfs);
  637         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  638         if (size > 0) {
  639                 count = td->td_retval[0];
  640                 sp = buf;
  641                 while (count > 0 && error == 0) {
  642                         cvtstatfs(sp, &osb);
  643                         error = copyout(&osb, uap->buf, sizeof(osb));
  644                         sp++;
  645                         uap->buf++;
  646                         count--;
  647                 }
  648                 free(buf, M_TEMP);
  649         }
  650         return (error);
  651 }
  652 
  653 /*
  654  * Implement fstatfs() for (NFS) file handles.
  655  */
  656 #ifndef _SYS_SYSPROTO_H_
  657 struct freebsd4_fhstatfs_args {
  658         struct fhandle *u_fhp;
  659         struct ostatfs *buf;
  660 };
  661 #endif
  662 int
  663 freebsd4_fhstatfs(td, uap)
  664         struct thread *td;
  665         struct freebsd4_fhstatfs_args /* {
  666                 struct fhandle *u_fhp;
  667                 struct ostatfs *buf;
  668         } */ *uap;
  669 {
  670         struct ostatfs osb;
  671         struct statfs sf;
  672         fhandle_t fh;
  673         int error;
  674 
  675         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  676         if (error)
  677                 return (error);
  678         error = kern_fhstatfs(td, fh, &sf);
  679         if (error)
  680                 return (error);
  681         cvtstatfs(&sf, &osb);
  682         return (copyout(&osb, uap->buf, sizeof(osb)));
  683 }
  684 
  685 /*
  686  * Convert a new format statfs structure to an old format statfs structure.
  687  */
  688 static void
  689 cvtstatfs(nsp, osp)
  690         struct statfs *nsp;
  691         struct ostatfs *osp;
  692 {
  693 
  694         statfs_scale_blocks(nsp, LONG_MAX);
  695         bzero(osp, sizeof(*osp));
  696         osp->f_bsize = nsp->f_bsize;
  697         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  698         osp->f_blocks = nsp->f_blocks;
  699         osp->f_bfree = nsp->f_bfree;
  700         osp->f_bavail = nsp->f_bavail;
  701         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  702         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  703         osp->f_owner = nsp->f_owner;
  704         osp->f_type = nsp->f_type;
  705         osp->f_flags = nsp->f_flags;
  706         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  707         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  708         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  709         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  710         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  711             MIN(MFSNAMELEN, OMFSNAMELEN));
  712         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  713             MIN(MNAMELEN, OMNAMELEN));
  714         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  715             MIN(MNAMELEN, OMNAMELEN));
  716         osp->f_fsid = nsp->f_fsid;
  717 }
  718 #endif /* COMPAT_FREEBSD4 */
  719 
  720 /*
  721  * Change current working directory to a given file descriptor.
  722  */
  723 #ifndef _SYS_SYSPROTO_H_
  724 struct fchdir_args {
  725         int     fd;
  726 };
  727 #endif
  728 int
  729 fchdir(td, uap)
  730         struct thread *td;
  731         struct fchdir_args /* {
  732                 int fd;
  733         } */ *uap;
  734 {
  735         register struct filedesc *fdp = td->td_proc->p_fd;
  736         struct vnode *vp, *tdp, *vpold;
  737         struct mount *mp;
  738         struct file *fp;
  739         int vfslocked;
  740         int error;
  741 
  742         AUDIT_ARG_FD(uap->fd);
  743         if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
  744                 return (error);
  745         vp = fp->f_vnode;
  746         VREF(vp);
  747         fdrop(fp, td);
  748         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  749         vn_lock(vp, LK_SHARED | LK_RETRY);
  750         AUDIT_ARG_VNODE1(vp);
  751         error = change_dir(vp, td);
  752         while (!error && (mp = vp->v_mountedhere) != NULL) {
  753                 int tvfslocked;
  754                 if (vfs_busy(mp, 0))
  755                         continue;
  756                 tvfslocked = VFS_LOCK_GIANT(mp);
  757                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  758                 vfs_unbusy(mp);
  759                 if (error) {
  760                         VFS_UNLOCK_GIANT(tvfslocked);
  761                         break;
  762                 }
  763                 vput(vp);
  764                 VFS_UNLOCK_GIANT(vfslocked);
  765                 vp = tdp;
  766                 vfslocked = tvfslocked;
  767         }
  768         if (error) {
  769                 vput(vp);
  770                 VFS_UNLOCK_GIANT(vfslocked);
  771                 return (error);
  772         }
  773         VOP_UNLOCK(vp, 0);
  774         VFS_UNLOCK_GIANT(vfslocked);
  775         FILEDESC_XLOCK(fdp);
  776         vpold = fdp->fd_cdir;
  777         fdp->fd_cdir = vp;
  778         FILEDESC_XUNLOCK(fdp);
  779         vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  780         vrele(vpold);
  781         VFS_UNLOCK_GIANT(vfslocked);
  782         return (0);
  783 }
  784 
  785 /*
  786  * Change current working directory (``.'').
  787  */
  788 #ifndef _SYS_SYSPROTO_H_
  789 struct chdir_args {
  790         char    *path;
  791 };
  792 #endif
  793 int
  794 chdir(td, uap)
  795         struct thread *td;
  796         struct chdir_args /* {
  797                 char *path;
  798         } */ *uap;
  799 {
  800 
  801         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  802 }
  803 
  804 int
  805 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  806 {
  807         register struct filedesc *fdp = td->td_proc->p_fd;
  808         int error;
  809         struct nameidata nd;
  810         struct vnode *vp;
  811         int vfslocked;
  812 
  813         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
  814             MPSAFE, pathseg, path, td);
  815         if ((error = namei(&nd)) != 0)
  816                 return (error);
  817         vfslocked = NDHASGIANT(&nd);
  818         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  819                 vput(nd.ni_vp);
  820                 VFS_UNLOCK_GIANT(vfslocked);
  821                 NDFREE(&nd, NDF_ONLY_PNBUF);
  822                 return (error);
  823         }
  824         VOP_UNLOCK(nd.ni_vp, 0);
  825         VFS_UNLOCK_GIANT(vfslocked);
  826         NDFREE(&nd, NDF_ONLY_PNBUF);
  827         FILEDESC_XLOCK(fdp);
  828         vp = fdp->fd_cdir;
  829         fdp->fd_cdir = nd.ni_vp;
  830         FILEDESC_XUNLOCK(fdp);
  831         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  832         vrele(vp);
  833         VFS_UNLOCK_GIANT(vfslocked);
  834         return (0);
  835 }
  836 
  837 /*
  838  * Helper function for raised chroot(2) security function:  Refuse if
  839  * any filedescriptors are open directories.
  840  */
  841 static int
  842 chroot_refuse_vdir_fds(fdp)
  843         struct filedesc *fdp;
  844 {
  845         struct vnode *vp;
  846         struct file *fp;
  847         int fd;
  848 
  849         FILEDESC_LOCK_ASSERT(fdp);
  850 
  851         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  852                 fp = fget_locked(fdp, fd);
  853                 if (fp == NULL)
  854                         continue;
  855                 if (fp->f_type == DTYPE_VNODE) {
  856                         vp = fp->f_vnode;
  857                         if (vp->v_type == VDIR)
  858                                 return (EPERM);
  859                 }
  860         }
  861         return (0);
  862 }
  863 
  864 /*
  865  * This sysctl determines if we will allow a process to chroot(2) if it
  866  * has a directory open:
  867  *      0: disallowed for all processes.
  868  *      1: allowed for processes that were not already chroot(2)'ed.
  869  *      2: allowed for all processes.
  870  */
  871 
  872 static int chroot_allow_open_directories = 1;
  873 
  874 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  875      &chroot_allow_open_directories, 0, "");
  876 
  877 /*
  878  * Change notion of root (``/'') directory.
  879  */
  880 #ifndef _SYS_SYSPROTO_H_
  881 struct chroot_args {
  882         char    *path;
  883 };
  884 #endif
  885 int
  886 chroot(td, uap)
  887         struct thread *td;
  888         struct chroot_args /* {
  889                 char *path;
  890         } */ *uap;
  891 {
  892         int error;
  893         struct nameidata nd;
  894         int vfslocked;
  895 
  896         error = priv_check(td, PRIV_VFS_CHROOT);
  897         if (error)
  898                 return (error);
  899         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  900             AUDITVNODE1, UIO_USERSPACE, uap->path, td);
  901         error = namei(&nd);
  902         if (error)
  903                 goto error;
  904         vfslocked = NDHASGIANT(&nd);
  905         if ((error = change_dir(nd.ni_vp, td)) != 0)
  906                 goto e_vunlock;
  907 #ifdef MAC
  908         if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
  909                 goto e_vunlock;
  910 #endif
  911         VOP_UNLOCK(nd.ni_vp, 0);
  912         error = change_root(nd.ni_vp, td);
  913         vrele(nd.ni_vp);
  914         VFS_UNLOCK_GIANT(vfslocked);
  915         NDFREE(&nd, NDF_ONLY_PNBUF);
  916         return (error);
  917 e_vunlock:
  918         vput(nd.ni_vp);
  919         VFS_UNLOCK_GIANT(vfslocked);
  920 error:
  921         NDFREE(&nd, NDF_ONLY_PNBUF);
  922         return (error);
  923 }
  924 
  925 /*
  926  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  927  * instance.
  928  */
  929 int
  930 change_dir(vp, td)
  931         struct vnode *vp;
  932         struct thread *td;
  933 {
  934         int error;
  935 
  936         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  937         if (vp->v_type != VDIR)
  938                 return (ENOTDIR);
  939 #ifdef MAC
  940         error = mac_vnode_check_chdir(td->td_ucred, vp);
  941         if (error)
  942                 return (error);
  943 #endif
  944         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  945         return (error);
  946 }
  947 
  948 /*
  949  * Common routine for kern_chroot() and jail_attach().  The caller is
  950  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  951  * authorize this operation.
  952  */
  953 int
  954 change_root(vp, td)
  955         struct vnode *vp;
  956         struct thread *td;
  957 {
  958         struct filedesc *fdp;
  959         struct vnode *oldvp;
  960         int vfslocked;
  961         int error;
  962 
  963         VFS_ASSERT_GIANT(vp->v_mount);
  964         fdp = td->td_proc->p_fd;
  965         FILEDESC_XLOCK(fdp);
  966         if (chroot_allow_open_directories == 0 ||
  967             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  968                 error = chroot_refuse_vdir_fds(fdp);
  969                 if (error) {
  970                         FILEDESC_XUNLOCK(fdp);
  971                         return (error);
  972                 }
  973         }
  974         oldvp = fdp->fd_rdir;
  975         fdp->fd_rdir = vp;
  976         VREF(fdp->fd_rdir);
  977         if (!fdp->fd_jdir) {
  978                 fdp->fd_jdir = vp;
  979                 VREF(fdp->fd_jdir);
  980         }
  981         FILEDESC_XUNLOCK(fdp);
  982         vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  983         vrele(oldvp);
  984         VFS_UNLOCK_GIANT(vfslocked);
  985         return (0);
  986 }
  987 
  988 /*
  989  * Check permissions, allocate an open file structure, and call the device
  990  * open routine if any.
  991  */
  992 #ifndef _SYS_SYSPROTO_H_
  993 struct open_args {
  994         char    *path;
  995         int     flags;
  996         int     mode;
  997 };
  998 #endif
  999 int
 1000 open(td, uap)
 1001         struct thread *td;
 1002         register struct open_args /* {
 1003                 char *path;
 1004                 int flags;
 1005                 int mode;
 1006         } */ *uap;
 1007 {
 1008 
 1009         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1010 }
 1011 
 1012 #ifndef _SYS_SYSPROTO_H_
 1013 struct openat_args {
 1014         int     fd;
 1015         char    *path;
 1016         int     flag;
 1017         int     mode;
 1018 };
 1019 #endif
 1020 int
 1021 openat(struct thread *td, struct openat_args *uap)
 1022 {
 1023 
 1024         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1025             uap->mode));
 1026 }
 1027 
 1028 int
 1029 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1030     int mode)
 1031 {
 1032 
 1033         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1034 }
 1035 
 1036 int
 1037 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1038     int flags, int mode)
 1039 {
 1040         struct proc *p = td->td_proc;
 1041         struct filedesc *fdp = p->p_fd;
 1042         struct file *fp;
 1043         struct vnode *vp;
 1044         int cmode;
 1045         struct file *nfp;
 1046         int type, indx, error;
 1047         struct flock lf;
 1048         struct nameidata nd;
 1049         int vfslocked;
 1050 
 1051         AUDIT_ARG_FFLAGS(flags);
 1052         AUDIT_ARG_MODE(mode);
 1053         /* XXX: audit dirfd */
 1054         /*
 1055          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR may
 1056          * be specified.
 1057          */
 1058         if (flags & O_EXEC) {
 1059                 if (flags & O_ACCMODE)
 1060                         return (EINVAL);
 1061         } else if ((flags & O_ACCMODE) == O_ACCMODE)
 1062                 return (EINVAL);
 1063         else
 1064                 flags = FFLAGS(flags);
 1065 
 1066         error = fallocf(td, &nfp, &indx, flags);
 1067         if (error)
 1068                 return (error);
 1069         /* An extra reference on `nfp' has been held for us by falloc(). */
 1070         fp = nfp;
 1071         /* Set the flags early so the finit in devfs can pick them up. */
 1072         fp->f_flag = flags & FMASK;
 1073         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 1074         NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
 1075             td);
 1076         td->td_dupfd = -1;              /* XXX check for fdopen */
 1077         error = vn_open(&nd, &flags, cmode, fp);
 1078         if (error) {
 1079                 /*
 1080                  * If the vn_open replaced the method vector, something
 1081                  * wonderous happened deep below and we just pass it up
 1082                  * pretending we know what we do.
 1083                  */
 1084                 if (error == ENXIO && fp->f_ops != &badfileops) {
 1085                         fdrop(fp, td);
 1086                         td->td_retval[0] = indx;
 1087                         return (0);
 1088                 }
 1089 
 1090                 /*
 1091                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1092                  * responsible for dropping the old contents of ofiles[indx]
 1093                  * if it succeeds.
 1094                  */
 1095                 if ((error == ENODEV || error == ENXIO) &&
 1096                     td->td_dupfd >= 0 &&                /* XXX from fdopen */
 1097                     (error =
 1098                         dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
 1099                         td->td_retval[0] = indx;
 1100                         fdrop(fp, td);
 1101                         return (0);
 1102                 }
 1103                 /*
 1104                  * Clean up the descriptor, but only if another thread hadn't
 1105                  * replaced or closed it.
 1106                  */
 1107                 fdclose(fdp, fp, indx, td);
 1108                 fdrop(fp, td);
 1109 
 1110                 if (error == ERESTART)
 1111                         error = EINTR;
 1112                 return (error);
 1113         }
 1114         td->td_dupfd = 0;
 1115         vfslocked = NDHASGIANT(&nd);
 1116         NDFREE(&nd, NDF_ONLY_PNBUF);
 1117         vp = nd.ni_vp;
 1118 
 1119         /*
 1120          * Store the vnode, for any f_type. Typically, the vnode use
 1121          * count is decremented by direct call to vn_closefile() for
 1122          * files that switched type in the cdevsw fdopen() method.
 1123          */
 1124         fp->f_vnode = vp;
 1125         /*
 1126          * If the file wasn't claimed by devfs bind it to the normal
 1127          * vnode operations here.
 1128          */
 1129         if (fp->f_ops == &badfileops) {
 1130                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1131                 fp->f_seqcount = 1;
 1132                 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
 1133         }
 1134 
 1135         VOP_UNLOCK(vp, 0);
 1136         if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
 1137                 lf.l_whence = SEEK_SET;
 1138                 lf.l_start = 0;
 1139                 lf.l_len = 0;
 1140                 if (flags & O_EXLOCK)
 1141                         lf.l_type = F_WRLCK;
 1142                 else
 1143                         lf.l_type = F_RDLCK;
 1144                 type = F_FLOCK;
 1145                 if ((flags & FNONBLOCK) == 0)
 1146                         type |= F_WAIT;
 1147                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 1148                             type)) != 0)
 1149                         goto bad;
 1150                 atomic_set_int(&fp->f_flag, FHASLOCK);
 1151         }
 1152         if (flags & O_TRUNC) {
 1153                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1154                 if (error)
 1155                         goto bad;
 1156         }
 1157         VFS_UNLOCK_GIANT(vfslocked);
 1158         /*
 1159          * Release our private reference, leaving the one associated with
 1160          * the descriptor table intact.
 1161          */
 1162         fdrop(fp, td);
 1163         td->td_retval[0] = indx;
 1164         return (0);
 1165 bad:
 1166         VFS_UNLOCK_GIANT(vfslocked);
 1167         fdclose(fdp, fp, indx, td);
 1168         fdrop(fp, td);
 1169         return (error);
 1170 }
 1171 
 1172 #ifdef COMPAT_43
 1173 /*
 1174  * Create a file.
 1175  */
 1176 #ifndef _SYS_SYSPROTO_H_
 1177 struct ocreat_args {
 1178         char    *path;
 1179         int     mode;
 1180 };
 1181 #endif
 1182 int
 1183 ocreat(td, uap)
 1184         struct thread *td;
 1185         register struct ocreat_args /* {
 1186                 char *path;
 1187                 int mode;
 1188         } */ *uap;
 1189 {
 1190 
 1191         return (kern_open(td, uap->path, UIO_USERSPACE,
 1192             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1193 }
 1194 #endif /* COMPAT_43 */
 1195 
 1196 /*
 1197  * Create a special file.
 1198  */
 1199 #ifndef _SYS_SYSPROTO_H_
 1200 struct mknod_args {
 1201         char    *path;
 1202         int     mode;
 1203         int     dev;
 1204 };
 1205 #endif
 1206 int
 1207 mknod(td, uap)
 1208         struct thread *td;
 1209         register struct mknod_args /* {
 1210                 char *path;
 1211                 int mode;
 1212                 int dev;
 1213         } */ *uap;
 1214 {
 1215 
 1216         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1217 }
 1218 
 1219 #ifndef _SYS_SYSPROTO_H_
 1220 struct mknodat_args {
 1221         int     fd;
 1222         char    *path;
 1223         mode_t  mode;
 1224         dev_t   dev;
 1225 };
 1226 #endif
 1227 int
 1228 mknodat(struct thread *td, struct mknodat_args *uap)
 1229 {
 1230 
 1231         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1232             uap->dev));
 1233 }
 1234 
 1235 int
 1236 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1237     int dev)
 1238 {
 1239 
 1240         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1241 }
 1242 
 1243 int
 1244 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1245     int mode, int dev)
 1246 {
 1247         struct vnode *vp;
 1248         struct mount *mp;
 1249         struct vattr vattr;
 1250         int error;
 1251         int whiteout = 0;
 1252         struct nameidata nd;
 1253         int vfslocked;
 1254 
 1255         AUDIT_ARG_MODE(mode);
 1256         AUDIT_ARG_DEV(dev);
 1257         switch (mode & S_IFMT) {
 1258         case S_IFCHR:
 1259         case S_IFBLK:
 1260                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1261                 break;
 1262         case S_IFMT:
 1263                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1264                 break;
 1265         case S_IFWHT:
 1266                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1267                 break;
 1268         case S_IFIFO:
 1269                 if (dev == 0)
 1270                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1271                 /* FALLTHROUGH */
 1272         default:
 1273                 error = EINVAL;
 1274                 break;
 1275         }
 1276         if (error)
 1277                 return (error);
 1278 restart:
 1279         bwillwrite();
 1280         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1281             pathseg, path, fd, td);
 1282         if ((error = namei(&nd)) != 0)
 1283                 return (error);
 1284         vfslocked = NDHASGIANT(&nd);
 1285         vp = nd.ni_vp;
 1286         if (vp != NULL) {
 1287                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1288                 if (vp == nd.ni_dvp)
 1289                         vrele(nd.ni_dvp);
 1290                 else
 1291                         vput(nd.ni_dvp);
 1292                 vrele(vp);
 1293                 VFS_UNLOCK_GIANT(vfslocked);
 1294                 return (EEXIST);
 1295         } else {
 1296                 VATTR_NULL(&vattr);
 1297                 vattr.va_mode = (mode & ALLPERMS) &
 1298                     ~td->td_proc->p_fd->fd_cmask;
 1299                 vattr.va_rdev = dev;
 1300                 whiteout = 0;
 1301 
 1302                 switch (mode & S_IFMT) {
 1303                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1304                         vattr.va_type = VBAD;
 1305                         break;
 1306                 case S_IFCHR:
 1307                         vattr.va_type = VCHR;
 1308                         break;
 1309                 case S_IFBLK:
 1310                         vattr.va_type = VBLK;
 1311                         break;
 1312                 case S_IFWHT:
 1313                         whiteout = 1;
 1314                         break;
 1315                 default:
 1316                         panic("kern_mknod: invalid mode");
 1317                 }
 1318         }
 1319         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1320                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1321                 vput(nd.ni_dvp);
 1322                 VFS_UNLOCK_GIANT(vfslocked);
 1323                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1324                         return (error);
 1325                 goto restart;
 1326         }
 1327 #ifdef MAC
 1328         if (error == 0 && !whiteout)
 1329                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1330                     &nd.ni_cnd, &vattr);
 1331 #endif
 1332         if (!error) {
 1333                 if (whiteout)
 1334                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1335                 else {
 1336                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1337                                                 &nd.ni_cnd, &vattr);
 1338                         if (error == 0)
 1339                                 vput(nd.ni_vp);
 1340                 }
 1341         }
 1342         NDFREE(&nd, NDF_ONLY_PNBUF);
 1343         vput(nd.ni_dvp);
 1344         vn_finished_write(mp);
 1345         VFS_UNLOCK_GIANT(vfslocked);
 1346         return (error);
 1347 }
 1348 
 1349 /*
 1350  * Create a named pipe.
 1351  */
 1352 #ifndef _SYS_SYSPROTO_H_
 1353 struct mkfifo_args {
 1354         char    *path;
 1355         int     mode;
 1356 };
 1357 #endif
 1358 int
 1359 mkfifo(td, uap)
 1360         struct thread *td;
 1361         register struct mkfifo_args /* {
 1362                 char *path;
 1363                 int mode;
 1364         } */ *uap;
 1365 {
 1366 
 1367         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1368 }
 1369 
 1370 #ifndef _SYS_SYSPROTO_H_
 1371 struct mkfifoat_args {
 1372         int     fd;
 1373         char    *path;
 1374         mode_t  mode;
 1375 };
 1376 #endif
 1377 int
 1378 mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1379 {
 1380 
 1381         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1382             uap->mode));
 1383 }
 1384 
 1385 int
 1386 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1387 {
 1388 
 1389         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1390 }
 1391 
 1392 int
 1393 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1394     int mode)
 1395 {
 1396         struct mount *mp;
 1397         struct vattr vattr;
 1398         int error;
 1399         struct nameidata nd;
 1400         int vfslocked;
 1401 
 1402         AUDIT_ARG_MODE(mode);
 1403 restart:
 1404         bwillwrite();
 1405         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1406             pathseg, path, fd, td);
 1407         if ((error = namei(&nd)) != 0)
 1408                 return (error);
 1409         vfslocked = NDHASGIANT(&nd);
 1410         if (nd.ni_vp != NULL) {
 1411                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1412                 if (nd.ni_vp == nd.ni_dvp)
 1413                         vrele(nd.ni_dvp);
 1414                 else
 1415                         vput(nd.ni_dvp);
 1416                 vrele(nd.ni_vp);
 1417                 VFS_UNLOCK_GIANT(vfslocked);
 1418                 return (EEXIST);
 1419         }
 1420         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1421                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1422                 vput(nd.ni_dvp);
 1423                 VFS_UNLOCK_GIANT(vfslocked);
 1424                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1425                         return (error);
 1426                 goto restart;
 1427         }
 1428         VATTR_NULL(&vattr);
 1429         vattr.va_type = VFIFO;
 1430         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1431 #ifdef MAC
 1432         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1433             &vattr);
 1434         if (error)
 1435                 goto out;
 1436 #endif
 1437         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1438         if (error == 0)
 1439                 vput(nd.ni_vp);
 1440 #ifdef MAC
 1441 out:
 1442 #endif
 1443         vput(nd.ni_dvp);
 1444         vn_finished_write(mp);
 1445         VFS_UNLOCK_GIANT(vfslocked);
 1446         NDFREE(&nd, NDF_ONLY_PNBUF);
 1447         return (error);
 1448 }
 1449 
 1450 /*
 1451  * Make a hard file link.
 1452  */
 1453 #ifndef _SYS_SYSPROTO_H_
 1454 struct link_args {
 1455         char    *path;
 1456         char    *link;
 1457 };
 1458 #endif
 1459 int
 1460 link(td, uap)
 1461         struct thread *td;
 1462         register struct link_args /* {
 1463                 char *path;
 1464                 char *link;
 1465         } */ *uap;
 1466 {
 1467 
 1468         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1469 }
 1470 
 1471 #ifndef _SYS_SYSPROTO_H_
 1472 struct linkat_args {
 1473         int     fd1;
 1474         char    *path1;
 1475         int     fd2;
 1476         char    *path2;
 1477         int     flag;
 1478 };
 1479 #endif
 1480 int
 1481 linkat(struct thread *td, struct linkat_args *uap)
 1482 {
 1483         int flag;
 1484 
 1485         flag = uap->flag;
 1486         if (flag & ~AT_SYMLINK_FOLLOW)
 1487                 return (EINVAL);
 1488 
 1489         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1490             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1491 }
 1492 
 1493 int hardlink_check_uid = 0;
 1494 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1495     &hardlink_check_uid, 0,
 1496     "Unprivileged processes cannot create hard links to files owned by other "
 1497     "users");
 1498 static int hardlink_check_gid = 0;
 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1500     &hardlink_check_gid, 0,
 1501     "Unprivileged processes cannot create hard links to files owned by other "
 1502     "groups");
 1503 
 1504 static int
 1505 can_hardlink(struct vnode *vp, struct ucred *cred)
 1506 {
 1507         struct vattr va;
 1508         int error;
 1509 
 1510         if (!hardlink_check_uid && !hardlink_check_gid)
 1511                 return (0);
 1512 
 1513         error = VOP_GETATTR(vp, &va, cred);
 1514         if (error != 0)
 1515                 return (error);
 1516 
 1517         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1518                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1519                 if (error)
 1520                         return (error);
 1521         }
 1522 
 1523         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1524                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1525                 if (error)
 1526                         return (error);
 1527         }
 1528 
 1529         return (0);
 1530 }
 1531 
 1532 int
 1533 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1534 {
 1535 
 1536         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1537 }
 1538 
 1539 int
 1540 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1541     enum uio_seg segflg, int follow)
 1542 {
 1543         struct vnode *vp;
 1544         struct mount *mp;
 1545         struct nameidata nd;
 1546         int vfslocked;
 1547         int lvfslocked;
 1548         int error;
 1549 
 1550         bwillwrite();
 1551         NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
 1552             fd1, td);
 1553 
 1554         if ((error = namei(&nd)) != 0)
 1555                 return (error);
 1556         vfslocked = NDHASGIANT(&nd);
 1557         NDFREE(&nd, NDF_ONLY_PNBUF);
 1558         vp = nd.ni_vp;
 1559         if (vp->v_type == VDIR) {
 1560                 vrele(vp);
 1561                 VFS_UNLOCK_GIANT(vfslocked);
 1562                 return (EPERM);         /* POSIX */
 1563         }
 1564         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1565                 vrele(vp);
 1566                 VFS_UNLOCK_GIANT(vfslocked);
 1567                 return (error);
 1568         }
 1569         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
 1570             segflg, path2, fd2, td);
 1571         if ((error = namei(&nd)) == 0) {
 1572                 lvfslocked = NDHASGIANT(&nd);
 1573                 if (nd.ni_vp != NULL) {
 1574                         if (nd.ni_dvp == nd.ni_vp)
 1575                                 vrele(nd.ni_dvp);
 1576                         else
 1577                                 vput(nd.ni_dvp);
 1578                         vrele(nd.ni_vp);
 1579                         error = EEXIST;
 1580                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
 1581                     == 0) {
 1582                         error = can_hardlink(vp, td->td_ucred);
 1583                         if (error == 0)
 1584 #ifdef MAC
 1585                                 error = mac_vnode_check_link(td->td_ucred,
 1586                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1587                         if (error == 0)
 1588 #endif
 1589                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1590                         VOP_UNLOCK(vp, 0);
 1591                         vput(nd.ni_dvp);
 1592                 }
 1593                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1594                 VFS_UNLOCK_GIANT(lvfslocked);
 1595         }
 1596         vrele(vp);
 1597         vn_finished_write(mp);
 1598         VFS_UNLOCK_GIANT(vfslocked);
 1599         return (error);
 1600 }
 1601 
 1602 /*
 1603  * Make a symbolic link.
 1604  */
 1605 #ifndef _SYS_SYSPROTO_H_
 1606 struct symlink_args {
 1607         char    *path;
 1608         char    *link;
 1609 };
 1610 #endif
 1611 int
 1612 symlink(td, uap)
 1613         struct thread *td;
 1614         register struct symlink_args /* {
 1615                 char *path;
 1616                 char *link;
 1617         } */ *uap;
 1618 {
 1619 
 1620         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1621 }
 1622 
 1623 #ifndef _SYS_SYSPROTO_H_
 1624 struct symlinkat_args {
 1625         char    *path;
 1626         int     fd;
 1627         char    *path2;
 1628 };
 1629 #endif
 1630 int
 1631 symlinkat(struct thread *td, struct symlinkat_args *uap)
 1632 {
 1633 
 1634         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1635             UIO_USERSPACE));
 1636 }
 1637 
 1638 int
 1639 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1640 {
 1641 
 1642         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1643 }
 1644 
 1645 int
 1646 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1647     enum uio_seg segflg)
 1648 {
 1649         struct mount *mp;
 1650         struct vattr vattr;
 1651         char *syspath;
 1652         int error;
 1653         struct nameidata nd;
 1654         int vfslocked;
 1655 
 1656         if (segflg == UIO_SYSSPACE) {
 1657                 syspath = path1;
 1658         } else {
 1659                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1660                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1661                         goto out;
 1662         }
 1663         AUDIT_ARG_TEXT(syspath);
 1664 restart:
 1665         bwillwrite();
 1666         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1667             segflg, path2, fd, td);
 1668         if ((error = namei(&nd)) != 0)
 1669                 goto out;
 1670         vfslocked = NDHASGIANT(&nd);
 1671         if (nd.ni_vp) {
 1672                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1673                 if (nd.ni_vp == nd.ni_dvp)
 1674                         vrele(nd.ni_dvp);
 1675                 else
 1676                         vput(nd.ni_dvp);
 1677                 vrele(nd.ni_vp);
 1678                 VFS_UNLOCK_GIANT(vfslocked);
 1679                 error = EEXIST;
 1680                 goto out;
 1681         }
 1682         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1683                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1684                 vput(nd.ni_dvp);
 1685                 VFS_UNLOCK_GIANT(vfslocked);
 1686                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1687                         goto out;
 1688                 goto restart;
 1689         }
 1690         VATTR_NULL(&vattr);
 1691         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1692 #ifdef MAC
 1693         vattr.va_type = VLNK;
 1694         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1695             &vattr);
 1696         if (error)
 1697                 goto out2;
 1698 #endif
 1699         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1700         if (error == 0)
 1701                 vput(nd.ni_vp);
 1702 #ifdef MAC
 1703 out2:
 1704 #endif
 1705         NDFREE(&nd, NDF_ONLY_PNBUF);
 1706         vput(nd.ni_dvp);
 1707         vn_finished_write(mp);
 1708         VFS_UNLOCK_GIANT(vfslocked);
 1709 out:
 1710         if (segflg != UIO_SYSSPACE)
 1711                 uma_zfree(namei_zone, syspath);
 1712         return (error);
 1713 }
 1714 
 1715 /*
 1716  * Delete a whiteout from the filesystem.
 1717  */
 1718 int
 1719 undelete(td, uap)
 1720         struct thread *td;
 1721         register struct undelete_args /* {
 1722                 char *path;
 1723         } */ *uap;
 1724 {
 1725         int error;
 1726         struct mount *mp;
 1727         struct nameidata nd;
 1728         int vfslocked;
 1729 
 1730 restart:
 1731         bwillwrite();
 1732         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
 1733             UIO_USERSPACE, uap->path, td);
 1734         error = namei(&nd);
 1735         if (error)
 1736                 return (error);
 1737         vfslocked = NDHASGIANT(&nd);
 1738 
 1739         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1740                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1741                 if (nd.ni_vp == nd.ni_dvp)
 1742                         vrele(nd.ni_dvp);
 1743                 else
 1744                         vput(nd.ni_dvp);
 1745                 if (nd.ni_vp)
 1746                         vrele(nd.ni_vp);
 1747                 VFS_UNLOCK_GIANT(vfslocked);
 1748                 return (EEXIST);
 1749         }
 1750         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1751                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1752                 vput(nd.ni_dvp);
 1753                 VFS_UNLOCK_GIANT(vfslocked);
 1754                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1755                         return (error);
 1756                 goto restart;
 1757         }
 1758         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1759         NDFREE(&nd, NDF_ONLY_PNBUF);
 1760         vput(nd.ni_dvp);
 1761         vn_finished_write(mp);
 1762         VFS_UNLOCK_GIANT(vfslocked);
 1763         return (error);
 1764 }
 1765 
 1766 /*
 1767  * Delete a name from the filesystem.
 1768  */
 1769 #ifndef _SYS_SYSPROTO_H_
 1770 struct unlink_args {
 1771         char    *path;
 1772 };
 1773 #endif
 1774 int
 1775 unlink(td, uap)
 1776         struct thread *td;
 1777         struct unlink_args /* {
 1778                 char *path;
 1779         } */ *uap;
 1780 {
 1781 
 1782         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1783 }
 1784 
 1785 #ifndef _SYS_SYSPROTO_H_
 1786 struct unlinkat_args {
 1787         int     fd;
 1788         char    *path;
 1789         int     flag;
 1790 };
 1791 #endif
 1792 int
 1793 unlinkat(struct thread *td, struct unlinkat_args *uap)
 1794 {
 1795         int flag = uap->flag;
 1796         int fd = uap->fd;
 1797         char *path = uap->path;
 1798 
 1799         if (flag & ~AT_REMOVEDIR)
 1800                 return (EINVAL);
 1801 
 1802         if (flag & AT_REMOVEDIR)
 1803                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1804         else
 1805                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE));
 1806 }
 1807 
 1808 int
 1809 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1810 {
 1811 
 1812         return (kern_unlinkat(td, AT_FDCWD, path, pathseg));
 1813 }
 1814 
 1815 int
 1816 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 1817 {
 1818         struct mount *mp;
 1819         struct vnode *vp;
 1820         int error;
 1821         struct nameidata nd;
 1822         int vfslocked;
 1823 
 1824 restart:
 1825         bwillwrite();
 1826         NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1827             pathseg, path, fd, td);
 1828         if ((error = namei(&nd)) != 0)
 1829                 return (error == EINVAL ? EPERM : error);
 1830         vfslocked = NDHASGIANT(&nd);
 1831         vp = nd.ni_vp;
 1832         if (vp->v_type == VDIR)
 1833                 error = EPERM;          /* POSIX */
 1834         else {
 1835                 /*
 1836                  * The root of a mounted filesystem cannot be deleted.
 1837                  *
 1838                  * XXX: can this only be a VDIR case?
 1839                  */
 1840                 if (vp->v_vflag & VV_ROOT)
 1841                         error = EBUSY;
 1842         }
 1843         if (error == 0) {
 1844                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1845                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1846                         vput(nd.ni_dvp);
 1847                         if (vp == nd.ni_dvp)
 1848                                 vrele(vp);
 1849                         else
 1850                                 vput(vp);
 1851                         VFS_UNLOCK_GIANT(vfslocked);
 1852                         if ((error = vn_start_write(NULL, &mp,
 1853                             V_XSLEEP | PCATCH)) != 0)
 1854                                 return (error);
 1855                         goto restart;
 1856                 }
 1857 #ifdef MAC
 1858                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1859                     &nd.ni_cnd);
 1860                 if (error)
 1861                         goto out;
 1862 #endif
 1863                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1864 #ifdef MAC
 1865 out:
 1866 #endif
 1867                 vn_finished_write(mp);
 1868         }
 1869         NDFREE(&nd, NDF_ONLY_PNBUF);
 1870         vput(nd.ni_dvp);
 1871         if (vp == nd.ni_dvp)
 1872                 vrele(vp);
 1873         else
 1874                 vput(vp);
 1875         VFS_UNLOCK_GIANT(vfslocked);
 1876         return (error);
 1877 }
 1878 
 1879 /*
 1880  * Reposition read/write file offset.
 1881  */
 1882 #ifndef _SYS_SYSPROTO_H_
 1883 struct lseek_args {
 1884         int     fd;
 1885         int     pad;
 1886         off_t   offset;
 1887         int     whence;
 1888 };
 1889 #endif
 1890 int
 1891 lseek(td, uap)
 1892         struct thread *td;
 1893         register struct lseek_args /* {
 1894                 int fd;
 1895                 int pad;
 1896                 off_t offset;
 1897                 int whence;
 1898         } */ *uap;
 1899 {
 1900         struct ucred *cred = td->td_ucred;
 1901         struct file *fp;
 1902         struct vnode *vp;
 1903         struct vattr vattr;
 1904         off_t offset, size;
 1905         int error, noneg;
 1906         int vfslocked;
 1907 
 1908         AUDIT_ARG_FD(uap->fd);
 1909         if ((error = fget(td, uap->fd, &fp)) != 0)
 1910                 return (error);
 1911         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 1912                 fdrop(fp, td);
 1913                 return (ESPIPE);
 1914         }
 1915         vp = fp->f_vnode;
 1916         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1917         noneg = (vp->v_type != VCHR);
 1918         offset = uap->offset;
 1919         switch (uap->whence) {
 1920         case L_INCR:
 1921                 if (noneg &&
 1922                     (fp->f_offset < 0 ||
 1923                     (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
 1924                         error = EOVERFLOW;
 1925                         break;
 1926                 }
 1927                 offset += fp->f_offset;
 1928                 break;
 1929         case L_XTND:
 1930                 vn_lock(vp, LK_SHARED | LK_RETRY);
 1931                 error = VOP_GETATTR(vp, &vattr, cred);
 1932                 VOP_UNLOCK(vp, 0);
 1933                 if (error)
 1934                         break;
 1935 
 1936                 /*
 1937                  * If the file references a disk device, then fetch
 1938                  * the media size and use that to determine the ending
 1939                  * offset.
 1940                  */
 1941                 if (vattr.va_size == 0 && vp->v_type == VCHR &&
 1942                     fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 1943                         vattr.va_size = size;
 1944                 if (noneg &&
 1945                     (vattr.va_size > OFF_MAX ||
 1946                     (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 1947                         error = EOVERFLOW;
 1948                         break;
 1949                 }
 1950                 offset += vattr.va_size;
 1951                 break;
 1952         case L_SET:
 1953                 break;
 1954         case SEEK_DATA:
 1955                 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 1956                 break;
 1957         case SEEK_HOLE:
 1958                 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 1959                 break;
 1960         default:
 1961                 error = EINVAL;
 1962         }
 1963         if (error == 0 && noneg && offset < 0)
 1964                 error = EINVAL;
 1965         if (error != 0)
 1966                 goto drop;
 1967         fp->f_offset = offset;
 1968         VFS_KNOTE_UNLOCKED(vp, 0);
 1969         *(off_t *)(td->td_retval) = fp->f_offset;
 1970 drop:
 1971         fdrop(fp, td);
 1972         VFS_UNLOCK_GIANT(vfslocked);
 1973         return (error);
 1974 }
 1975 
 1976 #if defined(COMPAT_43)
 1977 /*
 1978  * Reposition read/write file offset.
 1979  */
 1980 #ifndef _SYS_SYSPROTO_H_
 1981 struct olseek_args {
 1982         int     fd;
 1983         long    offset;
 1984         int     whence;
 1985 };
 1986 #endif
 1987 int
 1988 olseek(td, uap)
 1989         struct thread *td;
 1990         register struct olseek_args /* {
 1991                 int fd;
 1992                 long offset;
 1993                 int whence;
 1994         } */ *uap;
 1995 {
 1996         struct lseek_args /* {
 1997                 int fd;
 1998                 int pad;
 1999                 off_t offset;
 2000                 int whence;
 2001         } */ nuap;
 2002 
 2003         nuap.fd = uap->fd;
 2004         nuap.offset = uap->offset;
 2005         nuap.whence = uap->whence;
 2006         return (lseek(td, &nuap));
 2007 }
 2008 #endif /* COMPAT_43 */
 2009 
 2010 /* Version with the 'pad' argument */
 2011 int
 2012 freebsd6_lseek(td, uap)
 2013         struct thread *td;
 2014         register struct freebsd6_lseek_args *uap;
 2015 {
 2016         struct lseek_args ouap;
 2017 
 2018         ouap.fd = uap->fd;
 2019         ouap.offset = uap->offset;
 2020         ouap.whence = uap->whence;
 2021         return (lseek(td, &ouap));
 2022 }
 2023 
 2024 /*
 2025  * Check access permissions using passed credentials.
 2026  */
 2027 static int
 2028 vn_access(vp, user_flags, cred, td)
 2029         struct vnode    *vp;
 2030         int             user_flags;
 2031         struct ucred    *cred;
 2032         struct thread   *td;
 2033 {
 2034         int error;
 2035         accmode_t accmode;
 2036 
 2037         /* Flags == 0 means only check for existence. */
 2038         error = 0;
 2039         if (user_flags) {
 2040                 accmode = 0;
 2041                 if (user_flags & R_OK)
 2042                         accmode |= VREAD;
 2043                 if (user_flags & W_OK)
 2044                         accmode |= VWRITE;
 2045                 if (user_flags & X_OK)
 2046                         accmode |= VEXEC;
 2047 #ifdef MAC
 2048                 error = mac_vnode_check_access(cred, vp, accmode);
 2049                 if (error)
 2050                         return (error);
 2051 #endif
 2052                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2053                         error = VOP_ACCESS(vp, accmode, cred, td);
 2054         }
 2055         return (error);
 2056 }
 2057 
 2058 /*
 2059  * Check access permissions using "real" credentials.
 2060  */
 2061 #ifndef _SYS_SYSPROTO_H_
 2062 struct access_args {
 2063         char    *path;
 2064         int     flags;
 2065 };
 2066 #endif
 2067 int
 2068 access(td, uap)
 2069         struct thread *td;
 2070         register struct access_args /* {
 2071                 char *path;
 2072                 int flags;
 2073         } */ *uap;
 2074 {
 2075 
 2076         return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 2077 }
 2078 
 2079 #ifndef _SYS_SYSPROTO_H_
 2080 struct faccessat_args {
 2081         int     dirfd;
 2082         char    *path;
 2083         int     mode;
 2084         int     flag;
 2085 }
 2086 #endif
 2087 int
 2088 faccessat(struct thread *td, struct faccessat_args *uap)
 2089 {
 2090 
 2091         if (uap->flag & ~AT_EACCESS)
 2092                 return (EINVAL);
 2093         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2094             uap->mode));
 2095 }
 2096 
 2097 int
 2098 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2099 {
 2100 
 2101         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
 2102 }
 2103 
 2104 int
 2105 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2106     int flags, int mode)
 2107 {
 2108         struct ucred *cred, *tmpcred;
 2109         struct vnode *vp;
 2110         struct nameidata nd;
 2111         int vfslocked;
 2112         int error;
 2113 
 2114         /*
 2115          * Create and modify a temporary credential instead of one that
 2116          * is potentially shared.
 2117          */
 2118         if (!(flags & AT_EACCESS)) {
 2119                 cred = td->td_ucred;
 2120                 tmpcred = crdup(cred);
 2121                 tmpcred->cr_uid = cred->cr_ruid;
 2122                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2123                 td->td_ucred = tmpcred;
 2124         } else
 2125                 cred = tmpcred = td->td_ucred;
 2126         AUDIT_ARG_VALUE(mode);
 2127         NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2128             AUDITVNODE1, pathseg, path, fd, td);
 2129         if ((error = namei(&nd)) != 0)
 2130                 goto out1;
 2131         vfslocked = NDHASGIANT(&nd);
 2132         vp = nd.ni_vp;
 2133 
 2134         error = vn_access(vp, mode, tmpcred, td);
 2135         NDFREE(&nd, NDF_ONLY_PNBUF);
 2136         vput(vp);
 2137         VFS_UNLOCK_GIANT(vfslocked);
 2138 out1:
 2139         if (!(flags & AT_EACCESS)) {
 2140                 td->td_ucred = cred;
 2141                 crfree(tmpcred);
 2142         }
 2143         return (error);
 2144 }
 2145 
 2146 /*
 2147  * Check access permissions using "effective" credentials.
 2148  */
 2149 #ifndef _SYS_SYSPROTO_H_
 2150 struct eaccess_args {
 2151         char    *path;
 2152         int     flags;
 2153 };
 2154 #endif
 2155 int
 2156 eaccess(td, uap)
 2157         struct thread *td;
 2158         register struct eaccess_args /* {
 2159                 char *path;
 2160                 int flags;
 2161         } */ *uap;
 2162 {
 2163 
 2164         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
 2165 }
 2166 
 2167 int
 2168 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 2169 {
 2170 
 2171         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
 2172 }
 2173 
 2174 #if defined(COMPAT_43)
 2175 /*
 2176  * Get file status; this version follows links.
 2177  */
 2178 #ifndef _SYS_SYSPROTO_H_
 2179 struct ostat_args {
 2180         char    *path;
 2181         struct ostat *ub;
 2182 };
 2183 #endif
 2184 int
 2185 ostat(td, uap)
 2186         struct thread *td;
 2187         register struct ostat_args /* {
 2188                 char *path;
 2189                 struct ostat *ub;
 2190         } */ *uap;
 2191 {
 2192         struct stat sb;
 2193         struct ostat osb;
 2194         int error;
 2195 
 2196         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2197         if (error)
 2198                 return (error);
 2199         cvtstat(&sb, &osb);
 2200         error = copyout(&osb, uap->ub, sizeof (osb));
 2201         return (error);
 2202 }
 2203 
 2204 /*
 2205  * Get file status; this version does not follow links.
 2206  */
 2207 #ifndef _SYS_SYSPROTO_H_
 2208 struct olstat_args {
 2209         char    *path;
 2210         struct ostat *ub;
 2211 };
 2212 #endif
 2213 int
 2214 olstat(td, uap)
 2215         struct thread *td;
 2216         register struct olstat_args /* {
 2217                 char *path;
 2218                 struct ostat *ub;
 2219         } */ *uap;
 2220 {
 2221         struct stat sb;
 2222         struct ostat osb;
 2223         int error;
 2224 
 2225         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2226         if (error)
 2227                 return (error);
 2228         cvtstat(&sb, &osb);
 2229         error = copyout(&osb, uap->ub, sizeof (osb));
 2230         return (error);
 2231 }
 2232 
 2233 /*
 2234  * Convert from an old to a new stat structure.
 2235  */
 2236 void
 2237 cvtstat(st, ost)
 2238         struct stat *st;
 2239         struct ostat *ost;
 2240 {
 2241 
 2242         ost->st_dev = st->st_dev;
 2243         ost->st_ino = st->st_ino;
 2244         ost->st_mode = st->st_mode;
 2245         ost->st_nlink = st->st_nlink;
 2246         ost->st_uid = st->st_uid;
 2247         ost->st_gid = st->st_gid;
 2248         ost->st_rdev = st->st_rdev;
 2249         if (st->st_size < (quad_t)1 << 32)
 2250                 ost->st_size = st->st_size;
 2251         else
 2252                 ost->st_size = -2;
 2253         ost->st_atime = st->st_atime;
 2254         ost->st_mtime = st->st_mtime;
 2255         ost->st_ctime = st->st_ctime;
 2256         ost->st_blksize = st->st_blksize;
 2257         ost->st_blocks = st->st_blocks;
 2258         ost->st_flags = st->st_flags;
 2259         ost->st_gen = st->st_gen;
 2260 }
 2261 #endif /* COMPAT_43 */
 2262 
 2263 /*
 2264  * Get file status; this version follows links.
 2265  */
 2266 #ifndef _SYS_SYSPROTO_H_
 2267 struct stat_args {
 2268         char    *path;
 2269         struct stat *ub;
 2270 };
 2271 #endif
 2272 int
 2273 stat(td, uap)
 2274         struct thread *td;
 2275         register struct stat_args /* {
 2276                 char *path;
 2277                 struct stat *ub;
 2278         } */ *uap;
 2279 {
 2280         struct stat sb;
 2281         int error;
 2282 
 2283         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2284         if (error == 0)
 2285                 error = copyout(&sb, uap->ub, sizeof (sb));
 2286         return (error);
 2287 }
 2288 
 2289 #ifndef _SYS_SYSPROTO_H_
 2290 struct fstatat_args {
 2291         int     fd;
 2292         char    *path;
 2293         struct stat     *buf;
 2294         int     flag;
 2295 }
 2296 #endif
 2297 int
 2298 fstatat(struct thread *td, struct fstatat_args *uap)
 2299 {
 2300         struct stat sb;
 2301         int error;
 2302 
 2303         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2304             UIO_USERSPACE, &sb);
 2305         if (error == 0)
 2306                 error = copyout(&sb, uap->buf, sizeof (sb));
 2307         return (error);
 2308 }
 2309 
 2310 int
 2311 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2312 {
 2313 
 2314         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2315 }
 2316 
 2317 int
 2318 kern_statat(struct thread *td, int flag, int fd, char *path,
 2319     enum uio_seg pathseg, struct stat *sbp)
 2320 {
 2321 
 2322         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2323 }
 2324 
 2325 int
 2326 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2327     enum uio_seg pathseg, struct stat *sbp,
 2328     void (*hook)(struct vnode *vp, struct stat *sbp))
 2329 {
 2330         struct nameidata nd;
 2331         struct stat sb;
 2332         int error, vfslocked;
 2333 
 2334         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2335                 return (EINVAL);
 2336 
 2337         NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2338             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
 2339             path, fd, td);
 2340 
 2341         if ((error = namei(&nd)) != 0)
 2342                 return (error);
 2343         vfslocked = NDHASGIANT(&nd);
 2344         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2345         if (!error) {
 2346                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2347                 if (S_ISREG(sb.st_mode))
 2348                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2349                 if (__predict_false(hook != NULL))
 2350                         hook(nd.ni_vp, &sb);
 2351         }
 2352         NDFREE(&nd, NDF_ONLY_PNBUF);
 2353         vput(nd.ni_vp);
 2354         VFS_UNLOCK_GIANT(vfslocked);
 2355         if (error)
 2356                 return (error);
 2357         *sbp = sb;
 2358 #ifdef KTRACE
 2359         if (KTRPOINT(td, KTR_STRUCT))
 2360                 ktrstat(&sb);
 2361 #endif
 2362         return (0);
 2363 }
 2364 
 2365 /*
 2366  * Get file status; this version does not follow links.
 2367  */
 2368 #ifndef _SYS_SYSPROTO_H_
 2369 struct lstat_args {
 2370         char    *path;
 2371         struct stat *ub;
 2372 };
 2373 #endif
 2374 int
 2375 lstat(td, uap)
 2376         struct thread *td;
 2377         register struct lstat_args /* {
 2378                 char *path;
 2379                 struct stat *ub;
 2380         } */ *uap;
 2381 {
 2382         struct stat sb;
 2383         int error;
 2384 
 2385         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2386         if (error == 0)
 2387                 error = copyout(&sb, uap->ub, sizeof (sb));
 2388         return (error);
 2389 }
 2390 
 2391 int
 2392 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2393 {
 2394 
 2395         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2396             sbp));
 2397 }
 2398 
 2399 /*
 2400  * Implementation of the NetBSD [l]stat() functions.
 2401  */
 2402 void
 2403 cvtnstat(sb, nsb)
 2404         struct stat *sb;
 2405         struct nstat *nsb;
 2406 {
 2407         bzero(nsb, sizeof *nsb);
 2408         nsb->st_dev = sb->st_dev;
 2409         nsb->st_ino = sb->st_ino;
 2410         nsb->st_mode = sb->st_mode;
 2411         nsb->st_nlink = sb->st_nlink;
 2412         nsb->st_uid = sb->st_uid;
 2413         nsb->st_gid = sb->st_gid;
 2414         nsb->st_rdev = sb->st_rdev;
 2415         nsb->st_atimespec = sb->st_atimespec;
 2416         nsb->st_mtimespec = sb->st_mtimespec;
 2417         nsb->st_ctimespec = sb->st_ctimespec;
 2418         nsb->st_size = sb->st_size;
 2419         nsb->st_blocks = sb->st_blocks;
 2420         nsb->st_blksize = sb->st_blksize;
 2421         nsb->st_flags = sb->st_flags;
 2422         nsb->st_gen = sb->st_gen;
 2423         nsb->st_birthtimespec = sb->st_birthtimespec;
 2424 }
 2425 
 2426 #ifndef _SYS_SYSPROTO_H_
 2427 struct nstat_args {
 2428         char    *path;
 2429         struct nstat *ub;
 2430 };
 2431 #endif
 2432 int
 2433 nstat(td, uap)
 2434         struct thread *td;
 2435         register struct nstat_args /* {
 2436                 char *path;
 2437                 struct nstat *ub;
 2438         } */ *uap;
 2439 {
 2440         struct stat sb;
 2441         struct nstat nsb;
 2442         int error;
 2443 
 2444         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2445         if (error)
 2446                 return (error);
 2447         cvtnstat(&sb, &nsb);
 2448         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2449         return (error);
 2450 }
 2451 
 2452 /*
 2453  * NetBSD lstat.  Get file status; this version does not follow links.
 2454  */
 2455 #ifndef _SYS_SYSPROTO_H_
 2456 struct lstat_args {
 2457         char    *path;
 2458         struct stat *ub;
 2459 };
 2460 #endif
 2461 int
 2462 nlstat(td, uap)
 2463         struct thread *td;
 2464         register struct nlstat_args /* {
 2465                 char *path;
 2466                 struct nstat *ub;
 2467         } */ *uap;
 2468 {
 2469         struct stat sb;
 2470         struct nstat nsb;
 2471         int error;
 2472 
 2473         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2474         if (error)
 2475                 return (error);
 2476         cvtnstat(&sb, &nsb);
 2477         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2478         return (error);
 2479 }
 2480 
 2481 /*
 2482  * Get configurable pathname variables.
 2483  */
 2484 #ifndef _SYS_SYSPROTO_H_
 2485 struct pathconf_args {
 2486         char    *path;
 2487         int     name;
 2488 };
 2489 #endif
 2490 int
 2491 pathconf(td, uap)
 2492         struct thread *td;
 2493         register struct pathconf_args /* {
 2494                 char *path;
 2495                 int name;
 2496         } */ *uap;
 2497 {
 2498 
 2499         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2500 }
 2501 
 2502 #ifndef _SYS_SYSPROTO_H_
 2503 struct lpathconf_args {
 2504         char    *path;
 2505         int     name;
 2506 };
 2507 #endif
 2508 int
 2509 lpathconf(td, uap)
 2510         struct thread *td;
 2511         register struct lpathconf_args /* {
 2512                 char *path;
 2513                 int name;
 2514         } */ *uap;
 2515 {
 2516 
 2517         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
 2518 }
 2519 
 2520 int
 2521 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2522     u_long flags)
 2523 {
 2524         struct nameidata nd;
 2525         int error, vfslocked;
 2526 
 2527         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
 2528             flags, pathseg, path, td);
 2529         if ((error = namei(&nd)) != 0)
 2530                 return (error);
 2531         vfslocked = NDHASGIANT(&nd);
 2532         NDFREE(&nd, NDF_ONLY_PNBUF);
 2533 
 2534         /* If asynchronous I/O is available, it works for all files. */
 2535         if (name == _PC_ASYNC_IO)
 2536                 td->td_retval[0] = async_io_version;
 2537         else
 2538                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2539         vput(nd.ni_vp);
 2540         VFS_UNLOCK_GIANT(vfslocked);
 2541         return (error);
 2542 }
 2543 
 2544 /*
 2545  * Return target name of a symbolic link.
 2546  */
 2547 #ifndef _SYS_SYSPROTO_H_
 2548 struct readlink_args {
 2549         char    *path;
 2550         char    *buf;
 2551         size_t  count;
 2552 };
 2553 #endif
 2554 int
 2555 readlink(td, uap)
 2556         struct thread *td;
 2557         register struct readlink_args /* {
 2558                 char *path;
 2559                 char *buf;
 2560                 size_t count;
 2561         } */ *uap;
 2562 {
 2563 
 2564         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2565             UIO_USERSPACE, uap->count));
 2566 }
 2567 #ifndef _SYS_SYSPROTO_H_
 2568 struct readlinkat_args {
 2569         int     fd;
 2570         char    *path;
 2571         char    *buf;
 2572         size_t  bufsize;
 2573 };
 2574 #endif
 2575 int
 2576 readlinkat(struct thread *td, struct readlinkat_args *uap)
 2577 {
 2578 
 2579         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2580             uap->buf, UIO_USERSPACE, uap->bufsize));
 2581 }
 2582 
 2583 int
 2584 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2585     enum uio_seg bufseg, size_t count)
 2586 {
 2587 
 2588         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2589             count));
 2590 }
 2591 
 2592 int
 2593 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2594     char *buf, enum uio_seg bufseg, size_t count)
 2595 {
 2596         struct vnode *vp;
 2597         struct iovec aiov;
 2598         struct uio auio;
 2599         int error;
 2600         struct nameidata nd;
 2601         int vfslocked;
 2602 
 2603         if (count > INT_MAX)
 2604                 return (EINVAL);
 2605 
 2606         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2607             AUDITVNODE1, pathseg, path, fd, td);
 2608 
 2609         if ((error = namei(&nd)) != 0)
 2610                 return (error);
 2611         NDFREE(&nd, NDF_ONLY_PNBUF);
 2612         vfslocked = NDHASGIANT(&nd);
 2613         vp = nd.ni_vp;
 2614 #ifdef MAC
 2615         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2616         if (error) {
 2617                 vput(vp);
 2618                 VFS_UNLOCK_GIANT(vfslocked);
 2619                 return (error);
 2620         }
 2621 #endif
 2622         if (vp->v_type != VLNK)
 2623                 error = EINVAL;
 2624         else {
 2625                 aiov.iov_base = buf;
 2626                 aiov.iov_len = count;
 2627                 auio.uio_iov = &aiov;
 2628                 auio.uio_iovcnt = 1;
 2629                 auio.uio_offset = 0;
 2630                 auio.uio_rw = UIO_READ;
 2631                 auio.uio_segflg = bufseg;
 2632                 auio.uio_td = td;
 2633                 auio.uio_resid = count;
 2634                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2635         }
 2636         vput(vp);
 2637         VFS_UNLOCK_GIANT(vfslocked);
 2638         td->td_retval[0] = count - auio.uio_resid;
 2639         return (error);
 2640 }
 2641 
 2642 /*
 2643  * Common implementation code for chflags() and fchflags().
 2644  */
 2645 static int
 2646 setfflags(td, vp, flags)
 2647         struct thread *td;
 2648         struct vnode *vp;
 2649         int flags;
 2650 {
 2651         int error;
 2652         struct mount *mp;
 2653         struct vattr vattr;
 2654 
 2655         /*
 2656          * Prevent non-root users from setting flags on devices.  When
 2657          * a device is reused, users can retain ownership of the device
 2658          * if they are allowed to set flags and programs assume that
 2659          * chown can't fail when done as root.
 2660          */
 2661         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2662                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2663                 if (error)
 2664                         return (error);
 2665         }
 2666 
 2667         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2668                 return (error);
 2669         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2670         VATTR_NULL(&vattr);
 2671         vattr.va_flags = flags;
 2672 #ifdef MAC
 2673         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2674         if (error == 0)
 2675 #endif
 2676                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2677         VOP_UNLOCK(vp, 0);
 2678         vn_finished_write(mp);
 2679         return (error);
 2680 }
 2681 
 2682 /*
 2683  * Change flags of a file given a path name.
 2684  */
 2685 #ifndef _SYS_SYSPROTO_H_
 2686 struct chflags_args {
 2687         char    *path;
 2688         int     flags;
 2689 };
 2690 #endif
 2691 int
 2692 chflags(td, uap)
 2693         struct thread *td;
 2694         register struct chflags_args /* {
 2695                 char *path;
 2696                 int flags;
 2697         } */ *uap;
 2698 {
 2699         int error;
 2700         struct nameidata nd;
 2701         int vfslocked;
 2702 
 2703         AUDIT_ARG_FFLAGS(uap->flags);
 2704         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2705             uap->path, td);
 2706         if ((error = namei(&nd)) != 0)
 2707                 return (error);
 2708         NDFREE(&nd, NDF_ONLY_PNBUF);
 2709         vfslocked = NDHASGIANT(&nd);
 2710         error = setfflags(td, nd.ni_vp, uap->flags);
 2711         vrele(nd.ni_vp);
 2712         VFS_UNLOCK_GIANT(vfslocked);
 2713         return (error);
 2714 }
 2715 
 2716 /*
 2717  * Same as chflags() but doesn't follow symlinks.
 2718  */
 2719 int
 2720 lchflags(td, uap)
 2721         struct thread *td;
 2722         register struct lchflags_args /* {
 2723                 char *path;
 2724                 int flags;
 2725         } */ *uap;
 2726 {
 2727         int error;
 2728         struct nameidata nd;
 2729         int vfslocked;
 2730 
 2731         AUDIT_ARG_FFLAGS(uap->flags);
 2732         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2733             uap->path, td);
 2734         if ((error = namei(&nd)) != 0)
 2735                 return (error);
 2736         vfslocked = NDHASGIANT(&nd);
 2737         NDFREE(&nd, NDF_ONLY_PNBUF);
 2738         error = setfflags(td, nd.ni_vp, uap->flags);
 2739         vrele(nd.ni_vp);
 2740         VFS_UNLOCK_GIANT(vfslocked);
 2741         return (error);
 2742 }
 2743 
 2744 /*
 2745  * Change flags of a file given a file descriptor.
 2746  */
 2747 #ifndef _SYS_SYSPROTO_H_
 2748 struct fchflags_args {
 2749         int     fd;
 2750         int     flags;
 2751 };
 2752 #endif
 2753 int
 2754 fchflags(td, uap)
 2755         struct thread *td;
 2756         register struct fchflags_args /* {
 2757                 int fd;
 2758                 int flags;
 2759         } */ *uap;
 2760 {
 2761         struct file *fp;
 2762         int vfslocked;
 2763         int error;
 2764 
 2765         AUDIT_ARG_FD(uap->fd);
 2766         AUDIT_ARG_FFLAGS(uap->flags);
 2767         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 2768                 return (error);
 2769         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2770 #ifdef AUDIT
 2771         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2772         AUDIT_ARG_VNODE1(fp->f_vnode);
 2773         VOP_UNLOCK(fp->f_vnode, 0);
 2774 #endif
 2775         error = setfflags(td, fp->f_vnode, uap->flags);
 2776         VFS_UNLOCK_GIANT(vfslocked);
 2777         fdrop(fp, td);
 2778         return (error);
 2779 }
 2780 
 2781 /*
 2782  * Common implementation code for chmod(), lchmod() and fchmod().
 2783  */
 2784 static int
 2785 setfmode(td, vp, mode)
 2786         struct thread *td;
 2787         struct vnode *vp;
 2788         int mode;
 2789 {
 2790         int error;
 2791         struct mount *mp;
 2792         struct vattr vattr;
 2793 
 2794         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2795                 return (error);
 2796         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2797         VATTR_NULL(&vattr);
 2798         vattr.va_mode = mode & ALLPERMS;
 2799 #ifdef MAC
 2800         error = mac_vnode_check_setmode(td->td_ucred, vp, vattr.va_mode);
 2801         if (error == 0)
 2802 #endif
 2803                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2804         VOP_UNLOCK(vp, 0);
 2805         vn_finished_write(mp);
 2806         return (error);
 2807 }
 2808 
 2809 /*
 2810  * Change mode of a file given path name.
 2811  */
 2812 #ifndef _SYS_SYSPROTO_H_
 2813 struct chmod_args {
 2814         char    *path;
 2815         int     mode;
 2816 };
 2817 #endif
 2818 int
 2819 chmod(td, uap)
 2820         struct thread *td;
 2821         register struct chmod_args /* {
 2822                 char *path;
 2823                 int mode;
 2824         } */ *uap;
 2825 {
 2826 
 2827         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2828 }
 2829 
 2830 #ifndef _SYS_SYSPROTO_H_
 2831 struct fchmodat_args {
 2832         int     dirfd;
 2833         char    *path;
 2834         mode_t  mode;
 2835         int     flag;
 2836 }
 2837 #endif
 2838 int
 2839 fchmodat(struct thread *td, struct fchmodat_args *uap)
 2840 {
 2841         int flag = uap->flag;
 2842         int fd = uap->fd;
 2843         char *path = uap->path;
 2844         mode_t mode = uap->mode;
 2845 
 2846         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2847                 return (EINVAL);
 2848 
 2849         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2850 }
 2851 
 2852 int
 2853 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2854 {
 2855 
 2856         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2857 }
 2858 
 2859 /*
 2860  * Change mode of a file given path name (don't follow links.)
 2861  */
 2862 #ifndef _SYS_SYSPROTO_H_
 2863 struct lchmod_args {
 2864         char    *path;
 2865         int     mode;
 2866 };
 2867 #endif
 2868 int
 2869 lchmod(td, uap)
 2870         struct thread *td;
 2871         register struct lchmod_args /* {
 2872                 char *path;
 2873                 int mode;
 2874         } */ *uap;
 2875 {
 2876 
 2877         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2878             uap->mode, AT_SYMLINK_NOFOLLOW));
 2879 }
 2880 
 2881 
 2882 int
 2883 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2884     mode_t mode, int flag)
 2885 {
 2886         int error;
 2887         struct nameidata nd;
 2888         int vfslocked;
 2889         int follow;
 2890 
 2891         AUDIT_ARG_MODE(mode);
 2892         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2893         NDINIT_AT(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg, path,
 2894             fd, td);
 2895         if ((error = namei(&nd)) != 0)
 2896                 return (error);
 2897         vfslocked = NDHASGIANT(&nd);
 2898         NDFREE(&nd, NDF_ONLY_PNBUF);
 2899         error = setfmode(td, nd.ni_vp, mode);
 2900         vrele(nd.ni_vp);
 2901         VFS_UNLOCK_GIANT(vfslocked);
 2902         return (error);
 2903 }
 2904 
 2905 /*
 2906  * Change mode of a file given a file descriptor.
 2907  */
 2908 #ifndef _SYS_SYSPROTO_H_
 2909 struct fchmod_args {
 2910         int     fd;
 2911         int     mode;
 2912 };
 2913 #endif
 2914 int
 2915 fchmod(td, uap)
 2916         struct thread *td;
 2917         register struct fchmod_args /* {
 2918                 int fd;
 2919                 int mode;
 2920         } */ *uap;
 2921 {
 2922         struct file *fp;
 2923         int vfslocked;
 2924         int error;
 2925 
 2926         AUDIT_ARG_FD(uap->fd);
 2927         AUDIT_ARG_MODE(uap->mode);
 2928         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 2929                 return (error);
 2930         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2931 #ifdef AUDIT
 2932         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2933         AUDIT_ARG_VNODE1(fp->f_vnode);
 2934         VOP_UNLOCK(fp->f_vnode, 0);
 2935 #endif
 2936         error = setfmode(td, fp->f_vnode, uap->mode);
 2937         VFS_UNLOCK_GIANT(vfslocked);
 2938         fdrop(fp, td);
 2939         return (error);
 2940 }
 2941 
 2942 /*
 2943  * Common implementation for chown(), lchown(), and fchown()
 2944  */
 2945 static int
 2946 setfown(td, vp, uid, gid)
 2947         struct thread *td;
 2948         struct vnode *vp;
 2949         uid_t uid;
 2950         gid_t gid;
 2951 {
 2952         int error;
 2953         struct mount *mp;
 2954         struct vattr vattr;
 2955 
 2956         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2957                 return (error);
 2958         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2959         VATTR_NULL(&vattr);
 2960         vattr.va_uid = uid;
 2961         vattr.va_gid = gid;
 2962 #ifdef MAC
 2963         error = mac_vnode_check_setowner(td->td_ucred, vp, vattr.va_uid,
 2964             vattr.va_gid);
 2965         if (error == 0)
 2966 #endif
 2967                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2968         VOP_UNLOCK(vp, 0);
 2969         vn_finished_write(mp);
 2970         return (error);
 2971 }
 2972 
 2973 /*
 2974  * Set ownership given a path name.
 2975  */
 2976 #ifndef _SYS_SYSPROTO_H_
 2977 struct chown_args {
 2978         char    *path;
 2979         int     uid;
 2980         int     gid;
 2981 };
 2982 #endif
 2983 int
 2984 chown(td, uap)
 2985         struct thread *td;
 2986         register struct chown_args /* {
 2987                 char *path;
 2988                 int uid;
 2989                 int gid;
 2990         } */ *uap;
 2991 {
 2992 
 2993         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2994 }
 2995 
 2996 #ifndef _SYS_SYSPROTO_H_
 2997 struct fchownat_args {
 2998         int fd;
 2999         const char * path;
 3000         uid_t uid;
 3001         gid_t gid;
 3002         int flag;
 3003 };
 3004 #endif
 3005 int
 3006 fchownat(struct thread *td, struct fchownat_args *uap)
 3007 {
 3008         int flag;
 3009 
 3010         flag = uap->flag;
 3011         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3012                 return (EINVAL);
 3013 
 3014         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 3015             uap->gid, uap->flag));
 3016 }
 3017 
 3018 int
 3019 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3020     int gid)
 3021 {
 3022 
 3023         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 3024 }
 3025 
 3026 int
 3027 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3028     int uid, int gid, int flag)
 3029 {
 3030         struct nameidata nd;
 3031         int error, vfslocked, follow;
 3032 
 3033         AUDIT_ARG_OWNER(uid, gid);
 3034         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3035         NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
 3036             fd, td);
 3037 
 3038         if ((error = namei(&nd)) != 0)
 3039                 return (error);
 3040         vfslocked = NDHASGIANT(&nd);
 3041         NDFREE(&nd, NDF_ONLY_PNBUF);
 3042         error = setfown(td, nd.ni_vp, uid, gid);
 3043         vrele(nd.ni_vp);
 3044         VFS_UNLOCK_GIANT(vfslocked);
 3045         return (error);
 3046 }
 3047 
 3048 /*
 3049  * Set ownership given a path name, do not cross symlinks.
 3050  */
 3051 #ifndef _SYS_SYSPROTO_H_
 3052 struct lchown_args {
 3053         char    *path;
 3054         int     uid;
 3055         int     gid;
 3056 };
 3057 #endif
 3058 int
 3059 lchown(td, uap)
 3060         struct thread *td;
 3061         register struct lchown_args /* {
 3062                 char *path;
 3063                 int uid;
 3064                 int gid;
 3065         } */ *uap;
 3066 {
 3067 
 3068         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3069 }
 3070 
 3071 int
 3072 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3073     int gid)
 3074 {
 3075 
 3076         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3077             AT_SYMLINK_NOFOLLOW));
 3078 }
 3079 
 3080 /*
 3081  * Set ownership given a file descriptor.
 3082  */
 3083 #ifndef _SYS_SYSPROTO_H_
 3084 struct fchown_args {
 3085         int     fd;
 3086         int     uid;
 3087         int     gid;
 3088 };
 3089 #endif
 3090 int
 3091 fchown(td, uap)
 3092         struct thread *td;
 3093         register struct fchown_args /* {
 3094                 int fd;
 3095                 int uid;
 3096                 int gid;
 3097         } */ *uap;
 3098 {
 3099         struct file *fp;
 3100         int vfslocked;
 3101         int error;
 3102 
 3103         AUDIT_ARG_FD(uap->fd);
 3104         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3105         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 3106                 return (error);
 3107         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3108 #ifdef AUDIT
 3109         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3110         AUDIT_ARG_VNODE1(fp->f_vnode);
 3111         VOP_UNLOCK(fp->f_vnode, 0);
 3112 #endif
 3113         error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
 3114         VFS_UNLOCK_GIANT(vfslocked);
 3115         fdrop(fp, td);
 3116         return (error);
 3117 }
 3118 
 3119 /*
 3120  * Common implementation code for utimes(), lutimes(), and futimes().
 3121  */
 3122 static int
 3123 getutimes(usrtvp, tvpseg, tsp)
 3124         const struct timeval *usrtvp;
 3125         enum uio_seg tvpseg;
 3126         struct timespec *tsp;
 3127 {
 3128         struct timeval tv[2];
 3129         const struct timeval *tvp;
 3130         int error;
 3131 
 3132         if (usrtvp == NULL) {
 3133                 vfs_timestamp(&tsp[0]);
 3134                 tsp[1] = tsp[0];
 3135         } else {
 3136                 if (tvpseg == UIO_SYSSPACE) {
 3137                         tvp = usrtvp;
 3138                 } else {
 3139                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3140                                 return (error);
 3141                         tvp = tv;
 3142                 }
 3143 
 3144                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3145                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3146                         return (EINVAL);
 3147                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3148                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3149         }
 3150         return (0);
 3151 }
 3152 
 3153 /*
 3154  * Common implementation code for utimes(), lutimes(), and futimes().
 3155  */
 3156 static int
 3157 setutimes(td, vp, ts, numtimes, nullflag)
 3158         struct thread *td;
 3159         struct vnode *vp;
 3160         const struct timespec *ts;
 3161         int numtimes;
 3162         int nullflag;
 3163 {
 3164         int error, setbirthtime;
 3165         struct mount *mp;
 3166         struct vattr vattr;
 3167 
 3168         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3169                 return (error);
 3170         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3171         setbirthtime = 0;
 3172         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3173             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3174                 setbirthtime = 1;
 3175         VATTR_NULL(&vattr);
 3176         vattr.va_atime = ts[0];
 3177         vattr.va_mtime = ts[1];
 3178         if (setbirthtime)
 3179                 vattr.va_birthtime = ts[1];
 3180         if (numtimes > 2)
 3181                 vattr.va_birthtime = ts[2];
 3182         if (nullflag)
 3183                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3184 #ifdef MAC
 3185         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3186             vattr.va_mtime);
 3187 #endif
 3188         if (error == 0)
 3189                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3190         VOP_UNLOCK(vp, 0);
 3191         vn_finished_write(mp);
 3192         return (error);
 3193 }
 3194 
 3195 /*
 3196  * Set the access and modification times of a file.
 3197  */
 3198 #ifndef _SYS_SYSPROTO_H_
 3199 struct utimes_args {
 3200         char    *path;
 3201         struct  timeval *tptr;
 3202 };
 3203 #endif
 3204 int
 3205 utimes(td, uap)
 3206         struct thread *td;
 3207         register struct utimes_args /* {
 3208                 char *path;
 3209                 struct timeval *tptr;
 3210         } */ *uap;
 3211 {
 3212 
 3213         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3214             UIO_USERSPACE));
 3215 }
 3216 
 3217 #ifndef _SYS_SYSPROTO_H_
 3218 struct futimesat_args {
 3219         int fd;
 3220         const char * path;
 3221         const struct timeval * times;
 3222 };
 3223 #endif
 3224 int
 3225 futimesat(struct thread *td, struct futimesat_args *uap)
 3226 {
 3227 
 3228         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3229             uap->times, UIO_USERSPACE));
 3230 }
 3231 
 3232 int
 3233 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3234     struct timeval *tptr, enum uio_seg tptrseg)
 3235 {
 3236 
 3237         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3238 }
 3239 
 3240 int
 3241 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3242     struct timeval *tptr, enum uio_seg tptrseg)
 3243 {
 3244         struct nameidata nd;
 3245         struct timespec ts[2];
 3246         int error, vfslocked;
 3247 
 3248         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3249                 return (error);
 3250         NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
 3251             fd, td);
 3252 
 3253         if ((error = namei(&nd)) != 0)
 3254                 return (error);
 3255         vfslocked = NDHASGIANT(&nd);
 3256         NDFREE(&nd, NDF_ONLY_PNBUF);
 3257         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3258         vrele(nd.ni_vp);
 3259         VFS_UNLOCK_GIANT(vfslocked);
 3260         return (error);
 3261 }
 3262 
 3263 /*
 3264  * Set the access and modification times of a file.
 3265  */
 3266 #ifndef _SYS_SYSPROTO_H_
 3267 struct lutimes_args {
 3268         char    *path;
 3269         struct  timeval *tptr;
 3270 };
 3271 #endif
 3272 int
 3273 lutimes(td, uap)
 3274         struct thread *td;
 3275         register struct lutimes_args /* {
 3276                 char *path;
 3277                 struct timeval *tptr;
 3278         } */ *uap;
 3279 {
 3280 
 3281         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3282             UIO_USERSPACE));
 3283 }
 3284 
 3285 int
 3286 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3287     struct timeval *tptr, enum uio_seg tptrseg)
 3288 {
 3289         struct timespec ts[2];
 3290         int error;
 3291         struct nameidata nd;
 3292         int vfslocked;
 3293 
 3294         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3295                 return (error);
 3296         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3297         if ((error = namei(&nd)) != 0)
 3298                 return (error);
 3299         vfslocked = NDHASGIANT(&nd);
 3300         NDFREE(&nd, NDF_ONLY_PNBUF);
 3301         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3302         vrele(nd.ni_vp);
 3303         VFS_UNLOCK_GIANT(vfslocked);
 3304         return (error);
 3305 }
 3306 
 3307 /*
 3308  * Set the access and modification times of a file.
 3309  */
 3310 #ifndef _SYS_SYSPROTO_H_
 3311 struct futimes_args {
 3312         int     fd;
 3313         struct  timeval *tptr;
 3314 };
 3315 #endif
 3316 int
 3317 futimes(td, uap)
 3318         struct thread *td;
 3319         register struct futimes_args /* {
 3320                 int  fd;
 3321                 struct timeval *tptr;
 3322         } */ *uap;
 3323 {
 3324 
 3325         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3326 }
 3327 
 3328 int
 3329 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3330     enum uio_seg tptrseg)
 3331 {
 3332         struct timespec ts[2];
 3333         struct file *fp;
 3334         int vfslocked;
 3335         int error;
 3336 
 3337         AUDIT_ARG_FD(fd);
 3338         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3339                 return (error);
 3340         if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
 3341                 return (error);
 3342         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3343 #ifdef AUDIT
 3344         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3345         AUDIT_ARG_VNODE1(fp->f_vnode);
 3346         VOP_UNLOCK(fp->f_vnode, 0);
 3347 #endif
 3348         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3349         VFS_UNLOCK_GIANT(vfslocked);
 3350         fdrop(fp, td);
 3351         return (error);
 3352 }
 3353 
 3354 /*
 3355  * Truncate a file given its path name.
 3356  */
 3357 #ifndef _SYS_SYSPROTO_H_
 3358 struct truncate_args {
 3359         char    *path;
 3360         int     pad;
 3361         off_t   length;
 3362 };
 3363 #endif
 3364 int
 3365 truncate(td, uap)
 3366         struct thread *td;
 3367         register struct truncate_args /* {
 3368                 char *path;
 3369                 int pad;
 3370                 off_t length;
 3371         } */ *uap;
 3372 {
 3373 
 3374         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3375 }
 3376 
 3377 int
 3378 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3379 {
 3380         struct mount *mp;
 3381         struct vnode *vp;
 3382         struct vattr vattr;
 3383         int error;
 3384         struct nameidata nd;
 3385         int vfslocked;
 3386 
 3387         if (length < 0)
 3388                 return(EINVAL);
 3389         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3390         if ((error = namei(&nd)) != 0)
 3391                 return (error);
 3392         vfslocked = NDHASGIANT(&nd);
 3393         vp = nd.ni_vp;
 3394         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3395                 vrele(vp);
 3396                 VFS_UNLOCK_GIANT(vfslocked);
 3397                 return (error);
 3398         }
 3399         NDFREE(&nd, NDF_ONLY_PNBUF);
 3400         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3401         if (vp->v_type == VDIR)
 3402                 error = EISDIR;
 3403 #ifdef MAC
 3404         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3405         }
 3406 #endif
 3407         else if ((error = vn_writechk(vp)) == 0 &&
 3408             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3409                 VATTR_NULL(&vattr);
 3410                 vattr.va_size = length;
 3411                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3412         }
 3413         vput(vp);
 3414         vn_finished_write(mp);
 3415         VFS_UNLOCK_GIANT(vfslocked);
 3416         return (error);
 3417 }
 3418 
 3419 #if defined(COMPAT_43)
 3420 /*
 3421  * Truncate a file given its path name.
 3422  */
 3423 #ifndef _SYS_SYSPROTO_H_
 3424 struct otruncate_args {
 3425         char    *path;
 3426         long    length;
 3427 };
 3428 #endif
 3429 int
 3430 otruncate(td, uap)
 3431         struct thread *td;
 3432         register struct otruncate_args /* {
 3433                 char *path;
 3434                 long length;
 3435         } */ *uap;
 3436 {
 3437         struct truncate_args /* {
 3438                 char *path;
 3439                 int pad;
 3440                 off_t length;
 3441         } */ nuap;
 3442 
 3443         nuap.path = uap->path;
 3444         nuap.length = uap->length;
 3445         return (truncate(td, &nuap));
 3446 }
 3447 #endif /* COMPAT_43 */
 3448 
 3449 /* Versions with the pad argument */
 3450 int
 3451 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3452 {
 3453         struct truncate_args ouap;
 3454 
 3455         ouap.path = uap->path;
 3456         ouap.length = uap->length;
 3457         return (truncate(td, &ouap));
 3458 }
 3459 
 3460 int
 3461 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3462 {
 3463         struct ftruncate_args ouap;
 3464 
 3465         ouap.fd = uap->fd;
 3466         ouap.length = uap->length;
 3467         return (ftruncate(td, &ouap));
 3468 }
 3469 
 3470 /*
 3471  * Sync an open file.
 3472  */
 3473 #ifndef _SYS_SYSPROTO_H_
 3474 struct fsync_args {
 3475         int     fd;
 3476 };
 3477 #endif
 3478 int
 3479 fsync(td, uap)
 3480         struct thread *td;
 3481         struct fsync_args /* {
 3482                 int fd;
 3483         } */ *uap;
 3484 {
 3485         struct vnode *vp;
 3486         struct mount *mp;
 3487         struct file *fp;
 3488         int vfslocked;
 3489         int error, lock_flags;
 3490 
 3491         AUDIT_ARG_FD(uap->fd);
 3492         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 3493                 return (error);
 3494         vp = fp->f_vnode;
 3495         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3496         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3497                 goto drop;
 3498         if (MNT_SHARED_WRITES(mp) ||
 3499             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3500                 lock_flags = LK_SHARED;
 3501         } else {
 3502                 lock_flags = LK_EXCLUSIVE;
 3503         }
 3504         vn_lock(vp, lock_flags | LK_RETRY);
 3505         AUDIT_ARG_VNODE1(vp);
 3506         if (vp->v_object != NULL) {
 3507                 VM_OBJECT_LOCK(vp->v_object);
 3508                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3509                 VM_OBJECT_UNLOCK(vp->v_object);
 3510         }
 3511         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3512 
 3513         VOP_UNLOCK(vp, 0);
 3514         vn_finished_write(mp);
 3515 drop:
 3516         VFS_UNLOCK_GIANT(vfslocked);
 3517         fdrop(fp, td);
 3518         return (error);
 3519 }
 3520 
 3521 /*
 3522  * Rename files.  Source and destination must either both be directories, or
 3523  * both not be directories.  If target is a directory, it must be empty.
 3524  */
 3525 #ifndef _SYS_SYSPROTO_H_
 3526 struct rename_args {
 3527         char    *from;
 3528         char    *to;
 3529 };
 3530 #endif
 3531 int
 3532 rename(td, uap)
 3533         struct thread *td;
 3534         register struct rename_args /* {
 3535                 char *from;
 3536                 char *to;
 3537         } */ *uap;
 3538 {
 3539 
 3540         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3541 }
 3542 
 3543 #ifndef _SYS_SYSPROTO_H_
 3544 struct renameat_args {
 3545         int     oldfd;
 3546         char    *old;
 3547         int     newfd;
 3548         char    *new;
 3549 };
 3550 #endif
 3551 int
 3552 renameat(struct thread *td, struct renameat_args *uap)
 3553 {
 3554 
 3555         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3556             UIO_USERSPACE));
 3557 }
 3558 
 3559 int
 3560 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3561 {
 3562 
 3563         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3564 }
 3565 
 3566 int
 3567 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3568     enum uio_seg pathseg)
 3569 {
 3570         struct mount *mp = NULL;
 3571         struct vnode *tvp, *fvp, *tdvp;
 3572         struct nameidata fromnd, tond;
 3573         int tvfslocked;
 3574         int fvfslocked;
 3575         int error;
 3576 
 3577         bwillwrite();
 3578 #ifdef MAC
 3579         NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
 3580             AUDITVNODE1, pathseg, old, oldfd, td);
 3581 #else
 3582         NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
 3583             AUDITVNODE1, pathseg, old, oldfd, td);
 3584 #endif
 3585 
 3586         if ((error = namei(&fromnd)) != 0)
 3587                 return (error);
 3588         fvfslocked = NDHASGIANT(&fromnd);
 3589         tvfslocked = 0;
 3590 #ifdef MAC
 3591         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3592             fromnd.ni_vp, &fromnd.ni_cnd);
 3593         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3594         if (fromnd.ni_dvp != fromnd.ni_vp)
 3595                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3596 #endif
 3597         fvp = fromnd.ni_vp;
 3598         if (error == 0)
 3599                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3600         if (error != 0) {
 3601                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3602                 vrele(fromnd.ni_dvp);
 3603                 vrele(fvp);
 3604                 goto out1;
 3605         }
 3606         NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 3607             MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
 3608         if (fromnd.ni_vp->v_type == VDIR)
 3609                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3610         if ((error = namei(&tond)) != 0) {
 3611                 /* Translate error code for rename("dir1", "dir2/."). */
 3612                 if (error == EISDIR && fvp->v_type == VDIR)
 3613                         error = EINVAL;
 3614                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3615                 vrele(fromnd.ni_dvp);
 3616                 vrele(fvp);
 3617                 vn_finished_write(mp);
 3618                 goto out1;
 3619         }
 3620         tvfslocked = NDHASGIANT(&tond);
 3621         tdvp = tond.ni_dvp;
 3622         tvp = tond.ni_vp;
 3623         if (tvp != NULL) {
 3624                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3625                         error = ENOTDIR;
 3626                         goto out;
 3627                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3628                         error = EISDIR;
 3629                         goto out;
 3630                 }
 3631         }
 3632         if (fvp == tdvp) {
 3633                 error = EINVAL;
 3634                 goto out;
 3635         }
 3636         /*
 3637          * If the source is the same as the destination (that is, if they
 3638          * are links to the same vnode), then there is nothing to do.
 3639          */
 3640         if (fvp == tvp)
 3641                 error = -1;
 3642 #ifdef MAC
 3643         else
 3644                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3645                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3646 #endif
 3647 out:
 3648         if (!error) {
 3649                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3650                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3651                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3652                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3653         } else {
 3654                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3655                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3656                 if (tvp)
 3657                         vput(tvp);
 3658                 if (tdvp == tvp)
 3659                         vrele(tdvp);
 3660                 else
 3661                         vput(tdvp);
 3662                 vrele(fromnd.ni_dvp);
 3663                 vrele(fvp);
 3664         }
 3665         vrele(tond.ni_startdir);
 3666         vn_finished_write(mp);
 3667 out1:
 3668         if (fromnd.ni_startdir)
 3669                 vrele(fromnd.ni_startdir);
 3670         VFS_UNLOCK_GIANT(fvfslocked);
 3671         VFS_UNLOCK_GIANT(tvfslocked);
 3672         if (error == -1)
 3673                 return (0);
 3674         return (error);
 3675 }
 3676 
 3677 /*
 3678  * Make a directory file.
 3679  */
 3680 #ifndef _SYS_SYSPROTO_H_
 3681 struct mkdir_args {
 3682         char    *path;
 3683         int     mode;
 3684 };
 3685 #endif
 3686 int
 3687 mkdir(td, uap)
 3688         struct thread *td;
 3689         register struct mkdir_args /* {
 3690                 char *path;
 3691                 int mode;
 3692         } */ *uap;
 3693 {
 3694 
 3695         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3696 }
 3697 
 3698 #ifndef _SYS_SYSPROTO_H_
 3699 struct mkdirat_args {
 3700         int     fd;
 3701         char    *path;
 3702         mode_t  mode;
 3703 };
 3704 #endif
 3705 int
 3706 mkdirat(struct thread *td, struct mkdirat_args *uap)
 3707 {
 3708 
 3709         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3710 }
 3711 
 3712 int
 3713 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3714 {
 3715 
 3716         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3717 }
 3718 
 3719 int
 3720 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3721     int mode)
 3722 {
 3723         struct mount *mp;
 3724         struct vnode *vp;
 3725         struct vattr vattr;
 3726         int error;
 3727         struct nameidata nd;
 3728         int vfslocked;
 3729 
 3730         AUDIT_ARG_MODE(mode);
 3731 restart:
 3732         bwillwrite();
 3733         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 3734             segflg, path, fd, td);
 3735         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3736         if ((error = namei(&nd)) != 0)
 3737                 return (error);
 3738         vfslocked = NDHASGIANT(&nd);
 3739         vp = nd.ni_vp;
 3740         if (vp != NULL) {
 3741                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3742                 /*
 3743                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3744                  * the strange behaviour of leaving the vnode unlocked
 3745                  * if the target is the same vnode as the parent.
 3746                  */
 3747                 if (vp == nd.ni_dvp)
 3748                         vrele(nd.ni_dvp);
 3749                 else
 3750                         vput(nd.ni_dvp);
 3751                 vrele(vp);
 3752                 VFS_UNLOCK_GIANT(vfslocked);
 3753                 return (EEXIST);
 3754         }
 3755         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3756                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3757                 vput(nd.ni_dvp);
 3758                 VFS_UNLOCK_GIANT(vfslocked);
 3759                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3760                         return (error);
 3761                 goto restart;
 3762         }
 3763         VATTR_NULL(&vattr);
 3764         vattr.va_type = VDIR;
 3765         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3766 #ifdef MAC
 3767         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3768             &vattr);
 3769         if (error)
 3770                 goto out;
 3771 #endif
 3772         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3773 #ifdef MAC
 3774 out:
 3775 #endif
 3776         NDFREE(&nd, NDF_ONLY_PNBUF);
 3777         vput(nd.ni_dvp);
 3778         if (!error)
 3779                 vput(nd.ni_vp);
 3780         vn_finished_write(mp);
 3781         VFS_UNLOCK_GIANT(vfslocked);
 3782         return (error);
 3783 }
 3784 
 3785 /*
 3786  * Remove a directory file.
 3787  */
 3788 #ifndef _SYS_SYSPROTO_H_
 3789 struct rmdir_args {
 3790         char    *path;
 3791 };
 3792 #endif
 3793 int
 3794 rmdir(td, uap)
 3795         struct thread *td;
 3796         struct rmdir_args /* {
 3797                 char *path;
 3798         } */ *uap;
 3799 {
 3800 
 3801         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3802 }
 3803 
 3804 int
 3805 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3806 {
 3807 
 3808         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3809 }
 3810 
 3811 int
 3812 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3813 {
 3814         struct mount *mp;
 3815         struct vnode *vp;
 3816         int error;
 3817         struct nameidata nd;
 3818         int vfslocked;
 3819 
 3820 restart:
 3821         bwillwrite();
 3822         NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 3823             pathseg, path, fd, td);
 3824         if ((error = namei(&nd)) != 0)
 3825                 return (error);
 3826         vfslocked = NDHASGIANT(&nd);
 3827         vp = nd.ni_vp;
 3828         if (vp->v_type != VDIR) {
 3829                 error = ENOTDIR;
 3830                 goto out;
 3831         }
 3832         /*
 3833          * No rmdir "." please.
 3834          */
 3835         if (nd.ni_dvp == vp) {
 3836                 error = EINVAL;
 3837                 goto out;
 3838         }
 3839         /*
 3840          * The root of a mounted filesystem cannot be deleted.
 3841          */
 3842         if (vp->v_vflag & VV_ROOT) {
 3843                 error = EBUSY;
 3844                 goto out;
 3845         }
 3846 #ifdef MAC
 3847         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3848             &nd.ni_cnd);
 3849         if (error)
 3850                 goto out;
 3851 #endif
 3852         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3853                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3854                 vput(vp);
 3855                 if (nd.ni_dvp == vp)
 3856                         vrele(nd.ni_dvp);
 3857                 else
 3858                         vput(nd.ni_dvp);
 3859                 VFS_UNLOCK_GIANT(vfslocked);
 3860                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3861                         return (error);
 3862                 goto restart;
 3863         }
 3864         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3865         vn_finished_write(mp);
 3866 out:
 3867         NDFREE(&nd, NDF_ONLY_PNBUF);
 3868         vput(vp);
 3869         if (nd.ni_dvp == vp)
 3870                 vrele(nd.ni_dvp);
 3871         else
 3872                 vput(nd.ni_dvp);
 3873         VFS_UNLOCK_GIANT(vfslocked);
 3874         return (error);
 3875 }
 3876 
 3877 #ifdef COMPAT_43
 3878 /*
 3879  * Read a block of directory entries in a filesystem independent format.
 3880  */
 3881 #ifndef _SYS_SYSPROTO_H_
 3882 struct ogetdirentries_args {
 3883         int     fd;
 3884         char    *buf;
 3885         u_int   count;
 3886         long    *basep;
 3887 };
 3888 #endif
 3889 int
 3890 ogetdirentries(td, uap)
 3891         struct thread *td;
 3892         register struct ogetdirentries_args /* {
 3893                 int fd;
 3894                 char *buf;
 3895                 u_int count;
 3896                 long *basep;
 3897         } */ *uap;
 3898 {
 3899         struct vnode *vp;
 3900         struct file *fp;
 3901         struct uio auio, kuio;
 3902         struct iovec aiov, kiov;
 3903         struct dirent *dp, *edp;
 3904         caddr_t dirbuf;
 3905         int error, eofflag, readcnt, vfslocked;
 3906         long loff;
 3907 
 3908         /* XXX arbitrary sanity limit on `count'. */
 3909         if (uap->count > 64 * 1024)
 3910                 return (EINVAL);
 3911         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 3912                 return (error);
 3913         if ((fp->f_flag & FREAD) == 0) {
 3914                 fdrop(fp, td);
 3915                 return (EBADF);
 3916         }
 3917         vp = fp->f_vnode;
 3918 unionread:
 3919         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3920         if (vp->v_type != VDIR) {
 3921                 VFS_UNLOCK_GIANT(vfslocked);
 3922                 fdrop(fp, td);
 3923                 return (EINVAL);
 3924         }
 3925         aiov.iov_base = uap->buf;
 3926         aiov.iov_len = uap->count;
 3927         auio.uio_iov = &aiov;
 3928         auio.uio_iovcnt = 1;
 3929         auio.uio_rw = UIO_READ;
 3930         auio.uio_segflg = UIO_USERSPACE;
 3931         auio.uio_td = td;
 3932         auio.uio_resid = uap->count;
 3933         vn_lock(vp, LK_SHARED | LK_RETRY);
 3934         loff = auio.uio_offset = fp->f_offset;
 3935 #ifdef MAC
 3936         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3937         if (error) {
 3938                 VOP_UNLOCK(vp, 0);
 3939                 VFS_UNLOCK_GIANT(vfslocked);
 3940                 fdrop(fp, td);
 3941                 return (error);
 3942         }
 3943 #endif
 3944 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3945                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3946                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3947                             NULL, NULL);
 3948                         fp->f_offset = auio.uio_offset;
 3949                 } else
 3950 #       endif
 3951         {
 3952                 kuio = auio;
 3953                 kuio.uio_iov = &kiov;
 3954                 kuio.uio_segflg = UIO_SYSSPACE;
 3955                 kiov.iov_len = uap->count;
 3956                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 3957                 kiov.iov_base = dirbuf;
 3958                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3959                             NULL, NULL);
 3960                 fp->f_offset = kuio.uio_offset;
 3961                 if (error == 0) {
 3962                         readcnt = uap->count - kuio.uio_resid;
 3963                         edp = (struct dirent *)&dirbuf[readcnt];
 3964                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3965 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3966                                         /*
 3967                                          * The expected low byte of
 3968                                          * dp->d_namlen is our dp->d_type.
 3969                                          * The high MBZ byte of dp->d_namlen
 3970                                          * is our dp->d_namlen.
 3971                                          */
 3972                                         dp->d_type = dp->d_namlen;
 3973                                         dp->d_namlen = 0;
 3974 #                               else
 3975                                         /*
 3976                                          * The dp->d_type is the high byte
 3977                                          * of the expected dp->d_namlen,
 3978                                          * so must be zero'ed.
 3979                                          */
 3980                                         dp->d_type = 0;
 3981 #                               endif
 3982                                 if (dp->d_reclen > 0) {
 3983                                         dp = (struct dirent *)
 3984                                             ((char *)dp + dp->d_reclen);
 3985                                 } else {
 3986                                         error = EIO;
 3987                                         break;
 3988                                 }
 3989                         }
 3990                         if (dp >= edp)
 3991                                 error = uiomove(dirbuf, readcnt, &auio);
 3992                 }
 3993                 free(dirbuf, M_TEMP);
 3994         }
 3995         if (error) {
 3996                 VOP_UNLOCK(vp, 0);
 3997                 VFS_UNLOCK_GIANT(vfslocked);
 3998                 fdrop(fp, td);
 3999                 return (error);
 4000         }
 4001         if (uap->count == auio.uio_resid &&
 4002             (vp->v_vflag & VV_ROOT) &&
 4003             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4004                 struct vnode *tvp = vp;
 4005                 vp = vp->v_mount->mnt_vnodecovered;
 4006                 VREF(vp);
 4007                 fp->f_vnode = vp;
 4008                 fp->f_data = vp;
 4009                 fp->f_offset = 0;
 4010                 vput(tvp);
 4011                 VFS_UNLOCK_GIANT(vfslocked);
 4012                 goto unionread;
 4013         }
 4014         VOP_UNLOCK(vp, 0);
 4015         VFS_UNLOCK_GIANT(vfslocked);
 4016         error = copyout(&loff, uap->basep, sizeof(long));
 4017         fdrop(fp, td);
 4018         td->td_retval[0] = uap->count - auio.uio_resid;
 4019         return (error);
 4020 }
 4021 #endif /* COMPAT_43 */
 4022 
 4023 /*
 4024  * Read a block of directory entries in a filesystem independent format.
 4025  */
 4026 #ifndef _SYS_SYSPROTO_H_
 4027 struct getdirentries_args {
 4028         int     fd;
 4029         char    *buf;
 4030         u_int   count;
 4031         long    *basep;
 4032 };
 4033 #endif
 4034 int
 4035 getdirentries(td, uap)
 4036         struct thread *td;
 4037         register struct getdirentries_args /* {
 4038                 int fd;
 4039                 char *buf;
 4040                 u_int count;
 4041                 long *basep;
 4042         } */ *uap;
 4043 {
 4044         long base;
 4045         int error;
 4046 
 4047         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 4048         if (error)
 4049                 return (error);
 4050         if (uap->basep != NULL)
 4051                 error = copyout(&base, uap->basep, sizeof(long));
 4052         return (error);
 4053 }
 4054 
 4055 int
 4056 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4057     long *basep)
 4058 {
 4059         struct vnode *vp;
 4060         struct file *fp;
 4061         struct uio auio;
 4062         struct iovec aiov;
 4063         int vfslocked;
 4064         long loff;
 4065         int error, eofflag;
 4066 
 4067         AUDIT_ARG_FD(fd);
 4068         if (count > INT_MAX)
 4069                 return (EINVAL);
 4070         if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
 4071                 return (error);
 4072         if ((fp->f_flag & FREAD) == 0) {
 4073                 fdrop(fp, td);
 4074                 return (EBADF);
 4075         }
 4076         vp = fp->f_vnode;
 4077 unionread:
 4078         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4079         if (vp->v_type != VDIR) {
 4080                 VFS_UNLOCK_GIANT(vfslocked);
 4081                 error = EINVAL;
 4082                 goto fail;
 4083         }
 4084         aiov.iov_base = buf;
 4085         aiov.iov_len = count;
 4086         auio.uio_iov = &aiov;
 4087         auio.uio_iovcnt = 1;
 4088         auio.uio_rw = UIO_READ;
 4089         auio.uio_segflg = UIO_USERSPACE;
 4090         auio.uio_td = td;
 4091         auio.uio_resid = count;
 4092         vn_lock(vp, LK_SHARED | LK_RETRY);
 4093         AUDIT_ARG_VNODE1(vp);
 4094         loff = auio.uio_offset = fp->f_offset;
 4095 #ifdef MAC
 4096         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4097         if (error == 0)
 4098 #endif
 4099                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4100                     NULL);
 4101         fp->f_offset = auio.uio_offset;
 4102         if (error) {
 4103                 VOP_UNLOCK(vp, 0);
 4104                 VFS_UNLOCK_GIANT(vfslocked);
 4105                 goto fail;
 4106         }
 4107         if (count == auio.uio_resid &&
 4108             (vp->v_vflag & VV_ROOT) &&
 4109             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4110                 struct vnode *tvp = vp;
 4111                 vp = vp->v_mount->mnt_vnodecovered;
 4112                 VREF(vp);
 4113                 fp->f_vnode = vp;
 4114                 fp->f_data = vp;
 4115                 fp->f_offset = 0;
 4116                 vput(tvp);
 4117                 VFS_UNLOCK_GIANT(vfslocked);
 4118                 goto unionread;
 4119         }
 4120         VOP_UNLOCK(vp, 0);
 4121         VFS_UNLOCK_GIANT(vfslocked);
 4122         *basep = loff;
 4123         td->td_retval[0] = count - auio.uio_resid;
 4124 fail:
 4125         fdrop(fp, td);
 4126         return (error);
 4127 }
 4128 
 4129 #ifndef _SYS_SYSPROTO_H_
 4130 struct getdents_args {
 4131         int fd;
 4132         char *buf;
 4133         size_t count;
 4134 };
 4135 #endif
 4136 int
 4137 getdents(td, uap)
 4138         struct thread *td;
 4139         register struct getdents_args /* {
 4140                 int fd;
 4141                 char *buf;
 4142                 u_int count;
 4143         } */ *uap;
 4144 {
 4145         struct getdirentries_args ap;
 4146         ap.fd = uap->fd;
 4147         ap.buf = uap->buf;
 4148         ap.count = uap->count;
 4149         ap.basep = NULL;
 4150         return (getdirentries(td, &ap));
 4151 }
 4152 
 4153 /*
 4154  * Set the mode mask for creation of filesystem nodes.
 4155  */
 4156 #ifndef _SYS_SYSPROTO_H_
 4157 struct umask_args {
 4158         int     newmask;
 4159 };
 4160 #endif
 4161 int
 4162 umask(td, uap)
 4163         struct thread *td;
 4164         struct umask_args /* {
 4165                 int newmask;
 4166         } */ *uap;
 4167 {
 4168         register struct filedesc *fdp;
 4169 
 4170         FILEDESC_XLOCK(td->td_proc->p_fd);
 4171         fdp = td->td_proc->p_fd;
 4172         td->td_retval[0] = fdp->fd_cmask;
 4173         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4174         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4175         return (0);
 4176 }
 4177 
 4178 /*
 4179  * Void all references to file by ripping underlying filesystem away from
 4180  * vnode.
 4181  */
 4182 #ifndef _SYS_SYSPROTO_H_
 4183 struct revoke_args {
 4184         char    *path;
 4185 };
 4186 #endif
 4187 int
 4188 revoke(td, uap)
 4189         struct thread *td;
 4190         register struct revoke_args /* {
 4191                 char *path;
 4192         } */ *uap;
 4193 {
 4194         struct vnode *vp;
 4195         struct vattr vattr;
 4196         int error;
 4197         struct nameidata nd;
 4198         int vfslocked;
 4199 
 4200         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4201             UIO_USERSPACE, uap->path, td);
 4202         if ((error = namei(&nd)) != 0)
 4203                 return (error);
 4204         vfslocked = NDHASGIANT(&nd);
 4205         vp = nd.ni_vp;
 4206         NDFREE(&nd, NDF_ONLY_PNBUF);
 4207         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4208                 error = EINVAL;
 4209                 goto out;
 4210         }
 4211 #ifdef MAC
 4212         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4213         if (error)
 4214                 goto out;
 4215 #endif
 4216         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4217         if (error)
 4218                 goto out;
 4219         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4220                 error = priv_check(td, PRIV_VFS_ADMIN);
 4221                 if (error)
 4222                         goto out;
 4223         }
 4224         if (vcount(vp) > 1)
 4225                 VOP_REVOKE(vp, REVOKEALL);
 4226 out:
 4227         vput(vp);
 4228         VFS_UNLOCK_GIANT(vfslocked);
 4229         return (error);
 4230 }
 4231 
 4232 /*
 4233  * Convert a user file descriptor to a kernel file entry.
 4234  * A reference on the file entry is held upon returning.
 4235  */
 4236 int
 4237 getvnode(fdp, fd, fpp)
 4238         struct filedesc *fdp;
 4239         int fd;
 4240         struct file **fpp;
 4241 {
 4242         int error;
 4243         struct file *fp;
 4244 
 4245         error = 0;
 4246         fp = NULL;
 4247         if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
 4248                 error = EBADF;
 4249         else if (fp->f_vnode == NULL) {
 4250                 error = EINVAL;
 4251                 fdrop(fp, curthread);
 4252         }
 4253         *fpp = fp;
 4254         return (error);
 4255 }
 4256 
 4257 /*
 4258  * Get an (NFS) file handle.
 4259  */
 4260 #ifndef _SYS_SYSPROTO_H_
 4261 struct lgetfh_args {
 4262         char    *fname;
 4263         fhandle_t *fhp;
 4264 };
 4265 #endif
 4266 int
 4267 lgetfh(td, uap)
 4268         struct thread *td;
 4269         register struct lgetfh_args *uap;
 4270 {
 4271         struct nameidata nd;
 4272         fhandle_t fh;
 4273         register struct vnode *vp;
 4274         int vfslocked;
 4275         int error;
 4276 
 4277         error = priv_check(td, PRIV_VFS_GETFH);
 4278         if (error)
 4279                 return (error);
 4280         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4281             UIO_USERSPACE, uap->fname, td);
 4282         error = namei(&nd);
 4283         if (error)
 4284                 return (error);
 4285         vfslocked = NDHASGIANT(&nd);
 4286         NDFREE(&nd, NDF_ONLY_PNBUF);
 4287         vp = nd.ni_vp;
 4288         bzero(&fh, sizeof(fh));
 4289         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4290         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4291         vput(vp);
 4292         VFS_UNLOCK_GIANT(vfslocked);
 4293         if (error)
 4294                 return (error);
 4295         error = copyout(&fh, uap->fhp, sizeof (fh));
 4296         return (error);
 4297 }
 4298 
 4299 #ifndef _SYS_SYSPROTO_H_
 4300 struct getfh_args {
 4301         char    *fname;
 4302         fhandle_t *fhp;
 4303 };
 4304 #endif
 4305 int
 4306 getfh(td, uap)
 4307         struct thread *td;
 4308         register struct getfh_args *uap;
 4309 {
 4310         struct nameidata nd;
 4311         fhandle_t fh;
 4312         register struct vnode *vp;
 4313         int vfslocked;
 4314         int error;
 4315 
 4316         error = priv_check(td, PRIV_VFS_GETFH);
 4317         if (error)
 4318                 return (error);
 4319         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4320             UIO_USERSPACE, uap->fname, td);
 4321         error = namei(&nd);
 4322         if (error)
 4323                 return (error);
 4324         vfslocked = NDHASGIANT(&nd);
 4325         NDFREE(&nd, NDF_ONLY_PNBUF);
 4326         vp = nd.ni_vp;
 4327         bzero(&fh, sizeof(fh));
 4328         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4329         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4330         vput(vp);
 4331         VFS_UNLOCK_GIANT(vfslocked);
 4332         if (error)
 4333                 return (error);
 4334         error = copyout(&fh, uap->fhp, sizeof (fh));
 4335         return (error);
 4336 }
 4337 
 4338 /*
 4339  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4340  * open descriptor.
 4341  *
 4342  * warning: do not remove the priv_check() call or this becomes one giant
 4343  * security hole.
 4344  */
 4345 #ifndef _SYS_SYSPROTO_H_
 4346 struct fhopen_args {
 4347         const struct fhandle *u_fhp;
 4348         int flags;
 4349 };
 4350 #endif
 4351 int
 4352 fhopen(td, uap)
 4353         struct thread *td;
 4354         struct fhopen_args /* {
 4355                 const struct fhandle *u_fhp;
 4356                 int flags;
 4357         } */ *uap;
 4358 {
 4359         struct proc *p = td->td_proc;
 4360         struct mount *mp;
 4361         struct vnode *vp;
 4362         struct fhandle fhp;
 4363         struct vattr vat;
 4364         struct vattr *vap = &vat;
 4365         struct flock lf;
 4366         struct file *fp;
 4367         register struct filedesc *fdp = p->p_fd;
 4368         int fmode, error, type;
 4369         accmode_t accmode;
 4370         struct file *nfp;
 4371         int vfslocked;
 4372         int indx;
 4373 
 4374         error = priv_check(td, PRIV_VFS_FHOPEN);
 4375         if (error)
 4376                 return (error);
 4377         fmode = FFLAGS(uap->flags);
 4378         /* why not allow a non-read/write open for our lockd? */
 4379         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4380                 return (EINVAL);
 4381         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4382         if (error)
 4383                 return(error);
 4384         /* find the mount point */
 4385         mp = vfs_busyfs(&fhp.fh_fsid);
 4386         if (mp == NULL)
 4387                 return (ESTALE);
 4388         vfslocked = VFS_LOCK_GIANT(mp);
 4389         /* now give me my vnode, it gets returned to me locked */
 4390         error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
 4391         vfs_unbusy(mp);
 4392         if (error)
 4393                 goto out;
 4394         /*
 4395          * from now on we have to make sure not
 4396          * to forget about the vnode
 4397          * any error that causes an abort must vput(vp)
 4398          * just set error = err and 'goto bad;'.
 4399          */
 4400 
 4401         /*
 4402          * from vn_open
 4403          */
 4404         if (vp->v_type == VLNK) {
 4405                 error = EMLINK;
 4406                 goto bad;
 4407         }
 4408         if (vp->v_type == VSOCK) {
 4409                 error = EOPNOTSUPP;
 4410                 goto bad;
 4411         }
 4412         accmode = 0;
 4413         if (fmode & (FWRITE | O_TRUNC)) {
 4414                 if (vp->v_type == VDIR) {
 4415                         error = EISDIR;
 4416                         goto bad;
 4417                 }
 4418                 error = vn_writechk(vp);
 4419                 if (error)
 4420                         goto bad;
 4421                 accmode |= VWRITE;
 4422         }
 4423         if (fmode & FREAD)
 4424                 accmode |= VREAD;
 4425         if ((fmode & O_APPEND) && (fmode & FWRITE))
 4426                 accmode |= VAPPEND;
 4427 #ifdef MAC
 4428         error = mac_vnode_check_open(td->td_ucred, vp, accmode);
 4429         if (error)
 4430                 goto bad;
 4431 #endif
 4432         if (accmode) {
 4433                 error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
 4434                 if (error)
 4435                         goto bad;
 4436         }
 4437         if (fmode & O_TRUNC) {
 4438                 vfs_ref(mp);
 4439                 VOP_UNLOCK(vp, 0);                              /* XXX */
 4440                 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 4441                         vrele(vp);
 4442                         vfs_rel(mp);
 4443                         goto out;
 4444                 }
 4445                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 4446                 vfs_rel(mp);
 4447 #ifdef MAC
 4448                 /*
 4449                  * We don't yet have fp->f_cred, so use td->td_ucred, which
 4450                  * should be right.
 4451                  */
 4452                 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
 4453                 if (error == 0) {
 4454 #endif
 4455                         VATTR_NULL(vap);
 4456                         vap->va_size = 0;
 4457                         error = VOP_SETATTR(vp, vap, td->td_ucred);
 4458 #ifdef MAC
 4459                 }
 4460 #endif
 4461                 vn_finished_write(mp);
 4462                 if (error)
 4463                         goto bad;
 4464         }
 4465         error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
 4466         if (error)
 4467                 goto bad;
 4468 
 4469         if (fmode & FWRITE)
 4470                 vp->v_writecount++;
 4471 
 4472         /*
 4473          * end of vn_open code
 4474          */
 4475 
 4476         if ((error = fallocf(td, &nfp, &indx, fmode)) != 0) {
 4477                 if (fmode & FWRITE)
 4478                         vp->v_writecount--;
 4479                 goto bad;
 4480         }
 4481         /* An extra reference on `nfp' has been held for us by falloc(). */
 4482         fp = nfp;
 4483         nfp->f_vnode = vp;
 4484         finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
 4485         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4486                 lf.l_whence = SEEK_SET;
 4487                 lf.l_start = 0;
 4488                 lf.l_len = 0;
 4489                 if (fmode & O_EXLOCK)
 4490                         lf.l_type = F_WRLCK;
 4491                 else
 4492                         lf.l_type = F_RDLCK;
 4493                 type = F_FLOCK;
 4494                 if ((fmode & FNONBLOCK) == 0)
 4495                         type |= F_WAIT;
 4496                 VOP_UNLOCK(vp, 0);
 4497                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 4498                             type)) != 0) {
 4499                         /*
 4500                          * The lock request failed.  Normally close the
 4501                          * descriptor but handle the case where someone might
 4502                          * have dup()d or close()d it when we weren't looking.
 4503                          */
 4504                         fdclose(fdp, fp, indx, td);
 4505 
 4506                         /*
 4507                          * release our private reference
 4508                          */
 4509                         fdrop(fp, td);
 4510                         goto out;
 4511                 }
 4512                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4513                 atomic_set_int(&fp->f_flag, FHASLOCK);
 4514         }
 4515 
 4516         VOP_UNLOCK(vp, 0);
 4517         fdrop(fp, td);
 4518         VFS_UNLOCK_GIANT(vfslocked);
 4519         td->td_retval[0] = indx;
 4520         return (0);
 4521 
 4522 bad:
 4523         vput(vp);
 4524 out:
 4525         VFS_UNLOCK_GIANT(vfslocked);
 4526         return (error);
 4527 }
 4528 
 4529 /*
 4530  * Stat an (NFS) file handle.
 4531  */
 4532 #ifndef _SYS_SYSPROTO_H_
 4533 struct fhstat_args {
 4534         struct fhandle *u_fhp;
 4535         struct stat *sb;
 4536 };
 4537 #endif
 4538 int
 4539 fhstat(td, uap)
 4540         struct thread *td;
 4541         register struct fhstat_args /* {
 4542                 struct fhandle *u_fhp;
 4543                 struct stat *sb;
 4544         } */ *uap;
 4545 {
 4546         struct stat sb;
 4547         fhandle_t fh;
 4548         struct mount *mp;
 4549         struct vnode *vp;
 4550         int vfslocked;
 4551         int error;
 4552 
 4553         error = priv_check(td, PRIV_VFS_FHSTAT);
 4554         if (error)
 4555                 return (error);
 4556         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4557         if (error)
 4558                 return (error);
 4559         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4560                 return (ESTALE);
 4561         vfslocked = VFS_LOCK_GIANT(mp);
 4562         error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
 4563         vfs_unbusy(mp);
 4564         if (error) {
 4565                 VFS_UNLOCK_GIANT(vfslocked);
 4566                 return (error);
 4567         }
 4568         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 4569         vput(vp);
 4570         VFS_UNLOCK_GIANT(vfslocked);
 4571         if (error)
 4572                 return (error);
 4573         error = copyout(&sb, uap->sb, sizeof(sb));
 4574         return (error);
 4575 }
 4576 
 4577 /*
 4578  * Implement fstatfs() for (NFS) file handles.
 4579  */
 4580 #ifndef _SYS_SYSPROTO_H_
 4581 struct fhstatfs_args {
 4582         struct fhandle *u_fhp;
 4583         struct statfs *buf;
 4584 };
 4585 #endif
 4586 int
 4587 fhstatfs(td, uap)
 4588         struct thread *td;
 4589         struct fhstatfs_args /* {
 4590                 struct fhandle *u_fhp;
 4591                 struct statfs *buf;
 4592         } */ *uap;
 4593 {
 4594         struct statfs sf;
 4595         fhandle_t fh;
 4596         int error;
 4597 
 4598         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4599         if (error)
 4600                 return (error);
 4601         error = kern_fhstatfs(td, fh, &sf);
 4602         if (error)
 4603                 return (error);
 4604         return (copyout(&sf, uap->buf, sizeof(sf)));
 4605 }
 4606 
 4607 int
 4608 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4609 {
 4610         struct statfs *sp;
 4611         struct mount *mp;
 4612         struct vnode *vp;
 4613         int vfslocked;
 4614         int error;
 4615 
 4616         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4617         if (error)
 4618                 return (error);
 4619         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4620                 return (ESTALE);
 4621         vfslocked = VFS_LOCK_GIANT(mp);
 4622         error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
 4623         if (error) {
 4624                 vfs_unbusy(mp);
 4625                 VFS_UNLOCK_GIANT(vfslocked);
 4626                 return (error);
 4627         }
 4628         vput(vp);
 4629         error = prison_canseemount(td->td_ucred, mp);
 4630         if (error)
 4631                 goto out;
 4632 #ifdef MAC
 4633         error = mac_mount_check_stat(td->td_ucred, mp);
 4634         if (error)
 4635                 goto out;
 4636 #endif
 4637         /*
 4638          * Set these in case the underlying filesystem fails to do so.
 4639          */
 4640         sp = &mp->mnt_stat;
 4641         sp->f_version = STATFS_VERSION;
 4642         sp->f_namemax = NAME_MAX;
 4643         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4644         error = VFS_STATFS(mp, sp);
 4645         if (error == 0)
 4646                 *buf = *sp;
 4647 out:
 4648         vfs_unbusy(mp);
 4649         VFS_UNLOCK_GIANT(vfslocked);
 4650         return (error);
 4651 }
 4652 
 4653 int
 4654 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4655 {
 4656         struct file *fp;
 4657         struct mount *mp;
 4658         struct vnode *vp;
 4659         off_t olen, ooffset;
 4660         int error, vfslocked;
 4661 
 4662         fp = NULL;
 4663         vfslocked = 0;
 4664         error = fget(td, fd, &fp);
 4665         if (error != 0)
 4666                 goto out;
 4667 
 4668         switch (fp->f_type) {
 4669         case DTYPE_VNODE:
 4670                 break;
 4671         case DTYPE_PIPE:
 4672         case DTYPE_FIFO:
 4673                 error = ESPIPE;
 4674                 goto out;
 4675         default:
 4676                 error = ENODEV;
 4677                 goto out;
 4678         }
 4679         if ((fp->f_flag & FWRITE) == 0) {
 4680                 error = EBADF;
 4681                 goto out;
 4682         }
 4683         vp = fp->f_vnode;
 4684         if (vp->v_type != VREG) {
 4685                 error = ENODEV;
 4686                 goto out;
 4687         }
 4688         if (offset < 0 || len <= 0) {
 4689                 error = EINVAL;
 4690                 goto out;
 4691         }
 4692         /* Check for wrap. */
 4693         if (offset > OFF_MAX - len) {
 4694                 error = EFBIG;
 4695                 goto out;
 4696         }
 4697 
 4698         /* Allocating blocks may take a long time, so iterate. */
 4699         for (;;) {
 4700                 olen = len;
 4701                 ooffset = offset;
 4702 
 4703                 bwillwrite();
 4704                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4705                 mp = NULL;
 4706                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4707                 if (error != 0) {
 4708                         VFS_UNLOCK_GIANT(vfslocked);
 4709                         break;
 4710                 }
 4711                 error = vn_lock(vp, LK_EXCLUSIVE);
 4712                 if (error != 0) {
 4713                         vn_finished_write(mp);
 4714                         VFS_UNLOCK_GIANT(vfslocked);
 4715                         break;
 4716                 }
 4717 #ifdef MAC
 4718                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4719                 if (error == 0)
 4720 #endif
 4721                         error = VOP_ALLOCATE(vp, &offset, &len);
 4722                 VOP_UNLOCK(vp, 0);
 4723                 vn_finished_write(mp);
 4724                 VFS_UNLOCK_GIANT(vfslocked);
 4725 
 4726                 if (olen + ooffset != offset + len) {
 4727                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4728                             ooffset, olen, offset, len);
 4729                 }
 4730                 if (error != 0 || len == 0)
 4731                         break;
 4732                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4733                 maybe_yield();
 4734         }
 4735  out:
 4736         if (fp != NULL)
 4737                 fdrop(fp, td);
 4738         return (error);
 4739 }
 4740 
 4741 int
 4742 posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4743 {
 4744 
 4745         return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
 4746 }
 4747 
 4748 /*
 4749  * Unlike madvise(2), we do not make a best effort to remember every
 4750  * possible caching hint.  Instead, we remember the last setting with
 4751  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4752  * region of any current setting.
 4753  */
 4754 int
 4755 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4756     int advice)
 4757 {
 4758         struct fadvise_info *fa, *new;
 4759         struct file *fp;
 4760         struct vnode *vp;
 4761         off_t end;
 4762         int error;
 4763 
 4764         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4765                 return (EINVAL);
 4766         switch (advice) {
 4767         case POSIX_FADV_SEQUENTIAL:
 4768         case POSIX_FADV_RANDOM:
 4769         case POSIX_FADV_NOREUSE:
 4770                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4771                 break;
 4772         case POSIX_FADV_NORMAL:
 4773         case POSIX_FADV_WILLNEED:
 4774         case POSIX_FADV_DONTNEED:
 4775                 new = NULL;
 4776                 break;
 4777         default:
 4778                 return (EINVAL);
 4779         }
 4780         error = fget(td, fd, &fp);
 4781         if (error != 0)
 4782                 goto out;
 4783         
 4784         switch (fp->f_type) {
 4785         case DTYPE_VNODE:
 4786                 break;
 4787         case DTYPE_PIPE:
 4788         case DTYPE_FIFO:
 4789                 error = ESPIPE;
 4790                 goto out;
 4791         default:
 4792                 error = ENODEV;
 4793                 goto out;
 4794         }
 4795         vp = fp->f_vnode;
 4796         if (vp->v_type != VREG) {
 4797                 error = ENODEV;
 4798                 goto out;
 4799         }
 4800         if (len == 0)
 4801                 end = OFF_MAX;
 4802         else
 4803                 end = offset + len - 1;
 4804         switch (advice) {
 4805         case POSIX_FADV_SEQUENTIAL:
 4806         case POSIX_FADV_RANDOM:
 4807         case POSIX_FADV_NOREUSE:
 4808                 /*
 4809                  * Try to merge any existing non-standard region with
 4810                  * this new region if possible, otherwise create a new
 4811                  * non-standard region for this request.
 4812                  */
 4813                 mtx_pool_lock(mtxpool_sleep, fp);
 4814                 fa = fp->f_advice;
 4815                 if (fa != NULL && fa->fa_advice == advice &&
 4816                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4817                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4818                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4819                         if (offset < fa->fa_start)
 4820                                 fa->fa_start = offset;
 4821                         if (end > fa->fa_end)
 4822                                 fa->fa_end = end;
 4823                 } else {
 4824                         new->fa_advice = advice;
 4825                         new->fa_start = offset;
 4826                         new->fa_end = end;
 4827                         fp->f_advice = new;
 4828                         new = fa;
 4829                 }
 4830                 mtx_pool_unlock(mtxpool_sleep, fp);
 4831                 break;
 4832         case POSIX_FADV_NORMAL:
 4833                 /*
 4834                  * If a the "normal" region overlaps with an existing
 4835                  * non-standard region, trim or remove the
 4836                  * non-standard region.
 4837                  */
 4838                 mtx_pool_lock(mtxpool_sleep, fp);
 4839                 fa = fp->f_advice;
 4840                 if (fa != NULL) {
 4841                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4842                                 new = fa;
 4843                                 fp->f_advice = NULL;
 4844                         } else if (offset <= fa->fa_start &&
 4845                             end >= fa->fa_start)
 4846                                 fa->fa_start = end + 1;
 4847                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4848                                 fa->fa_end = offset - 1;
 4849                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4850                                 /*
 4851                                  * If the "normal" region is a middle
 4852                                  * portion of the existing
 4853                                  * non-standard region, just remove
 4854                                  * the whole thing rather than picking
 4855                                  * one side or the other to
 4856                                  * preserve.
 4857                                  */
 4858                                 new = fa;
 4859                                 fp->f_advice = NULL;
 4860                         }
 4861                 }
 4862                 mtx_pool_unlock(mtxpool_sleep, fp);
 4863                 break;
 4864         case POSIX_FADV_WILLNEED:
 4865         case POSIX_FADV_DONTNEED:
 4866                 error = VOP_ADVISE(vp, offset, end, advice);
 4867                 break;
 4868         }
 4869 out:
 4870         if (fp != NULL)
 4871                 fdrop(fp, td);
 4872         free(new, M_FADVISE);
 4873         return (error);
 4874 }
 4875 
 4876 int
 4877 posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4878 {
 4879 
 4880         return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4881             uap->advice));
 4882 }

Cache object: 33f391e1d435e16f1b00eeaac512ec52


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.