The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/9.2/sys/kern/vfs_syscalls.c 250978 2013-05-25 11:05:00Z kib $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capability.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/sdt.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/unistd.h>
   68 #include <sys/vnode.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/dirent.h>
   72 #include <sys/jail.h>
   73 #include <sys/syscallsubr.h>
   74 #include <sys/sysctl.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 
   79 #include <machine/stdarg.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/uma.h>
   88 
   89 #include <ufs/ufs/quota.h>
   90 
   91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   92 
   93 SDT_PROVIDER_DEFINE(vfs);
   94 SDT_PROBE_DEFINE(vfs, , stat, mode, mode);
   95 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
   96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
   97 SDT_PROBE_DEFINE(vfs, , stat, reg, reg);
   98 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
   99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
  100 
  101 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
  102 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  103 static int setfflags(struct thread *td, struct vnode *, int);
  104 static int setutimes(struct thread *td, struct vnode *,
  105     const struct timespec *, int, int);
  106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  107     struct thread *td);
  108 
  109 /*
  110  * The module initialization routine for POSIX asynchronous I/O will
  111  * set this to the version of AIO that it implements.  (Zero means
  112  * that it is not implemented.)  This value is used here by pathconf()
  113  * and in kern_descrip.c by fpathconf().
  114  */
  115 int async_io_version;
  116 
  117 #ifdef DEBUG
  118 static int syncprt = 0;
  119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  120 #endif
  121 
  122 /*
  123  * Sync each mounted filesystem.
  124  */
  125 #ifndef _SYS_SYSPROTO_H_
  126 struct sync_args {
  127         int     dummy;
  128 };
  129 #endif
  130 /* ARGSUSED */
  131 int
  132 sys_sync(td, uap)
  133         struct thread *td;
  134         struct sync_args *uap;
  135 {
  136         struct mount *mp, *nmp;
  137         int save, vfslocked;
  138 
  139         mtx_lock(&mountlist_mtx);
  140         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  141                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  142                         nmp = TAILQ_NEXT(mp, mnt_list);
  143                         continue;
  144                 }
  145                 vfslocked = VFS_LOCK_GIANT(mp);
  146                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  147                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  148                         save = curthread_pflags_set(TDP_SYNCIO);
  149                         vfs_msync(mp, MNT_NOWAIT);
  150                         VFS_SYNC(mp, MNT_NOWAIT);
  151                         curthread_pflags_restore(save);
  152                         vn_finished_write(mp);
  153                 }
  154                 VFS_UNLOCK_GIANT(vfslocked);
  155                 mtx_lock(&mountlist_mtx);
  156                 nmp = TAILQ_NEXT(mp, mnt_list);
  157                 vfs_unbusy(mp);
  158         }
  159         mtx_unlock(&mountlist_mtx);
  160         return (0);
  161 }
  162 
  163 /*
  164  * Change filesystem quotas.
  165  */
  166 #ifndef _SYS_SYSPROTO_H_
  167 struct quotactl_args {
  168         char *path;
  169         int cmd;
  170         int uid;
  171         caddr_t arg;
  172 };
  173 #endif
  174 int
  175 sys_quotactl(td, uap)
  176         struct thread *td;
  177         register struct quotactl_args /* {
  178                 char *path;
  179                 int cmd;
  180                 int uid;
  181                 caddr_t arg;
  182         } */ *uap;
  183 {
  184         struct mount *mp;
  185         int vfslocked;
  186         int error;
  187         struct nameidata nd;
  188 
  189         AUDIT_ARG_CMD(uap->cmd);
  190         AUDIT_ARG_UID(uap->uid);
  191         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  192                 return (EPERM);
  193         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
  194            UIO_USERSPACE, uap->path, td);
  195         if ((error = namei(&nd)) != 0)
  196                 return (error);
  197         vfslocked = NDHASGIANT(&nd);
  198         NDFREE(&nd, NDF_ONLY_PNBUF);
  199         mp = nd.ni_vp->v_mount;
  200         vfs_ref(mp);
  201         vput(nd.ni_vp);
  202         error = vfs_busy(mp, 0);
  203         vfs_rel(mp);
  204         if (error) {
  205                 VFS_UNLOCK_GIANT(vfslocked);
  206                 return (error);
  207         }
  208         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  209 
  210         /*
  211          * Since quota on operation typically needs to open quota
  212          * file, the Q_QUOTAON handler needs to unbusy the mount point
  213          * before calling into namei.  Otherwise, unmount might be
  214          * started between two vfs_busy() invocations (first is our,
  215          * second is from mount point cross-walk code in lookup()),
  216          * causing deadlock.
  217          *
  218          * Require that Q_QUOTAON handles the vfs_busy() reference on
  219          * its own, always returning with ubusied mount point.
  220          */
  221         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  222                 vfs_unbusy(mp);
  223         VFS_UNLOCK_GIANT(vfslocked);
  224         return (error);
  225 }
  226 
  227 /*
  228  * Used by statfs conversion routines to scale the block size up if
  229  * necessary so that all of the block counts are <= 'max_size'.  Note
  230  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  231  * value of 'n'.
  232  */
  233 void
  234 statfs_scale_blocks(struct statfs *sf, long max_size)
  235 {
  236         uint64_t count;
  237         int shift;
  238 
  239         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  240 
  241         /*
  242          * Attempt to scale the block counts to give a more accurate
  243          * overview to userland of the ratio of free space to used
  244          * space.  To do this, find the largest block count and compute
  245          * a divisor that lets it fit into a signed integer <= max_size.
  246          */
  247         if (sf->f_bavail < 0)
  248                 count = -sf->f_bavail;
  249         else
  250                 count = sf->f_bavail;
  251         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  252         if (count <= max_size)
  253                 return;
  254 
  255         count >>= flsl(max_size);
  256         shift = 0;
  257         while (count > 0) {
  258                 shift++;
  259                 count >>=1;
  260         }
  261 
  262         sf->f_bsize <<= shift;
  263         sf->f_blocks >>= shift;
  264         sf->f_bfree >>= shift;
  265         sf->f_bavail >>= shift;
  266 }
  267 
  268 /*
  269  * Get filesystem statistics.
  270  */
  271 #ifndef _SYS_SYSPROTO_H_
  272 struct statfs_args {
  273         char *path;
  274         struct statfs *buf;
  275 };
  276 #endif
  277 int
  278 sys_statfs(td, uap)
  279         struct thread *td;
  280         register struct statfs_args /* {
  281                 char *path;
  282                 struct statfs *buf;
  283         } */ *uap;
  284 {
  285         struct statfs sf;
  286         int error;
  287 
  288         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  289         if (error == 0)
  290                 error = copyout(&sf, uap->buf, sizeof(sf));
  291         return (error);
  292 }
  293 
  294 int
  295 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  296     struct statfs *buf)
  297 {
  298         struct mount *mp;
  299         struct statfs *sp, sb;
  300         int vfslocked;
  301         int error;
  302         struct nameidata nd;
  303 
  304         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  305             AUDITVNODE1, pathseg, path, td);
  306         error = namei(&nd);
  307         if (error)
  308                 return (error);
  309         vfslocked = NDHASGIANT(&nd);
  310         mp = nd.ni_vp->v_mount;
  311         vfs_ref(mp);
  312         NDFREE(&nd, NDF_ONLY_PNBUF);
  313         vput(nd.ni_vp);
  314         error = vfs_busy(mp, 0);
  315         vfs_rel(mp);
  316         if (error) {
  317                 VFS_UNLOCK_GIANT(vfslocked);
  318                 return (error);
  319         }
  320 #ifdef MAC
  321         error = mac_mount_check_stat(td->td_ucred, mp);
  322         if (error)
  323                 goto out;
  324 #endif
  325         /*
  326          * Set these in case the underlying filesystem fails to do so.
  327          */
  328         sp = &mp->mnt_stat;
  329         sp->f_version = STATFS_VERSION;
  330         sp->f_namemax = NAME_MAX;
  331         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  332         error = VFS_STATFS(mp, sp);
  333         if (error)
  334                 goto out;
  335         if (priv_check(td, PRIV_VFS_GENERATION)) {
  336                 bcopy(sp, &sb, sizeof(sb));
  337                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  338                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  339                 sp = &sb;
  340         }
  341         *buf = *sp;
  342 out:
  343         vfs_unbusy(mp);
  344         VFS_UNLOCK_GIANT(vfslocked);
  345         return (error);
  346 }
  347 
  348 /*
  349  * Get filesystem statistics.
  350  */
  351 #ifndef _SYS_SYSPROTO_H_
  352 struct fstatfs_args {
  353         int fd;
  354         struct statfs *buf;
  355 };
  356 #endif
  357 int
  358 sys_fstatfs(td, uap)
  359         struct thread *td;
  360         register struct fstatfs_args /* {
  361                 int fd;
  362                 struct statfs *buf;
  363         } */ *uap;
  364 {
  365         struct statfs sf;
  366         int error;
  367 
  368         error = kern_fstatfs(td, uap->fd, &sf);
  369         if (error == 0)
  370                 error = copyout(&sf, uap->buf, sizeof(sf));
  371         return (error);
  372 }
  373 
  374 int
  375 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  376 {
  377         struct file *fp;
  378         struct mount *mp;
  379         struct statfs *sp, sb;
  380         int vfslocked;
  381         struct vnode *vp;
  382         int error;
  383 
  384         AUDIT_ARG_FD(fd);
  385         error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp);
  386         if (error)
  387                 return (error);
  388         vp = fp->f_vnode;
  389         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  390         vn_lock(vp, LK_SHARED | LK_RETRY);
  391 #ifdef AUDIT
  392         AUDIT_ARG_VNODE1(vp);
  393 #endif
  394         mp = vp->v_mount;
  395         if (mp)
  396                 vfs_ref(mp);
  397         VOP_UNLOCK(vp, 0);
  398         fdrop(fp, td);
  399         if (mp == NULL) {
  400                 error = EBADF;
  401                 goto out;
  402         }
  403         error = vfs_busy(mp, 0);
  404         vfs_rel(mp);
  405         if (error) {
  406                 VFS_UNLOCK_GIANT(vfslocked);
  407                 return (error);
  408         }
  409 #ifdef MAC
  410         error = mac_mount_check_stat(td->td_ucred, mp);
  411         if (error)
  412                 goto out;
  413 #endif
  414         /*
  415          * Set these in case the underlying filesystem fails to do so.
  416          */
  417         sp = &mp->mnt_stat;
  418         sp->f_version = STATFS_VERSION;
  419         sp->f_namemax = NAME_MAX;
  420         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  421         error = VFS_STATFS(mp, sp);
  422         if (error)
  423                 goto out;
  424         if (priv_check(td, PRIV_VFS_GENERATION)) {
  425                 bcopy(sp, &sb, sizeof(sb));
  426                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  427                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  428                 sp = &sb;
  429         }
  430         *buf = *sp;
  431 out:
  432         if (mp)
  433                 vfs_unbusy(mp);
  434         VFS_UNLOCK_GIANT(vfslocked);
  435         return (error);
  436 }
  437 
  438 /*
  439  * Get statistics on all filesystems.
  440  */
  441 #ifndef _SYS_SYSPROTO_H_
  442 struct getfsstat_args {
  443         struct statfs *buf;
  444         long bufsize;
  445         int flags;
  446 };
  447 #endif
  448 int
  449 sys_getfsstat(td, uap)
  450         struct thread *td;
  451         register struct getfsstat_args /* {
  452                 struct statfs *buf;
  453                 long bufsize;
  454                 int flags;
  455         } */ *uap;
  456 {
  457 
  458         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  459             uap->flags));
  460 }
  461 
  462 /*
  463  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  464  *      The caller is responsible for freeing memory which will be allocated
  465  *      in '*buf'.
  466  */
  467 int
  468 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  469     enum uio_seg bufseg, int flags)
  470 {
  471         struct mount *mp, *nmp;
  472         struct statfs *sfsp, *sp, sb;
  473         size_t count, maxcount;
  474         int vfslocked;
  475         int error;
  476 
  477         maxcount = bufsize / sizeof(struct statfs);
  478         if (bufsize == 0)
  479                 sfsp = NULL;
  480         else if (bufseg == UIO_USERSPACE)
  481                 sfsp = *buf;
  482         else /* if (bufseg == UIO_SYSSPACE) */ {
  483                 count = 0;
  484                 mtx_lock(&mountlist_mtx);
  485                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  486                         count++;
  487                 }
  488                 mtx_unlock(&mountlist_mtx);
  489                 if (maxcount > count)
  490                         maxcount = count;
  491                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  492                     M_WAITOK);
  493         }
  494         count = 0;
  495         mtx_lock(&mountlist_mtx);
  496         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  497                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  498                         nmp = TAILQ_NEXT(mp, mnt_list);
  499                         continue;
  500                 }
  501 #ifdef MAC
  502                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  503                         nmp = TAILQ_NEXT(mp, mnt_list);
  504                         continue;
  505                 }
  506 #endif
  507                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  508                         nmp = TAILQ_NEXT(mp, mnt_list);
  509                         continue;
  510                 }
  511                 vfslocked = VFS_LOCK_GIANT(mp);
  512                 if (sfsp && count < maxcount) {
  513                         sp = &mp->mnt_stat;
  514                         /*
  515                          * Set these in case the underlying filesystem
  516                          * fails to do so.
  517                          */
  518                         sp->f_version = STATFS_VERSION;
  519                         sp->f_namemax = NAME_MAX;
  520                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  521                         /*
  522                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  523                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  524                          * overrides MNT_WAIT.
  525                          */
  526                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  527                             (flags & MNT_WAIT)) &&
  528                             (error = VFS_STATFS(mp, sp))) {
  529                                 VFS_UNLOCK_GIANT(vfslocked);
  530                                 mtx_lock(&mountlist_mtx);
  531                                 nmp = TAILQ_NEXT(mp, mnt_list);
  532                                 vfs_unbusy(mp);
  533                                 continue;
  534                         }
  535                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  536                                 bcopy(sp, &sb, sizeof(sb));
  537                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  538                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  539                                 sp = &sb;
  540                         }
  541                         if (bufseg == UIO_SYSSPACE)
  542                                 bcopy(sp, sfsp, sizeof(*sp));
  543                         else /* if (bufseg == UIO_USERSPACE) */ {
  544                                 error = copyout(sp, sfsp, sizeof(*sp));
  545                                 if (error) {
  546                                         vfs_unbusy(mp);
  547                                         VFS_UNLOCK_GIANT(vfslocked);
  548                                         return (error);
  549                                 }
  550                         }
  551                         sfsp++;
  552                 }
  553                 VFS_UNLOCK_GIANT(vfslocked);
  554                 count++;
  555                 mtx_lock(&mountlist_mtx);
  556                 nmp = TAILQ_NEXT(mp, mnt_list);
  557                 vfs_unbusy(mp);
  558         }
  559         mtx_unlock(&mountlist_mtx);
  560         if (sfsp && count > maxcount)
  561                 td->td_retval[0] = maxcount;
  562         else
  563                 td->td_retval[0] = count;
  564         return (0);
  565 }
  566 
  567 #ifdef COMPAT_FREEBSD4
  568 /*
  569  * Get old format filesystem statistics.
  570  */
  571 static void cvtstatfs(struct statfs *, struct ostatfs *);
  572 
  573 #ifndef _SYS_SYSPROTO_H_
  574 struct freebsd4_statfs_args {
  575         char *path;
  576         struct ostatfs *buf;
  577 };
  578 #endif
  579 int
  580 freebsd4_statfs(td, uap)
  581         struct thread *td;
  582         struct freebsd4_statfs_args /* {
  583                 char *path;
  584                 struct ostatfs *buf;
  585         } */ *uap;
  586 {
  587         struct ostatfs osb;
  588         struct statfs sf;
  589         int error;
  590 
  591         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  592         if (error)
  593                 return (error);
  594         cvtstatfs(&sf, &osb);
  595         return (copyout(&osb, uap->buf, sizeof(osb)));
  596 }
  597 
  598 /*
  599  * Get filesystem statistics.
  600  */
  601 #ifndef _SYS_SYSPROTO_H_
  602 struct freebsd4_fstatfs_args {
  603         int fd;
  604         struct ostatfs *buf;
  605 };
  606 #endif
  607 int
  608 freebsd4_fstatfs(td, uap)
  609         struct thread *td;
  610         struct freebsd4_fstatfs_args /* {
  611                 int fd;
  612                 struct ostatfs *buf;
  613         } */ *uap;
  614 {
  615         struct ostatfs osb;
  616         struct statfs sf;
  617         int error;
  618 
  619         error = kern_fstatfs(td, uap->fd, &sf);
  620         if (error)
  621                 return (error);
  622         cvtstatfs(&sf, &osb);
  623         return (copyout(&osb, uap->buf, sizeof(osb)));
  624 }
  625 
  626 /*
  627  * Get statistics on all filesystems.
  628  */
  629 #ifndef _SYS_SYSPROTO_H_
  630 struct freebsd4_getfsstat_args {
  631         struct ostatfs *buf;
  632         long bufsize;
  633         int flags;
  634 };
  635 #endif
  636 int
  637 freebsd4_getfsstat(td, uap)
  638         struct thread *td;
  639         register struct freebsd4_getfsstat_args /* {
  640                 struct ostatfs *buf;
  641                 long bufsize;
  642                 int flags;
  643         } */ *uap;
  644 {
  645         struct statfs *buf, *sp;
  646         struct ostatfs osb;
  647         size_t count, size;
  648         int error;
  649 
  650         count = uap->bufsize / sizeof(struct ostatfs);
  651         size = count * sizeof(struct statfs);
  652         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  653         if (size > 0) {
  654                 count = td->td_retval[0];
  655                 sp = buf;
  656                 while (count > 0 && error == 0) {
  657                         cvtstatfs(sp, &osb);
  658                         error = copyout(&osb, uap->buf, sizeof(osb));
  659                         sp++;
  660                         uap->buf++;
  661                         count--;
  662                 }
  663                 free(buf, M_TEMP);
  664         }
  665         return (error);
  666 }
  667 
  668 /*
  669  * Implement fstatfs() for (NFS) file handles.
  670  */
  671 #ifndef _SYS_SYSPROTO_H_
  672 struct freebsd4_fhstatfs_args {
  673         struct fhandle *u_fhp;
  674         struct ostatfs *buf;
  675 };
  676 #endif
  677 int
  678 freebsd4_fhstatfs(td, uap)
  679         struct thread *td;
  680         struct freebsd4_fhstatfs_args /* {
  681                 struct fhandle *u_fhp;
  682                 struct ostatfs *buf;
  683         } */ *uap;
  684 {
  685         struct ostatfs osb;
  686         struct statfs sf;
  687         fhandle_t fh;
  688         int error;
  689 
  690         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  691         if (error)
  692                 return (error);
  693         error = kern_fhstatfs(td, fh, &sf);
  694         if (error)
  695                 return (error);
  696         cvtstatfs(&sf, &osb);
  697         return (copyout(&osb, uap->buf, sizeof(osb)));
  698 }
  699 
  700 /*
  701  * Convert a new format statfs structure to an old format statfs structure.
  702  */
  703 static void
  704 cvtstatfs(nsp, osp)
  705         struct statfs *nsp;
  706         struct ostatfs *osp;
  707 {
  708 
  709         statfs_scale_blocks(nsp, LONG_MAX);
  710         bzero(osp, sizeof(*osp));
  711         osp->f_bsize = nsp->f_bsize;
  712         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  713         osp->f_blocks = nsp->f_blocks;
  714         osp->f_bfree = nsp->f_bfree;
  715         osp->f_bavail = nsp->f_bavail;
  716         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  717         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  718         osp->f_owner = nsp->f_owner;
  719         osp->f_type = nsp->f_type;
  720         osp->f_flags = nsp->f_flags;
  721         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  722         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  723         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  724         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  725         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  726             MIN(MFSNAMELEN, OMFSNAMELEN));
  727         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  728             MIN(MNAMELEN, OMNAMELEN));
  729         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  730             MIN(MNAMELEN, OMNAMELEN));
  731         osp->f_fsid = nsp->f_fsid;
  732 }
  733 #endif /* COMPAT_FREEBSD4 */
  734 
  735 /*
  736  * Change current working directory to a given file descriptor.
  737  */
  738 #ifndef _SYS_SYSPROTO_H_
  739 struct fchdir_args {
  740         int     fd;
  741 };
  742 #endif
  743 int
  744 sys_fchdir(td, uap)
  745         struct thread *td;
  746         struct fchdir_args /* {
  747                 int fd;
  748         } */ *uap;
  749 {
  750         register struct filedesc *fdp = td->td_proc->p_fd;
  751         struct vnode *vp, *tdp, *vpold;
  752         struct mount *mp;
  753         struct file *fp;
  754         int vfslocked;
  755         int error;
  756 
  757         AUDIT_ARG_FD(uap->fd);
  758         if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0)
  759                 return (error);
  760         vp = fp->f_vnode;
  761         VREF(vp);
  762         fdrop(fp, td);
  763         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  764         vn_lock(vp, LK_SHARED | LK_RETRY);
  765         AUDIT_ARG_VNODE1(vp);
  766         error = change_dir(vp, td);
  767         while (!error && (mp = vp->v_mountedhere) != NULL) {
  768                 int tvfslocked;
  769                 if (vfs_busy(mp, 0))
  770                         continue;
  771                 tvfslocked = VFS_LOCK_GIANT(mp);
  772                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  773                 vfs_unbusy(mp);
  774                 if (error) {
  775                         VFS_UNLOCK_GIANT(tvfslocked);
  776                         break;
  777                 }
  778                 vput(vp);
  779                 VFS_UNLOCK_GIANT(vfslocked);
  780                 vp = tdp;
  781                 vfslocked = tvfslocked;
  782         }
  783         if (error) {
  784                 vput(vp);
  785                 VFS_UNLOCK_GIANT(vfslocked);
  786                 return (error);
  787         }
  788         VOP_UNLOCK(vp, 0);
  789         VFS_UNLOCK_GIANT(vfslocked);
  790         FILEDESC_XLOCK(fdp);
  791         vpold = fdp->fd_cdir;
  792         fdp->fd_cdir = vp;
  793         FILEDESC_XUNLOCK(fdp);
  794         vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  795         vrele(vpold);
  796         VFS_UNLOCK_GIANT(vfslocked);
  797         return (0);
  798 }
  799 
  800 /*
  801  * Change current working directory (``.'').
  802  */
  803 #ifndef _SYS_SYSPROTO_H_
  804 struct chdir_args {
  805         char    *path;
  806 };
  807 #endif
  808 int
  809 sys_chdir(td, uap)
  810         struct thread *td;
  811         struct chdir_args /* {
  812                 char *path;
  813         } */ *uap;
  814 {
  815 
  816         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  817 }
  818 
  819 int
  820 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  821 {
  822         register struct filedesc *fdp = td->td_proc->p_fd;
  823         int error;
  824         struct nameidata nd;
  825         struct vnode *vp;
  826         int vfslocked;
  827 
  828         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
  829             MPSAFE, pathseg, path, td);
  830         if ((error = namei(&nd)) != 0)
  831                 return (error);
  832         vfslocked = NDHASGIANT(&nd);
  833         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  834                 vput(nd.ni_vp);
  835                 VFS_UNLOCK_GIANT(vfslocked);
  836                 NDFREE(&nd, NDF_ONLY_PNBUF);
  837                 return (error);
  838         }
  839         VOP_UNLOCK(nd.ni_vp, 0);
  840         VFS_UNLOCK_GIANT(vfslocked);
  841         NDFREE(&nd, NDF_ONLY_PNBUF);
  842         FILEDESC_XLOCK(fdp);
  843         vp = fdp->fd_cdir;
  844         fdp->fd_cdir = nd.ni_vp;
  845         FILEDESC_XUNLOCK(fdp);
  846         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  847         vrele(vp);
  848         VFS_UNLOCK_GIANT(vfslocked);
  849         return (0);
  850 }
  851 
  852 /*
  853  * Helper function for raised chroot(2) security function:  Refuse if
  854  * any filedescriptors are open directories.
  855  */
  856 static int
  857 chroot_refuse_vdir_fds(fdp)
  858         struct filedesc *fdp;
  859 {
  860         struct vnode *vp;
  861         struct file *fp;
  862         int fd;
  863 
  864         FILEDESC_LOCK_ASSERT(fdp);
  865 
  866         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  867                 fp = fget_locked(fdp, fd);
  868                 if (fp == NULL)
  869                         continue;
  870                 if (fp->f_type == DTYPE_VNODE) {
  871                         vp = fp->f_vnode;
  872                         if (vp->v_type == VDIR)
  873                                 return (EPERM);
  874                 }
  875         }
  876         return (0);
  877 }
  878 
  879 /*
  880  * This sysctl determines if we will allow a process to chroot(2) if it
  881  * has a directory open:
  882  *      0: disallowed for all processes.
  883  *      1: allowed for processes that were not already chroot(2)'ed.
  884  *      2: allowed for all processes.
  885  */
  886 
  887 static int chroot_allow_open_directories = 1;
  888 
  889 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  890      &chroot_allow_open_directories, 0, "");
  891 
  892 /*
  893  * Change notion of root (``/'') directory.
  894  */
  895 #ifndef _SYS_SYSPROTO_H_
  896 struct chroot_args {
  897         char    *path;
  898 };
  899 #endif
  900 int
  901 sys_chroot(td, uap)
  902         struct thread *td;
  903         struct chroot_args /* {
  904                 char *path;
  905         } */ *uap;
  906 {
  907         int error;
  908         struct nameidata nd;
  909         int vfslocked;
  910 
  911         error = priv_check(td, PRIV_VFS_CHROOT);
  912         if (error)
  913                 return (error);
  914         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  915             AUDITVNODE1, UIO_USERSPACE, uap->path, td);
  916         error = namei(&nd);
  917         if (error)
  918                 goto error;
  919         vfslocked = NDHASGIANT(&nd);
  920         if ((error = change_dir(nd.ni_vp, td)) != 0)
  921                 goto e_vunlock;
  922 #ifdef MAC
  923         if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
  924                 goto e_vunlock;
  925 #endif
  926         VOP_UNLOCK(nd.ni_vp, 0);
  927         error = change_root(nd.ni_vp, td);
  928         vrele(nd.ni_vp);
  929         VFS_UNLOCK_GIANT(vfslocked);
  930         NDFREE(&nd, NDF_ONLY_PNBUF);
  931         return (error);
  932 e_vunlock:
  933         vput(nd.ni_vp);
  934         VFS_UNLOCK_GIANT(vfslocked);
  935 error:
  936         NDFREE(&nd, NDF_ONLY_PNBUF);
  937         return (error);
  938 }
  939 
  940 /*
  941  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  942  * instance.
  943  */
  944 int
  945 change_dir(vp, td)
  946         struct vnode *vp;
  947         struct thread *td;
  948 {
  949         int error;
  950 
  951         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  952         if (vp->v_type != VDIR)
  953                 return (ENOTDIR);
  954 #ifdef MAC
  955         error = mac_vnode_check_chdir(td->td_ucred, vp);
  956         if (error)
  957                 return (error);
  958 #endif
  959         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  960         return (error);
  961 }
  962 
  963 /*
  964  * Common routine for kern_chroot() and jail_attach().  The caller is
  965  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  966  * authorize this operation.
  967  */
  968 int
  969 change_root(vp, td)
  970         struct vnode *vp;
  971         struct thread *td;
  972 {
  973         struct filedesc *fdp;
  974         struct vnode *oldvp;
  975         int vfslocked;
  976         int error;
  977 
  978         VFS_ASSERT_GIANT(vp->v_mount);
  979         fdp = td->td_proc->p_fd;
  980         FILEDESC_XLOCK(fdp);
  981         if (chroot_allow_open_directories == 0 ||
  982             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  983                 error = chroot_refuse_vdir_fds(fdp);
  984                 if (error) {
  985                         FILEDESC_XUNLOCK(fdp);
  986                         return (error);
  987                 }
  988         }
  989         oldvp = fdp->fd_rdir;
  990         fdp->fd_rdir = vp;
  991         VREF(fdp->fd_rdir);
  992         if (!fdp->fd_jdir) {
  993                 fdp->fd_jdir = vp;
  994                 VREF(fdp->fd_jdir);
  995         }
  996         FILEDESC_XUNLOCK(fdp);
  997         vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  998         vrele(oldvp);
  999         VFS_UNLOCK_GIANT(vfslocked);
 1000         return (0);
 1001 }
 1002 
 1003 static __inline cap_rights_t
 1004 flags_to_rights(int flags)
 1005 {
 1006         cap_rights_t rights = 0;
 1007 
 1008         switch ((flags & O_ACCMODE)) {
 1009         case O_RDONLY:
 1010                 rights |= CAP_READ;
 1011                 break;
 1012 
 1013         case O_RDWR:
 1014                 rights |= CAP_READ;
 1015                 /* fall through */
 1016 
 1017         case O_WRONLY:
 1018                 rights |= CAP_WRITE;
 1019                 break;
 1020 
 1021         case O_EXEC:
 1022                 rights |= CAP_FEXECVE;
 1023                 break;
 1024         }
 1025 
 1026         if (flags & O_CREAT)
 1027                 rights |= CAP_CREATE;
 1028 
 1029         if (flags & O_TRUNC)
 1030                 rights |= CAP_FTRUNCATE;
 1031 
 1032         if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
 1033                 rights |= CAP_FLOCK;
 1034 
 1035         return (rights);
 1036 }
 1037 
 1038 /*
 1039  * Check permissions, allocate an open file structure, and call the device
 1040  * open routine if any.
 1041  */
 1042 #ifndef _SYS_SYSPROTO_H_
 1043 struct open_args {
 1044         char    *path;
 1045         int     flags;
 1046         int     mode;
 1047 };
 1048 #endif
 1049 int
 1050 sys_open(td, uap)
 1051         struct thread *td;
 1052         register struct open_args /* {
 1053                 char *path;
 1054                 int flags;
 1055                 int mode;
 1056         } */ *uap;
 1057 {
 1058 
 1059         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1060 }
 1061 
 1062 #ifndef _SYS_SYSPROTO_H_
 1063 struct openat_args {
 1064         int     fd;
 1065         char    *path;
 1066         int     flag;
 1067         int     mode;
 1068 };
 1069 #endif
 1070 int
 1071 sys_openat(struct thread *td, struct openat_args *uap)
 1072 {
 1073 
 1074         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1075             uap->mode));
 1076 }
 1077 
 1078 int
 1079 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1080     int mode)
 1081 {
 1082 
 1083         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1084 }
 1085 
 1086 int
 1087 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1088     int flags, int mode)
 1089 {
 1090         struct proc *p = td->td_proc;
 1091         struct filedesc *fdp = p->p_fd;
 1092         struct file *fp;
 1093         struct vnode *vp;
 1094         int cmode;
 1095         struct file *nfp;
 1096         int type, indx = -1, error, error_open;
 1097         struct flock lf;
 1098         struct nameidata nd;
 1099         int vfslocked;
 1100         cap_rights_t rights_needed = CAP_LOOKUP;
 1101 
 1102         AUDIT_ARG_FFLAGS(flags);
 1103         AUDIT_ARG_MODE(mode);
 1104         /* XXX: audit dirfd */
 1105         rights_needed |= flags_to_rights(flags);
 1106         /*
 1107          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1108          * may be specified.
 1109          */
 1110         if (flags & O_EXEC) {
 1111                 if (flags & O_ACCMODE)
 1112                         return (EINVAL);
 1113         } else if ((flags & O_ACCMODE) == O_ACCMODE)
 1114                 return (EINVAL);
 1115         else
 1116                 flags = FFLAGS(flags);
 1117 
 1118         /*
 1119          * allocate the file descriptor, but don't install a descriptor yet
 1120          */
 1121         error = falloc_noinstall(td, &nfp);
 1122         if (error)
 1123                 return (error);
 1124         /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
 1125         fp = nfp;
 1126         /* Set the flags early so the finit in devfs can pick them up. */
 1127         fp->f_flag = flags & FMASK;
 1128         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 1129         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
 1130             path, fd, rights_needed, td);
 1131         td->td_dupfd = -1;              /* XXX check for fdopen */
 1132         error = vn_open(&nd, &flags, cmode, fp);
 1133         if (error) {
 1134                 /*
 1135                  * If the vn_open replaced the method vector, something
 1136                  * wonderous happened deep below and we just pass it up
 1137                  * pretending we know what we do.
 1138                  */
 1139                 if (error == ENXIO && fp->f_ops != &badfileops)
 1140                         goto success;
 1141 
 1142                 /*
 1143                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1144                  * responsible for dropping the old contents of ofiles[indx]
 1145                  * if it succeeds.
 1146                  *
 1147                  * Don't do this for relative (capability) lookups; we don't
 1148                  * understand exactly what would happen, and we don't think
 1149                  * that it ever should.
 1150                  */
 1151                 if ((nd.ni_strictrelative == 0) &&
 1152                     (error == ENODEV || error == ENXIO) &&
 1153                     (td->td_dupfd >= 0)) {
 1154                         /* XXX from fdopen */
 1155                         error_open = error;
 1156                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1157                                 goto bad_unlocked;
 1158                         if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
 1159                             flags, error_open)) == 0)
 1160                                 goto success;
 1161                 }
 1162                 /*
 1163                  * Clean up the descriptor, but only if another thread hadn't
 1164                  * replaced or closed it.
 1165                  */
 1166                 if (indx != -1)
 1167                         fdclose(fdp, fp, indx, td);
 1168                 fdrop(fp, td);
 1169 
 1170                 return (error);
 1171         }
 1172         td->td_dupfd = 0;
 1173         vfslocked = NDHASGIANT(&nd);
 1174         NDFREE(&nd, NDF_ONLY_PNBUF);
 1175         vp = nd.ni_vp;
 1176 
 1177         /*
 1178          * Store the vnode, for any f_type. Typically, the vnode use
 1179          * count is decremented by direct call to vn_closefile() for
 1180          * files that switched type in the cdevsw fdopen() method.
 1181          */
 1182         fp->f_vnode = vp;
 1183         /*
 1184          * If the file wasn't claimed by devfs bind it to the normal
 1185          * vnode operations here.
 1186          */
 1187         if (fp->f_ops == &badfileops) {
 1188                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1189                 fp->f_seqcount = 1;
 1190                 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
 1191         }
 1192 
 1193         VOP_UNLOCK(vp, 0);
 1194         if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
 1195                 lf.l_whence = SEEK_SET;
 1196                 lf.l_start = 0;
 1197                 lf.l_len = 0;
 1198                 if (flags & O_EXLOCK)
 1199                         lf.l_type = F_WRLCK;
 1200                 else
 1201                         lf.l_type = F_RDLCK;
 1202                 type = F_FLOCK;
 1203                 if ((flags & FNONBLOCK) == 0)
 1204                         type |= F_WAIT;
 1205                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 1206                             type)) != 0)
 1207                         goto bad;
 1208                 atomic_set_int(&fp->f_flag, FHASLOCK);
 1209         }
 1210         if (flags & O_TRUNC) {
 1211                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1212                 if (error)
 1213                         goto bad;
 1214         }
 1215         VFS_UNLOCK_GIANT(vfslocked);
 1216 success:
 1217         /*
 1218          * If we haven't already installed the FD (for dupfdopen), do so now.
 1219          */
 1220         if (indx == -1) {
 1221 #ifdef CAPABILITIES
 1222                 if (nd.ni_strictrelative == 1) {
 1223                         /*
 1224                          * We are doing a strict relative lookup; wrap the
 1225                          * result in a capability.
 1226                          */
 1227                         if ((error = kern_capwrap(td, fp, nd.ni_baserights,
 1228                             &indx)) != 0)
 1229                                 goto bad_unlocked;
 1230                 } else
 1231 #endif
 1232                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1233                                 goto bad_unlocked;
 1234 
 1235         }
 1236 
 1237         /*
 1238          * Release our private reference, leaving the one associated with
 1239          * the descriptor table intact.
 1240          */
 1241         fdrop(fp, td);
 1242         td->td_retval[0] = indx;
 1243         return (0);
 1244 bad:
 1245         VFS_UNLOCK_GIANT(vfslocked);
 1246 bad_unlocked:
 1247         if (indx != -1)
 1248                 fdclose(fdp, fp, indx, td);
 1249         fdrop(fp, td);
 1250         td->td_retval[0] = -1;
 1251         return (error);
 1252 }
 1253 
 1254 #ifdef COMPAT_43
 1255 /*
 1256  * Create a file.
 1257  */
 1258 #ifndef _SYS_SYSPROTO_H_
 1259 struct ocreat_args {
 1260         char    *path;
 1261         int     mode;
 1262 };
 1263 #endif
 1264 int
 1265 ocreat(td, uap)
 1266         struct thread *td;
 1267         register struct ocreat_args /* {
 1268                 char *path;
 1269                 int mode;
 1270         } */ *uap;
 1271 {
 1272 
 1273         return (kern_open(td, uap->path, UIO_USERSPACE,
 1274             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1275 }
 1276 #endif /* COMPAT_43 */
 1277 
 1278 /*
 1279  * Create a special file.
 1280  */
 1281 #ifndef _SYS_SYSPROTO_H_
 1282 struct mknod_args {
 1283         char    *path;
 1284         int     mode;
 1285         int     dev;
 1286 };
 1287 #endif
 1288 int
 1289 sys_mknod(td, uap)
 1290         struct thread *td;
 1291         register struct mknod_args /* {
 1292                 char *path;
 1293                 int mode;
 1294                 int dev;
 1295         } */ *uap;
 1296 {
 1297 
 1298         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1299 }
 1300 
 1301 #ifndef _SYS_SYSPROTO_H_
 1302 struct mknodat_args {
 1303         int     fd;
 1304         char    *path;
 1305         mode_t  mode;
 1306         dev_t   dev;
 1307 };
 1308 #endif
 1309 int
 1310 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1311 {
 1312 
 1313         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1314             uap->dev));
 1315 }
 1316 
 1317 int
 1318 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1319     int dev)
 1320 {
 1321 
 1322         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1323 }
 1324 
 1325 int
 1326 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1327     int mode, int dev)
 1328 {
 1329         struct vnode *vp;
 1330         struct mount *mp;
 1331         struct vattr vattr;
 1332         int error;
 1333         int whiteout = 0;
 1334         struct nameidata nd;
 1335         int vfslocked;
 1336 
 1337         AUDIT_ARG_MODE(mode);
 1338         AUDIT_ARG_DEV(dev);
 1339         switch (mode & S_IFMT) {
 1340         case S_IFCHR:
 1341         case S_IFBLK:
 1342                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1343                 break;
 1344         case S_IFMT:
 1345                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1346                 break;
 1347         case S_IFWHT:
 1348                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1349                 break;
 1350         case S_IFIFO:
 1351                 if (dev == 0)
 1352                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1353                 /* FALLTHROUGH */
 1354         default:
 1355                 error = EINVAL;
 1356                 break;
 1357         }
 1358         if (error)
 1359                 return (error);
 1360 restart:
 1361         bwillwrite();
 1362         NDINIT_ATRIGHTS(&nd, CREATE,
 1363             LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
 1364             CAP_MKFIFO, td);
 1365         if ((error = namei(&nd)) != 0)
 1366                 return (error);
 1367         vfslocked = NDHASGIANT(&nd);
 1368         vp = nd.ni_vp;
 1369         if (vp != NULL) {
 1370                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1371                 if (vp == nd.ni_dvp)
 1372                         vrele(nd.ni_dvp);
 1373                 else
 1374                         vput(nd.ni_dvp);
 1375                 vrele(vp);
 1376                 VFS_UNLOCK_GIANT(vfslocked);
 1377                 return (EEXIST);
 1378         } else {
 1379                 VATTR_NULL(&vattr);
 1380                 vattr.va_mode = (mode & ALLPERMS) &
 1381                     ~td->td_proc->p_fd->fd_cmask;
 1382                 vattr.va_rdev = dev;
 1383                 whiteout = 0;
 1384 
 1385                 switch (mode & S_IFMT) {
 1386                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1387                         vattr.va_type = VBAD;
 1388                         break;
 1389                 case S_IFCHR:
 1390                         vattr.va_type = VCHR;
 1391                         break;
 1392                 case S_IFBLK:
 1393                         vattr.va_type = VBLK;
 1394                         break;
 1395                 case S_IFWHT:
 1396                         whiteout = 1;
 1397                         break;
 1398                 default:
 1399                         panic("kern_mknod: invalid mode");
 1400                 }
 1401         }
 1402         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1403                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1404                 vput(nd.ni_dvp);
 1405                 VFS_UNLOCK_GIANT(vfslocked);
 1406                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1407                         return (error);
 1408                 goto restart;
 1409         }
 1410 #ifdef MAC
 1411         if (error == 0 && !whiteout)
 1412                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1413                     &nd.ni_cnd, &vattr);
 1414 #endif
 1415         if (!error) {
 1416                 if (whiteout)
 1417                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1418                 else {
 1419                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1420                                                 &nd.ni_cnd, &vattr);
 1421                         if (error == 0)
 1422                                 vput(nd.ni_vp);
 1423                 }
 1424         }
 1425         NDFREE(&nd, NDF_ONLY_PNBUF);
 1426         vput(nd.ni_dvp);
 1427         vn_finished_write(mp);
 1428         VFS_UNLOCK_GIANT(vfslocked);
 1429         return (error);
 1430 }
 1431 
 1432 /*
 1433  * Create a named pipe.
 1434  */
 1435 #ifndef _SYS_SYSPROTO_H_
 1436 struct mkfifo_args {
 1437         char    *path;
 1438         int     mode;
 1439 };
 1440 #endif
 1441 int
 1442 sys_mkfifo(td, uap)
 1443         struct thread *td;
 1444         register struct mkfifo_args /* {
 1445                 char *path;
 1446                 int mode;
 1447         } */ *uap;
 1448 {
 1449 
 1450         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1451 }
 1452 
 1453 #ifndef _SYS_SYSPROTO_H_
 1454 struct mkfifoat_args {
 1455         int     fd;
 1456         char    *path;
 1457         mode_t  mode;
 1458 };
 1459 #endif
 1460 int
 1461 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1462 {
 1463 
 1464         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1465             uap->mode));
 1466 }
 1467 
 1468 int
 1469 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1470 {
 1471 
 1472         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1473 }
 1474 
 1475 int
 1476 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1477     int mode)
 1478 {
 1479         struct mount *mp;
 1480         struct vattr vattr;
 1481         int error;
 1482         struct nameidata nd;
 1483         int vfslocked;
 1484 
 1485         AUDIT_ARG_MODE(mode);
 1486 restart:
 1487         bwillwrite();
 1488         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1489             pathseg, path, fd, td);
 1490         if ((error = namei(&nd)) != 0)
 1491                 return (error);
 1492         vfslocked = NDHASGIANT(&nd);
 1493         if (nd.ni_vp != NULL) {
 1494                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1495                 if (nd.ni_vp == nd.ni_dvp)
 1496                         vrele(nd.ni_dvp);
 1497                 else
 1498                         vput(nd.ni_dvp);
 1499                 vrele(nd.ni_vp);
 1500                 VFS_UNLOCK_GIANT(vfslocked);
 1501                 return (EEXIST);
 1502         }
 1503         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1504                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1505                 vput(nd.ni_dvp);
 1506                 VFS_UNLOCK_GIANT(vfslocked);
 1507                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1508                         return (error);
 1509                 goto restart;
 1510         }
 1511         VATTR_NULL(&vattr);
 1512         vattr.va_type = VFIFO;
 1513         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1514 #ifdef MAC
 1515         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1516             &vattr);
 1517         if (error)
 1518                 goto out;
 1519 #endif
 1520         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1521         if (error == 0)
 1522                 vput(nd.ni_vp);
 1523 #ifdef MAC
 1524 out:
 1525 #endif
 1526         vput(nd.ni_dvp);
 1527         vn_finished_write(mp);
 1528         VFS_UNLOCK_GIANT(vfslocked);
 1529         NDFREE(&nd, NDF_ONLY_PNBUF);
 1530         return (error);
 1531 }
 1532 
 1533 /*
 1534  * Make a hard file link.
 1535  */
 1536 #ifndef _SYS_SYSPROTO_H_
 1537 struct link_args {
 1538         char    *path;
 1539         char    *link;
 1540 };
 1541 #endif
 1542 int
 1543 sys_link(td, uap)
 1544         struct thread *td;
 1545         register struct link_args /* {
 1546                 char *path;
 1547                 char *link;
 1548         } */ *uap;
 1549 {
 1550 
 1551         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1552 }
 1553 
 1554 #ifndef _SYS_SYSPROTO_H_
 1555 struct linkat_args {
 1556         int     fd1;
 1557         char    *path1;
 1558         int     fd2;
 1559         char    *path2;
 1560         int     flag;
 1561 };
 1562 #endif
 1563 int
 1564 sys_linkat(struct thread *td, struct linkat_args *uap)
 1565 {
 1566         int flag;
 1567 
 1568         flag = uap->flag;
 1569         if (flag & ~AT_SYMLINK_FOLLOW)
 1570                 return (EINVAL);
 1571 
 1572         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1573             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1574 }
 1575 
 1576 int hardlink_check_uid = 0;
 1577 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1578     &hardlink_check_uid, 0,
 1579     "Unprivileged processes cannot create hard links to files owned by other "
 1580     "users");
 1581 static int hardlink_check_gid = 0;
 1582 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1583     &hardlink_check_gid, 0,
 1584     "Unprivileged processes cannot create hard links to files owned by other "
 1585     "groups");
 1586 
 1587 static int
 1588 can_hardlink(struct vnode *vp, struct ucred *cred)
 1589 {
 1590         struct vattr va;
 1591         int error;
 1592 
 1593         if (!hardlink_check_uid && !hardlink_check_gid)
 1594                 return (0);
 1595 
 1596         error = VOP_GETATTR(vp, &va, cred);
 1597         if (error != 0)
 1598                 return (error);
 1599 
 1600         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1601                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1602                 if (error)
 1603                         return (error);
 1604         }
 1605 
 1606         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1607                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1608                 if (error)
 1609                         return (error);
 1610         }
 1611 
 1612         return (0);
 1613 }
 1614 
 1615 int
 1616 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1617 {
 1618 
 1619         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1620 }
 1621 
 1622 int
 1623 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1624     enum uio_seg segflg, int follow)
 1625 {
 1626         struct vnode *vp;
 1627         struct mount *mp;
 1628         struct nameidata nd;
 1629         int vfslocked;
 1630         int lvfslocked;
 1631         int error;
 1632 
 1633         bwillwrite();
 1634         NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
 1635             fd1, td);
 1636 
 1637         if ((error = namei(&nd)) != 0)
 1638                 return (error);
 1639         vfslocked = NDHASGIANT(&nd);
 1640         NDFREE(&nd, NDF_ONLY_PNBUF);
 1641         vp = nd.ni_vp;
 1642         if (vp->v_type == VDIR) {
 1643                 vrele(vp);
 1644                 VFS_UNLOCK_GIANT(vfslocked);
 1645                 return (EPERM);         /* POSIX */
 1646         }
 1647         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1648                 vrele(vp);
 1649                 VFS_UNLOCK_GIANT(vfslocked);
 1650                 return (error);
 1651         }
 1652         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
 1653             segflg, path2, fd2, td);
 1654         if ((error = namei(&nd)) == 0) {
 1655                 lvfslocked = NDHASGIANT(&nd);
 1656                 if (nd.ni_vp != NULL) {
 1657                         if (nd.ni_dvp == nd.ni_vp)
 1658                                 vrele(nd.ni_dvp);
 1659                         else
 1660                                 vput(nd.ni_dvp);
 1661                         vrele(nd.ni_vp);
 1662                         error = EEXIST;
 1663                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
 1664                     == 0) {
 1665                         error = can_hardlink(vp, td->td_ucred);
 1666                         if (error == 0)
 1667 #ifdef MAC
 1668                                 error = mac_vnode_check_link(td->td_ucred,
 1669                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1670                         if (error == 0)
 1671 #endif
 1672                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1673                         VOP_UNLOCK(vp, 0);
 1674                         vput(nd.ni_dvp);
 1675                 }
 1676                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1677                 VFS_UNLOCK_GIANT(lvfslocked);
 1678         }
 1679         vrele(vp);
 1680         vn_finished_write(mp);
 1681         VFS_UNLOCK_GIANT(vfslocked);
 1682         return (error);
 1683 }
 1684 
 1685 /*
 1686  * Make a symbolic link.
 1687  */
 1688 #ifndef _SYS_SYSPROTO_H_
 1689 struct symlink_args {
 1690         char    *path;
 1691         char    *link;
 1692 };
 1693 #endif
 1694 int
 1695 sys_symlink(td, uap)
 1696         struct thread *td;
 1697         register struct symlink_args /* {
 1698                 char *path;
 1699                 char *link;
 1700         } */ *uap;
 1701 {
 1702 
 1703         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1704 }
 1705 
 1706 #ifndef _SYS_SYSPROTO_H_
 1707 struct symlinkat_args {
 1708         char    *path;
 1709         int     fd;
 1710         char    *path2;
 1711 };
 1712 #endif
 1713 int
 1714 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1715 {
 1716 
 1717         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1718             UIO_USERSPACE));
 1719 }
 1720 
 1721 int
 1722 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1723 {
 1724 
 1725         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1726 }
 1727 
 1728 int
 1729 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1730     enum uio_seg segflg)
 1731 {
 1732         struct mount *mp;
 1733         struct vattr vattr;
 1734         char *syspath;
 1735         int error;
 1736         struct nameidata nd;
 1737         int vfslocked;
 1738 
 1739         if (segflg == UIO_SYSSPACE) {
 1740                 syspath = path1;
 1741         } else {
 1742                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1743                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1744                         goto out;
 1745         }
 1746         AUDIT_ARG_TEXT(syspath);
 1747 restart:
 1748         bwillwrite();
 1749         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1750             segflg, path2, fd, td);
 1751         if ((error = namei(&nd)) != 0)
 1752                 goto out;
 1753         vfslocked = NDHASGIANT(&nd);
 1754         if (nd.ni_vp) {
 1755                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1756                 if (nd.ni_vp == nd.ni_dvp)
 1757                         vrele(nd.ni_dvp);
 1758                 else
 1759                         vput(nd.ni_dvp);
 1760                 vrele(nd.ni_vp);
 1761                 VFS_UNLOCK_GIANT(vfslocked);
 1762                 error = EEXIST;
 1763                 goto out;
 1764         }
 1765         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1766                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1767                 vput(nd.ni_dvp);
 1768                 VFS_UNLOCK_GIANT(vfslocked);
 1769                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1770                         goto out;
 1771                 goto restart;
 1772         }
 1773         VATTR_NULL(&vattr);
 1774         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1775 #ifdef MAC
 1776         vattr.va_type = VLNK;
 1777         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1778             &vattr);
 1779         if (error)
 1780                 goto out2;
 1781 #endif
 1782         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1783         if (error == 0)
 1784                 vput(nd.ni_vp);
 1785 #ifdef MAC
 1786 out2:
 1787 #endif
 1788         NDFREE(&nd, NDF_ONLY_PNBUF);
 1789         vput(nd.ni_dvp);
 1790         vn_finished_write(mp);
 1791         VFS_UNLOCK_GIANT(vfslocked);
 1792 out:
 1793         if (segflg != UIO_SYSSPACE)
 1794                 uma_zfree(namei_zone, syspath);
 1795         return (error);
 1796 }
 1797 
 1798 /*
 1799  * Delete a whiteout from the filesystem.
 1800  */
 1801 int
 1802 sys_undelete(td, uap)
 1803         struct thread *td;
 1804         register struct undelete_args /* {
 1805                 char *path;
 1806         } */ *uap;
 1807 {
 1808         int error;
 1809         struct mount *mp;
 1810         struct nameidata nd;
 1811         int vfslocked;
 1812 
 1813 restart:
 1814         bwillwrite();
 1815         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
 1816             UIO_USERSPACE, uap->path, td);
 1817         error = namei(&nd);
 1818         if (error)
 1819                 return (error);
 1820         vfslocked = NDHASGIANT(&nd);
 1821 
 1822         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1823                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1824                 if (nd.ni_vp == nd.ni_dvp)
 1825                         vrele(nd.ni_dvp);
 1826                 else
 1827                         vput(nd.ni_dvp);
 1828                 if (nd.ni_vp)
 1829                         vrele(nd.ni_vp);
 1830                 VFS_UNLOCK_GIANT(vfslocked);
 1831                 return (EEXIST);
 1832         }
 1833         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1834                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1835                 vput(nd.ni_dvp);
 1836                 VFS_UNLOCK_GIANT(vfslocked);
 1837                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1838                         return (error);
 1839                 goto restart;
 1840         }
 1841         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1842         NDFREE(&nd, NDF_ONLY_PNBUF);
 1843         vput(nd.ni_dvp);
 1844         vn_finished_write(mp);
 1845         VFS_UNLOCK_GIANT(vfslocked);
 1846         return (error);
 1847 }
 1848 
 1849 /*
 1850  * Delete a name from the filesystem.
 1851  */
 1852 #ifndef _SYS_SYSPROTO_H_
 1853 struct unlink_args {
 1854         char    *path;
 1855 };
 1856 #endif
 1857 int
 1858 sys_unlink(td, uap)
 1859         struct thread *td;
 1860         struct unlink_args /* {
 1861                 char *path;
 1862         } */ *uap;
 1863 {
 1864 
 1865         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1866 }
 1867 
 1868 #ifndef _SYS_SYSPROTO_H_
 1869 struct unlinkat_args {
 1870         int     fd;
 1871         char    *path;
 1872         int     flag;
 1873 };
 1874 #endif
 1875 int
 1876 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1877 {
 1878         int flag = uap->flag;
 1879         int fd = uap->fd;
 1880         char *path = uap->path;
 1881 
 1882         if (flag & ~AT_REMOVEDIR)
 1883                 return (EINVAL);
 1884 
 1885         if (flag & AT_REMOVEDIR)
 1886                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1887         else
 1888                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1889 }
 1890 
 1891 int
 1892 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1893 {
 1894 
 1895         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1896 }
 1897 
 1898 int
 1899 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1900     ino_t oldinum)
 1901 {
 1902         struct mount *mp;
 1903         struct vnode *vp;
 1904         int error;
 1905         struct nameidata nd;
 1906         struct stat sb;
 1907         int vfslocked;
 1908 
 1909 restart:
 1910         bwillwrite();
 1911         NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1912             pathseg, path, fd, td);
 1913         if ((error = namei(&nd)) != 0)
 1914                 return (error == EINVAL ? EPERM : error);
 1915         vfslocked = NDHASGIANT(&nd);
 1916         vp = nd.ni_vp;
 1917         if (vp->v_type == VDIR && oldinum == 0) {
 1918                 error = EPERM;          /* POSIX */
 1919         } else if (oldinum != 0 &&
 1920                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1921                   sb.st_ino != oldinum) {
 1922                         error = EIDRM;  /* Identifier removed */
 1923         } else {
 1924                 /*
 1925                  * The root of a mounted filesystem cannot be deleted.
 1926                  *
 1927                  * XXX: can this only be a VDIR case?
 1928                  */
 1929                 if (vp->v_vflag & VV_ROOT)
 1930                         error = EBUSY;
 1931         }
 1932         if (error == 0) {
 1933                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1934                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1935                         vput(nd.ni_dvp);
 1936                         if (vp == nd.ni_dvp)
 1937                                 vrele(vp);
 1938                         else
 1939                                 vput(vp);
 1940                         VFS_UNLOCK_GIANT(vfslocked);
 1941                         if ((error = vn_start_write(NULL, &mp,
 1942                             V_XSLEEP | PCATCH)) != 0)
 1943                                 return (error);
 1944                         goto restart;
 1945                 }
 1946 #ifdef MAC
 1947                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1948                     &nd.ni_cnd);
 1949                 if (error)
 1950                         goto out;
 1951 #endif
 1952                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1953                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1954 #ifdef MAC
 1955 out:
 1956 #endif
 1957                 vn_finished_write(mp);
 1958         }
 1959         NDFREE(&nd, NDF_ONLY_PNBUF);
 1960         vput(nd.ni_dvp);
 1961         if (vp == nd.ni_dvp)
 1962                 vrele(vp);
 1963         else
 1964                 vput(vp);
 1965         VFS_UNLOCK_GIANT(vfslocked);
 1966         return (error);
 1967 }
 1968 
 1969 /*
 1970  * Reposition read/write file offset.
 1971  */
 1972 #ifndef _SYS_SYSPROTO_H_
 1973 struct lseek_args {
 1974         int     fd;
 1975         int     pad;
 1976         off_t   offset;
 1977         int     whence;
 1978 };
 1979 #endif
 1980 int
 1981 sys_lseek(td, uap)
 1982         struct thread *td;
 1983         register struct lseek_args /* {
 1984                 int fd;
 1985                 int pad;
 1986                 off_t offset;
 1987                 int whence;
 1988         } */ *uap;
 1989 {
 1990         struct ucred *cred = td->td_ucred;
 1991         struct file *fp;
 1992         struct vnode *vp;
 1993         struct vattr vattr;
 1994         off_t foffset, offset, size;
 1995         int error, noneg;
 1996         int vfslocked;
 1997 
 1998         AUDIT_ARG_FD(uap->fd);
 1999         if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0)
 2000                 return (error);
 2001         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 2002                 fdrop(fp, td);
 2003                 return (ESPIPE);
 2004         }
 2005         vp = fp->f_vnode;
 2006         foffset = foffset_lock(fp, 0);
 2007         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2008         noneg = (vp->v_type != VCHR);
 2009         offset = uap->offset;
 2010         switch (uap->whence) {
 2011         case L_INCR:
 2012                 if (noneg &&
 2013                     (foffset < 0 ||
 2014                     (offset > 0 && foffset > OFF_MAX - offset))) {
 2015                         error = EOVERFLOW;
 2016                         break;
 2017                 }
 2018                 offset += foffset;
 2019                 break;
 2020         case L_XTND:
 2021                 vn_lock(vp, LK_SHARED | LK_RETRY);
 2022                 error = VOP_GETATTR(vp, &vattr, cred);
 2023                 VOP_UNLOCK(vp, 0);
 2024                 if (error)
 2025                         break;
 2026 
 2027                 /*
 2028                  * If the file references a disk device, then fetch
 2029                  * the media size and use that to determine the ending
 2030                  * offset.
 2031                  */
 2032                 if (vattr.va_size == 0 && vp->v_type == VCHR &&
 2033                     fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 2034                         vattr.va_size = size;
 2035                 if (noneg &&
 2036                     (vattr.va_size > OFF_MAX ||
 2037                     (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 2038                         error = EOVERFLOW;
 2039                         break;
 2040                 }
 2041                 offset += vattr.va_size;
 2042                 break;
 2043         case L_SET:
 2044                 break;
 2045         case SEEK_DATA:
 2046                 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 2047                 break;
 2048         case SEEK_HOLE:
 2049                 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 2050                 break;
 2051         default:
 2052                 error = EINVAL;
 2053         }
 2054         if (error == 0 && noneg && offset < 0)
 2055                 error = EINVAL;
 2056         if (error != 0)
 2057                 goto drop;
 2058         VFS_KNOTE_UNLOCKED(vp, 0);
 2059         *(off_t *)(td->td_retval) = offset;
 2060 drop:
 2061         fdrop(fp, td);
 2062         VFS_UNLOCK_GIANT(vfslocked);
 2063         foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
 2064         return (error);
 2065 }
 2066 
 2067 #if defined(COMPAT_43)
 2068 /*
 2069  * Reposition read/write file offset.
 2070  */
 2071 #ifndef _SYS_SYSPROTO_H_
 2072 struct olseek_args {
 2073         int     fd;
 2074         long    offset;
 2075         int     whence;
 2076 };
 2077 #endif
 2078 int
 2079 olseek(td, uap)
 2080         struct thread *td;
 2081         register struct olseek_args /* {
 2082                 int fd;
 2083                 long offset;
 2084                 int whence;
 2085         } */ *uap;
 2086 {
 2087         struct lseek_args /* {
 2088                 int fd;
 2089                 int pad;
 2090                 off_t offset;
 2091                 int whence;
 2092         } */ nuap;
 2093 
 2094         nuap.fd = uap->fd;
 2095         nuap.offset = uap->offset;
 2096         nuap.whence = uap->whence;
 2097         return (sys_lseek(td, &nuap));
 2098 }
 2099 #endif /* COMPAT_43 */
 2100 
 2101 /* Version with the 'pad' argument */
 2102 int
 2103 freebsd6_lseek(td, uap)
 2104         struct thread *td;
 2105         register struct freebsd6_lseek_args *uap;
 2106 {
 2107         struct lseek_args ouap;
 2108 
 2109         ouap.fd = uap->fd;
 2110         ouap.offset = uap->offset;
 2111         ouap.whence = uap->whence;
 2112         return (sys_lseek(td, &ouap));
 2113 }
 2114 
 2115 /*
 2116  * Check access permissions using passed credentials.
 2117  */
 2118 static int
 2119 vn_access(vp, user_flags, cred, td)
 2120         struct vnode    *vp;
 2121         int             user_flags;
 2122         struct ucred    *cred;
 2123         struct thread   *td;
 2124 {
 2125         int error;
 2126         accmode_t accmode;
 2127 
 2128         /* Flags == 0 means only check for existence. */
 2129         error = 0;
 2130         if (user_flags) {
 2131                 accmode = 0;
 2132                 if (user_flags & R_OK)
 2133                         accmode |= VREAD;
 2134                 if (user_flags & W_OK)
 2135                         accmode |= VWRITE;
 2136                 if (user_flags & X_OK)
 2137                         accmode |= VEXEC;
 2138 #ifdef MAC
 2139                 error = mac_vnode_check_access(cred, vp, accmode);
 2140                 if (error)
 2141                         return (error);
 2142 #endif
 2143                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2144                         error = VOP_ACCESS(vp, accmode, cred, td);
 2145         }
 2146         return (error);
 2147 }
 2148 
 2149 /*
 2150  * Check access permissions using "real" credentials.
 2151  */
 2152 #ifndef _SYS_SYSPROTO_H_
 2153 struct access_args {
 2154         char    *path;
 2155         int     flags;
 2156 };
 2157 #endif
 2158 int
 2159 sys_access(td, uap)
 2160         struct thread *td;
 2161         register struct access_args /* {
 2162                 char *path;
 2163                 int flags;
 2164         } */ *uap;
 2165 {
 2166 
 2167         return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 2168 }
 2169 
 2170 #ifndef _SYS_SYSPROTO_H_
 2171 struct faccessat_args {
 2172         int     dirfd;
 2173         char    *path;
 2174         int     mode;
 2175         int     flag;
 2176 }
 2177 #endif
 2178 int
 2179 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2180 {
 2181 
 2182         if (uap->flag & ~AT_EACCESS)
 2183                 return (EINVAL);
 2184         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2185             uap->mode));
 2186 }
 2187 
 2188 int
 2189 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2190 {
 2191 
 2192         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
 2193 }
 2194 
 2195 int
 2196 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2197     int flags, int mode)
 2198 {
 2199         struct ucred *cred, *tmpcred;
 2200         struct vnode *vp;
 2201         struct nameidata nd;
 2202         int vfslocked;
 2203         int error;
 2204 
 2205         /*
 2206          * Create and modify a temporary credential instead of one that
 2207          * is potentially shared.
 2208          */
 2209         if (!(flags & AT_EACCESS)) {
 2210                 cred = td->td_ucred;
 2211                 tmpcred = crdup(cred);
 2212                 tmpcred->cr_uid = cred->cr_ruid;
 2213                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2214                 td->td_ucred = tmpcred;
 2215         } else
 2216                 cred = tmpcred = td->td_ucred;
 2217         AUDIT_ARG_VALUE(mode);
 2218         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2219             AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
 2220         if ((error = namei(&nd)) != 0)
 2221                 goto out1;
 2222         vfslocked = NDHASGIANT(&nd);
 2223         vp = nd.ni_vp;
 2224 
 2225         error = vn_access(vp, mode, tmpcred, td);
 2226         NDFREE(&nd, NDF_ONLY_PNBUF);
 2227         vput(vp);
 2228         VFS_UNLOCK_GIANT(vfslocked);
 2229 out1:
 2230         if (!(flags & AT_EACCESS)) {
 2231                 td->td_ucred = cred;
 2232                 crfree(tmpcred);
 2233         }
 2234         return (error);
 2235 }
 2236 
 2237 /*
 2238  * Check access permissions using "effective" credentials.
 2239  */
 2240 #ifndef _SYS_SYSPROTO_H_
 2241 struct eaccess_args {
 2242         char    *path;
 2243         int     flags;
 2244 };
 2245 #endif
 2246 int
 2247 sys_eaccess(td, uap)
 2248         struct thread *td;
 2249         register struct eaccess_args /* {
 2250                 char *path;
 2251                 int flags;
 2252         } */ *uap;
 2253 {
 2254 
 2255         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
 2256 }
 2257 
 2258 int
 2259 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 2260 {
 2261 
 2262         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
 2263 }
 2264 
 2265 #if defined(COMPAT_43)
 2266 /*
 2267  * Get file status; this version follows links.
 2268  */
 2269 #ifndef _SYS_SYSPROTO_H_
 2270 struct ostat_args {
 2271         char    *path;
 2272         struct ostat *ub;
 2273 };
 2274 #endif
 2275 int
 2276 ostat(td, uap)
 2277         struct thread *td;
 2278         register struct ostat_args /* {
 2279                 char *path;
 2280                 struct ostat *ub;
 2281         } */ *uap;
 2282 {
 2283         struct stat sb;
 2284         struct ostat osb;
 2285         int error;
 2286 
 2287         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2288         if (error)
 2289                 return (error);
 2290         cvtstat(&sb, &osb);
 2291         error = copyout(&osb, uap->ub, sizeof (osb));
 2292         return (error);
 2293 }
 2294 
 2295 /*
 2296  * Get file status; this version does not follow links.
 2297  */
 2298 #ifndef _SYS_SYSPROTO_H_
 2299 struct olstat_args {
 2300         char    *path;
 2301         struct ostat *ub;
 2302 };
 2303 #endif
 2304 int
 2305 olstat(td, uap)
 2306         struct thread *td;
 2307         register struct olstat_args /* {
 2308                 char *path;
 2309                 struct ostat *ub;
 2310         } */ *uap;
 2311 {
 2312         struct stat sb;
 2313         struct ostat osb;
 2314         int error;
 2315 
 2316         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2317         if (error)
 2318                 return (error);
 2319         cvtstat(&sb, &osb);
 2320         error = copyout(&osb, uap->ub, sizeof (osb));
 2321         return (error);
 2322 }
 2323 
 2324 /*
 2325  * Convert from an old to a new stat structure.
 2326  */
 2327 void
 2328 cvtstat(st, ost)
 2329         struct stat *st;
 2330         struct ostat *ost;
 2331 {
 2332 
 2333         ost->st_dev = st->st_dev;
 2334         ost->st_ino = st->st_ino;
 2335         ost->st_mode = st->st_mode;
 2336         ost->st_nlink = st->st_nlink;
 2337         ost->st_uid = st->st_uid;
 2338         ost->st_gid = st->st_gid;
 2339         ost->st_rdev = st->st_rdev;
 2340         if (st->st_size < (quad_t)1 << 32)
 2341                 ost->st_size = st->st_size;
 2342         else
 2343                 ost->st_size = -2;
 2344         ost->st_atim = st->st_atim;
 2345         ost->st_mtim = st->st_mtim;
 2346         ost->st_ctim = st->st_ctim;
 2347         ost->st_blksize = st->st_blksize;
 2348         ost->st_blocks = st->st_blocks;
 2349         ost->st_flags = st->st_flags;
 2350         ost->st_gen = st->st_gen;
 2351 }
 2352 #endif /* COMPAT_43 */
 2353 
 2354 /*
 2355  * Get file status; this version follows links.
 2356  */
 2357 #ifndef _SYS_SYSPROTO_H_
 2358 struct stat_args {
 2359         char    *path;
 2360         struct stat *ub;
 2361 };
 2362 #endif
 2363 int
 2364 sys_stat(td, uap)
 2365         struct thread *td;
 2366         register struct stat_args /* {
 2367                 char *path;
 2368                 struct stat *ub;
 2369         } */ *uap;
 2370 {
 2371         struct stat sb;
 2372         int error;
 2373 
 2374         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2375         if (error == 0)
 2376                 error = copyout(&sb, uap->ub, sizeof (sb));
 2377         return (error);
 2378 }
 2379 
 2380 #ifndef _SYS_SYSPROTO_H_
 2381 struct fstatat_args {
 2382         int     fd;
 2383         char    *path;
 2384         struct stat     *buf;
 2385         int     flag;
 2386 }
 2387 #endif
 2388 int
 2389 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2390 {
 2391         struct stat sb;
 2392         int error;
 2393 
 2394         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2395             UIO_USERSPACE, &sb);
 2396         if (error == 0)
 2397                 error = copyout(&sb, uap->buf, sizeof (sb));
 2398         return (error);
 2399 }
 2400 
 2401 int
 2402 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2403 {
 2404 
 2405         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2406 }
 2407 
 2408 int
 2409 kern_statat(struct thread *td, int flag, int fd, char *path,
 2410     enum uio_seg pathseg, struct stat *sbp)
 2411 {
 2412 
 2413         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2414 }
 2415 
 2416 int
 2417 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2418     enum uio_seg pathseg, struct stat *sbp,
 2419     void (*hook)(struct vnode *vp, struct stat *sbp))
 2420 {
 2421         struct nameidata nd;
 2422         struct stat sb;
 2423         int error, vfslocked;
 2424 
 2425         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2426                 return (EINVAL);
 2427 
 2428         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2429             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
 2430             path, fd, CAP_FSTAT, td);
 2431 
 2432         if ((error = namei(&nd)) != 0)
 2433                 return (error);
 2434         vfslocked = NDHASGIANT(&nd);
 2435         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2436         if (!error) {
 2437                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2438                 if (S_ISREG(sb.st_mode))
 2439                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2440                 if (__predict_false(hook != NULL))
 2441                         hook(nd.ni_vp, &sb);
 2442         }
 2443         NDFREE(&nd, NDF_ONLY_PNBUF);
 2444         vput(nd.ni_vp);
 2445         VFS_UNLOCK_GIANT(vfslocked);
 2446         if (error)
 2447                 return (error);
 2448         *sbp = sb;
 2449 #ifdef KTRACE
 2450         if (KTRPOINT(td, KTR_STRUCT))
 2451                 ktrstat(&sb);
 2452 #endif
 2453         return (0);
 2454 }
 2455 
 2456 /*
 2457  * Get file status; this version does not follow links.
 2458  */
 2459 #ifndef _SYS_SYSPROTO_H_
 2460 struct lstat_args {
 2461         char    *path;
 2462         struct stat *ub;
 2463 };
 2464 #endif
 2465 int
 2466 sys_lstat(td, uap)
 2467         struct thread *td;
 2468         register struct lstat_args /* {
 2469                 char *path;
 2470                 struct stat *ub;
 2471         } */ *uap;
 2472 {
 2473         struct stat sb;
 2474         int error;
 2475 
 2476         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2477         if (error == 0)
 2478                 error = copyout(&sb, uap->ub, sizeof (sb));
 2479         return (error);
 2480 }
 2481 
 2482 int
 2483 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2484 {
 2485 
 2486         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2487             sbp));
 2488 }
 2489 
 2490 /*
 2491  * Implementation of the NetBSD [l]stat() functions.
 2492  */
 2493 void
 2494 cvtnstat(sb, nsb)
 2495         struct stat *sb;
 2496         struct nstat *nsb;
 2497 {
 2498         bzero(nsb, sizeof *nsb);
 2499         nsb->st_dev = sb->st_dev;
 2500         nsb->st_ino = sb->st_ino;
 2501         nsb->st_mode = sb->st_mode;
 2502         nsb->st_nlink = sb->st_nlink;
 2503         nsb->st_uid = sb->st_uid;
 2504         nsb->st_gid = sb->st_gid;
 2505         nsb->st_rdev = sb->st_rdev;
 2506         nsb->st_atim = sb->st_atim;
 2507         nsb->st_mtim = sb->st_mtim;
 2508         nsb->st_ctim = sb->st_ctim;
 2509         nsb->st_size = sb->st_size;
 2510         nsb->st_blocks = sb->st_blocks;
 2511         nsb->st_blksize = sb->st_blksize;
 2512         nsb->st_flags = sb->st_flags;
 2513         nsb->st_gen = sb->st_gen;
 2514         nsb->st_birthtim = sb->st_birthtim;
 2515 }
 2516 
 2517 #ifndef _SYS_SYSPROTO_H_
 2518 struct nstat_args {
 2519         char    *path;
 2520         struct nstat *ub;
 2521 };
 2522 #endif
 2523 int
 2524 sys_nstat(td, uap)
 2525         struct thread *td;
 2526         register struct nstat_args /* {
 2527                 char *path;
 2528                 struct nstat *ub;
 2529         } */ *uap;
 2530 {
 2531         struct stat sb;
 2532         struct nstat nsb;
 2533         int error;
 2534 
 2535         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2536         if (error)
 2537                 return (error);
 2538         cvtnstat(&sb, &nsb);
 2539         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2540         return (error);
 2541 }
 2542 
 2543 /*
 2544  * NetBSD lstat.  Get file status; this version does not follow links.
 2545  */
 2546 #ifndef _SYS_SYSPROTO_H_
 2547 struct lstat_args {
 2548         char    *path;
 2549         struct stat *ub;
 2550 };
 2551 #endif
 2552 int
 2553 sys_nlstat(td, uap)
 2554         struct thread *td;
 2555         register struct nlstat_args /* {
 2556                 char *path;
 2557                 struct nstat *ub;
 2558         } */ *uap;
 2559 {
 2560         struct stat sb;
 2561         struct nstat nsb;
 2562         int error;
 2563 
 2564         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2565         if (error)
 2566                 return (error);
 2567         cvtnstat(&sb, &nsb);
 2568         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2569         return (error);
 2570 }
 2571 
 2572 /*
 2573  * Get configurable pathname variables.
 2574  */
 2575 #ifndef _SYS_SYSPROTO_H_
 2576 struct pathconf_args {
 2577         char    *path;
 2578         int     name;
 2579 };
 2580 #endif
 2581 int
 2582 sys_pathconf(td, uap)
 2583         struct thread *td;
 2584         register struct pathconf_args /* {
 2585                 char *path;
 2586                 int name;
 2587         } */ *uap;
 2588 {
 2589 
 2590         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2591 }
 2592 
 2593 #ifndef _SYS_SYSPROTO_H_
 2594 struct lpathconf_args {
 2595         char    *path;
 2596         int     name;
 2597 };
 2598 #endif
 2599 int
 2600 sys_lpathconf(td, uap)
 2601         struct thread *td;
 2602         register struct lpathconf_args /* {
 2603                 char *path;
 2604                 int name;
 2605         } */ *uap;
 2606 {
 2607 
 2608         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
 2609 }
 2610 
 2611 int
 2612 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2613     u_long flags)
 2614 {
 2615         struct nameidata nd;
 2616         int error, vfslocked;
 2617 
 2618         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
 2619             flags, pathseg, path, td);
 2620         if ((error = namei(&nd)) != 0)
 2621                 return (error);
 2622         vfslocked = NDHASGIANT(&nd);
 2623         NDFREE(&nd, NDF_ONLY_PNBUF);
 2624 
 2625         /* If asynchronous I/O is available, it works for all files. */
 2626         if (name == _PC_ASYNC_IO)
 2627                 td->td_retval[0] = async_io_version;
 2628         else
 2629                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2630         vput(nd.ni_vp);
 2631         VFS_UNLOCK_GIANT(vfslocked);
 2632         return (error);
 2633 }
 2634 
 2635 /*
 2636  * Return target name of a symbolic link.
 2637  */
 2638 #ifndef _SYS_SYSPROTO_H_
 2639 struct readlink_args {
 2640         char    *path;
 2641         char    *buf;
 2642         size_t  count;
 2643 };
 2644 #endif
 2645 int
 2646 sys_readlink(td, uap)
 2647         struct thread *td;
 2648         register struct readlink_args /* {
 2649                 char *path;
 2650                 char *buf;
 2651                 size_t count;
 2652         } */ *uap;
 2653 {
 2654 
 2655         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2656             UIO_USERSPACE, uap->count));
 2657 }
 2658 #ifndef _SYS_SYSPROTO_H_
 2659 struct readlinkat_args {
 2660         int     fd;
 2661         char    *path;
 2662         char    *buf;
 2663         size_t  bufsize;
 2664 };
 2665 #endif
 2666 int
 2667 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2668 {
 2669 
 2670         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2671             uap->buf, UIO_USERSPACE, uap->bufsize));
 2672 }
 2673 
 2674 int
 2675 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2676     enum uio_seg bufseg, size_t count)
 2677 {
 2678 
 2679         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2680             count));
 2681 }
 2682 
 2683 int
 2684 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2685     char *buf, enum uio_seg bufseg, size_t count)
 2686 {
 2687         struct vnode *vp;
 2688         struct iovec aiov;
 2689         struct uio auio;
 2690         int error;
 2691         struct nameidata nd;
 2692         int vfslocked;
 2693 
 2694         if (count > IOSIZE_MAX)
 2695                 return (EINVAL);
 2696 
 2697         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2698             AUDITVNODE1, pathseg, path, fd, td);
 2699 
 2700         if ((error = namei(&nd)) != 0)
 2701                 return (error);
 2702         NDFREE(&nd, NDF_ONLY_PNBUF);
 2703         vfslocked = NDHASGIANT(&nd);
 2704         vp = nd.ni_vp;
 2705 #ifdef MAC
 2706         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2707         if (error) {
 2708                 vput(vp);
 2709                 VFS_UNLOCK_GIANT(vfslocked);
 2710                 return (error);
 2711         }
 2712 #endif
 2713         if (vp->v_type != VLNK)
 2714                 error = EINVAL;
 2715         else {
 2716                 aiov.iov_base = buf;
 2717                 aiov.iov_len = count;
 2718                 auio.uio_iov = &aiov;
 2719                 auio.uio_iovcnt = 1;
 2720                 auio.uio_offset = 0;
 2721                 auio.uio_rw = UIO_READ;
 2722                 auio.uio_segflg = bufseg;
 2723                 auio.uio_td = td;
 2724                 auio.uio_resid = count;
 2725                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2726         }
 2727         vput(vp);
 2728         VFS_UNLOCK_GIANT(vfslocked);
 2729         td->td_retval[0] = count - auio.uio_resid;
 2730         return (error);
 2731 }
 2732 
 2733 /*
 2734  * Common implementation code for chflags() and fchflags().
 2735  */
 2736 static int
 2737 setfflags(td, vp, flags)
 2738         struct thread *td;
 2739         struct vnode *vp;
 2740         int flags;
 2741 {
 2742         int error;
 2743         struct mount *mp;
 2744         struct vattr vattr;
 2745 
 2746         /* We can't support the value matching VNOVAL. */
 2747         if (flags == VNOVAL)
 2748                 return (EOPNOTSUPP);
 2749 
 2750         /*
 2751          * Prevent non-root users from setting flags on devices.  When
 2752          * a device is reused, users can retain ownership of the device
 2753          * if they are allowed to set flags and programs assume that
 2754          * chown can't fail when done as root.
 2755          */
 2756         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2757                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2758                 if (error)
 2759                         return (error);
 2760         }
 2761 
 2762         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2763                 return (error);
 2764         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2765         VATTR_NULL(&vattr);
 2766         vattr.va_flags = flags;
 2767 #ifdef MAC
 2768         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2769         if (error == 0)
 2770 #endif
 2771                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2772         VOP_UNLOCK(vp, 0);
 2773         vn_finished_write(mp);
 2774         return (error);
 2775 }
 2776 
 2777 /*
 2778  * Change flags of a file given a path name.
 2779  */
 2780 #ifndef _SYS_SYSPROTO_H_
 2781 struct chflags_args {
 2782         char    *path;
 2783         int     flags;
 2784 };
 2785 #endif
 2786 int
 2787 sys_chflags(td, uap)
 2788         struct thread *td;
 2789         register struct chflags_args /* {
 2790                 char *path;
 2791                 int flags;
 2792         } */ *uap;
 2793 {
 2794         int error;
 2795         struct nameidata nd;
 2796         int vfslocked;
 2797 
 2798         AUDIT_ARG_FFLAGS(uap->flags);
 2799         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2800             uap->path, td);
 2801         if ((error = namei(&nd)) != 0)
 2802                 return (error);
 2803         NDFREE(&nd, NDF_ONLY_PNBUF);
 2804         vfslocked = NDHASGIANT(&nd);
 2805         error = setfflags(td, nd.ni_vp, uap->flags);
 2806         vrele(nd.ni_vp);
 2807         VFS_UNLOCK_GIANT(vfslocked);
 2808         return (error);
 2809 }
 2810 
 2811 /*
 2812  * Same as chflags() but doesn't follow symlinks.
 2813  */
 2814 int
 2815 sys_lchflags(td, uap)
 2816         struct thread *td;
 2817         register struct lchflags_args /* {
 2818                 char *path;
 2819                 int flags;
 2820         } */ *uap;
 2821 {
 2822         int error;
 2823         struct nameidata nd;
 2824         int vfslocked;
 2825 
 2826         AUDIT_ARG_FFLAGS(uap->flags);
 2827         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2828             uap->path, td);
 2829         if ((error = namei(&nd)) != 0)
 2830                 return (error);
 2831         vfslocked = NDHASGIANT(&nd);
 2832         NDFREE(&nd, NDF_ONLY_PNBUF);
 2833         error = setfflags(td, nd.ni_vp, uap->flags);
 2834         vrele(nd.ni_vp);
 2835         VFS_UNLOCK_GIANT(vfslocked);
 2836         return (error);
 2837 }
 2838 
 2839 /*
 2840  * Change flags of a file given a file descriptor.
 2841  */
 2842 #ifndef _SYS_SYSPROTO_H_
 2843 struct fchflags_args {
 2844         int     fd;
 2845         int     flags;
 2846 };
 2847 #endif
 2848 int
 2849 sys_fchflags(td, uap)
 2850         struct thread *td;
 2851         register struct fchflags_args /* {
 2852                 int fd;
 2853                 int flags;
 2854         } */ *uap;
 2855 {
 2856         struct file *fp;
 2857         int vfslocked;
 2858         int error;
 2859 
 2860         AUDIT_ARG_FD(uap->fd);
 2861         AUDIT_ARG_FFLAGS(uap->flags);
 2862         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS,
 2863             &fp)) != 0)
 2864                 return (error);
 2865         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2866 #ifdef AUDIT
 2867         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2868         AUDIT_ARG_VNODE1(fp->f_vnode);
 2869         VOP_UNLOCK(fp->f_vnode, 0);
 2870 #endif
 2871         error = setfflags(td, fp->f_vnode, uap->flags);
 2872         VFS_UNLOCK_GIANT(vfslocked);
 2873         fdrop(fp, td);
 2874         return (error);
 2875 }
 2876 
 2877 /*
 2878  * Common implementation code for chmod(), lchmod() and fchmod().
 2879  */
 2880 int
 2881 setfmode(td, cred, vp, mode)
 2882         struct thread *td;
 2883         struct ucred *cred;
 2884         struct vnode *vp;
 2885         int mode;
 2886 {
 2887         int error;
 2888         struct mount *mp;
 2889         struct vattr vattr;
 2890 
 2891         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2892                 return (error);
 2893         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2894         VATTR_NULL(&vattr);
 2895         vattr.va_mode = mode & ALLPERMS;
 2896 #ifdef MAC
 2897         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2898         if (error == 0)
 2899 #endif
 2900                 error = VOP_SETATTR(vp, &vattr, cred);
 2901         VOP_UNLOCK(vp, 0);
 2902         vn_finished_write(mp);
 2903         return (error);
 2904 }
 2905 
 2906 /*
 2907  * Change mode of a file given path name.
 2908  */
 2909 #ifndef _SYS_SYSPROTO_H_
 2910 struct chmod_args {
 2911         char    *path;
 2912         int     mode;
 2913 };
 2914 #endif
 2915 int
 2916 sys_chmod(td, uap)
 2917         struct thread *td;
 2918         register struct chmod_args /* {
 2919                 char *path;
 2920                 int mode;
 2921         } */ *uap;
 2922 {
 2923 
 2924         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2925 }
 2926 
 2927 #ifndef _SYS_SYSPROTO_H_
 2928 struct fchmodat_args {
 2929         int     dirfd;
 2930         char    *path;
 2931         mode_t  mode;
 2932         int     flag;
 2933 }
 2934 #endif
 2935 int
 2936 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2937 {
 2938         int flag = uap->flag;
 2939         int fd = uap->fd;
 2940         char *path = uap->path;
 2941         mode_t mode = uap->mode;
 2942 
 2943         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2944                 return (EINVAL);
 2945 
 2946         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2947 }
 2948 
 2949 int
 2950 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2951 {
 2952 
 2953         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2954 }
 2955 
 2956 /*
 2957  * Change mode of a file given path name (don't follow links.)
 2958  */
 2959 #ifndef _SYS_SYSPROTO_H_
 2960 struct lchmod_args {
 2961         char    *path;
 2962         int     mode;
 2963 };
 2964 #endif
 2965 int
 2966 sys_lchmod(td, uap)
 2967         struct thread *td;
 2968         register struct lchmod_args /* {
 2969                 char *path;
 2970                 int mode;
 2971         } */ *uap;
 2972 {
 2973 
 2974         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2975             uap->mode, AT_SYMLINK_NOFOLLOW));
 2976 }
 2977 
 2978 
 2979 int
 2980 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2981     mode_t mode, int flag)
 2982 {
 2983         int error;
 2984         struct nameidata nd;
 2985         int vfslocked;
 2986         int follow;
 2987 
 2988         AUDIT_ARG_MODE(mode);
 2989         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2990         NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
 2991             path, fd, CAP_FCHMOD, td);
 2992         if ((error = namei(&nd)) != 0)
 2993                 return (error);
 2994         vfslocked = NDHASGIANT(&nd);
 2995         NDFREE(&nd, NDF_ONLY_PNBUF);
 2996         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2997         vrele(nd.ni_vp);
 2998         VFS_UNLOCK_GIANT(vfslocked);
 2999         return (error);
 3000 }
 3001 
 3002 /*
 3003  * Change mode of a file given a file descriptor.
 3004  */
 3005 #ifndef _SYS_SYSPROTO_H_
 3006 struct fchmod_args {
 3007         int     fd;
 3008         int     mode;
 3009 };
 3010 #endif
 3011 int
 3012 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 3013 {
 3014         struct file *fp;
 3015         int error;
 3016 
 3017         AUDIT_ARG_FD(uap->fd);
 3018         AUDIT_ARG_MODE(uap->mode);
 3019 
 3020         error = fget(td, uap->fd, CAP_FCHMOD, &fp);
 3021         if (error != 0)
 3022                 return (error);
 3023         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 3024         fdrop(fp, td);
 3025         return (error);
 3026 }
 3027 
 3028 /*
 3029  * Common implementation for chown(), lchown(), and fchown()
 3030  */
 3031 int
 3032 setfown(td, cred, vp, uid, gid)
 3033         struct thread *td;
 3034         struct ucred *cred;
 3035         struct vnode *vp;
 3036         uid_t uid;
 3037         gid_t gid;
 3038 {
 3039         int error;
 3040         struct mount *mp;
 3041         struct vattr vattr;
 3042 
 3043         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3044                 return (error);
 3045         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3046         VATTR_NULL(&vattr);
 3047         vattr.va_uid = uid;
 3048         vattr.va_gid = gid;
 3049 #ifdef MAC
 3050         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 3051             vattr.va_gid);
 3052         if (error == 0)
 3053 #endif
 3054                 error = VOP_SETATTR(vp, &vattr, cred);
 3055         VOP_UNLOCK(vp, 0);
 3056         vn_finished_write(mp);
 3057         return (error);
 3058 }
 3059 
 3060 /*
 3061  * Set ownership given a path name.
 3062  */
 3063 #ifndef _SYS_SYSPROTO_H_
 3064 struct chown_args {
 3065         char    *path;
 3066         int     uid;
 3067         int     gid;
 3068 };
 3069 #endif
 3070 int
 3071 sys_chown(td, uap)
 3072         struct thread *td;
 3073         register struct chown_args /* {
 3074                 char *path;
 3075                 int uid;
 3076                 int gid;
 3077         } */ *uap;
 3078 {
 3079 
 3080         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3081 }
 3082 
 3083 #ifndef _SYS_SYSPROTO_H_
 3084 struct fchownat_args {
 3085         int fd;
 3086         const char * path;
 3087         uid_t uid;
 3088         gid_t gid;
 3089         int flag;
 3090 };
 3091 #endif
 3092 int
 3093 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 3094 {
 3095         int flag;
 3096 
 3097         flag = uap->flag;
 3098         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3099                 return (EINVAL);
 3100 
 3101         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 3102             uap->gid, uap->flag));
 3103 }
 3104 
 3105 int
 3106 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3107     int gid)
 3108 {
 3109 
 3110         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 3111 }
 3112 
 3113 int
 3114 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3115     int uid, int gid, int flag)
 3116 {
 3117         struct nameidata nd;
 3118         int error, vfslocked, follow;
 3119 
 3120         AUDIT_ARG_OWNER(uid, gid);
 3121         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3122         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
 3123             path, fd, CAP_FCHOWN, td);
 3124 
 3125         if ((error = namei(&nd)) != 0)
 3126                 return (error);
 3127         vfslocked = NDHASGIANT(&nd);
 3128         NDFREE(&nd, NDF_ONLY_PNBUF);
 3129         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3130         vrele(nd.ni_vp);
 3131         VFS_UNLOCK_GIANT(vfslocked);
 3132         return (error);
 3133 }
 3134 
 3135 /*
 3136  * Set ownership given a path name, do not cross symlinks.
 3137  */
 3138 #ifndef _SYS_SYSPROTO_H_
 3139 struct lchown_args {
 3140         char    *path;
 3141         int     uid;
 3142         int     gid;
 3143 };
 3144 #endif
 3145 int
 3146 sys_lchown(td, uap)
 3147         struct thread *td;
 3148         register struct lchown_args /* {
 3149                 char *path;
 3150                 int uid;
 3151                 int gid;
 3152         } */ *uap;
 3153 {
 3154 
 3155         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3156 }
 3157 
 3158 int
 3159 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3160     int gid)
 3161 {
 3162 
 3163         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3164             AT_SYMLINK_NOFOLLOW));
 3165 }
 3166 
 3167 /*
 3168  * Set ownership given a file descriptor.
 3169  */
 3170 #ifndef _SYS_SYSPROTO_H_
 3171 struct fchown_args {
 3172         int     fd;
 3173         int     uid;
 3174         int     gid;
 3175 };
 3176 #endif
 3177 int
 3178 sys_fchown(td, uap)
 3179         struct thread *td;
 3180         register struct fchown_args /* {
 3181                 int fd;
 3182                 int uid;
 3183                 int gid;
 3184         } */ *uap;
 3185 {
 3186         struct file *fp;
 3187         int error;
 3188 
 3189         AUDIT_ARG_FD(uap->fd);
 3190         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3191         error = fget(td, uap->fd, CAP_FCHOWN, &fp);
 3192         if (error != 0)
 3193                 return (error);
 3194         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3195         fdrop(fp, td);
 3196         return (error);
 3197 }
 3198 
 3199 /*
 3200  * Common implementation code for utimes(), lutimes(), and futimes().
 3201  */
 3202 static int
 3203 getutimes(usrtvp, tvpseg, tsp)
 3204         const struct timeval *usrtvp;
 3205         enum uio_seg tvpseg;
 3206         struct timespec *tsp;
 3207 {
 3208         struct timeval tv[2];
 3209         const struct timeval *tvp;
 3210         int error;
 3211 
 3212         if (usrtvp == NULL) {
 3213                 vfs_timestamp(&tsp[0]);
 3214                 tsp[1] = tsp[0];
 3215         } else {
 3216                 if (tvpseg == UIO_SYSSPACE) {
 3217                         tvp = usrtvp;
 3218                 } else {
 3219                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3220                                 return (error);
 3221                         tvp = tv;
 3222                 }
 3223 
 3224                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3225                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3226                         return (EINVAL);
 3227                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3228                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3229         }
 3230         return (0);
 3231 }
 3232 
 3233 /*
 3234  * Common implementation code for utimes(), lutimes(), and futimes().
 3235  */
 3236 static int
 3237 setutimes(td, vp, ts, numtimes, nullflag)
 3238         struct thread *td;
 3239         struct vnode *vp;
 3240         const struct timespec *ts;
 3241         int numtimes;
 3242         int nullflag;
 3243 {
 3244         int error, setbirthtime;
 3245         struct mount *mp;
 3246         struct vattr vattr;
 3247 
 3248         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3249                 return (error);
 3250         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3251         setbirthtime = 0;
 3252         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3253             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3254                 setbirthtime = 1;
 3255         VATTR_NULL(&vattr);
 3256         vattr.va_atime = ts[0];
 3257         vattr.va_mtime = ts[1];
 3258         if (setbirthtime)
 3259                 vattr.va_birthtime = ts[1];
 3260         if (numtimes > 2)
 3261                 vattr.va_birthtime = ts[2];
 3262         if (nullflag)
 3263                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3264 #ifdef MAC
 3265         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3266             vattr.va_mtime);
 3267 #endif
 3268         if (error == 0)
 3269                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3270         VOP_UNLOCK(vp, 0);
 3271         vn_finished_write(mp);
 3272         return (error);
 3273 }
 3274 
 3275 /*
 3276  * Set the access and modification times of a file.
 3277  */
 3278 #ifndef _SYS_SYSPROTO_H_
 3279 struct utimes_args {
 3280         char    *path;
 3281         struct  timeval *tptr;
 3282 };
 3283 #endif
 3284 int
 3285 sys_utimes(td, uap)
 3286         struct thread *td;
 3287         register struct utimes_args /* {
 3288                 char *path;
 3289                 struct timeval *tptr;
 3290         } */ *uap;
 3291 {
 3292 
 3293         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3294             UIO_USERSPACE));
 3295 }
 3296 
 3297 #ifndef _SYS_SYSPROTO_H_
 3298 struct futimesat_args {
 3299         int fd;
 3300         const char * path;
 3301         const struct timeval * times;
 3302 };
 3303 #endif
 3304 int
 3305 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3306 {
 3307 
 3308         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3309             uap->times, UIO_USERSPACE));
 3310 }
 3311 
 3312 int
 3313 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3314     struct timeval *tptr, enum uio_seg tptrseg)
 3315 {
 3316 
 3317         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3318 }
 3319 
 3320 int
 3321 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3322     struct timeval *tptr, enum uio_seg tptrseg)
 3323 {
 3324         struct nameidata nd;
 3325         struct timespec ts[2];
 3326         int error, vfslocked;
 3327 
 3328         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3329                 return (error);
 3330         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
 3331             path, fd, CAP_FUTIMES, td);
 3332 
 3333         if ((error = namei(&nd)) != 0)
 3334                 return (error);
 3335         vfslocked = NDHASGIANT(&nd);
 3336         NDFREE(&nd, NDF_ONLY_PNBUF);
 3337         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3338         vrele(nd.ni_vp);
 3339         VFS_UNLOCK_GIANT(vfslocked);
 3340         return (error);
 3341 }
 3342 
 3343 /*
 3344  * Set the access and modification times of a file.
 3345  */
 3346 #ifndef _SYS_SYSPROTO_H_
 3347 struct lutimes_args {
 3348         char    *path;
 3349         struct  timeval *tptr;
 3350 };
 3351 #endif
 3352 int
 3353 sys_lutimes(td, uap)
 3354         struct thread *td;
 3355         register struct lutimes_args /* {
 3356                 char *path;
 3357                 struct timeval *tptr;
 3358         } */ *uap;
 3359 {
 3360 
 3361         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3362             UIO_USERSPACE));
 3363 }
 3364 
 3365 int
 3366 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3367     struct timeval *tptr, enum uio_seg tptrseg)
 3368 {
 3369         struct timespec ts[2];
 3370         int error;
 3371         struct nameidata nd;
 3372         int vfslocked;
 3373 
 3374         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3375                 return (error);
 3376         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3377         if ((error = namei(&nd)) != 0)
 3378                 return (error);
 3379         vfslocked = NDHASGIANT(&nd);
 3380         NDFREE(&nd, NDF_ONLY_PNBUF);
 3381         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3382         vrele(nd.ni_vp);
 3383         VFS_UNLOCK_GIANT(vfslocked);
 3384         return (error);
 3385 }
 3386 
 3387 /*
 3388  * Set the access and modification times of a file.
 3389  */
 3390 #ifndef _SYS_SYSPROTO_H_
 3391 struct futimes_args {
 3392         int     fd;
 3393         struct  timeval *tptr;
 3394 };
 3395 #endif
 3396 int
 3397 sys_futimes(td, uap)
 3398         struct thread *td;
 3399         register struct futimes_args /* {
 3400                 int  fd;
 3401                 struct timeval *tptr;
 3402         } */ *uap;
 3403 {
 3404 
 3405         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3406 }
 3407 
 3408 int
 3409 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3410     enum uio_seg tptrseg)
 3411 {
 3412         struct timespec ts[2];
 3413         struct file *fp;
 3414         int vfslocked;
 3415         int error;
 3416 
 3417         AUDIT_ARG_FD(fd);
 3418         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3419                 return (error);
 3420         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp))
 3421             != 0)
 3422                 return (error);
 3423         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3424 #ifdef AUDIT
 3425         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3426         AUDIT_ARG_VNODE1(fp->f_vnode);
 3427         VOP_UNLOCK(fp->f_vnode, 0);
 3428 #endif
 3429         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3430         VFS_UNLOCK_GIANT(vfslocked);
 3431         fdrop(fp, td);
 3432         return (error);
 3433 }
 3434 
 3435 /*
 3436  * Truncate a file given its path name.
 3437  */
 3438 #ifndef _SYS_SYSPROTO_H_
 3439 struct truncate_args {
 3440         char    *path;
 3441         int     pad;
 3442         off_t   length;
 3443 };
 3444 #endif
 3445 int
 3446 sys_truncate(td, uap)
 3447         struct thread *td;
 3448         register struct truncate_args /* {
 3449                 char *path;
 3450                 int pad;
 3451                 off_t length;
 3452         } */ *uap;
 3453 {
 3454 
 3455         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3456 }
 3457 
 3458 int
 3459 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3460 {
 3461         struct mount *mp;
 3462         struct vnode *vp;
 3463         void *rl_cookie;
 3464         struct vattr vattr;
 3465         struct nameidata nd;
 3466         int error, vfslocked;
 3467 
 3468         if (length < 0)
 3469                 return(EINVAL);
 3470         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3471         if ((error = namei(&nd)) != 0)
 3472                 return (error);
 3473         vfslocked = NDHASGIANT(&nd);
 3474         vp = nd.ni_vp;
 3475         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3476         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3477                 vn_rangelock_unlock(vp, rl_cookie);
 3478                 vrele(vp);
 3479                 VFS_UNLOCK_GIANT(vfslocked);
 3480                 return (error);
 3481         }
 3482         NDFREE(&nd, NDF_ONLY_PNBUF);
 3483         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3484         if (vp->v_type == VDIR)
 3485                 error = EISDIR;
 3486 #ifdef MAC
 3487         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3488         }
 3489 #endif
 3490         else if ((error = vn_writechk(vp)) == 0 &&
 3491             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3492                 VATTR_NULL(&vattr);
 3493                 vattr.va_size = length;
 3494                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3495         }
 3496         VOP_UNLOCK(vp, 0);
 3497         vn_finished_write(mp);
 3498         vn_rangelock_unlock(vp, rl_cookie);
 3499         vrele(vp);
 3500         VFS_UNLOCK_GIANT(vfslocked);
 3501         return (error);
 3502 }
 3503 
 3504 #if defined(COMPAT_43)
 3505 /*
 3506  * Truncate a file given its path name.
 3507  */
 3508 #ifndef _SYS_SYSPROTO_H_
 3509 struct otruncate_args {
 3510         char    *path;
 3511         long    length;
 3512 };
 3513 #endif
 3514 int
 3515 otruncate(td, uap)
 3516         struct thread *td;
 3517         register struct otruncate_args /* {
 3518                 char *path;
 3519                 long length;
 3520         } */ *uap;
 3521 {
 3522         struct truncate_args /* {
 3523                 char *path;
 3524                 int pad;
 3525                 off_t length;
 3526         } */ nuap;
 3527 
 3528         nuap.path = uap->path;
 3529         nuap.length = uap->length;
 3530         return (sys_truncate(td, &nuap));
 3531 }
 3532 #endif /* COMPAT_43 */
 3533 
 3534 /* Versions with the pad argument */
 3535 int
 3536 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3537 {
 3538         struct truncate_args ouap;
 3539 
 3540         ouap.path = uap->path;
 3541         ouap.length = uap->length;
 3542         return (sys_truncate(td, &ouap));
 3543 }
 3544 
 3545 int
 3546 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3547 {
 3548         struct ftruncate_args ouap;
 3549 
 3550         ouap.fd = uap->fd;
 3551         ouap.length = uap->length;
 3552         return (sys_ftruncate(td, &ouap));
 3553 }
 3554 
 3555 /*
 3556  * Sync an open file.
 3557  */
 3558 #ifndef _SYS_SYSPROTO_H_
 3559 struct fsync_args {
 3560         int     fd;
 3561 };
 3562 #endif
 3563 int
 3564 sys_fsync(td, uap)
 3565         struct thread *td;
 3566         struct fsync_args /* {
 3567                 int fd;
 3568         } */ *uap;
 3569 {
 3570         struct vnode *vp;
 3571         struct mount *mp;
 3572         struct file *fp;
 3573         int vfslocked;
 3574         int error, lock_flags;
 3575 
 3576         AUDIT_ARG_FD(uap->fd);
 3577         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC,
 3578             &fp)) != 0)
 3579                 return (error);
 3580         vp = fp->f_vnode;
 3581         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3582         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3583                 goto drop;
 3584         if (MNT_SHARED_WRITES(mp) ||
 3585             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3586                 lock_flags = LK_SHARED;
 3587         } else {
 3588                 lock_flags = LK_EXCLUSIVE;
 3589         }
 3590         vn_lock(vp, lock_flags | LK_RETRY);
 3591         AUDIT_ARG_VNODE1(vp);
 3592         if (vp->v_object != NULL) {
 3593                 VM_OBJECT_LOCK(vp->v_object);
 3594                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3595                 VM_OBJECT_UNLOCK(vp->v_object);
 3596         }
 3597         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3598 
 3599         VOP_UNLOCK(vp, 0);
 3600         vn_finished_write(mp);
 3601 drop:
 3602         VFS_UNLOCK_GIANT(vfslocked);
 3603         fdrop(fp, td);
 3604         return (error);
 3605 }
 3606 
 3607 /*
 3608  * Rename files.  Source and destination must either both be directories, or
 3609  * both not be directories.  If target is a directory, it must be empty.
 3610  */
 3611 #ifndef _SYS_SYSPROTO_H_
 3612 struct rename_args {
 3613         char    *from;
 3614         char    *to;
 3615 };
 3616 #endif
 3617 int
 3618 sys_rename(td, uap)
 3619         struct thread *td;
 3620         register struct rename_args /* {
 3621                 char *from;
 3622                 char *to;
 3623         } */ *uap;
 3624 {
 3625 
 3626         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3627 }
 3628 
 3629 #ifndef _SYS_SYSPROTO_H_
 3630 struct renameat_args {
 3631         int     oldfd;
 3632         char    *old;
 3633         int     newfd;
 3634         char    *new;
 3635 };
 3636 #endif
 3637 int
 3638 sys_renameat(struct thread *td, struct renameat_args *uap)
 3639 {
 3640 
 3641         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3642             UIO_USERSPACE));
 3643 }
 3644 
 3645 int
 3646 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3647 {
 3648 
 3649         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3650 }
 3651 
 3652 int
 3653 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3654     enum uio_seg pathseg)
 3655 {
 3656         struct mount *mp = NULL;
 3657         struct vnode *tvp, *fvp, *tdvp;
 3658         struct nameidata fromnd, tond;
 3659         int tvfslocked;
 3660         int fvfslocked;
 3661         int error;
 3662 
 3663         bwillwrite();
 3664 #ifdef MAC
 3665         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3666             MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3667 #else
 3668         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
 3669             AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3670 #endif
 3671 
 3672         if ((error = namei(&fromnd)) != 0)
 3673                 return (error);
 3674         fvfslocked = NDHASGIANT(&fromnd);
 3675         tvfslocked = 0;
 3676 #ifdef MAC
 3677         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3678             fromnd.ni_vp, &fromnd.ni_cnd);
 3679         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3680         if (fromnd.ni_dvp != fromnd.ni_vp)
 3681                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3682 #endif
 3683         fvp = fromnd.ni_vp;
 3684         if (error == 0)
 3685                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3686         if (error != 0) {
 3687                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3688                 vrele(fromnd.ni_dvp);
 3689                 vrele(fvp);
 3690                 goto out1;
 3691         }
 3692         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3693             SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
 3694             td);
 3695         if (fromnd.ni_vp->v_type == VDIR)
 3696                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3697         if ((error = namei(&tond)) != 0) {
 3698                 /* Translate error code for rename("dir1", "dir2/."). */
 3699                 if (error == EISDIR && fvp->v_type == VDIR)
 3700                         error = EINVAL;
 3701                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3702                 vrele(fromnd.ni_dvp);
 3703                 vrele(fvp);
 3704                 vn_finished_write(mp);
 3705                 goto out1;
 3706         }
 3707         tvfslocked = NDHASGIANT(&tond);
 3708         tdvp = tond.ni_dvp;
 3709         tvp = tond.ni_vp;
 3710         if (tvp != NULL) {
 3711                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3712                         error = ENOTDIR;
 3713                         goto out;
 3714                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3715                         error = EISDIR;
 3716                         goto out;
 3717                 }
 3718         }
 3719         if (fvp == tdvp) {
 3720                 error = EINVAL;
 3721                 goto out;
 3722         }
 3723         /*
 3724          * If the source is the same as the destination (that is, if they
 3725          * are links to the same vnode), then there is nothing to do.
 3726          */
 3727         if (fvp == tvp)
 3728                 error = -1;
 3729 #ifdef MAC
 3730         else
 3731                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3732                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3733 #endif
 3734 out:
 3735         if (!error) {
 3736                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3737                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3738                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3739                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3740         } else {
 3741                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3742                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3743                 if (tvp)
 3744                         vput(tvp);
 3745                 if (tdvp == tvp)
 3746                         vrele(tdvp);
 3747                 else
 3748                         vput(tdvp);
 3749                 vrele(fromnd.ni_dvp);
 3750                 vrele(fvp);
 3751         }
 3752         vrele(tond.ni_startdir);
 3753         vn_finished_write(mp);
 3754 out1:
 3755         if (fromnd.ni_startdir)
 3756                 vrele(fromnd.ni_startdir);
 3757         VFS_UNLOCK_GIANT(fvfslocked);
 3758         VFS_UNLOCK_GIANT(tvfslocked);
 3759         if (error == -1)
 3760                 return (0);
 3761         return (error);
 3762 }
 3763 
 3764 /*
 3765  * Make a directory file.
 3766  */
 3767 #ifndef _SYS_SYSPROTO_H_
 3768 struct mkdir_args {
 3769         char    *path;
 3770         int     mode;
 3771 };
 3772 #endif
 3773 int
 3774 sys_mkdir(td, uap)
 3775         struct thread *td;
 3776         register struct mkdir_args /* {
 3777                 char *path;
 3778                 int mode;
 3779         } */ *uap;
 3780 {
 3781 
 3782         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3783 }
 3784 
 3785 #ifndef _SYS_SYSPROTO_H_
 3786 struct mkdirat_args {
 3787         int     fd;
 3788         char    *path;
 3789         mode_t  mode;
 3790 };
 3791 #endif
 3792 int
 3793 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3794 {
 3795 
 3796         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3797 }
 3798 
 3799 int
 3800 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3801 {
 3802 
 3803         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3804 }
 3805 
 3806 int
 3807 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3808     int mode)
 3809 {
 3810         struct mount *mp;
 3811         struct vnode *vp;
 3812         struct vattr vattr;
 3813         int error;
 3814         struct nameidata nd;
 3815         int vfslocked;
 3816 
 3817         AUDIT_ARG_MODE(mode);
 3818 restart:
 3819         bwillwrite();
 3820         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
 3821             AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
 3822         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3823         if ((error = namei(&nd)) != 0)
 3824                 return (error);
 3825         vfslocked = NDHASGIANT(&nd);
 3826         vp = nd.ni_vp;
 3827         if (vp != NULL) {
 3828                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3829                 /*
 3830                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3831                  * the strange behaviour of leaving the vnode unlocked
 3832                  * if the target is the same vnode as the parent.
 3833                  */
 3834                 if (vp == nd.ni_dvp)
 3835                         vrele(nd.ni_dvp);
 3836                 else
 3837                         vput(nd.ni_dvp);
 3838                 vrele(vp);
 3839                 VFS_UNLOCK_GIANT(vfslocked);
 3840                 return (EEXIST);
 3841         }
 3842         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3843                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3844                 vput(nd.ni_dvp);
 3845                 VFS_UNLOCK_GIANT(vfslocked);
 3846                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3847                         return (error);
 3848                 goto restart;
 3849         }
 3850         VATTR_NULL(&vattr);
 3851         vattr.va_type = VDIR;
 3852         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3853 #ifdef MAC
 3854         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3855             &vattr);
 3856         if (error)
 3857                 goto out;
 3858 #endif
 3859         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3860 #ifdef MAC
 3861 out:
 3862 #endif
 3863         NDFREE(&nd, NDF_ONLY_PNBUF);
 3864         vput(nd.ni_dvp);
 3865         if (!error)
 3866                 vput(nd.ni_vp);
 3867         vn_finished_write(mp);
 3868         VFS_UNLOCK_GIANT(vfslocked);
 3869         return (error);
 3870 }
 3871 
 3872 /*
 3873  * Remove a directory file.
 3874  */
 3875 #ifndef _SYS_SYSPROTO_H_
 3876 struct rmdir_args {
 3877         char    *path;
 3878 };
 3879 #endif
 3880 int
 3881 sys_rmdir(td, uap)
 3882         struct thread *td;
 3883         struct rmdir_args /* {
 3884                 char *path;
 3885         } */ *uap;
 3886 {
 3887 
 3888         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3889 }
 3890 
 3891 int
 3892 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3893 {
 3894 
 3895         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3896 }
 3897 
 3898 int
 3899 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3900 {
 3901         struct mount *mp;
 3902         struct vnode *vp;
 3903         int error;
 3904         struct nameidata nd;
 3905         int vfslocked;
 3906 
 3907 restart:
 3908         bwillwrite();
 3909         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
 3910             AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
 3911         if ((error = namei(&nd)) != 0)
 3912                 return (error);
 3913         vfslocked = NDHASGIANT(&nd);
 3914         vp = nd.ni_vp;
 3915         if (vp->v_type != VDIR) {
 3916                 error = ENOTDIR;
 3917                 goto out;
 3918         }
 3919         /*
 3920          * No rmdir "." please.
 3921          */
 3922         if (nd.ni_dvp == vp) {
 3923                 error = EINVAL;
 3924                 goto out;
 3925         }
 3926         /*
 3927          * The root of a mounted filesystem cannot be deleted.
 3928          */
 3929         if (vp->v_vflag & VV_ROOT) {
 3930                 error = EBUSY;
 3931                 goto out;
 3932         }
 3933 #ifdef MAC
 3934         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3935             &nd.ni_cnd);
 3936         if (error)
 3937                 goto out;
 3938 #endif
 3939         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3940                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3941                 vput(vp);
 3942                 if (nd.ni_dvp == vp)
 3943                         vrele(nd.ni_dvp);
 3944                 else
 3945                         vput(nd.ni_dvp);
 3946                 VFS_UNLOCK_GIANT(vfslocked);
 3947                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3948                         return (error);
 3949                 goto restart;
 3950         }
 3951         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3952         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3953         vn_finished_write(mp);
 3954 out:
 3955         NDFREE(&nd, NDF_ONLY_PNBUF);
 3956         vput(vp);
 3957         if (nd.ni_dvp == vp)
 3958                 vrele(nd.ni_dvp);
 3959         else
 3960                 vput(nd.ni_dvp);
 3961         VFS_UNLOCK_GIANT(vfslocked);
 3962         return (error);
 3963 }
 3964 
 3965 #ifdef COMPAT_43
 3966 /*
 3967  * Read a block of directory entries in a filesystem independent format.
 3968  */
 3969 #ifndef _SYS_SYSPROTO_H_
 3970 struct ogetdirentries_args {
 3971         int     fd;
 3972         char    *buf;
 3973         u_int   count;
 3974         long    *basep;
 3975 };
 3976 #endif
 3977 int
 3978 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3979 {
 3980         long loff;
 3981         int error;
 3982 
 3983         error = kern_ogetdirentries(td, uap, &loff);
 3984         if (error == 0)
 3985                 error = copyout(&loff, uap->basep, sizeof(long));
 3986         return (error);
 3987 }
 3988 
 3989 int
 3990 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3991     long *ploff)
 3992 {
 3993         struct vnode *vp;
 3994         struct file *fp;
 3995         struct uio auio, kuio;
 3996         struct iovec aiov, kiov;
 3997         struct dirent *dp, *edp;
 3998         caddr_t dirbuf;
 3999         int error, eofflag, readcnt, vfslocked;
 4000         long loff;
 4001         off_t foffset;
 4002 
 4003         /* XXX arbitrary sanity limit on `count'. */
 4004         if (uap->count > 64 * 1024)
 4005                 return (EINVAL);
 4006         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
 4007             &fp)) != 0)
 4008                 return (error);
 4009         if ((fp->f_flag & FREAD) == 0) {
 4010                 fdrop(fp, td);
 4011                 return (EBADF);
 4012         }
 4013         vp = fp->f_vnode;
 4014         foffset = foffset_lock(fp, 0);
 4015 unionread:
 4016         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4017         if (vp->v_type != VDIR) {
 4018                 VFS_UNLOCK_GIANT(vfslocked);
 4019                 foffset_unlock(fp, foffset, 0);
 4020                 fdrop(fp, td);
 4021                 return (EINVAL);
 4022         }
 4023         aiov.iov_base = uap->buf;
 4024         aiov.iov_len = uap->count;
 4025         auio.uio_iov = &aiov;
 4026         auio.uio_iovcnt = 1;
 4027         auio.uio_rw = UIO_READ;
 4028         auio.uio_segflg = UIO_USERSPACE;
 4029         auio.uio_td = td;
 4030         auio.uio_resid = uap->count;
 4031         vn_lock(vp, LK_SHARED | LK_RETRY);
 4032         loff = auio.uio_offset = foffset;
 4033 #ifdef MAC
 4034         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4035         if (error) {
 4036                 VOP_UNLOCK(vp, 0);
 4037                 VFS_UNLOCK_GIANT(vfslocked);
 4038                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 4039                 fdrop(fp, td);
 4040                 return (error);
 4041         }
 4042 #endif
 4043 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4044                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4045                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4046                             NULL, NULL);
 4047                         foffset = auio.uio_offset;
 4048                 } else
 4049 #       endif
 4050         {
 4051                 kuio = auio;
 4052                 kuio.uio_iov = &kiov;
 4053                 kuio.uio_segflg = UIO_SYSSPACE;
 4054                 kiov.iov_len = uap->count;
 4055                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4056                 kiov.iov_base = dirbuf;
 4057                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4058                             NULL, NULL);
 4059                 foffset = kuio.uio_offset;
 4060                 if (error == 0) {
 4061                         readcnt = uap->count - kuio.uio_resid;
 4062                         edp = (struct dirent *)&dirbuf[readcnt];
 4063                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4064 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4065                                         /*
 4066                                          * The expected low byte of
 4067                                          * dp->d_namlen is our dp->d_type.
 4068                                          * The high MBZ byte of dp->d_namlen
 4069                                          * is our dp->d_namlen.
 4070                                          */
 4071                                         dp->d_type = dp->d_namlen;
 4072                                         dp->d_namlen = 0;
 4073 #                               else
 4074                                         /*
 4075                                          * The dp->d_type is the high byte
 4076                                          * of the expected dp->d_namlen,
 4077                                          * so must be zero'ed.
 4078                                          */
 4079                                         dp->d_type = 0;
 4080 #                               endif
 4081                                 if (dp->d_reclen > 0) {
 4082                                         dp = (struct dirent *)
 4083                                             ((char *)dp + dp->d_reclen);
 4084                                 } else {
 4085                                         error = EIO;
 4086                                         break;
 4087                                 }
 4088                         }
 4089                         if (dp >= edp)
 4090                                 error = uiomove(dirbuf, readcnt, &auio);
 4091                 }
 4092                 free(dirbuf, M_TEMP);
 4093         }
 4094         if (error) {
 4095                 VOP_UNLOCK(vp, 0);
 4096                 VFS_UNLOCK_GIANT(vfslocked);
 4097                 foffset_unlock(fp, foffset, 0);
 4098                 fdrop(fp, td);
 4099                 return (error);
 4100         }
 4101         if (uap->count == auio.uio_resid &&
 4102             (vp->v_vflag & VV_ROOT) &&
 4103             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4104                 struct vnode *tvp = vp;
 4105                 vp = vp->v_mount->mnt_vnodecovered;
 4106                 VREF(vp);
 4107                 fp->f_vnode = vp;
 4108                 fp->f_data = vp;
 4109                 foffset = 0;
 4110                 vput(tvp);
 4111                 VFS_UNLOCK_GIANT(vfslocked);
 4112                 goto unionread;
 4113         }
 4114         VOP_UNLOCK(vp, 0);
 4115         VFS_UNLOCK_GIANT(vfslocked);
 4116         foffset_unlock(fp, foffset, 0);
 4117         fdrop(fp, td);
 4118         td->td_retval[0] = uap->count - auio.uio_resid;
 4119         if (error == 0)
 4120                 *ploff = loff;
 4121         return (error);
 4122 }
 4123 #endif /* COMPAT_43 */
 4124 
 4125 /*
 4126  * Read a block of directory entries in a filesystem independent format.
 4127  */
 4128 #ifndef _SYS_SYSPROTO_H_
 4129 struct getdirentries_args {
 4130         int     fd;
 4131         char    *buf;
 4132         u_int   count;
 4133         long    *basep;
 4134 };
 4135 #endif
 4136 int
 4137 sys_getdirentries(td, uap)
 4138         struct thread *td;
 4139         register struct getdirentries_args /* {
 4140                 int fd;
 4141                 char *buf;
 4142                 u_int count;
 4143                 long *basep;
 4144         } */ *uap;
 4145 {
 4146         long base;
 4147         int error;
 4148 
 4149         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 4150         if (error)
 4151                 return (error);
 4152         if (uap->basep != NULL)
 4153                 error = copyout(&base, uap->basep, sizeof(long));
 4154         return (error);
 4155 }
 4156 
 4157 int
 4158 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4159     long *basep)
 4160 {
 4161         struct vnode *vp;
 4162         struct file *fp;
 4163         struct uio auio;
 4164         struct iovec aiov;
 4165         int vfslocked;
 4166         long loff;
 4167         int error, eofflag;
 4168         off_t foffset;
 4169 
 4170         AUDIT_ARG_FD(fd);
 4171         auio.uio_resid = count;
 4172         if (auio.uio_resid > IOSIZE_MAX)
 4173                 return (EINVAL);
 4174         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
 4175             &fp)) != 0)
 4176                 return (error);
 4177         if ((fp->f_flag & FREAD) == 0) {
 4178                 fdrop(fp, td);
 4179                 return (EBADF);
 4180         }
 4181         vp = fp->f_vnode;
 4182         foffset = foffset_lock(fp, 0);
 4183 unionread:
 4184         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4185         if (vp->v_type != VDIR) {
 4186                 VFS_UNLOCK_GIANT(vfslocked);
 4187                 error = EINVAL;
 4188                 goto fail;
 4189         }
 4190         aiov.iov_base = buf;
 4191         aiov.iov_len = count;
 4192         auio.uio_iov = &aiov;
 4193         auio.uio_iovcnt = 1;
 4194         auio.uio_rw = UIO_READ;
 4195         auio.uio_segflg = UIO_USERSPACE;
 4196         auio.uio_td = td;
 4197         vn_lock(vp, LK_SHARED | LK_RETRY);
 4198         AUDIT_ARG_VNODE1(vp);
 4199         loff = auio.uio_offset = foffset;
 4200 #ifdef MAC
 4201         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4202         if (error == 0)
 4203 #endif
 4204                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4205                     NULL);
 4206         foffset = auio.uio_offset;
 4207         if (error) {
 4208                 VOP_UNLOCK(vp, 0);
 4209                 VFS_UNLOCK_GIANT(vfslocked);
 4210                 goto fail;
 4211         }
 4212         if (count == auio.uio_resid &&
 4213             (vp->v_vflag & VV_ROOT) &&
 4214             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4215                 struct vnode *tvp = vp;
 4216                 vp = vp->v_mount->mnt_vnodecovered;
 4217                 VREF(vp);
 4218                 fp->f_vnode = vp;
 4219                 fp->f_data = vp;
 4220                 foffset = 0;
 4221                 vput(tvp);
 4222                 VFS_UNLOCK_GIANT(vfslocked);
 4223                 goto unionread;
 4224         }
 4225         VOP_UNLOCK(vp, 0);
 4226         VFS_UNLOCK_GIANT(vfslocked);
 4227         *basep = loff;
 4228         td->td_retval[0] = count - auio.uio_resid;
 4229 fail:
 4230         foffset_unlock(fp, foffset, 0);
 4231         fdrop(fp, td);
 4232         return (error);
 4233 }
 4234 
 4235 #ifndef _SYS_SYSPROTO_H_
 4236 struct getdents_args {
 4237         int fd;
 4238         char *buf;
 4239         size_t count;
 4240 };
 4241 #endif
 4242 int
 4243 sys_getdents(td, uap)
 4244         struct thread *td;
 4245         register struct getdents_args /* {
 4246                 int fd;
 4247                 char *buf;
 4248                 u_int count;
 4249         } */ *uap;
 4250 {
 4251         struct getdirentries_args ap;
 4252         ap.fd = uap->fd;
 4253         ap.buf = uap->buf;
 4254         ap.count = uap->count;
 4255         ap.basep = NULL;
 4256         return (sys_getdirentries(td, &ap));
 4257 }
 4258 
 4259 /*
 4260  * Set the mode mask for creation of filesystem nodes.
 4261  */
 4262 #ifndef _SYS_SYSPROTO_H_
 4263 struct umask_args {
 4264         int     newmask;
 4265 };
 4266 #endif
 4267 int
 4268 sys_umask(td, uap)
 4269         struct thread *td;
 4270         struct umask_args /* {
 4271                 int newmask;
 4272         } */ *uap;
 4273 {
 4274         register struct filedesc *fdp;
 4275 
 4276         FILEDESC_XLOCK(td->td_proc->p_fd);
 4277         fdp = td->td_proc->p_fd;
 4278         td->td_retval[0] = fdp->fd_cmask;
 4279         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4280         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4281         return (0);
 4282 }
 4283 
 4284 /*
 4285  * Void all references to file by ripping underlying filesystem away from
 4286  * vnode.
 4287  */
 4288 #ifndef _SYS_SYSPROTO_H_
 4289 struct revoke_args {
 4290         char    *path;
 4291 };
 4292 #endif
 4293 int
 4294 sys_revoke(td, uap)
 4295         struct thread *td;
 4296         register struct revoke_args /* {
 4297                 char *path;
 4298         } */ *uap;
 4299 {
 4300         struct vnode *vp;
 4301         struct vattr vattr;
 4302         int error;
 4303         struct nameidata nd;
 4304         int vfslocked;
 4305 
 4306         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4307             UIO_USERSPACE, uap->path, td);
 4308         if ((error = namei(&nd)) != 0)
 4309                 return (error);
 4310         vfslocked = NDHASGIANT(&nd);
 4311         vp = nd.ni_vp;
 4312         NDFREE(&nd, NDF_ONLY_PNBUF);
 4313         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4314                 error = EINVAL;
 4315                 goto out;
 4316         }
 4317 #ifdef MAC
 4318         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4319         if (error)
 4320                 goto out;
 4321 #endif
 4322         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4323         if (error)
 4324                 goto out;
 4325         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4326                 error = priv_check(td, PRIV_VFS_ADMIN);
 4327                 if (error)
 4328                         goto out;
 4329         }
 4330         if (vcount(vp) > 1)
 4331                 VOP_REVOKE(vp, REVOKEALL);
 4332 out:
 4333         vput(vp);
 4334         VFS_UNLOCK_GIANT(vfslocked);
 4335         return (error);
 4336 }
 4337 
 4338 /*
 4339  * Convert a user file descriptor to a kernel file entry and check that, if it
 4340  * is a capability, the correct rights are present. A reference on the file
 4341  * entry is held upon returning.
 4342  */
 4343 int
 4344 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
 4345     struct file **fpp)
 4346 {
 4347         struct file *fp;
 4348 #ifdef CAPABILITIES
 4349         struct file *fp_fromcap;
 4350 #endif
 4351         int error;
 4352 
 4353         error = 0;
 4354         fp = NULL;
 4355         if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
 4356                 return (EBADF);
 4357 #ifdef CAPABILITIES
 4358         /*
 4359          * If the file descriptor is for a capability, test rights and use the
 4360          * file descriptor referenced by the capability.
 4361          */
 4362         error = cap_funwrap(fp, rights, &fp_fromcap);
 4363         if (error) {
 4364                 fdrop(fp, curthread);
 4365                 return (error);
 4366         }
 4367         if (fp != fp_fromcap) {
 4368                 fhold(fp_fromcap);
 4369                 fdrop(fp, curthread);
 4370                 fp = fp_fromcap;
 4371         }
 4372 #endif /* CAPABILITIES */
 4373 
 4374         /*
 4375          * The file could be not of the vnode type, or it may be not
 4376          * yet fully initialized, in which case the f_vnode pointer
 4377          * may be set, but f_ops is still badfileops.  E.g.,
 4378          * devfs_open() transiently create such situation to
 4379          * facilitate csw d_fdopen().
 4380          *
 4381          * Dupfdopen() handling in kern_openat() installs the
 4382          * half-baked file into the process descriptor table, allowing
 4383          * other thread to dereference it. Guard against the race by
 4384          * checking f_ops.
 4385          */
 4386         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4387                 fdrop(fp, curthread);
 4388                 return (EINVAL);
 4389         }
 4390         *fpp = fp;
 4391         return (0);
 4392 }
 4393 
 4394 
 4395 /*
 4396  * Get an (NFS) file handle.
 4397  */
 4398 #ifndef _SYS_SYSPROTO_H_
 4399 struct lgetfh_args {
 4400         char    *fname;
 4401         fhandle_t *fhp;
 4402 };
 4403 #endif
 4404 int
 4405 sys_lgetfh(td, uap)
 4406         struct thread *td;
 4407         register struct lgetfh_args *uap;
 4408 {
 4409         struct nameidata nd;
 4410         fhandle_t fh;
 4411         register struct vnode *vp;
 4412         int vfslocked;
 4413         int error;
 4414 
 4415         error = priv_check(td, PRIV_VFS_GETFH);
 4416         if (error)
 4417                 return (error);
 4418         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4419             UIO_USERSPACE, uap->fname, td);
 4420         error = namei(&nd);
 4421         if (error)
 4422                 return (error);
 4423         vfslocked = NDHASGIANT(&nd);
 4424         NDFREE(&nd, NDF_ONLY_PNBUF);
 4425         vp = nd.ni_vp;
 4426         bzero(&fh, sizeof(fh));
 4427         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4428         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4429         vput(vp);
 4430         VFS_UNLOCK_GIANT(vfslocked);
 4431         if (error)
 4432                 return (error);
 4433         error = copyout(&fh, uap->fhp, sizeof (fh));
 4434         return (error);
 4435 }
 4436 
 4437 #ifndef _SYS_SYSPROTO_H_
 4438 struct getfh_args {
 4439         char    *fname;
 4440         fhandle_t *fhp;
 4441 };
 4442 #endif
 4443 int
 4444 sys_getfh(td, uap)
 4445         struct thread *td;
 4446         register struct getfh_args *uap;
 4447 {
 4448         struct nameidata nd;
 4449         fhandle_t fh;
 4450         register struct vnode *vp;
 4451         int vfslocked;
 4452         int error;
 4453 
 4454         error = priv_check(td, PRIV_VFS_GETFH);
 4455         if (error)
 4456                 return (error);
 4457         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4458             UIO_USERSPACE, uap->fname, td);
 4459         error = namei(&nd);
 4460         if (error)
 4461                 return (error);
 4462         vfslocked = NDHASGIANT(&nd);
 4463         NDFREE(&nd, NDF_ONLY_PNBUF);
 4464         vp = nd.ni_vp;
 4465         bzero(&fh, sizeof(fh));
 4466         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4467         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4468         vput(vp);
 4469         VFS_UNLOCK_GIANT(vfslocked);
 4470         if (error)
 4471                 return (error);
 4472         error = copyout(&fh, uap->fhp, sizeof (fh));
 4473         return (error);
 4474 }
 4475 
 4476 /*
 4477  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4478  * open descriptor.
 4479  *
 4480  * warning: do not remove the priv_check() call or this becomes one giant
 4481  * security hole.
 4482  */
 4483 #ifndef _SYS_SYSPROTO_H_
 4484 struct fhopen_args {
 4485         const struct fhandle *u_fhp;
 4486         int flags;
 4487 };
 4488 #endif
 4489 int
 4490 sys_fhopen(td, uap)
 4491         struct thread *td;
 4492         struct fhopen_args /* {
 4493                 const struct fhandle *u_fhp;
 4494                 int flags;
 4495         } */ *uap;
 4496 {
 4497         struct proc *p = td->td_proc;
 4498         struct mount *mp;
 4499         struct vnode *vp;
 4500         struct fhandle fhp;
 4501         struct vattr vat;
 4502         struct vattr *vap = &vat;
 4503         struct flock lf;
 4504         struct file *fp;
 4505         register struct filedesc *fdp = p->p_fd;
 4506         int fmode, error, type;
 4507         accmode_t accmode;
 4508         struct file *nfp;
 4509         int vfslocked;
 4510         int indx;
 4511 
 4512         error = priv_check(td, PRIV_VFS_FHOPEN);
 4513         if (error)
 4514                 return (error);
 4515         fmode = FFLAGS(uap->flags);
 4516         /* why not allow a non-read/write open for our lockd? */
 4517         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4518                 return (EINVAL);
 4519         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4520         if (error)
 4521                 return(error);
 4522         /* find the mount point */
 4523         mp = vfs_busyfs(&fhp.fh_fsid);
 4524         if (mp == NULL)
 4525                 return (ESTALE);
 4526         vfslocked = VFS_LOCK_GIANT(mp);
 4527         /* now give me my vnode, it gets returned to me locked */
 4528         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4529         vfs_unbusy(mp);
 4530         if (error)
 4531                 goto out;
 4532         /*
 4533          * from now on we have to make sure not
 4534          * to forget about the vnode
 4535          * any error that causes an abort must vput(vp)
 4536          * just set error = err and 'goto bad;'.
 4537          */
 4538 
 4539         /*
 4540          * from vn_open
 4541          */
 4542         if (vp->v_type == VLNK) {
 4543                 error = EMLINK;
 4544                 goto bad;
 4545         }
 4546         if (vp->v_type == VSOCK) {
 4547                 error = EOPNOTSUPP;
 4548                 goto bad;
 4549         }
 4550         if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
 4551                 error = ENOTDIR;
 4552                 goto bad;
 4553         }
 4554         accmode = 0;
 4555         if (fmode & (FWRITE | O_TRUNC)) {
 4556                 if (vp->v_type == VDIR) {
 4557                         error = EISDIR;
 4558                         goto bad;
 4559                 }
 4560                 error = vn_writechk(vp);
 4561                 if (error)
 4562                         goto bad;
 4563                 accmode |= VWRITE;
 4564         }
 4565         if (fmode & FREAD)
 4566                 accmode |= VREAD;
 4567         if ((fmode & O_APPEND) && (fmode & FWRITE))
 4568                 accmode |= VAPPEND;
 4569 #ifdef MAC
 4570         error = mac_vnode_check_open(td->td_ucred, vp, accmode);
 4571         if (error)
 4572                 goto bad;
 4573 #endif
 4574         if (accmode) {
 4575                 error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
 4576                 if (error)
 4577                         goto bad;
 4578         }
 4579         if (fmode & O_TRUNC) {
 4580                 vfs_ref(mp);
 4581                 VOP_UNLOCK(vp, 0);                              /* XXX */
 4582                 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 4583                         vrele(vp);
 4584                         vfs_rel(mp);
 4585                         goto out;
 4586                 }
 4587                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 4588                 vfs_rel(mp);
 4589 #ifdef MAC
 4590                 /*
 4591                  * We don't yet have fp->f_cred, so use td->td_ucred, which
 4592                  * should be right.
 4593                  */
 4594                 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
 4595                 if (error == 0) {
 4596 #endif
 4597                         VATTR_NULL(vap);
 4598                         vap->va_size = 0;
 4599                         error = VOP_SETATTR(vp, vap, td->td_ucred);
 4600 #ifdef MAC
 4601                 }
 4602 #endif
 4603                 vn_finished_write(mp);
 4604                 if (error)
 4605                         goto bad;
 4606         }
 4607         error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
 4608         if (error)
 4609                 goto bad;
 4610 
 4611         if (fmode & FWRITE)
 4612                 vp->v_writecount++;
 4613 
 4614         /*
 4615          * end of vn_open code
 4616          */
 4617 
 4618         if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
 4619                 if (fmode & FWRITE)
 4620                         vp->v_writecount--;
 4621                 goto bad;
 4622         }
 4623         /* An extra reference on `nfp' has been held for us by falloc(). */
 4624         fp = nfp;
 4625         nfp->f_vnode = vp;
 4626         finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
 4627         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4628                 lf.l_whence = SEEK_SET;
 4629                 lf.l_start = 0;
 4630                 lf.l_len = 0;
 4631                 if (fmode & O_EXLOCK)
 4632                         lf.l_type = F_WRLCK;
 4633                 else
 4634                         lf.l_type = F_RDLCK;
 4635                 type = F_FLOCK;
 4636                 if ((fmode & FNONBLOCK) == 0)
 4637                         type |= F_WAIT;
 4638                 VOP_UNLOCK(vp, 0);
 4639                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 4640                             type)) != 0) {
 4641                         /*
 4642                          * The lock request failed.  Normally close the
 4643                          * descriptor but handle the case where someone might
 4644                          * have dup()d or close()d it when we weren't looking.
 4645                          */
 4646                         fdclose(fdp, fp, indx, td);
 4647 
 4648                         /*
 4649                          * release our private reference
 4650                          */
 4651                         fdrop(fp, td);
 4652                         goto out;
 4653                 }
 4654                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4655                 atomic_set_int(&fp->f_flag, FHASLOCK);
 4656         }
 4657 
 4658         VOP_UNLOCK(vp, 0);
 4659         fdrop(fp, td);
 4660         VFS_UNLOCK_GIANT(vfslocked);
 4661         td->td_retval[0] = indx;
 4662         return (0);
 4663 
 4664 bad:
 4665         vput(vp);
 4666 out:
 4667         VFS_UNLOCK_GIANT(vfslocked);
 4668         return (error);
 4669 }
 4670 
 4671 /*
 4672  * Stat an (NFS) file handle.
 4673  */
 4674 #ifndef _SYS_SYSPROTO_H_
 4675 struct fhstat_args {
 4676         struct fhandle *u_fhp;
 4677         struct stat *sb;
 4678 };
 4679 #endif
 4680 int
 4681 sys_fhstat(td, uap)
 4682         struct thread *td;
 4683         register struct fhstat_args /* {
 4684                 struct fhandle *u_fhp;
 4685                 struct stat *sb;
 4686         } */ *uap;
 4687 {
 4688         struct stat sb;
 4689         fhandle_t fh;
 4690         struct mount *mp;
 4691         struct vnode *vp;
 4692         int vfslocked;
 4693         int error;
 4694 
 4695         error = priv_check(td, PRIV_VFS_FHSTAT);
 4696         if (error)
 4697                 return (error);
 4698         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4699         if (error)
 4700                 return (error);
 4701         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4702                 return (ESTALE);
 4703         vfslocked = VFS_LOCK_GIANT(mp);
 4704         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4705         vfs_unbusy(mp);
 4706         if (error) {
 4707                 VFS_UNLOCK_GIANT(vfslocked);
 4708                 return (error);
 4709         }
 4710         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 4711         vput(vp);
 4712         VFS_UNLOCK_GIANT(vfslocked);
 4713         if (error)
 4714                 return (error);
 4715         error = copyout(&sb, uap->sb, sizeof(sb));
 4716         return (error);
 4717 }
 4718 
 4719 /*
 4720  * Implement fstatfs() for (NFS) file handles.
 4721  */
 4722 #ifndef _SYS_SYSPROTO_H_
 4723 struct fhstatfs_args {
 4724         struct fhandle *u_fhp;
 4725         struct statfs *buf;
 4726 };
 4727 #endif
 4728 int
 4729 sys_fhstatfs(td, uap)
 4730         struct thread *td;
 4731         struct fhstatfs_args /* {
 4732                 struct fhandle *u_fhp;
 4733                 struct statfs *buf;
 4734         } */ *uap;
 4735 {
 4736         struct statfs sf;
 4737         fhandle_t fh;
 4738         int error;
 4739 
 4740         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4741         if (error)
 4742                 return (error);
 4743         error = kern_fhstatfs(td, fh, &sf);
 4744         if (error)
 4745                 return (error);
 4746         return (copyout(&sf, uap->buf, sizeof(sf)));
 4747 }
 4748 
 4749 int
 4750 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4751 {
 4752         struct statfs *sp;
 4753         struct mount *mp;
 4754         struct vnode *vp;
 4755         int vfslocked;
 4756         int error;
 4757 
 4758         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4759         if (error)
 4760                 return (error);
 4761         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4762                 return (ESTALE);
 4763         vfslocked = VFS_LOCK_GIANT(mp);
 4764         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4765         if (error) {
 4766                 vfs_unbusy(mp);
 4767                 VFS_UNLOCK_GIANT(vfslocked);
 4768                 return (error);
 4769         }
 4770         vput(vp);
 4771         error = prison_canseemount(td->td_ucred, mp);
 4772         if (error)
 4773                 goto out;
 4774 #ifdef MAC
 4775         error = mac_mount_check_stat(td->td_ucred, mp);
 4776         if (error)
 4777                 goto out;
 4778 #endif
 4779         /*
 4780          * Set these in case the underlying filesystem fails to do so.
 4781          */
 4782         sp = &mp->mnt_stat;
 4783         sp->f_version = STATFS_VERSION;
 4784         sp->f_namemax = NAME_MAX;
 4785         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4786         error = VFS_STATFS(mp, sp);
 4787         if (error == 0)
 4788                 *buf = *sp;
 4789 out:
 4790         vfs_unbusy(mp);
 4791         VFS_UNLOCK_GIANT(vfslocked);
 4792         return (error);
 4793 }
 4794 
 4795 int
 4796 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4797 {
 4798         struct file *fp;
 4799         struct mount *mp;
 4800         struct vnode *vp;
 4801         off_t olen, ooffset;
 4802         int error, vfslocked;
 4803 
 4804         fp = NULL;
 4805         vfslocked = 0;
 4806         error = fget(td, fd, CAP_WRITE, &fp);
 4807         if (error != 0)
 4808                 goto out;
 4809 
 4810         switch (fp->f_type) {
 4811         case DTYPE_VNODE:
 4812                 break;
 4813         case DTYPE_PIPE:
 4814         case DTYPE_FIFO:
 4815                 error = ESPIPE;
 4816                 goto out;
 4817         default:
 4818                 error = ENODEV;
 4819                 goto out;
 4820         }
 4821         if ((fp->f_flag & FWRITE) == 0) {
 4822                 error = EBADF;
 4823                 goto out;
 4824         }
 4825         vp = fp->f_vnode;
 4826         if (vp->v_type != VREG) {
 4827                 error = ENODEV;
 4828                 goto out;
 4829         }
 4830         if (offset < 0 || len <= 0) {
 4831                 error = EINVAL;
 4832                 goto out;
 4833         }
 4834         /* Check for wrap. */
 4835         if (offset > OFF_MAX - len) {
 4836                 error = EFBIG;
 4837                 goto out;
 4838         }
 4839 
 4840         /* Allocating blocks may take a long time, so iterate. */
 4841         for (;;) {
 4842                 olen = len;
 4843                 ooffset = offset;
 4844 
 4845                 bwillwrite();
 4846                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4847                 mp = NULL;
 4848                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4849                 if (error != 0) {
 4850                         VFS_UNLOCK_GIANT(vfslocked);
 4851                         break;
 4852                 }
 4853                 error = vn_lock(vp, LK_EXCLUSIVE);
 4854                 if (error != 0) {
 4855                         vn_finished_write(mp);
 4856                         VFS_UNLOCK_GIANT(vfslocked);
 4857                         break;
 4858                 }
 4859 #ifdef MAC
 4860                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4861                 if (error == 0)
 4862 #endif
 4863                         error = VOP_ALLOCATE(vp, &offset, &len);
 4864                 VOP_UNLOCK(vp, 0);
 4865                 vn_finished_write(mp);
 4866                 VFS_UNLOCK_GIANT(vfslocked);
 4867 
 4868                 if (olen + ooffset != offset + len) {
 4869                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4870                             ooffset, olen, offset, len);
 4871                 }
 4872                 if (error != 0 || len == 0)
 4873                         break;
 4874                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4875                 maybe_yield();
 4876         }
 4877  out:
 4878         if (fp != NULL)
 4879                 fdrop(fp, td);
 4880         return (error);
 4881 }
 4882 
 4883 int
 4884 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4885 {
 4886 
 4887         return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
 4888 }
 4889 
 4890 /*
 4891  * Unlike madvise(2), we do not make a best effort to remember every
 4892  * possible caching hint.  Instead, we remember the last setting with
 4893  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4894  * region of any current setting.
 4895  */
 4896 int
 4897 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4898     int advice)
 4899 {
 4900         struct fadvise_info *fa, *new;
 4901         struct file *fp;
 4902         struct vnode *vp;
 4903         off_t end;
 4904         int error;
 4905 
 4906         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4907                 return (EINVAL);
 4908         switch (advice) {
 4909         case POSIX_FADV_SEQUENTIAL:
 4910         case POSIX_FADV_RANDOM:
 4911         case POSIX_FADV_NOREUSE:
 4912                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4913                 break;
 4914         case POSIX_FADV_NORMAL:
 4915         case POSIX_FADV_WILLNEED:
 4916         case POSIX_FADV_DONTNEED:
 4917                 new = NULL;
 4918                 break;
 4919         default:
 4920                 return (EINVAL);
 4921         }
 4922         /* XXX: CAP_POSIX_FADVISE? */
 4923         error = fget(td, fd, 0, &fp);
 4924         if (error != 0)
 4925                 goto out;
 4926         
 4927         switch (fp->f_type) {
 4928         case DTYPE_VNODE:
 4929                 break;
 4930         case DTYPE_PIPE:
 4931         case DTYPE_FIFO:
 4932                 error = ESPIPE;
 4933                 goto out;
 4934         default:
 4935                 error = ENODEV;
 4936                 goto out;
 4937         }
 4938         vp = fp->f_vnode;
 4939         if (vp->v_type != VREG) {
 4940                 error = ENODEV;
 4941                 goto out;
 4942         }
 4943         if (len == 0)
 4944                 end = OFF_MAX;
 4945         else
 4946                 end = offset + len - 1;
 4947         switch (advice) {
 4948         case POSIX_FADV_SEQUENTIAL:
 4949         case POSIX_FADV_RANDOM:
 4950         case POSIX_FADV_NOREUSE:
 4951                 /*
 4952                  * Try to merge any existing non-standard region with
 4953                  * this new region if possible, otherwise create a new
 4954                  * non-standard region for this request.
 4955                  */
 4956                 mtx_pool_lock(mtxpool_sleep, fp);
 4957                 fa = fp->f_advice;
 4958                 if (fa != NULL && fa->fa_advice == advice &&
 4959                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4960                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4961                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4962                         if (offset < fa->fa_start)
 4963                                 fa->fa_start = offset;
 4964                         if (end > fa->fa_end)
 4965                                 fa->fa_end = end;
 4966                 } else {
 4967                         new->fa_advice = advice;
 4968                         new->fa_start = offset;
 4969                         new->fa_end = end;
 4970                         new->fa_prevstart = 0;
 4971                         new->fa_prevend = 0;
 4972                         fp->f_advice = new;
 4973                         new = fa;
 4974                 }
 4975                 mtx_pool_unlock(mtxpool_sleep, fp);
 4976                 break;
 4977         case POSIX_FADV_NORMAL:
 4978                 /*
 4979                  * If a the "normal" region overlaps with an existing
 4980                  * non-standard region, trim or remove the
 4981                  * non-standard region.
 4982                  */
 4983                 mtx_pool_lock(mtxpool_sleep, fp);
 4984                 fa = fp->f_advice;
 4985                 if (fa != NULL) {
 4986                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4987                                 new = fa;
 4988                                 fp->f_advice = NULL;
 4989                         } else if (offset <= fa->fa_start &&
 4990                             end >= fa->fa_start)
 4991                                 fa->fa_start = end + 1;
 4992                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4993                                 fa->fa_end = offset - 1;
 4994                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4995                                 /*
 4996                                  * If the "normal" region is a middle
 4997                                  * portion of the existing
 4998                                  * non-standard region, just remove
 4999                                  * the whole thing rather than picking
 5000                                  * one side or the other to
 5001                                  * preserve.
 5002                                  */
 5003                                 new = fa;
 5004                                 fp->f_advice = NULL;
 5005                         }
 5006                 }
 5007                 mtx_pool_unlock(mtxpool_sleep, fp);
 5008                 break;
 5009         case POSIX_FADV_WILLNEED:
 5010         case POSIX_FADV_DONTNEED:
 5011                 error = VOP_ADVISE(vp, offset, end, advice);
 5012                 break;
 5013         }
 5014 out:
 5015         if (fp != NULL)
 5016                 fdrop(fp, td);
 5017         free(new, M_FADVISE);
 5018         return (error);
 5019 }
 5020 
 5021 int
 5022 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 5023 {
 5024 
 5025         return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 5026             uap->advice));
 5027 }

Cache object: 50f8ffa130e76e381b0835a9ed4c7d4d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.