The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/9.1/sys/kern/vfs_syscalls.c 235449 2012-05-14 15:46:37Z jh $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capability.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/sdt.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/unistd.h>
   68 #include <sys/vnode.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/dirent.h>
   72 #include <sys/jail.h>
   73 #include <sys/syscallsubr.h>
   74 #include <sys/sysctl.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 
   79 #include <machine/stdarg.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/uma.h>
   88 
   89 #include <ufs/ufs/quota.h>
   90 
   91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   92 
   93 SDT_PROVIDER_DEFINE(vfs);
   94 SDT_PROBE_DEFINE(vfs, , stat, mode, mode);
   95 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
   96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
   97 SDT_PROBE_DEFINE(vfs, , stat, reg, reg);
   98 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
   99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
  100 
  101 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
  102 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  103 static int setfflags(struct thread *td, struct vnode *, int);
  104 static int setutimes(struct thread *td, struct vnode *,
  105     const struct timespec *, int, int);
  106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  107     struct thread *td);
  108 
  109 /*
  110  * The module initialization routine for POSIX asynchronous I/O will
  111  * set this to the version of AIO that it implements.  (Zero means
  112  * that it is not implemented.)  This value is used here by pathconf()
  113  * and in kern_descrip.c by fpathconf().
  114  */
  115 int async_io_version;
  116 
  117 #ifdef DEBUG
  118 static int syncprt = 0;
  119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  120 #endif
  121 
  122 /*
  123  * Sync each mounted filesystem.
  124  */
  125 #ifndef _SYS_SYSPROTO_H_
  126 struct sync_args {
  127         int     dummy;
  128 };
  129 #endif
  130 /* ARGSUSED */
  131 int
  132 sys_sync(td, uap)
  133         struct thread *td;
  134         struct sync_args *uap;
  135 {
  136         struct mount *mp, *nmp;
  137         int save, vfslocked;
  138 
  139         mtx_lock(&mountlist_mtx);
  140         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  141                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  142                         nmp = TAILQ_NEXT(mp, mnt_list);
  143                         continue;
  144                 }
  145                 vfslocked = VFS_LOCK_GIANT(mp);
  146                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  147                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  148                         save = curthread_pflags_set(TDP_SYNCIO);
  149                         vfs_msync(mp, MNT_NOWAIT);
  150                         VFS_SYNC(mp, MNT_NOWAIT);
  151                         curthread_pflags_restore(save);
  152                         vn_finished_write(mp);
  153                 }
  154                 VFS_UNLOCK_GIANT(vfslocked);
  155                 mtx_lock(&mountlist_mtx);
  156                 nmp = TAILQ_NEXT(mp, mnt_list);
  157                 vfs_unbusy(mp);
  158         }
  159         mtx_unlock(&mountlist_mtx);
  160         return (0);
  161 }
  162 
  163 /*
  164  * Change filesystem quotas.
  165  */
  166 #ifndef _SYS_SYSPROTO_H_
  167 struct quotactl_args {
  168         char *path;
  169         int cmd;
  170         int uid;
  171         caddr_t arg;
  172 };
  173 #endif
  174 int
  175 sys_quotactl(td, uap)
  176         struct thread *td;
  177         register struct quotactl_args /* {
  178                 char *path;
  179                 int cmd;
  180                 int uid;
  181                 caddr_t arg;
  182         } */ *uap;
  183 {
  184         struct mount *mp;
  185         int vfslocked;
  186         int error;
  187         struct nameidata nd;
  188 
  189         AUDIT_ARG_CMD(uap->cmd);
  190         AUDIT_ARG_UID(uap->uid);
  191         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  192                 return (EPERM);
  193         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
  194            UIO_USERSPACE, uap->path, td);
  195         if ((error = namei(&nd)) != 0)
  196                 return (error);
  197         vfslocked = NDHASGIANT(&nd);
  198         NDFREE(&nd, NDF_ONLY_PNBUF);
  199         mp = nd.ni_vp->v_mount;
  200         vfs_ref(mp);
  201         vput(nd.ni_vp);
  202         error = vfs_busy(mp, 0);
  203         vfs_rel(mp);
  204         if (error) {
  205                 VFS_UNLOCK_GIANT(vfslocked);
  206                 return (error);
  207         }
  208         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  209 
  210         /*
  211          * Since quota on operation typically needs to open quota
  212          * file, the Q_QUOTAON handler needs to unbusy the mount point
  213          * before calling into namei.  Otherwise, unmount might be
  214          * started between two vfs_busy() invocations (first is our,
  215          * second is from mount point cross-walk code in lookup()),
  216          * causing deadlock.
  217          *
  218          * Require that Q_QUOTAON handles the vfs_busy() reference on
  219          * its own, always returning with ubusied mount point.
  220          */
  221         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  222                 vfs_unbusy(mp);
  223         VFS_UNLOCK_GIANT(vfslocked);
  224         return (error);
  225 }
  226 
  227 /*
  228  * Used by statfs conversion routines to scale the block size up if
  229  * necessary so that all of the block counts are <= 'max_size'.  Note
  230  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  231  * value of 'n'.
  232  */
  233 void
  234 statfs_scale_blocks(struct statfs *sf, long max_size)
  235 {
  236         uint64_t count;
  237         int shift;
  238 
  239         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  240 
  241         /*
  242          * Attempt to scale the block counts to give a more accurate
  243          * overview to userland of the ratio of free space to used
  244          * space.  To do this, find the largest block count and compute
  245          * a divisor that lets it fit into a signed integer <= max_size.
  246          */
  247         if (sf->f_bavail < 0)
  248                 count = -sf->f_bavail;
  249         else
  250                 count = sf->f_bavail;
  251         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  252         if (count <= max_size)
  253                 return;
  254 
  255         count >>= flsl(max_size);
  256         shift = 0;
  257         while (count > 0) {
  258                 shift++;
  259                 count >>=1;
  260         }
  261 
  262         sf->f_bsize <<= shift;
  263         sf->f_blocks >>= shift;
  264         sf->f_bfree >>= shift;
  265         sf->f_bavail >>= shift;
  266 }
  267 
  268 /*
  269  * Get filesystem statistics.
  270  */
  271 #ifndef _SYS_SYSPROTO_H_
  272 struct statfs_args {
  273         char *path;
  274         struct statfs *buf;
  275 };
  276 #endif
  277 int
  278 sys_statfs(td, uap)
  279         struct thread *td;
  280         register struct statfs_args /* {
  281                 char *path;
  282                 struct statfs *buf;
  283         } */ *uap;
  284 {
  285         struct statfs sf;
  286         int error;
  287 
  288         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  289         if (error == 0)
  290                 error = copyout(&sf, uap->buf, sizeof(sf));
  291         return (error);
  292 }
  293 
  294 int
  295 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  296     struct statfs *buf)
  297 {
  298         struct mount *mp;
  299         struct statfs *sp, sb;
  300         int vfslocked;
  301         int error;
  302         struct nameidata nd;
  303 
  304         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  305             AUDITVNODE1, pathseg, path, td);
  306         error = namei(&nd);
  307         if (error)
  308                 return (error);
  309         vfslocked = NDHASGIANT(&nd);
  310         mp = nd.ni_vp->v_mount;
  311         vfs_ref(mp);
  312         NDFREE(&nd, NDF_ONLY_PNBUF);
  313         vput(nd.ni_vp);
  314         error = vfs_busy(mp, 0);
  315         vfs_rel(mp);
  316         if (error) {
  317                 VFS_UNLOCK_GIANT(vfslocked);
  318                 return (error);
  319         }
  320 #ifdef MAC
  321         error = mac_mount_check_stat(td->td_ucred, mp);
  322         if (error)
  323                 goto out;
  324 #endif
  325         /*
  326          * Set these in case the underlying filesystem fails to do so.
  327          */
  328         sp = &mp->mnt_stat;
  329         sp->f_version = STATFS_VERSION;
  330         sp->f_namemax = NAME_MAX;
  331         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  332         error = VFS_STATFS(mp, sp);
  333         if (error)
  334                 goto out;
  335         if (priv_check(td, PRIV_VFS_GENERATION)) {
  336                 bcopy(sp, &sb, sizeof(sb));
  337                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  338                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  339                 sp = &sb;
  340         }
  341         *buf = *sp;
  342 out:
  343         vfs_unbusy(mp);
  344         VFS_UNLOCK_GIANT(vfslocked);
  345         return (error);
  346 }
  347 
  348 /*
  349  * Get filesystem statistics.
  350  */
  351 #ifndef _SYS_SYSPROTO_H_
  352 struct fstatfs_args {
  353         int fd;
  354         struct statfs *buf;
  355 };
  356 #endif
  357 int
  358 sys_fstatfs(td, uap)
  359         struct thread *td;
  360         register struct fstatfs_args /* {
  361                 int fd;
  362                 struct statfs *buf;
  363         } */ *uap;
  364 {
  365         struct statfs sf;
  366         int error;
  367 
  368         error = kern_fstatfs(td, uap->fd, &sf);
  369         if (error == 0)
  370                 error = copyout(&sf, uap->buf, sizeof(sf));
  371         return (error);
  372 }
  373 
  374 int
  375 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  376 {
  377         struct file *fp;
  378         struct mount *mp;
  379         struct statfs *sp, sb;
  380         int vfslocked;
  381         struct vnode *vp;
  382         int error;
  383 
  384         AUDIT_ARG_FD(fd);
  385         error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp);
  386         if (error)
  387                 return (error);
  388         vp = fp->f_vnode;
  389         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  390         vn_lock(vp, LK_SHARED | LK_RETRY);
  391 #ifdef AUDIT
  392         AUDIT_ARG_VNODE1(vp);
  393 #endif
  394         mp = vp->v_mount;
  395         if (mp)
  396                 vfs_ref(mp);
  397         VOP_UNLOCK(vp, 0);
  398         fdrop(fp, td);
  399         if (mp == NULL) {
  400                 error = EBADF;
  401                 goto out;
  402         }
  403         error = vfs_busy(mp, 0);
  404         vfs_rel(mp);
  405         if (error) {
  406                 VFS_UNLOCK_GIANT(vfslocked);
  407                 return (error);
  408         }
  409 #ifdef MAC
  410         error = mac_mount_check_stat(td->td_ucred, mp);
  411         if (error)
  412                 goto out;
  413 #endif
  414         /*
  415          * Set these in case the underlying filesystem fails to do so.
  416          */
  417         sp = &mp->mnt_stat;
  418         sp->f_version = STATFS_VERSION;
  419         sp->f_namemax = NAME_MAX;
  420         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  421         error = VFS_STATFS(mp, sp);
  422         if (error)
  423                 goto out;
  424         if (priv_check(td, PRIV_VFS_GENERATION)) {
  425                 bcopy(sp, &sb, sizeof(sb));
  426                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  427                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  428                 sp = &sb;
  429         }
  430         *buf = *sp;
  431 out:
  432         if (mp)
  433                 vfs_unbusy(mp);
  434         VFS_UNLOCK_GIANT(vfslocked);
  435         return (error);
  436 }
  437 
  438 /*
  439  * Get statistics on all filesystems.
  440  */
  441 #ifndef _SYS_SYSPROTO_H_
  442 struct getfsstat_args {
  443         struct statfs *buf;
  444         long bufsize;
  445         int flags;
  446 };
  447 #endif
  448 int
  449 sys_getfsstat(td, uap)
  450         struct thread *td;
  451         register struct getfsstat_args /* {
  452                 struct statfs *buf;
  453                 long bufsize;
  454                 int flags;
  455         } */ *uap;
  456 {
  457 
  458         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  459             uap->flags));
  460 }
  461 
  462 /*
  463  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  464  *      The caller is responsible for freeing memory which will be allocated
  465  *      in '*buf'.
  466  */
  467 int
  468 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  469     enum uio_seg bufseg, int flags)
  470 {
  471         struct mount *mp, *nmp;
  472         struct statfs *sfsp, *sp, sb;
  473         size_t count, maxcount;
  474         int vfslocked;
  475         int error;
  476 
  477         maxcount = bufsize / sizeof(struct statfs);
  478         if (bufsize == 0)
  479                 sfsp = NULL;
  480         else if (bufseg == UIO_USERSPACE)
  481                 sfsp = *buf;
  482         else /* if (bufseg == UIO_SYSSPACE) */ {
  483                 count = 0;
  484                 mtx_lock(&mountlist_mtx);
  485                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  486                         count++;
  487                 }
  488                 mtx_unlock(&mountlist_mtx);
  489                 if (maxcount > count)
  490                         maxcount = count;
  491                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  492                     M_WAITOK);
  493         }
  494         count = 0;
  495         mtx_lock(&mountlist_mtx);
  496         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  497                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  498                         nmp = TAILQ_NEXT(mp, mnt_list);
  499                         continue;
  500                 }
  501 #ifdef MAC
  502                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  503                         nmp = TAILQ_NEXT(mp, mnt_list);
  504                         continue;
  505                 }
  506 #endif
  507                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  508                         nmp = TAILQ_NEXT(mp, mnt_list);
  509                         continue;
  510                 }
  511                 vfslocked = VFS_LOCK_GIANT(mp);
  512                 if (sfsp && count < maxcount) {
  513                         sp = &mp->mnt_stat;
  514                         /*
  515                          * Set these in case the underlying filesystem
  516                          * fails to do so.
  517                          */
  518                         sp->f_version = STATFS_VERSION;
  519                         sp->f_namemax = NAME_MAX;
  520                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  521                         /*
  522                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  523                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  524                          * overrides MNT_WAIT.
  525                          */
  526                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  527                             (flags & MNT_WAIT)) &&
  528                             (error = VFS_STATFS(mp, sp))) {
  529                                 VFS_UNLOCK_GIANT(vfslocked);
  530                                 mtx_lock(&mountlist_mtx);
  531                                 nmp = TAILQ_NEXT(mp, mnt_list);
  532                                 vfs_unbusy(mp);
  533                                 continue;
  534                         }
  535                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  536                                 bcopy(sp, &sb, sizeof(sb));
  537                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  538                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  539                                 sp = &sb;
  540                         }
  541                         if (bufseg == UIO_SYSSPACE)
  542                                 bcopy(sp, sfsp, sizeof(*sp));
  543                         else /* if (bufseg == UIO_USERSPACE) */ {
  544                                 error = copyout(sp, sfsp, sizeof(*sp));
  545                                 if (error) {
  546                                         vfs_unbusy(mp);
  547                                         VFS_UNLOCK_GIANT(vfslocked);
  548                                         return (error);
  549                                 }
  550                         }
  551                         sfsp++;
  552                 }
  553                 VFS_UNLOCK_GIANT(vfslocked);
  554                 count++;
  555                 mtx_lock(&mountlist_mtx);
  556                 nmp = TAILQ_NEXT(mp, mnt_list);
  557                 vfs_unbusy(mp);
  558         }
  559         mtx_unlock(&mountlist_mtx);
  560         if (sfsp && count > maxcount)
  561                 td->td_retval[0] = maxcount;
  562         else
  563                 td->td_retval[0] = count;
  564         return (0);
  565 }
  566 
  567 #ifdef COMPAT_FREEBSD4
  568 /*
  569  * Get old format filesystem statistics.
  570  */
  571 static void cvtstatfs(struct statfs *, struct ostatfs *);
  572 
  573 #ifndef _SYS_SYSPROTO_H_
  574 struct freebsd4_statfs_args {
  575         char *path;
  576         struct ostatfs *buf;
  577 };
  578 #endif
  579 int
  580 freebsd4_statfs(td, uap)
  581         struct thread *td;
  582         struct freebsd4_statfs_args /* {
  583                 char *path;
  584                 struct ostatfs *buf;
  585         } */ *uap;
  586 {
  587         struct ostatfs osb;
  588         struct statfs sf;
  589         int error;
  590 
  591         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  592         if (error)
  593                 return (error);
  594         cvtstatfs(&sf, &osb);
  595         return (copyout(&osb, uap->buf, sizeof(osb)));
  596 }
  597 
  598 /*
  599  * Get filesystem statistics.
  600  */
  601 #ifndef _SYS_SYSPROTO_H_
  602 struct freebsd4_fstatfs_args {
  603         int fd;
  604         struct ostatfs *buf;
  605 };
  606 #endif
  607 int
  608 freebsd4_fstatfs(td, uap)
  609         struct thread *td;
  610         struct freebsd4_fstatfs_args /* {
  611                 int fd;
  612                 struct ostatfs *buf;
  613         } */ *uap;
  614 {
  615         struct ostatfs osb;
  616         struct statfs sf;
  617         int error;
  618 
  619         error = kern_fstatfs(td, uap->fd, &sf);
  620         if (error)
  621                 return (error);
  622         cvtstatfs(&sf, &osb);
  623         return (copyout(&osb, uap->buf, sizeof(osb)));
  624 }
  625 
  626 /*
  627  * Get statistics on all filesystems.
  628  */
  629 #ifndef _SYS_SYSPROTO_H_
  630 struct freebsd4_getfsstat_args {
  631         struct ostatfs *buf;
  632         long bufsize;
  633         int flags;
  634 };
  635 #endif
  636 int
  637 freebsd4_getfsstat(td, uap)
  638         struct thread *td;
  639         register struct freebsd4_getfsstat_args /* {
  640                 struct ostatfs *buf;
  641                 long bufsize;
  642                 int flags;
  643         } */ *uap;
  644 {
  645         struct statfs *buf, *sp;
  646         struct ostatfs osb;
  647         size_t count, size;
  648         int error;
  649 
  650         count = uap->bufsize / sizeof(struct ostatfs);
  651         size = count * sizeof(struct statfs);
  652         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  653         if (size > 0) {
  654                 count = td->td_retval[0];
  655                 sp = buf;
  656                 while (count > 0 && error == 0) {
  657                         cvtstatfs(sp, &osb);
  658                         error = copyout(&osb, uap->buf, sizeof(osb));
  659                         sp++;
  660                         uap->buf++;
  661                         count--;
  662                 }
  663                 free(buf, M_TEMP);
  664         }
  665         return (error);
  666 }
  667 
  668 /*
  669  * Implement fstatfs() for (NFS) file handles.
  670  */
  671 #ifndef _SYS_SYSPROTO_H_
  672 struct freebsd4_fhstatfs_args {
  673         struct fhandle *u_fhp;
  674         struct ostatfs *buf;
  675 };
  676 #endif
  677 int
  678 freebsd4_fhstatfs(td, uap)
  679         struct thread *td;
  680         struct freebsd4_fhstatfs_args /* {
  681                 struct fhandle *u_fhp;
  682                 struct ostatfs *buf;
  683         } */ *uap;
  684 {
  685         struct ostatfs osb;
  686         struct statfs sf;
  687         fhandle_t fh;
  688         int error;
  689 
  690         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  691         if (error)
  692                 return (error);
  693         error = kern_fhstatfs(td, fh, &sf);
  694         if (error)
  695                 return (error);
  696         cvtstatfs(&sf, &osb);
  697         return (copyout(&osb, uap->buf, sizeof(osb)));
  698 }
  699 
  700 /*
  701  * Convert a new format statfs structure to an old format statfs structure.
  702  */
  703 static void
  704 cvtstatfs(nsp, osp)
  705         struct statfs *nsp;
  706         struct ostatfs *osp;
  707 {
  708 
  709         statfs_scale_blocks(nsp, LONG_MAX);
  710         bzero(osp, sizeof(*osp));
  711         osp->f_bsize = nsp->f_bsize;
  712         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  713         osp->f_blocks = nsp->f_blocks;
  714         osp->f_bfree = nsp->f_bfree;
  715         osp->f_bavail = nsp->f_bavail;
  716         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  717         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  718         osp->f_owner = nsp->f_owner;
  719         osp->f_type = nsp->f_type;
  720         osp->f_flags = nsp->f_flags;
  721         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  722         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  723         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  724         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  725         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  726             MIN(MFSNAMELEN, OMFSNAMELEN));
  727         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  728             MIN(MNAMELEN, OMNAMELEN));
  729         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  730             MIN(MNAMELEN, OMNAMELEN));
  731         osp->f_fsid = nsp->f_fsid;
  732 }
  733 #endif /* COMPAT_FREEBSD4 */
  734 
  735 /*
  736  * Change current working directory to a given file descriptor.
  737  */
  738 #ifndef _SYS_SYSPROTO_H_
  739 struct fchdir_args {
  740         int     fd;
  741 };
  742 #endif
  743 int
  744 sys_fchdir(td, uap)
  745         struct thread *td;
  746         struct fchdir_args /* {
  747                 int fd;
  748         } */ *uap;
  749 {
  750         register struct filedesc *fdp = td->td_proc->p_fd;
  751         struct vnode *vp, *tdp, *vpold;
  752         struct mount *mp;
  753         struct file *fp;
  754         int vfslocked;
  755         int error;
  756 
  757         AUDIT_ARG_FD(uap->fd);
  758         if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0)
  759                 return (error);
  760         vp = fp->f_vnode;
  761         VREF(vp);
  762         fdrop(fp, td);
  763         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  764         vn_lock(vp, LK_SHARED | LK_RETRY);
  765         AUDIT_ARG_VNODE1(vp);
  766         error = change_dir(vp, td);
  767         while (!error && (mp = vp->v_mountedhere) != NULL) {
  768                 int tvfslocked;
  769                 if (vfs_busy(mp, 0))
  770                         continue;
  771                 tvfslocked = VFS_LOCK_GIANT(mp);
  772                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  773                 vfs_unbusy(mp);
  774                 if (error) {
  775                         VFS_UNLOCK_GIANT(tvfslocked);
  776                         break;
  777                 }
  778                 vput(vp);
  779                 VFS_UNLOCK_GIANT(vfslocked);
  780                 vp = tdp;
  781                 vfslocked = tvfslocked;
  782         }
  783         if (error) {
  784                 vput(vp);
  785                 VFS_UNLOCK_GIANT(vfslocked);
  786                 return (error);
  787         }
  788         VOP_UNLOCK(vp, 0);
  789         VFS_UNLOCK_GIANT(vfslocked);
  790         FILEDESC_XLOCK(fdp);
  791         vpold = fdp->fd_cdir;
  792         fdp->fd_cdir = vp;
  793         FILEDESC_XUNLOCK(fdp);
  794         vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  795         vrele(vpold);
  796         VFS_UNLOCK_GIANT(vfslocked);
  797         return (0);
  798 }
  799 
  800 /*
  801  * Change current working directory (``.'').
  802  */
  803 #ifndef _SYS_SYSPROTO_H_
  804 struct chdir_args {
  805         char    *path;
  806 };
  807 #endif
  808 int
  809 sys_chdir(td, uap)
  810         struct thread *td;
  811         struct chdir_args /* {
  812                 char *path;
  813         } */ *uap;
  814 {
  815 
  816         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  817 }
  818 
  819 int
  820 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  821 {
  822         register struct filedesc *fdp = td->td_proc->p_fd;
  823         int error;
  824         struct nameidata nd;
  825         struct vnode *vp;
  826         int vfslocked;
  827 
  828         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
  829             MPSAFE, pathseg, path, td);
  830         if ((error = namei(&nd)) != 0)
  831                 return (error);
  832         vfslocked = NDHASGIANT(&nd);
  833         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  834                 vput(nd.ni_vp);
  835                 VFS_UNLOCK_GIANT(vfslocked);
  836                 NDFREE(&nd, NDF_ONLY_PNBUF);
  837                 return (error);
  838         }
  839         VOP_UNLOCK(nd.ni_vp, 0);
  840         VFS_UNLOCK_GIANT(vfslocked);
  841         NDFREE(&nd, NDF_ONLY_PNBUF);
  842         FILEDESC_XLOCK(fdp);
  843         vp = fdp->fd_cdir;
  844         fdp->fd_cdir = nd.ni_vp;
  845         FILEDESC_XUNLOCK(fdp);
  846         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  847         vrele(vp);
  848         VFS_UNLOCK_GIANT(vfslocked);
  849         return (0);
  850 }
  851 
  852 /*
  853  * Helper function for raised chroot(2) security function:  Refuse if
  854  * any filedescriptors are open directories.
  855  */
  856 static int
  857 chroot_refuse_vdir_fds(fdp)
  858         struct filedesc *fdp;
  859 {
  860         struct vnode *vp;
  861         struct file *fp;
  862         int fd;
  863 
  864         FILEDESC_LOCK_ASSERT(fdp);
  865 
  866         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  867                 fp = fget_locked(fdp, fd);
  868                 if (fp == NULL)
  869                         continue;
  870                 if (fp->f_type == DTYPE_VNODE) {
  871                         vp = fp->f_vnode;
  872                         if (vp->v_type == VDIR)
  873                                 return (EPERM);
  874                 }
  875         }
  876         return (0);
  877 }
  878 
  879 /*
  880  * This sysctl determines if we will allow a process to chroot(2) if it
  881  * has a directory open:
  882  *      0: disallowed for all processes.
  883  *      1: allowed for processes that were not already chroot(2)'ed.
  884  *      2: allowed for all processes.
  885  */
  886 
  887 static int chroot_allow_open_directories = 1;
  888 
  889 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  890      &chroot_allow_open_directories, 0, "");
  891 
  892 /*
  893  * Change notion of root (``/'') directory.
  894  */
  895 #ifndef _SYS_SYSPROTO_H_
  896 struct chroot_args {
  897         char    *path;
  898 };
  899 #endif
  900 int
  901 sys_chroot(td, uap)
  902         struct thread *td;
  903         struct chroot_args /* {
  904                 char *path;
  905         } */ *uap;
  906 {
  907         int error;
  908         struct nameidata nd;
  909         int vfslocked;
  910 
  911         error = priv_check(td, PRIV_VFS_CHROOT);
  912         if (error)
  913                 return (error);
  914         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  915             AUDITVNODE1, UIO_USERSPACE, uap->path, td);
  916         error = namei(&nd);
  917         if (error)
  918                 goto error;
  919         vfslocked = NDHASGIANT(&nd);
  920         if ((error = change_dir(nd.ni_vp, td)) != 0)
  921                 goto e_vunlock;
  922 #ifdef MAC
  923         if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
  924                 goto e_vunlock;
  925 #endif
  926         VOP_UNLOCK(nd.ni_vp, 0);
  927         error = change_root(nd.ni_vp, td);
  928         vrele(nd.ni_vp);
  929         VFS_UNLOCK_GIANT(vfslocked);
  930         NDFREE(&nd, NDF_ONLY_PNBUF);
  931         return (error);
  932 e_vunlock:
  933         vput(nd.ni_vp);
  934         VFS_UNLOCK_GIANT(vfslocked);
  935 error:
  936         NDFREE(&nd, NDF_ONLY_PNBUF);
  937         return (error);
  938 }
  939 
  940 /*
  941  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  942  * instance.
  943  */
  944 int
  945 change_dir(vp, td)
  946         struct vnode *vp;
  947         struct thread *td;
  948 {
  949         int error;
  950 
  951         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  952         if (vp->v_type != VDIR)
  953                 return (ENOTDIR);
  954 #ifdef MAC
  955         error = mac_vnode_check_chdir(td->td_ucred, vp);
  956         if (error)
  957                 return (error);
  958 #endif
  959         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  960         return (error);
  961 }
  962 
  963 /*
  964  * Common routine for kern_chroot() and jail_attach().  The caller is
  965  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  966  * authorize this operation.
  967  */
  968 int
  969 change_root(vp, td)
  970         struct vnode *vp;
  971         struct thread *td;
  972 {
  973         struct filedesc *fdp;
  974         struct vnode *oldvp;
  975         int vfslocked;
  976         int error;
  977 
  978         VFS_ASSERT_GIANT(vp->v_mount);
  979         fdp = td->td_proc->p_fd;
  980         FILEDESC_XLOCK(fdp);
  981         if (chroot_allow_open_directories == 0 ||
  982             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  983                 error = chroot_refuse_vdir_fds(fdp);
  984                 if (error) {
  985                         FILEDESC_XUNLOCK(fdp);
  986                         return (error);
  987                 }
  988         }
  989         oldvp = fdp->fd_rdir;
  990         fdp->fd_rdir = vp;
  991         VREF(fdp->fd_rdir);
  992         if (!fdp->fd_jdir) {
  993                 fdp->fd_jdir = vp;
  994                 VREF(fdp->fd_jdir);
  995         }
  996         FILEDESC_XUNLOCK(fdp);
  997         vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  998         vrele(oldvp);
  999         VFS_UNLOCK_GIANT(vfslocked);
 1000         return (0);
 1001 }
 1002 
 1003 static __inline cap_rights_t
 1004 flags_to_rights(int flags)
 1005 {
 1006         cap_rights_t rights = 0;
 1007 
 1008         switch ((flags & O_ACCMODE)) {
 1009         case O_RDONLY:
 1010                 rights |= CAP_READ;
 1011                 break;
 1012 
 1013         case O_RDWR:
 1014                 rights |= CAP_READ;
 1015                 /* fall through */
 1016 
 1017         case O_WRONLY:
 1018                 rights |= CAP_WRITE;
 1019                 break;
 1020 
 1021         case O_EXEC:
 1022                 rights |= CAP_FEXECVE;
 1023                 break;
 1024         }
 1025 
 1026         if (flags & O_CREAT)
 1027                 rights |= CAP_CREATE;
 1028 
 1029         if (flags & O_TRUNC)
 1030                 rights |= CAP_FTRUNCATE;
 1031 
 1032         if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
 1033                 rights |= CAP_FLOCK;
 1034 
 1035         return (rights);
 1036 }
 1037 
 1038 /*
 1039  * Check permissions, allocate an open file structure, and call the device
 1040  * open routine if any.
 1041  */
 1042 #ifndef _SYS_SYSPROTO_H_
 1043 struct open_args {
 1044         char    *path;
 1045         int     flags;
 1046         int     mode;
 1047 };
 1048 #endif
 1049 int
 1050 sys_open(td, uap)
 1051         struct thread *td;
 1052         register struct open_args /* {
 1053                 char *path;
 1054                 int flags;
 1055                 int mode;
 1056         } */ *uap;
 1057 {
 1058 
 1059         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1060 }
 1061 
 1062 #ifndef _SYS_SYSPROTO_H_
 1063 struct openat_args {
 1064         int     fd;
 1065         char    *path;
 1066         int     flag;
 1067         int     mode;
 1068 };
 1069 #endif
 1070 int
 1071 sys_openat(struct thread *td, struct openat_args *uap)
 1072 {
 1073 
 1074         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1075             uap->mode));
 1076 }
 1077 
 1078 int
 1079 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1080     int mode)
 1081 {
 1082 
 1083         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1084 }
 1085 
 1086 int
 1087 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1088     int flags, int mode)
 1089 {
 1090         struct proc *p = td->td_proc;
 1091         struct filedesc *fdp = p->p_fd;
 1092         struct file *fp;
 1093         struct vnode *vp;
 1094         int cmode;
 1095         struct file *nfp;
 1096         int type, indx = -1, error, error_open;
 1097         struct flock lf;
 1098         struct nameidata nd;
 1099         int vfslocked;
 1100         cap_rights_t rights_needed = CAP_LOOKUP;
 1101 
 1102         AUDIT_ARG_FFLAGS(flags);
 1103         AUDIT_ARG_MODE(mode);
 1104         /* XXX: audit dirfd */
 1105         rights_needed |= flags_to_rights(flags);
 1106         /*
 1107          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1108          * may be specified.
 1109          */
 1110         if (flags & O_EXEC) {
 1111                 if (flags & O_ACCMODE)
 1112                         return (EINVAL);
 1113         } else if ((flags & O_ACCMODE) == O_ACCMODE)
 1114                 return (EINVAL);
 1115         else
 1116                 flags = FFLAGS(flags);
 1117 
 1118         /*
 1119          * allocate the file descriptor, but don't install a descriptor yet
 1120          */
 1121         error = falloc_noinstall(td, &nfp);
 1122         if (error)
 1123                 return (error);
 1124         /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
 1125         fp = nfp;
 1126         /* Set the flags early so the finit in devfs can pick them up. */
 1127         fp->f_flag = flags & FMASK;
 1128         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 1129         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
 1130             path, fd, rights_needed, td);
 1131         td->td_dupfd = -1;              /* XXX check for fdopen */
 1132         error = vn_open(&nd, &flags, cmode, fp);
 1133         if (error) {
 1134                 /*
 1135                  * If the vn_open replaced the method vector, something
 1136                  * wonderous happened deep below and we just pass it up
 1137                  * pretending we know what we do.
 1138                  */
 1139                 if (error == ENXIO && fp->f_ops != &badfileops)
 1140                         goto success;
 1141 
 1142                 /*
 1143                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1144                  * responsible for dropping the old contents of ofiles[indx]
 1145                  * if it succeeds.
 1146                  *
 1147                  * Don't do this for relative (capability) lookups; we don't
 1148                  * understand exactly what would happen, and we don't think
 1149                  * that it ever should.
 1150                  */
 1151                 if ((nd.ni_strictrelative == 0) &&
 1152                     (error == ENODEV || error == ENXIO) &&
 1153                     (td->td_dupfd >= 0)) {
 1154                         /* XXX from fdopen */
 1155                         error_open = error;
 1156                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1157                                 goto bad_unlocked;
 1158                         if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
 1159                             flags, error_open)) == 0)
 1160                                 goto success;
 1161                 }
 1162                 /*
 1163                  * Clean up the descriptor, but only if another thread hadn't
 1164                  * replaced or closed it.
 1165                  */
 1166                 if (indx != -1)
 1167                         fdclose(fdp, fp, indx, td);
 1168                 fdrop(fp, td);
 1169 
 1170                 if (error == ERESTART)
 1171                         error = EINTR;
 1172                 return (error);
 1173         }
 1174         td->td_dupfd = 0;
 1175         vfslocked = NDHASGIANT(&nd);
 1176         NDFREE(&nd, NDF_ONLY_PNBUF);
 1177         vp = nd.ni_vp;
 1178 
 1179         /*
 1180          * Store the vnode, for any f_type. Typically, the vnode use
 1181          * count is decremented by direct call to vn_closefile() for
 1182          * files that switched type in the cdevsw fdopen() method.
 1183          */
 1184         fp->f_vnode = vp;
 1185         /*
 1186          * If the file wasn't claimed by devfs bind it to the normal
 1187          * vnode operations here.
 1188          */
 1189         if (fp->f_ops == &badfileops) {
 1190                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1191                 fp->f_seqcount = 1;
 1192                 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
 1193         }
 1194 
 1195         VOP_UNLOCK(vp, 0);
 1196         if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
 1197                 lf.l_whence = SEEK_SET;
 1198                 lf.l_start = 0;
 1199                 lf.l_len = 0;
 1200                 if (flags & O_EXLOCK)
 1201                         lf.l_type = F_WRLCK;
 1202                 else
 1203                         lf.l_type = F_RDLCK;
 1204                 type = F_FLOCK;
 1205                 if ((flags & FNONBLOCK) == 0)
 1206                         type |= F_WAIT;
 1207                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 1208                             type)) != 0)
 1209                         goto bad;
 1210                 atomic_set_int(&fp->f_flag, FHASLOCK);
 1211         }
 1212         if (flags & O_TRUNC) {
 1213                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1214                 if (error)
 1215                         goto bad;
 1216         }
 1217         VFS_UNLOCK_GIANT(vfslocked);
 1218 success:
 1219         /*
 1220          * If we haven't already installed the FD (for dupfdopen), do so now.
 1221          */
 1222         if (indx == -1) {
 1223 #ifdef CAPABILITIES
 1224                 if (nd.ni_strictrelative == 1) {
 1225                         /*
 1226                          * We are doing a strict relative lookup; wrap the
 1227                          * result in a capability.
 1228                          */
 1229                         if ((error = kern_capwrap(td, fp, nd.ni_baserights,
 1230                             &indx)) != 0)
 1231                                 goto bad_unlocked;
 1232                 } else
 1233 #endif
 1234                         if ((error = finstall(td, fp, &indx, flags)) != 0)
 1235                                 goto bad_unlocked;
 1236 
 1237         }
 1238 
 1239         /*
 1240          * Release our private reference, leaving the one associated with
 1241          * the descriptor table intact.
 1242          */
 1243         fdrop(fp, td);
 1244         td->td_retval[0] = indx;
 1245         return (0);
 1246 bad:
 1247         VFS_UNLOCK_GIANT(vfslocked);
 1248 bad_unlocked:
 1249         if (indx != -1)
 1250                 fdclose(fdp, fp, indx, td);
 1251         fdrop(fp, td);
 1252         td->td_retval[0] = -1;
 1253         return (error);
 1254 }
 1255 
 1256 #ifdef COMPAT_43
 1257 /*
 1258  * Create a file.
 1259  */
 1260 #ifndef _SYS_SYSPROTO_H_
 1261 struct ocreat_args {
 1262         char    *path;
 1263         int     mode;
 1264 };
 1265 #endif
 1266 int
 1267 ocreat(td, uap)
 1268         struct thread *td;
 1269         register struct ocreat_args /* {
 1270                 char *path;
 1271                 int mode;
 1272         } */ *uap;
 1273 {
 1274 
 1275         return (kern_open(td, uap->path, UIO_USERSPACE,
 1276             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1277 }
 1278 #endif /* COMPAT_43 */
 1279 
 1280 /*
 1281  * Create a special file.
 1282  */
 1283 #ifndef _SYS_SYSPROTO_H_
 1284 struct mknod_args {
 1285         char    *path;
 1286         int     mode;
 1287         int     dev;
 1288 };
 1289 #endif
 1290 int
 1291 sys_mknod(td, uap)
 1292         struct thread *td;
 1293         register struct mknod_args /* {
 1294                 char *path;
 1295                 int mode;
 1296                 int dev;
 1297         } */ *uap;
 1298 {
 1299 
 1300         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1301 }
 1302 
 1303 #ifndef _SYS_SYSPROTO_H_
 1304 struct mknodat_args {
 1305         int     fd;
 1306         char    *path;
 1307         mode_t  mode;
 1308         dev_t   dev;
 1309 };
 1310 #endif
 1311 int
 1312 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1313 {
 1314 
 1315         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1316             uap->dev));
 1317 }
 1318 
 1319 int
 1320 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1321     int dev)
 1322 {
 1323 
 1324         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1325 }
 1326 
 1327 int
 1328 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1329     int mode, int dev)
 1330 {
 1331         struct vnode *vp;
 1332         struct mount *mp;
 1333         struct vattr vattr;
 1334         int error;
 1335         int whiteout = 0;
 1336         struct nameidata nd;
 1337         int vfslocked;
 1338 
 1339         AUDIT_ARG_MODE(mode);
 1340         AUDIT_ARG_DEV(dev);
 1341         switch (mode & S_IFMT) {
 1342         case S_IFCHR:
 1343         case S_IFBLK:
 1344                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1345                 break;
 1346         case S_IFMT:
 1347                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1348                 break;
 1349         case S_IFWHT:
 1350                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1351                 break;
 1352         case S_IFIFO:
 1353                 if (dev == 0)
 1354                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1355                 /* FALLTHROUGH */
 1356         default:
 1357                 error = EINVAL;
 1358                 break;
 1359         }
 1360         if (error)
 1361                 return (error);
 1362 restart:
 1363         bwillwrite();
 1364         NDINIT_ATRIGHTS(&nd, CREATE,
 1365             LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
 1366             CAP_MKFIFO, td);
 1367         if ((error = namei(&nd)) != 0)
 1368                 return (error);
 1369         vfslocked = NDHASGIANT(&nd);
 1370         vp = nd.ni_vp;
 1371         if (vp != NULL) {
 1372                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1373                 if (vp == nd.ni_dvp)
 1374                         vrele(nd.ni_dvp);
 1375                 else
 1376                         vput(nd.ni_dvp);
 1377                 vrele(vp);
 1378                 VFS_UNLOCK_GIANT(vfslocked);
 1379                 return (EEXIST);
 1380         } else {
 1381                 VATTR_NULL(&vattr);
 1382                 vattr.va_mode = (mode & ALLPERMS) &
 1383                     ~td->td_proc->p_fd->fd_cmask;
 1384                 vattr.va_rdev = dev;
 1385                 whiteout = 0;
 1386 
 1387                 switch (mode & S_IFMT) {
 1388                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1389                         vattr.va_type = VBAD;
 1390                         break;
 1391                 case S_IFCHR:
 1392                         vattr.va_type = VCHR;
 1393                         break;
 1394                 case S_IFBLK:
 1395                         vattr.va_type = VBLK;
 1396                         break;
 1397                 case S_IFWHT:
 1398                         whiteout = 1;
 1399                         break;
 1400                 default:
 1401                         panic("kern_mknod: invalid mode");
 1402                 }
 1403         }
 1404         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1405                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1406                 vput(nd.ni_dvp);
 1407                 VFS_UNLOCK_GIANT(vfslocked);
 1408                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1409                         return (error);
 1410                 goto restart;
 1411         }
 1412 #ifdef MAC
 1413         if (error == 0 && !whiteout)
 1414                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1415                     &nd.ni_cnd, &vattr);
 1416 #endif
 1417         if (!error) {
 1418                 if (whiteout)
 1419                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1420                 else {
 1421                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1422                                                 &nd.ni_cnd, &vattr);
 1423                         if (error == 0)
 1424                                 vput(nd.ni_vp);
 1425                 }
 1426         }
 1427         NDFREE(&nd, NDF_ONLY_PNBUF);
 1428         vput(nd.ni_dvp);
 1429         vn_finished_write(mp);
 1430         VFS_UNLOCK_GIANT(vfslocked);
 1431         return (error);
 1432 }
 1433 
 1434 /*
 1435  * Create a named pipe.
 1436  */
 1437 #ifndef _SYS_SYSPROTO_H_
 1438 struct mkfifo_args {
 1439         char    *path;
 1440         int     mode;
 1441 };
 1442 #endif
 1443 int
 1444 sys_mkfifo(td, uap)
 1445         struct thread *td;
 1446         register struct mkfifo_args /* {
 1447                 char *path;
 1448                 int mode;
 1449         } */ *uap;
 1450 {
 1451 
 1452         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1453 }
 1454 
 1455 #ifndef _SYS_SYSPROTO_H_
 1456 struct mkfifoat_args {
 1457         int     fd;
 1458         char    *path;
 1459         mode_t  mode;
 1460 };
 1461 #endif
 1462 int
 1463 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1464 {
 1465 
 1466         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1467             uap->mode));
 1468 }
 1469 
 1470 int
 1471 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1472 {
 1473 
 1474         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1475 }
 1476 
 1477 int
 1478 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1479     int mode)
 1480 {
 1481         struct mount *mp;
 1482         struct vattr vattr;
 1483         int error;
 1484         struct nameidata nd;
 1485         int vfslocked;
 1486 
 1487         AUDIT_ARG_MODE(mode);
 1488 restart:
 1489         bwillwrite();
 1490         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1491             pathseg, path, fd, td);
 1492         if ((error = namei(&nd)) != 0)
 1493                 return (error);
 1494         vfslocked = NDHASGIANT(&nd);
 1495         if (nd.ni_vp != NULL) {
 1496                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1497                 if (nd.ni_vp == nd.ni_dvp)
 1498                         vrele(nd.ni_dvp);
 1499                 else
 1500                         vput(nd.ni_dvp);
 1501                 vrele(nd.ni_vp);
 1502                 VFS_UNLOCK_GIANT(vfslocked);
 1503                 return (EEXIST);
 1504         }
 1505         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1506                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1507                 vput(nd.ni_dvp);
 1508                 VFS_UNLOCK_GIANT(vfslocked);
 1509                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1510                         return (error);
 1511                 goto restart;
 1512         }
 1513         VATTR_NULL(&vattr);
 1514         vattr.va_type = VFIFO;
 1515         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1516 #ifdef MAC
 1517         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1518             &vattr);
 1519         if (error)
 1520                 goto out;
 1521 #endif
 1522         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1523         if (error == 0)
 1524                 vput(nd.ni_vp);
 1525 #ifdef MAC
 1526 out:
 1527 #endif
 1528         vput(nd.ni_dvp);
 1529         vn_finished_write(mp);
 1530         VFS_UNLOCK_GIANT(vfslocked);
 1531         NDFREE(&nd, NDF_ONLY_PNBUF);
 1532         return (error);
 1533 }
 1534 
 1535 /*
 1536  * Make a hard file link.
 1537  */
 1538 #ifndef _SYS_SYSPROTO_H_
 1539 struct link_args {
 1540         char    *path;
 1541         char    *link;
 1542 };
 1543 #endif
 1544 int
 1545 sys_link(td, uap)
 1546         struct thread *td;
 1547         register struct link_args /* {
 1548                 char *path;
 1549                 char *link;
 1550         } */ *uap;
 1551 {
 1552 
 1553         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1554 }
 1555 
 1556 #ifndef _SYS_SYSPROTO_H_
 1557 struct linkat_args {
 1558         int     fd1;
 1559         char    *path1;
 1560         int     fd2;
 1561         char    *path2;
 1562         int     flag;
 1563 };
 1564 #endif
 1565 int
 1566 sys_linkat(struct thread *td, struct linkat_args *uap)
 1567 {
 1568         int flag;
 1569 
 1570         flag = uap->flag;
 1571         if (flag & ~AT_SYMLINK_FOLLOW)
 1572                 return (EINVAL);
 1573 
 1574         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1575             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1576 }
 1577 
 1578 int hardlink_check_uid = 0;
 1579 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1580     &hardlink_check_uid, 0,
 1581     "Unprivileged processes cannot create hard links to files owned by other "
 1582     "users");
 1583 static int hardlink_check_gid = 0;
 1584 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1585     &hardlink_check_gid, 0,
 1586     "Unprivileged processes cannot create hard links to files owned by other "
 1587     "groups");
 1588 
 1589 static int
 1590 can_hardlink(struct vnode *vp, struct ucred *cred)
 1591 {
 1592         struct vattr va;
 1593         int error;
 1594 
 1595         if (!hardlink_check_uid && !hardlink_check_gid)
 1596                 return (0);
 1597 
 1598         error = VOP_GETATTR(vp, &va, cred);
 1599         if (error != 0)
 1600                 return (error);
 1601 
 1602         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1603                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1604                 if (error)
 1605                         return (error);
 1606         }
 1607 
 1608         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1609                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1610                 if (error)
 1611                         return (error);
 1612         }
 1613 
 1614         return (0);
 1615 }
 1616 
 1617 int
 1618 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1619 {
 1620 
 1621         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1622 }
 1623 
 1624 int
 1625 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1626     enum uio_seg segflg, int follow)
 1627 {
 1628         struct vnode *vp;
 1629         struct mount *mp;
 1630         struct nameidata nd;
 1631         int vfslocked;
 1632         int lvfslocked;
 1633         int error;
 1634 
 1635         bwillwrite();
 1636         NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
 1637             fd1, td);
 1638 
 1639         if ((error = namei(&nd)) != 0)
 1640                 return (error);
 1641         vfslocked = NDHASGIANT(&nd);
 1642         NDFREE(&nd, NDF_ONLY_PNBUF);
 1643         vp = nd.ni_vp;
 1644         if (vp->v_type == VDIR) {
 1645                 vrele(vp);
 1646                 VFS_UNLOCK_GIANT(vfslocked);
 1647                 return (EPERM);         /* POSIX */
 1648         }
 1649         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1650                 vrele(vp);
 1651                 VFS_UNLOCK_GIANT(vfslocked);
 1652                 return (error);
 1653         }
 1654         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
 1655             segflg, path2, fd2, td);
 1656         if ((error = namei(&nd)) == 0) {
 1657                 lvfslocked = NDHASGIANT(&nd);
 1658                 if (nd.ni_vp != NULL) {
 1659                         if (nd.ni_dvp == nd.ni_vp)
 1660                                 vrele(nd.ni_dvp);
 1661                         else
 1662                                 vput(nd.ni_dvp);
 1663                         vrele(nd.ni_vp);
 1664                         error = EEXIST;
 1665                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
 1666                     == 0) {
 1667                         error = can_hardlink(vp, td->td_ucred);
 1668                         if (error == 0)
 1669 #ifdef MAC
 1670                                 error = mac_vnode_check_link(td->td_ucred,
 1671                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1672                         if (error == 0)
 1673 #endif
 1674                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1675                         VOP_UNLOCK(vp, 0);
 1676                         vput(nd.ni_dvp);
 1677                 }
 1678                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1679                 VFS_UNLOCK_GIANT(lvfslocked);
 1680         }
 1681         vrele(vp);
 1682         vn_finished_write(mp);
 1683         VFS_UNLOCK_GIANT(vfslocked);
 1684         return (error);
 1685 }
 1686 
 1687 /*
 1688  * Make a symbolic link.
 1689  */
 1690 #ifndef _SYS_SYSPROTO_H_
 1691 struct symlink_args {
 1692         char    *path;
 1693         char    *link;
 1694 };
 1695 #endif
 1696 int
 1697 sys_symlink(td, uap)
 1698         struct thread *td;
 1699         register struct symlink_args /* {
 1700                 char *path;
 1701                 char *link;
 1702         } */ *uap;
 1703 {
 1704 
 1705         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1706 }
 1707 
 1708 #ifndef _SYS_SYSPROTO_H_
 1709 struct symlinkat_args {
 1710         char    *path;
 1711         int     fd;
 1712         char    *path2;
 1713 };
 1714 #endif
 1715 int
 1716 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1717 {
 1718 
 1719         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1720             UIO_USERSPACE));
 1721 }
 1722 
 1723 int
 1724 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1725 {
 1726 
 1727         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1728 }
 1729 
 1730 int
 1731 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1732     enum uio_seg segflg)
 1733 {
 1734         struct mount *mp;
 1735         struct vattr vattr;
 1736         char *syspath;
 1737         int error;
 1738         struct nameidata nd;
 1739         int vfslocked;
 1740 
 1741         if (segflg == UIO_SYSSPACE) {
 1742                 syspath = path1;
 1743         } else {
 1744                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1745                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1746                         goto out;
 1747         }
 1748         AUDIT_ARG_TEXT(syspath);
 1749 restart:
 1750         bwillwrite();
 1751         NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1752             segflg, path2, fd, td);
 1753         if ((error = namei(&nd)) != 0)
 1754                 goto out;
 1755         vfslocked = NDHASGIANT(&nd);
 1756         if (nd.ni_vp) {
 1757                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1758                 if (nd.ni_vp == nd.ni_dvp)
 1759                         vrele(nd.ni_dvp);
 1760                 else
 1761                         vput(nd.ni_dvp);
 1762                 vrele(nd.ni_vp);
 1763                 VFS_UNLOCK_GIANT(vfslocked);
 1764                 error = EEXIST;
 1765                 goto out;
 1766         }
 1767         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1768                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1769                 vput(nd.ni_dvp);
 1770                 VFS_UNLOCK_GIANT(vfslocked);
 1771                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1772                         goto out;
 1773                 goto restart;
 1774         }
 1775         VATTR_NULL(&vattr);
 1776         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1777 #ifdef MAC
 1778         vattr.va_type = VLNK;
 1779         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1780             &vattr);
 1781         if (error)
 1782                 goto out2;
 1783 #endif
 1784         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1785         if (error == 0)
 1786                 vput(nd.ni_vp);
 1787 #ifdef MAC
 1788 out2:
 1789 #endif
 1790         NDFREE(&nd, NDF_ONLY_PNBUF);
 1791         vput(nd.ni_dvp);
 1792         vn_finished_write(mp);
 1793         VFS_UNLOCK_GIANT(vfslocked);
 1794 out:
 1795         if (segflg != UIO_SYSSPACE)
 1796                 uma_zfree(namei_zone, syspath);
 1797         return (error);
 1798 }
 1799 
 1800 /*
 1801  * Delete a whiteout from the filesystem.
 1802  */
 1803 int
 1804 sys_undelete(td, uap)
 1805         struct thread *td;
 1806         register struct undelete_args /* {
 1807                 char *path;
 1808         } */ *uap;
 1809 {
 1810         int error;
 1811         struct mount *mp;
 1812         struct nameidata nd;
 1813         int vfslocked;
 1814 
 1815 restart:
 1816         bwillwrite();
 1817         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
 1818             UIO_USERSPACE, uap->path, td);
 1819         error = namei(&nd);
 1820         if (error)
 1821                 return (error);
 1822         vfslocked = NDHASGIANT(&nd);
 1823 
 1824         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1825                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1826                 if (nd.ni_vp == nd.ni_dvp)
 1827                         vrele(nd.ni_dvp);
 1828                 else
 1829                         vput(nd.ni_dvp);
 1830                 if (nd.ni_vp)
 1831                         vrele(nd.ni_vp);
 1832                 VFS_UNLOCK_GIANT(vfslocked);
 1833                 return (EEXIST);
 1834         }
 1835         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1836                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1837                 vput(nd.ni_dvp);
 1838                 VFS_UNLOCK_GIANT(vfslocked);
 1839                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1840                         return (error);
 1841                 goto restart;
 1842         }
 1843         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1844         NDFREE(&nd, NDF_ONLY_PNBUF);
 1845         vput(nd.ni_dvp);
 1846         vn_finished_write(mp);
 1847         VFS_UNLOCK_GIANT(vfslocked);
 1848         return (error);
 1849 }
 1850 
 1851 /*
 1852  * Delete a name from the filesystem.
 1853  */
 1854 #ifndef _SYS_SYSPROTO_H_
 1855 struct unlink_args {
 1856         char    *path;
 1857 };
 1858 #endif
 1859 int
 1860 sys_unlink(td, uap)
 1861         struct thread *td;
 1862         struct unlink_args /* {
 1863                 char *path;
 1864         } */ *uap;
 1865 {
 1866 
 1867         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1868 }
 1869 
 1870 #ifndef _SYS_SYSPROTO_H_
 1871 struct unlinkat_args {
 1872         int     fd;
 1873         char    *path;
 1874         int     flag;
 1875 };
 1876 #endif
 1877 int
 1878 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1879 {
 1880         int flag = uap->flag;
 1881         int fd = uap->fd;
 1882         char *path = uap->path;
 1883 
 1884         if (flag & ~AT_REMOVEDIR)
 1885                 return (EINVAL);
 1886 
 1887         if (flag & AT_REMOVEDIR)
 1888                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1889         else
 1890                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1891 }
 1892 
 1893 int
 1894 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1895 {
 1896 
 1897         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1898 }
 1899 
 1900 int
 1901 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1902     ino_t oldinum)
 1903 {
 1904         struct mount *mp;
 1905         struct vnode *vp;
 1906         int error;
 1907         struct nameidata nd;
 1908         struct stat sb;
 1909         int vfslocked;
 1910 
 1911 restart:
 1912         bwillwrite();
 1913         NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1914             pathseg, path, fd, td);
 1915         if ((error = namei(&nd)) != 0)
 1916                 return (error == EINVAL ? EPERM : error);
 1917         vfslocked = NDHASGIANT(&nd);
 1918         vp = nd.ni_vp;
 1919         if (vp->v_type == VDIR && oldinum == 0) {
 1920                 error = EPERM;          /* POSIX */
 1921         } else if (oldinum != 0 &&
 1922                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1923                   sb.st_ino != oldinum) {
 1924                         error = EIDRM;  /* Identifier removed */
 1925         } else {
 1926                 /*
 1927                  * The root of a mounted filesystem cannot be deleted.
 1928                  *
 1929                  * XXX: can this only be a VDIR case?
 1930                  */
 1931                 if (vp->v_vflag & VV_ROOT)
 1932                         error = EBUSY;
 1933         }
 1934         if (error == 0) {
 1935                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1936                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1937                         vput(nd.ni_dvp);
 1938                         if (vp == nd.ni_dvp)
 1939                                 vrele(vp);
 1940                         else
 1941                                 vput(vp);
 1942                         VFS_UNLOCK_GIANT(vfslocked);
 1943                         if ((error = vn_start_write(NULL, &mp,
 1944                             V_XSLEEP | PCATCH)) != 0)
 1945                                 return (error);
 1946                         goto restart;
 1947                 }
 1948 #ifdef MAC
 1949                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1950                     &nd.ni_cnd);
 1951                 if (error)
 1952                         goto out;
 1953 #endif
 1954                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1955 #ifdef MAC
 1956 out:
 1957 #endif
 1958                 vn_finished_write(mp);
 1959         }
 1960         NDFREE(&nd, NDF_ONLY_PNBUF);
 1961         vput(nd.ni_dvp);
 1962         if (vp == nd.ni_dvp)
 1963                 vrele(vp);
 1964         else
 1965                 vput(vp);
 1966         VFS_UNLOCK_GIANT(vfslocked);
 1967         return (error);
 1968 }
 1969 
 1970 /*
 1971  * Reposition read/write file offset.
 1972  */
 1973 #ifndef _SYS_SYSPROTO_H_
 1974 struct lseek_args {
 1975         int     fd;
 1976         int     pad;
 1977         off_t   offset;
 1978         int     whence;
 1979 };
 1980 #endif
 1981 int
 1982 sys_lseek(td, uap)
 1983         struct thread *td;
 1984         register struct lseek_args /* {
 1985                 int fd;
 1986                 int pad;
 1987                 off_t offset;
 1988                 int whence;
 1989         } */ *uap;
 1990 {
 1991         struct ucred *cred = td->td_ucred;
 1992         struct file *fp;
 1993         struct vnode *vp;
 1994         struct vattr vattr;
 1995         off_t offset, size;
 1996         int error, noneg;
 1997         int vfslocked;
 1998 
 1999         AUDIT_ARG_FD(uap->fd);
 2000         if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0)
 2001                 return (error);
 2002         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 2003                 fdrop(fp, td);
 2004                 return (ESPIPE);
 2005         }
 2006         vp = fp->f_vnode;
 2007         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2008         noneg = (vp->v_type != VCHR);
 2009         offset = uap->offset;
 2010         switch (uap->whence) {
 2011         case L_INCR:
 2012                 if (noneg &&
 2013                     (fp->f_offset < 0 ||
 2014                     (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
 2015                         error = EOVERFLOW;
 2016                         break;
 2017                 }
 2018                 offset += fp->f_offset;
 2019                 break;
 2020         case L_XTND:
 2021                 vn_lock(vp, LK_SHARED | LK_RETRY);
 2022                 error = VOP_GETATTR(vp, &vattr, cred);
 2023                 VOP_UNLOCK(vp, 0);
 2024                 if (error)
 2025                         break;
 2026 
 2027                 /*
 2028                  * If the file references a disk device, then fetch
 2029                  * the media size and use that to determine the ending
 2030                  * offset.
 2031                  */
 2032                 if (vattr.va_size == 0 && vp->v_type == VCHR &&
 2033                     fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 2034                         vattr.va_size = size;
 2035                 if (noneg &&
 2036                     (vattr.va_size > OFF_MAX ||
 2037                     (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 2038                         error = EOVERFLOW;
 2039                         break;
 2040                 }
 2041                 offset += vattr.va_size;
 2042                 break;
 2043         case L_SET:
 2044                 break;
 2045         case SEEK_DATA:
 2046                 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 2047                 break;
 2048         case SEEK_HOLE:
 2049                 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 2050                 break;
 2051         default:
 2052                 error = EINVAL;
 2053         }
 2054         if (error == 0 && noneg && offset < 0)
 2055                 error = EINVAL;
 2056         if (error != 0)
 2057                 goto drop;
 2058         fp->f_offset = offset;
 2059         VFS_KNOTE_UNLOCKED(vp, 0);
 2060         *(off_t *)(td->td_retval) = fp->f_offset;
 2061 drop:
 2062         fdrop(fp, td);
 2063         VFS_UNLOCK_GIANT(vfslocked);
 2064         return (error);
 2065 }
 2066 
 2067 #if defined(COMPAT_43)
 2068 /*
 2069  * Reposition read/write file offset.
 2070  */
 2071 #ifndef _SYS_SYSPROTO_H_
 2072 struct olseek_args {
 2073         int     fd;
 2074         long    offset;
 2075         int     whence;
 2076 };
 2077 #endif
 2078 int
 2079 olseek(td, uap)
 2080         struct thread *td;
 2081         register struct olseek_args /* {
 2082                 int fd;
 2083                 long offset;
 2084                 int whence;
 2085         } */ *uap;
 2086 {
 2087         struct lseek_args /* {
 2088                 int fd;
 2089                 int pad;
 2090                 off_t offset;
 2091                 int whence;
 2092         } */ nuap;
 2093 
 2094         nuap.fd = uap->fd;
 2095         nuap.offset = uap->offset;
 2096         nuap.whence = uap->whence;
 2097         return (sys_lseek(td, &nuap));
 2098 }
 2099 #endif /* COMPAT_43 */
 2100 
 2101 /* Version with the 'pad' argument */
 2102 int
 2103 freebsd6_lseek(td, uap)
 2104         struct thread *td;
 2105         register struct freebsd6_lseek_args *uap;
 2106 {
 2107         struct lseek_args ouap;
 2108 
 2109         ouap.fd = uap->fd;
 2110         ouap.offset = uap->offset;
 2111         ouap.whence = uap->whence;
 2112         return (sys_lseek(td, &ouap));
 2113 }
 2114 
 2115 /*
 2116  * Check access permissions using passed credentials.
 2117  */
 2118 static int
 2119 vn_access(vp, user_flags, cred, td)
 2120         struct vnode    *vp;
 2121         int             user_flags;
 2122         struct ucred    *cred;
 2123         struct thread   *td;
 2124 {
 2125         int error;
 2126         accmode_t accmode;
 2127 
 2128         /* Flags == 0 means only check for existence. */
 2129         error = 0;
 2130         if (user_flags) {
 2131                 accmode = 0;
 2132                 if (user_flags & R_OK)
 2133                         accmode |= VREAD;
 2134                 if (user_flags & W_OK)
 2135                         accmode |= VWRITE;
 2136                 if (user_flags & X_OK)
 2137                         accmode |= VEXEC;
 2138 #ifdef MAC
 2139                 error = mac_vnode_check_access(cred, vp, accmode);
 2140                 if (error)
 2141                         return (error);
 2142 #endif
 2143                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 2144                         error = VOP_ACCESS(vp, accmode, cred, td);
 2145         }
 2146         return (error);
 2147 }
 2148 
 2149 /*
 2150  * Check access permissions using "real" credentials.
 2151  */
 2152 #ifndef _SYS_SYSPROTO_H_
 2153 struct access_args {
 2154         char    *path;
 2155         int     flags;
 2156 };
 2157 #endif
 2158 int
 2159 sys_access(td, uap)
 2160         struct thread *td;
 2161         register struct access_args /* {
 2162                 char *path;
 2163                 int flags;
 2164         } */ *uap;
 2165 {
 2166 
 2167         return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 2168 }
 2169 
 2170 #ifndef _SYS_SYSPROTO_H_
 2171 struct faccessat_args {
 2172         int     dirfd;
 2173         char    *path;
 2174         int     mode;
 2175         int     flag;
 2176 }
 2177 #endif
 2178 int
 2179 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2180 {
 2181 
 2182         if (uap->flag & ~AT_EACCESS)
 2183                 return (EINVAL);
 2184         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2185             uap->mode));
 2186 }
 2187 
 2188 int
 2189 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2190 {
 2191 
 2192         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
 2193 }
 2194 
 2195 int
 2196 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2197     int flags, int mode)
 2198 {
 2199         struct ucred *cred, *tmpcred;
 2200         struct vnode *vp;
 2201         struct nameidata nd;
 2202         int vfslocked;
 2203         int error;
 2204 
 2205         /*
 2206          * Create and modify a temporary credential instead of one that
 2207          * is potentially shared.
 2208          */
 2209         if (!(flags & AT_EACCESS)) {
 2210                 cred = td->td_ucred;
 2211                 tmpcred = crdup(cred);
 2212                 tmpcred->cr_uid = cred->cr_ruid;
 2213                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2214                 td->td_ucred = tmpcred;
 2215         } else
 2216                 cred = tmpcred = td->td_ucred;
 2217         AUDIT_ARG_VALUE(mode);
 2218         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2219             AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
 2220         if ((error = namei(&nd)) != 0)
 2221                 goto out1;
 2222         vfslocked = NDHASGIANT(&nd);
 2223         vp = nd.ni_vp;
 2224 
 2225         error = vn_access(vp, mode, tmpcred, td);
 2226         NDFREE(&nd, NDF_ONLY_PNBUF);
 2227         vput(vp);
 2228         VFS_UNLOCK_GIANT(vfslocked);
 2229 out1:
 2230         if (!(flags & AT_EACCESS)) {
 2231                 td->td_ucred = cred;
 2232                 crfree(tmpcred);
 2233         }
 2234         return (error);
 2235 }
 2236 
 2237 /*
 2238  * Check access permissions using "effective" credentials.
 2239  */
 2240 #ifndef _SYS_SYSPROTO_H_
 2241 struct eaccess_args {
 2242         char    *path;
 2243         int     flags;
 2244 };
 2245 #endif
 2246 int
 2247 sys_eaccess(td, uap)
 2248         struct thread *td;
 2249         register struct eaccess_args /* {
 2250                 char *path;
 2251                 int flags;
 2252         } */ *uap;
 2253 {
 2254 
 2255         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
 2256 }
 2257 
 2258 int
 2259 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 2260 {
 2261 
 2262         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
 2263 }
 2264 
 2265 #if defined(COMPAT_43)
 2266 /*
 2267  * Get file status; this version follows links.
 2268  */
 2269 #ifndef _SYS_SYSPROTO_H_
 2270 struct ostat_args {
 2271         char    *path;
 2272         struct ostat *ub;
 2273 };
 2274 #endif
 2275 int
 2276 ostat(td, uap)
 2277         struct thread *td;
 2278         register struct ostat_args /* {
 2279                 char *path;
 2280                 struct ostat *ub;
 2281         } */ *uap;
 2282 {
 2283         struct stat sb;
 2284         struct ostat osb;
 2285         int error;
 2286 
 2287         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2288         if (error)
 2289                 return (error);
 2290         cvtstat(&sb, &osb);
 2291         error = copyout(&osb, uap->ub, sizeof (osb));
 2292         return (error);
 2293 }
 2294 
 2295 /*
 2296  * Get file status; this version does not follow links.
 2297  */
 2298 #ifndef _SYS_SYSPROTO_H_
 2299 struct olstat_args {
 2300         char    *path;
 2301         struct ostat *ub;
 2302 };
 2303 #endif
 2304 int
 2305 olstat(td, uap)
 2306         struct thread *td;
 2307         register struct olstat_args /* {
 2308                 char *path;
 2309                 struct ostat *ub;
 2310         } */ *uap;
 2311 {
 2312         struct stat sb;
 2313         struct ostat osb;
 2314         int error;
 2315 
 2316         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2317         if (error)
 2318                 return (error);
 2319         cvtstat(&sb, &osb);
 2320         error = copyout(&osb, uap->ub, sizeof (osb));
 2321         return (error);
 2322 }
 2323 
 2324 /*
 2325  * Convert from an old to a new stat structure.
 2326  */
 2327 void
 2328 cvtstat(st, ost)
 2329         struct stat *st;
 2330         struct ostat *ost;
 2331 {
 2332 
 2333         ost->st_dev = st->st_dev;
 2334         ost->st_ino = st->st_ino;
 2335         ost->st_mode = st->st_mode;
 2336         ost->st_nlink = st->st_nlink;
 2337         ost->st_uid = st->st_uid;
 2338         ost->st_gid = st->st_gid;
 2339         ost->st_rdev = st->st_rdev;
 2340         if (st->st_size < (quad_t)1 << 32)
 2341                 ost->st_size = st->st_size;
 2342         else
 2343                 ost->st_size = -2;
 2344         ost->st_atim = st->st_atim;
 2345         ost->st_mtim = st->st_mtim;
 2346         ost->st_ctim = st->st_ctim;
 2347         ost->st_blksize = st->st_blksize;
 2348         ost->st_blocks = st->st_blocks;
 2349         ost->st_flags = st->st_flags;
 2350         ost->st_gen = st->st_gen;
 2351 }
 2352 #endif /* COMPAT_43 */
 2353 
 2354 /*
 2355  * Get file status; this version follows links.
 2356  */
 2357 #ifndef _SYS_SYSPROTO_H_
 2358 struct stat_args {
 2359         char    *path;
 2360         struct stat *ub;
 2361 };
 2362 #endif
 2363 int
 2364 sys_stat(td, uap)
 2365         struct thread *td;
 2366         register struct stat_args /* {
 2367                 char *path;
 2368                 struct stat *ub;
 2369         } */ *uap;
 2370 {
 2371         struct stat sb;
 2372         int error;
 2373 
 2374         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2375         if (error == 0)
 2376                 error = copyout(&sb, uap->ub, sizeof (sb));
 2377         return (error);
 2378 }
 2379 
 2380 #ifndef _SYS_SYSPROTO_H_
 2381 struct fstatat_args {
 2382         int     fd;
 2383         char    *path;
 2384         struct stat     *buf;
 2385         int     flag;
 2386 }
 2387 #endif
 2388 int
 2389 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2390 {
 2391         struct stat sb;
 2392         int error;
 2393 
 2394         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2395             UIO_USERSPACE, &sb);
 2396         if (error == 0)
 2397                 error = copyout(&sb, uap->buf, sizeof (sb));
 2398         return (error);
 2399 }
 2400 
 2401 int
 2402 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2403 {
 2404 
 2405         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2406 }
 2407 
 2408 int
 2409 kern_statat(struct thread *td, int flag, int fd, char *path,
 2410     enum uio_seg pathseg, struct stat *sbp)
 2411 {
 2412 
 2413         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2414 }
 2415 
 2416 int
 2417 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2418     enum uio_seg pathseg, struct stat *sbp,
 2419     void (*hook)(struct vnode *vp, struct stat *sbp))
 2420 {
 2421         struct nameidata nd;
 2422         struct stat sb;
 2423         int error, vfslocked;
 2424 
 2425         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2426                 return (EINVAL);
 2427 
 2428         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2429             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
 2430             path, fd, CAP_FSTAT, td);
 2431 
 2432         if ((error = namei(&nd)) != 0)
 2433                 return (error);
 2434         vfslocked = NDHASGIANT(&nd);
 2435         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2436         if (!error) {
 2437                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2438                 if (S_ISREG(sb.st_mode))
 2439                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2440                 if (__predict_false(hook != NULL))
 2441                         hook(nd.ni_vp, &sb);
 2442         }
 2443         NDFREE(&nd, NDF_ONLY_PNBUF);
 2444         vput(nd.ni_vp);
 2445         VFS_UNLOCK_GIANT(vfslocked);
 2446         if (error)
 2447                 return (error);
 2448         *sbp = sb;
 2449 #ifdef KTRACE
 2450         if (KTRPOINT(td, KTR_STRUCT))
 2451                 ktrstat(&sb);
 2452 #endif
 2453         return (0);
 2454 }
 2455 
 2456 /*
 2457  * Get file status; this version does not follow links.
 2458  */
 2459 #ifndef _SYS_SYSPROTO_H_
 2460 struct lstat_args {
 2461         char    *path;
 2462         struct stat *ub;
 2463 };
 2464 #endif
 2465 int
 2466 sys_lstat(td, uap)
 2467         struct thread *td;
 2468         register struct lstat_args /* {
 2469                 char *path;
 2470                 struct stat *ub;
 2471         } */ *uap;
 2472 {
 2473         struct stat sb;
 2474         int error;
 2475 
 2476         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2477         if (error == 0)
 2478                 error = copyout(&sb, uap->ub, sizeof (sb));
 2479         return (error);
 2480 }
 2481 
 2482 int
 2483 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2484 {
 2485 
 2486         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2487             sbp));
 2488 }
 2489 
 2490 /*
 2491  * Implementation of the NetBSD [l]stat() functions.
 2492  */
 2493 void
 2494 cvtnstat(sb, nsb)
 2495         struct stat *sb;
 2496         struct nstat *nsb;
 2497 {
 2498         bzero(nsb, sizeof *nsb);
 2499         nsb->st_dev = sb->st_dev;
 2500         nsb->st_ino = sb->st_ino;
 2501         nsb->st_mode = sb->st_mode;
 2502         nsb->st_nlink = sb->st_nlink;
 2503         nsb->st_uid = sb->st_uid;
 2504         nsb->st_gid = sb->st_gid;
 2505         nsb->st_rdev = sb->st_rdev;
 2506         nsb->st_atim = sb->st_atim;
 2507         nsb->st_mtim = sb->st_mtim;
 2508         nsb->st_ctim = sb->st_ctim;
 2509         nsb->st_size = sb->st_size;
 2510         nsb->st_blocks = sb->st_blocks;
 2511         nsb->st_blksize = sb->st_blksize;
 2512         nsb->st_flags = sb->st_flags;
 2513         nsb->st_gen = sb->st_gen;
 2514         nsb->st_birthtim = sb->st_birthtim;
 2515 }
 2516 
 2517 #ifndef _SYS_SYSPROTO_H_
 2518 struct nstat_args {
 2519         char    *path;
 2520         struct nstat *ub;
 2521 };
 2522 #endif
 2523 int
 2524 sys_nstat(td, uap)
 2525         struct thread *td;
 2526         register struct nstat_args /* {
 2527                 char *path;
 2528                 struct nstat *ub;
 2529         } */ *uap;
 2530 {
 2531         struct stat sb;
 2532         struct nstat nsb;
 2533         int error;
 2534 
 2535         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2536         if (error)
 2537                 return (error);
 2538         cvtnstat(&sb, &nsb);
 2539         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2540         return (error);
 2541 }
 2542 
 2543 /*
 2544  * NetBSD lstat.  Get file status; this version does not follow links.
 2545  */
 2546 #ifndef _SYS_SYSPROTO_H_
 2547 struct lstat_args {
 2548         char    *path;
 2549         struct stat *ub;
 2550 };
 2551 #endif
 2552 int
 2553 sys_nlstat(td, uap)
 2554         struct thread *td;
 2555         register struct nlstat_args /* {
 2556                 char *path;
 2557                 struct nstat *ub;
 2558         } */ *uap;
 2559 {
 2560         struct stat sb;
 2561         struct nstat nsb;
 2562         int error;
 2563 
 2564         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2565         if (error)
 2566                 return (error);
 2567         cvtnstat(&sb, &nsb);
 2568         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2569         return (error);
 2570 }
 2571 
 2572 /*
 2573  * Get configurable pathname variables.
 2574  */
 2575 #ifndef _SYS_SYSPROTO_H_
 2576 struct pathconf_args {
 2577         char    *path;
 2578         int     name;
 2579 };
 2580 #endif
 2581 int
 2582 sys_pathconf(td, uap)
 2583         struct thread *td;
 2584         register struct pathconf_args /* {
 2585                 char *path;
 2586                 int name;
 2587         } */ *uap;
 2588 {
 2589 
 2590         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2591 }
 2592 
 2593 #ifndef _SYS_SYSPROTO_H_
 2594 struct lpathconf_args {
 2595         char    *path;
 2596         int     name;
 2597 };
 2598 #endif
 2599 int
 2600 sys_lpathconf(td, uap)
 2601         struct thread *td;
 2602         register struct lpathconf_args /* {
 2603                 char *path;
 2604                 int name;
 2605         } */ *uap;
 2606 {
 2607 
 2608         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
 2609 }
 2610 
 2611 int
 2612 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2613     u_long flags)
 2614 {
 2615         struct nameidata nd;
 2616         int error, vfslocked;
 2617 
 2618         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
 2619             flags, pathseg, path, td);
 2620         if ((error = namei(&nd)) != 0)
 2621                 return (error);
 2622         vfslocked = NDHASGIANT(&nd);
 2623         NDFREE(&nd, NDF_ONLY_PNBUF);
 2624 
 2625         /* If asynchronous I/O is available, it works for all files. */
 2626         if (name == _PC_ASYNC_IO)
 2627                 td->td_retval[0] = async_io_version;
 2628         else
 2629                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2630         vput(nd.ni_vp);
 2631         VFS_UNLOCK_GIANT(vfslocked);
 2632         return (error);
 2633 }
 2634 
 2635 /*
 2636  * Return target name of a symbolic link.
 2637  */
 2638 #ifndef _SYS_SYSPROTO_H_
 2639 struct readlink_args {
 2640         char    *path;
 2641         char    *buf;
 2642         size_t  count;
 2643 };
 2644 #endif
 2645 int
 2646 sys_readlink(td, uap)
 2647         struct thread *td;
 2648         register struct readlink_args /* {
 2649                 char *path;
 2650                 char *buf;
 2651                 size_t count;
 2652         } */ *uap;
 2653 {
 2654 
 2655         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2656             UIO_USERSPACE, uap->count));
 2657 }
 2658 #ifndef _SYS_SYSPROTO_H_
 2659 struct readlinkat_args {
 2660         int     fd;
 2661         char    *path;
 2662         char    *buf;
 2663         size_t  bufsize;
 2664 };
 2665 #endif
 2666 int
 2667 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2668 {
 2669 
 2670         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2671             uap->buf, UIO_USERSPACE, uap->bufsize));
 2672 }
 2673 
 2674 int
 2675 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2676     enum uio_seg bufseg, size_t count)
 2677 {
 2678 
 2679         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2680             count));
 2681 }
 2682 
 2683 int
 2684 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2685     char *buf, enum uio_seg bufseg, size_t count)
 2686 {
 2687         struct vnode *vp;
 2688         struct iovec aiov;
 2689         struct uio auio;
 2690         int error;
 2691         struct nameidata nd;
 2692         int vfslocked;
 2693 
 2694         if (count > IOSIZE_MAX)
 2695                 return (EINVAL);
 2696 
 2697         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2698             AUDITVNODE1, pathseg, path, fd, td);
 2699 
 2700         if ((error = namei(&nd)) != 0)
 2701                 return (error);
 2702         NDFREE(&nd, NDF_ONLY_PNBUF);
 2703         vfslocked = NDHASGIANT(&nd);
 2704         vp = nd.ni_vp;
 2705 #ifdef MAC
 2706         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2707         if (error) {
 2708                 vput(vp);
 2709                 VFS_UNLOCK_GIANT(vfslocked);
 2710                 return (error);
 2711         }
 2712 #endif
 2713         if (vp->v_type != VLNK)
 2714                 error = EINVAL;
 2715         else {
 2716                 aiov.iov_base = buf;
 2717                 aiov.iov_len = count;
 2718                 auio.uio_iov = &aiov;
 2719                 auio.uio_iovcnt = 1;
 2720                 auio.uio_offset = 0;
 2721                 auio.uio_rw = UIO_READ;
 2722                 auio.uio_segflg = bufseg;
 2723                 auio.uio_td = td;
 2724                 auio.uio_resid = count;
 2725                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2726         }
 2727         vput(vp);
 2728         VFS_UNLOCK_GIANT(vfslocked);
 2729         td->td_retval[0] = count - auio.uio_resid;
 2730         return (error);
 2731 }
 2732 
 2733 /*
 2734  * Common implementation code for chflags() and fchflags().
 2735  */
 2736 static int
 2737 setfflags(td, vp, flags)
 2738         struct thread *td;
 2739         struct vnode *vp;
 2740         int flags;
 2741 {
 2742         int error;
 2743         struct mount *mp;
 2744         struct vattr vattr;
 2745 
 2746         /* We can't support the value matching VNOVAL. */
 2747         if (flags == VNOVAL)
 2748                 return (EOPNOTSUPP);
 2749 
 2750         /*
 2751          * Prevent non-root users from setting flags on devices.  When
 2752          * a device is reused, users can retain ownership of the device
 2753          * if they are allowed to set flags and programs assume that
 2754          * chown can't fail when done as root.
 2755          */
 2756         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2757                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2758                 if (error)
 2759                         return (error);
 2760         }
 2761 
 2762         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2763                 return (error);
 2764         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2765         VATTR_NULL(&vattr);
 2766         vattr.va_flags = flags;
 2767 #ifdef MAC
 2768         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2769         if (error == 0)
 2770 #endif
 2771                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2772         VOP_UNLOCK(vp, 0);
 2773         vn_finished_write(mp);
 2774         return (error);
 2775 }
 2776 
 2777 /*
 2778  * Change flags of a file given a path name.
 2779  */
 2780 #ifndef _SYS_SYSPROTO_H_
 2781 struct chflags_args {
 2782         char    *path;
 2783         int     flags;
 2784 };
 2785 #endif
 2786 int
 2787 sys_chflags(td, uap)
 2788         struct thread *td;
 2789         register struct chflags_args /* {
 2790                 char *path;
 2791                 int flags;
 2792         } */ *uap;
 2793 {
 2794         int error;
 2795         struct nameidata nd;
 2796         int vfslocked;
 2797 
 2798         AUDIT_ARG_FFLAGS(uap->flags);
 2799         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2800             uap->path, td);
 2801         if ((error = namei(&nd)) != 0)
 2802                 return (error);
 2803         NDFREE(&nd, NDF_ONLY_PNBUF);
 2804         vfslocked = NDHASGIANT(&nd);
 2805         error = setfflags(td, nd.ni_vp, uap->flags);
 2806         vrele(nd.ni_vp);
 2807         VFS_UNLOCK_GIANT(vfslocked);
 2808         return (error);
 2809 }
 2810 
 2811 /*
 2812  * Same as chflags() but doesn't follow symlinks.
 2813  */
 2814 int
 2815 sys_lchflags(td, uap)
 2816         struct thread *td;
 2817         register struct lchflags_args /* {
 2818                 char *path;
 2819                 int flags;
 2820         } */ *uap;
 2821 {
 2822         int error;
 2823         struct nameidata nd;
 2824         int vfslocked;
 2825 
 2826         AUDIT_ARG_FFLAGS(uap->flags);
 2827         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2828             uap->path, td);
 2829         if ((error = namei(&nd)) != 0)
 2830                 return (error);
 2831         vfslocked = NDHASGIANT(&nd);
 2832         NDFREE(&nd, NDF_ONLY_PNBUF);
 2833         error = setfflags(td, nd.ni_vp, uap->flags);
 2834         vrele(nd.ni_vp);
 2835         VFS_UNLOCK_GIANT(vfslocked);
 2836         return (error);
 2837 }
 2838 
 2839 /*
 2840  * Change flags of a file given a file descriptor.
 2841  */
 2842 #ifndef _SYS_SYSPROTO_H_
 2843 struct fchflags_args {
 2844         int     fd;
 2845         int     flags;
 2846 };
 2847 #endif
 2848 int
 2849 sys_fchflags(td, uap)
 2850         struct thread *td;
 2851         register struct fchflags_args /* {
 2852                 int fd;
 2853                 int flags;
 2854         } */ *uap;
 2855 {
 2856         struct file *fp;
 2857         int vfslocked;
 2858         int error;
 2859 
 2860         AUDIT_ARG_FD(uap->fd);
 2861         AUDIT_ARG_FFLAGS(uap->flags);
 2862         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS,
 2863             &fp)) != 0)
 2864                 return (error);
 2865         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2866 #ifdef AUDIT
 2867         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2868         AUDIT_ARG_VNODE1(fp->f_vnode);
 2869         VOP_UNLOCK(fp->f_vnode, 0);
 2870 #endif
 2871         error = setfflags(td, fp->f_vnode, uap->flags);
 2872         VFS_UNLOCK_GIANT(vfslocked);
 2873         fdrop(fp, td);
 2874         return (error);
 2875 }
 2876 
 2877 /*
 2878  * Common implementation code for chmod(), lchmod() and fchmod().
 2879  */
 2880 int
 2881 setfmode(td, cred, vp, mode)
 2882         struct thread *td;
 2883         struct ucred *cred;
 2884         struct vnode *vp;
 2885         int mode;
 2886 {
 2887         int error;
 2888         struct mount *mp;
 2889         struct vattr vattr;
 2890 
 2891         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2892                 return (error);
 2893         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2894         VATTR_NULL(&vattr);
 2895         vattr.va_mode = mode & ALLPERMS;
 2896 #ifdef MAC
 2897         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2898         if (error == 0)
 2899 #endif
 2900                 error = VOP_SETATTR(vp, &vattr, cred);
 2901         VOP_UNLOCK(vp, 0);
 2902         vn_finished_write(mp);
 2903         return (error);
 2904 }
 2905 
 2906 /*
 2907  * Change mode of a file given path name.
 2908  */
 2909 #ifndef _SYS_SYSPROTO_H_
 2910 struct chmod_args {
 2911         char    *path;
 2912         int     mode;
 2913 };
 2914 #endif
 2915 int
 2916 sys_chmod(td, uap)
 2917         struct thread *td;
 2918         register struct chmod_args /* {
 2919                 char *path;
 2920                 int mode;
 2921         } */ *uap;
 2922 {
 2923 
 2924         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2925 }
 2926 
 2927 #ifndef _SYS_SYSPROTO_H_
 2928 struct fchmodat_args {
 2929         int     dirfd;
 2930         char    *path;
 2931         mode_t  mode;
 2932         int     flag;
 2933 }
 2934 #endif
 2935 int
 2936 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2937 {
 2938         int flag = uap->flag;
 2939         int fd = uap->fd;
 2940         char *path = uap->path;
 2941         mode_t mode = uap->mode;
 2942 
 2943         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2944                 return (EINVAL);
 2945 
 2946         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2947 }
 2948 
 2949 int
 2950 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2951 {
 2952 
 2953         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2954 }
 2955 
 2956 /*
 2957  * Change mode of a file given path name (don't follow links.)
 2958  */
 2959 #ifndef _SYS_SYSPROTO_H_
 2960 struct lchmod_args {
 2961         char    *path;
 2962         int     mode;
 2963 };
 2964 #endif
 2965 int
 2966 sys_lchmod(td, uap)
 2967         struct thread *td;
 2968         register struct lchmod_args /* {
 2969                 char *path;
 2970                 int mode;
 2971         } */ *uap;
 2972 {
 2973 
 2974         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2975             uap->mode, AT_SYMLINK_NOFOLLOW));
 2976 }
 2977 
 2978 
 2979 int
 2980 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2981     mode_t mode, int flag)
 2982 {
 2983         int error;
 2984         struct nameidata nd;
 2985         int vfslocked;
 2986         int follow;
 2987 
 2988         AUDIT_ARG_MODE(mode);
 2989         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2990         NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
 2991             path, fd, CAP_FCHMOD, td);
 2992         if ((error = namei(&nd)) != 0)
 2993                 return (error);
 2994         vfslocked = NDHASGIANT(&nd);
 2995         NDFREE(&nd, NDF_ONLY_PNBUF);
 2996         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2997         vrele(nd.ni_vp);
 2998         VFS_UNLOCK_GIANT(vfslocked);
 2999         return (error);
 3000 }
 3001 
 3002 /*
 3003  * Change mode of a file given a file descriptor.
 3004  */
 3005 #ifndef _SYS_SYSPROTO_H_
 3006 struct fchmod_args {
 3007         int     fd;
 3008         int     mode;
 3009 };
 3010 #endif
 3011 int
 3012 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 3013 {
 3014         struct file *fp;
 3015         int error;
 3016 
 3017         AUDIT_ARG_FD(uap->fd);
 3018         AUDIT_ARG_MODE(uap->mode);
 3019 
 3020         error = fget(td, uap->fd, CAP_FCHMOD, &fp);
 3021         if (error != 0)
 3022                 return (error);
 3023         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 3024         fdrop(fp, td);
 3025         return (error);
 3026 }
 3027 
 3028 /*
 3029  * Common implementation for chown(), lchown(), and fchown()
 3030  */
 3031 int
 3032 setfown(td, cred, vp, uid, gid)
 3033         struct thread *td;
 3034         struct ucred *cred;
 3035         struct vnode *vp;
 3036         uid_t uid;
 3037         gid_t gid;
 3038 {
 3039         int error;
 3040         struct mount *mp;
 3041         struct vattr vattr;
 3042 
 3043         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3044                 return (error);
 3045         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3046         VATTR_NULL(&vattr);
 3047         vattr.va_uid = uid;
 3048         vattr.va_gid = gid;
 3049 #ifdef MAC
 3050         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 3051             vattr.va_gid);
 3052         if (error == 0)
 3053 #endif
 3054                 error = VOP_SETATTR(vp, &vattr, cred);
 3055         VOP_UNLOCK(vp, 0);
 3056         vn_finished_write(mp);
 3057         return (error);
 3058 }
 3059 
 3060 /*
 3061  * Set ownership given a path name.
 3062  */
 3063 #ifndef _SYS_SYSPROTO_H_
 3064 struct chown_args {
 3065         char    *path;
 3066         int     uid;
 3067         int     gid;
 3068 };
 3069 #endif
 3070 int
 3071 sys_chown(td, uap)
 3072         struct thread *td;
 3073         register struct chown_args /* {
 3074                 char *path;
 3075                 int uid;
 3076                 int gid;
 3077         } */ *uap;
 3078 {
 3079 
 3080         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3081 }
 3082 
 3083 #ifndef _SYS_SYSPROTO_H_
 3084 struct fchownat_args {
 3085         int fd;
 3086         const char * path;
 3087         uid_t uid;
 3088         gid_t gid;
 3089         int flag;
 3090 };
 3091 #endif
 3092 int
 3093 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 3094 {
 3095         int flag;
 3096 
 3097         flag = uap->flag;
 3098         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3099                 return (EINVAL);
 3100 
 3101         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 3102             uap->gid, uap->flag));
 3103 }
 3104 
 3105 int
 3106 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3107     int gid)
 3108 {
 3109 
 3110         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 3111 }
 3112 
 3113 int
 3114 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3115     int uid, int gid, int flag)
 3116 {
 3117         struct nameidata nd;
 3118         int error, vfslocked, follow;
 3119 
 3120         AUDIT_ARG_OWNER(uid, gid);
 3121         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 3122         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
 3123             path, fd, CAP_FCHOWN, td);
 3124 
 3125         if ((error = namei(&nd)) != 0)
 3126                 return (error);
 3127         vfslocked = NDHASGIANT(&nd);
 3128         NDFREE(&nd, NDF_ONLY_PNBUF);
 3129         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 3130         vrele(nd.ni_vp);
 3131         VFS_UNLOCK_GIANT(vfslocked);
 3132         return (error);
 3133 }
 3134 
 3135 /*
 3136  * Set ownership given a path name, do not cross symlinks.
 3137  */
 3138 #ifndef _SYS_SYSPROTO_H_
 3139 struct lchown_args {
 3140         char    *path;
 3141         int     uid;
 3142         int     gid;
 3143 };
 3144 #endif
 3145 int
 3146 sys_lchown(td, uap)
 3147         struct thread *td;
 3148         register struct lchown_args /* {
 3149                 char *path;
 3150                 int uid;
 3151                 int gid;
 3152         } */ *uap;
 3153 {
 3154 
 3155         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3156 }
 3157 
 3158 int
 3159 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3160     int gid)
 3161 {
 3162 
 3163         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3164             AT_SYMLINK_NOFOLLOW));
 3165 }
 3166 
 3167 /*
 3168  * Set ownership given a file descriptor.
 3169  */
 3170 #ifndef _SYS_SYSPROTO_H_
 3171 struct fchown_args {
 3172         int     fd;
 3173         int     uid;
 3174         int     gid;
 3175 };
 3176 #endif
 3177 int
 3178 sys_fchown(td, uap)
 3179         struct thread *td;
 3180         register struct fchown_args /* {
 3181                 int fd;
 3182                 int uid;
 3183                 int gid;
 3184         } */ *uap;
 3185 {
 3186         struct file *fp;
 3187         int error;
 3188 
 3189         AUDIT_ARG_FD(uap->fd);
 3190         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3191         error = fget(td, uap->fd, CAP_FCHOWN, &fp);
 3192         if (error != 0)
 3193                 return (error);
 3194         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3195         fdrop(fp, td);
 3196         return (error);
 3197 }
 3198 
 3199 /*
 3200  * Common implementation code for utimes(), lutimes(), and futimes().
 3201  */
 3202 static int
 3203 getutimes(usrtvp, tvpseg, tsp)
 3204         const struct timeval *usrtvp;
 3205         enum uio_seg tvpseg;
 3206         struct timespec *tsp;
 3207 {
 3208         struct timeval tv[2];
 3209         const struct timeval *tvp;
 3210         int error;
 3211 
 3212         if (usrtvp == NULL) {
 3213                 vfs_timestamp(&tsp[0]);
 3214                 tsp[1] = tsp[0];
 3215         } else {
 3216                 if (tvpseg == UIO_SYSSPACE) {
 3217                         tvp = usrtvp;
 3218                 } else {
 3219                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3220                                 return (error);
 3221                         tvp = tv;
 3222                 }
 3223 
 3224                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3225                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3226                         return (EINVAL);
 3227                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3228                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3229         }
 3230         return (0);
 3231 }
 3232 
 3233 /*
 3234  * Common implementation code for utimes(), lutimes(), and futimes().
 3235  */
 3236 static int
 3237 setutimes(td, vp, ts, numtimes, nullflag)
 3238         struct thread *td;
 3239         struct vnode *vp;
 3240         const struct timespec *ts;
 3241         int numtimes;
 3242         int nullflag;
 3243 {
 3244         int error, setbirthtime;
 3245         struct mount *mp;
 3246         struct vattr vattr;
 3247 
 3248         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3249                 return (error);
 3250         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3251         setbirthtime = 0;
 3252         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3253             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3254                 setbirthtime = 1;
 3255         VATTR_NULL(&vattr);
 3256         vattr.va_atime = ts[0];
 3257         vattr.va_mtime = ts[1];
 3258         if (setbirthtime)
 3259                 vattr.va_birthtime = ts[1];
 3260         if (numtimes > 2)
 3261                 vattr.va_birthtime = ts[2];
 3262         if (nullflag)
 3263                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3264 #ifdef MAC
 3265         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3266             vattr.va_mtime);
 3267 #endif
 3268         if (error == 0)
 3269                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3270         VOP_UNLOCK(vp, 0);
 3271         vn_finished_write(mp);
 3272         return (error);
 3273 }
 3274 
 3275 /*
 3276  * Set the access and modification times of a file.
 3277  */
 3278 #ifndef _SYS_SYSPROTO_H_
 3279 struct utimes_args {
 3280         char    *path;
 3281         struct  timeval *tptr;
 3282 };
 3283 #endif
 3284 int
 3285 sys_utimes(td, uap)
 3286         struct thread *td;
 3287         register struct utimes_args /* {
 3288                 char *path;
 3289                 struct timeval *tptr;
 3290         } */ *uap;
 3291 {
 3292 
 3293         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3294             UIO_USERSPACE));
 3295 }
 3296 
 3297 #ifndef _SYS_SYSPROTO_H_
 3298 struct futimesat_args {
 3299         int fd;
 3300         const char * path;
 3301         const struct timeval * times;
 3302 };
 3303 #endif
 3304 int
 3305 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3306 {
 3307 
 3308         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3309             uap->times, UIO_USERSPACE));
 3310 }
 3311 
 3312 int
 3313 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3314     struct timeval *tptr, enum uio_seg tptrseg)
 3315 {
 3316 
 3317         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3318 }
 3319 
 3320 int
 3321 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3322     struct timeval *tptr, enum uio_seg tptrseg)
 3323 {
 3324         struct nameidata nd;
 3325         struct timespec ts[2];
 3326         int error, vfslocked;
 3327 
 3328         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3329                 return (error);
 3330         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
 3331             path, fd, CAP_FUTIMES, td);
 3332 
 3333         if ((error = namei(&nd)) != 0)
 3334                 return (error);
 3335         vfslocked = NDHASGIANT(&nd);
 3336         NDFREE(&nd, NDF_ONLY_PNBUF);
 3337         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3338         vrele(nd.ni_vp);
 3339         VFS_UNLOCK_GIANT(vfslocked);
 3340         return (error);
 3341 }
 3342 
 3343 /*
 3344  * Set the access and modification times of a file.
 3345  */
 3346 #ifndef _SYS_SYSPROTO_H_
 3347 struct lutimes_args {
 3348         char    *path;
 3349         struct  timeval *tptr;
 3350 };
 3351 #endif
 3352 int
 3353 sys_lutimes(td, uap)
 3354         struct thread *td;
 3355         register struct lutimes_args /* {
 3356                 char *path;
 3357                 struct timeval *tptr;
 3358         } */ *uap;
 3359 {
 3360 
 3361         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3362             UIO_USERSPACE));
 3363 }
 3364 
 3365 int
 3366 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3367     struct timeval *tptr, enum uio_seg tptrseg)
 3368 {
 3369         struct timespec ts[2];
 3370         int error;
 3371         struct nameidata nd;
 3372         int vfslocked;
 3373 
 3374         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3375                 return (error);
 3376         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3377         if ((error = namei(&nd)) != 0)
 3378                 return (error);
 3379         vfslocked = NDHASGIANT(&nd);
 3380         NDFREE(&nd, NDF_ONLY_PNBUF);
 3381         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3382         vrele(nd.ni_vp);
 3383         VFS_UNLOCK_GIANT(vfslocked);
 3384         return (error);
 3385 }
 3386 
 3387 /*
 3388  * Set the access and modification times of a file.
 3389  */
 3390 #ifndef _SYS_SYSPROTO_H_
 3391 struct futimes_args {
 3392         int     fd;
 3393         struct  timeval *tptr;
 3394 };
 3395 #endif
 3396 int
 3397 sys_futimes(td, uap)
 3398         struct thread *td;
 3399         register struct futimes_args /* {
 3400                 int  fd;
 3401                 struct timeval *tptr;
 3402         } */ *uap;
 3403 {
 3404 
 3405         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3406 }
 3407 
 3408 int
 3409 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3410     enum uio_seg tptrseg)
 3411 {
 3412         struct timespec ts[2];
 3413         struct file *fp;
 3414         int vfslocked;
 3415         int error;
 3416 
 3417         AUDIT_ARG_FD(fd);
 3418         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3419                 return (error);
 3420         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp))
 3421             != 0)
 3422                 return (error);
 3423         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3424 #ifdef AUDIT
 3425         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3426         AUDIT_ARG_VNODE1(fp->f_vnode);
 3427         VOP_UNLOCK(fp->f_vnode, 0);
 3428 #endif
 3429         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3430         VFS_UNLOCK_GIANT(vfslocked);
 3431         fdrop(fp, td);
 3432         return (error);
 3433 }
 3434 
 3435 /*
 3436  * Truncate a file given its path name.
 3437  */
 3438 #ifndef _SYS_SYSPROTO_H_
 3439 struct truncate_args {
 3440         char    *path;
 3441         int     pad;
 3442         off_t   length;
 3443 };
 3444 #endif
 3445 int
 3446 sys_truncate(td, uap)
 3447         struct thread *td;
 3448         register struct truncate_args /* {
 3449                 char *path;
 3450                 int pad;
 3451                 off_t length;
 3452         } */ *uap;
 3453 {
 3454 
 3455         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3456 }
 3457 
 3458 int
 3459 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3460 {
 3461         struct mount *mp;
 3462         struct vnode *vp;
 3463         struct vattr vattr;
 3464         int error;
 3465         struct nameidata nd;
 3466         int vfslocked;
 3467 
 3468         if (length < 0)
 3469                 return(EINVAL);
 3470         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3471         if ((error = namei(&nd)) != 0)
 3472                 return (error);
 3473         vfslocked = NDHASGIANT(&nd);
 3474         vp = nd.ni_vp;
 3475         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3476                 vrele(vp);
 3477                 VFS_UNLOCK_GIANT(vfslocked);
 3478                 return (error);
 3479         }
 3480         NDFREE(&nd, NDF_ONLY_PNBUF);
 3481         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3482         if (vp->v_type == VDIR)
 3483                 error = EISDIR;
 3484 #ifdef MAC
 3485         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3486         }
 3487 #endif
 3488         else if ((error = vn_writechk(vp)) == 0 &&
 3489             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3490                 VATTR_NULL(&vattr);
 3491                 vattr.va_size = length;
 3492                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3493         }
 3494         vput(vp);
 3495         vn_finished_write(mp);
 3496         VFS_UNLOCK_GIANT(vfslocked);
 3497         return (error);
 3498 }
 3499 
 3500 #if defined(COMPAT_43)
 3501 /*
 3502  * Truncate a file given its path name.
 3503  */
 3504 #ifndef _SYS_SYSPROTO_H_
 3505 struct otruncate_args {
 3506         char    *path;
 3507         long    length;
 3508 };
 3509 #endif
 3510 int
 3511 otruncate(td, uap)
 3512         struct thread *td;
 3513         register struct otruncate_args /* {
 3514                 char *path;
 3515                 long length;
 3516         } */ *uap;
 3517 {
 3518         struct truncate_args /* {
 3519                 char *path;
 3520                 int pad;
 3521                 off_t length;
 3522         } */ nuap;
 3523 
 3524         nuap.path = uap->path;
 3525         nuap.length = uap->length;
 3526         return (sys_truncate(td, &nuap));
 3527 }
 3528 #endif /* COMPAT_43 */
 3529 
 3530 /* Versions with the pad argument */
 3531 int
 3532 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3533 {
 3534         struct truncate_args ouap;
 3535 
 3536         ouap.path = uap->path;
 3537         ouap.length = uap->length;
 3538         return (sys_truncate(td, &ouap));
 3539 }
 3540 
 3541 int
 3542 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3543 {
 3544         struct ftruncate_args ouap;
 3545 
 3546         ouap.fd = uap->fd;
 3547         ouap.length = uap->length;
 3548         return (sys_ftruncate(td, &ouap));
 3549 }
 3550 
 3551 /*
 3552  * Sync an open file.
 3553  */
 3554 #ifndef _SYS_SYSPROTO_H_
 3555 struct fsync_args {
 3556         int     fd;
 3557 };
 3558 #endif
 3559 int
 3560 sys_fsync(td, uap)
 3561         struct thread *td;
 3562         struct fsync_args /* {
 3563                 int fd;
 3564         } */ *uap;
 3565 {
 3566         struct vnode *vp;
 3567         struct mount *mp;
 3568         struct file *fp;
 3569         int vfslocked;
 3570         int error, lock_flags;
 3571 
 3572         AUDIT_ARG_FD(uap->fd);
 3573         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC,
 3574             &fp)) != 0)
 3575                 return (error);
 3576         vp = fp->f_vnode;
 3577         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3578         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3579                 goto drop;
 3580         if (MNT_SHARED_WRITES(mp) ||
 3581             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3582                 lock_flags = LK_SHARED;
 3583         } else {
 3584                 lock_flags = LK_EXCLUSIVE;
 3585         }
 3586         vn_lock(vp, lock_flags | LK_RETRY);
 3587         AUDIT_ARG_VNODE1(vp);
 3588         if (vp->v_object != NULL) {
 3589                 VM_OBJECT_LOCK(vp->v_object);
 3590                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3591                 VM_OBJECT_UNLOCK(vp->v_object);
 3592         }
 3593         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3594 
 3595         VOP_UNLOCK(vp, 0);
 3596         vn_finished_write(mp);
 3597 drop:
 3598         VFS_UNLOCK_GIANT(vfslocked);
 3599         fdrop(fp, td);
 3600         return (error);
 3601 }
 3602 
 3603 /*
 3604  * Rename files.  Source and destination must either both be directories, or
 3605  * both not be directories.  If target is a directory, it must be empty.
 3606  */
 3607 #ifndef _SYS_SYSPROTO_H_
 3608 struct rename_args {
 3609         char    *from;
 3610         char    *to;
 3611 };
 3612 #endif
 3613 int
 3614 sys_rename(td, uap)
 3615         struct thread *td;
 3616         register struct rename_args /* {
 3617                 char *from;
 3618                 char *to;
 3619         } */ *uap;
 3620 {
 3621 
 3622         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3623 }
 3624 
 3625 #ifndef _SYS_SYSPROTO_H_
 3626 struct renameat_args {
 3627         int     oldfd;
 3628         char    *old;
 3629         int     newfd;
 3630         char    *new;
 3631 };
 3632 #endif
 3633 int
 3634 sys_renameat(struct thread *td, struct renameat_args *uap)
 3635 {
 3636 
 3637         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3638             UIO_USERSPACE));
 3639 }
 3640 
 3641 int
 3642 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3643 {
 3644 
 3645         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3646 }
 3647 
 3648 int
 3649 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3650     enum uio_seg pathseg)
 3651 {
 3652         struct mount *mp = NULL;
 3653         struct vnode *tvp, *fvp, *tdvp;
 3654         struct nameidata fromnd, tond;
 3655         int tvfslocked;
 3656         int fvfslocked;
 3657         int error;
 3658 
 3659         bwillwrite();
 3660 #ifdef MAC
 3661         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3662             MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3663 #else
 3664         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
 3665             AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 3666 #endif
 3667 
 3668         if ((error = namei(&fromnd)) != 0)
 3669                 return (error);
 3670         fvfslocked = NDHASGIANT(&fromnd);
 3671         tvfslocked = 0;
 3672 #ifdef MAC
 3673         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3674             fromnd.ni_vp, &fromnd.ni_cnd);
 3675         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3676         if (fromnd.ni_dvp != fromnd.ni_vp)
 3677                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3678 #endif
 3679         fvp = fromnd.ni_vp;
 3680         if (error == 0)
 3681                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3682         if (error != 0) {
 3683                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3684                 vrele(fromnd.ni_dvp);
 3685                 vrele(fvp);
 3686                 goto out1;
 3687         }
 3688         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3689             SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
 3690             td);
 3691         if (fromnd.ni_vp->v_type == VDIR)
 3692                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3693         if ((error = namei(&tond)) != 0) {
 3694                 /* Translate error code for rename("dir1", "dir2/."). */
 3695                 if (error == EISDIR && fvp->v_type == VDIR)
 3696                         error = EINVAL;
 3697                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3698                 vrele(fromnd.ni_dvp);
 3699                 vrele(fvp);
 3700                 vn_finished_write(mp);
 3701                 goto out1;
 3702         }
 3703         tvfslocked = NDHASGIANT(&tond);
 3704         tdvp = tond.ni_dvp;
 3705         tvp = tond.ni_vp;
 3706         if (tvp != NULL) {
 3707                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3708                         error = ENOTDIR;
 3709                         goto out;
 3710                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3711                         error = EISDIR;
 3712                         goto out;
 3713                 }
 3714         }
 3715         if (fvp == tdvp) {
 3716                 error = EINVAL;
 3717                 goto out;
 3718         }
 3719         /*
 3720          * If the source is the same as the destination (that is, if they
 3721          * are links to the same vnode), then there is nothing to do.
 3722          */
 3723         if (fvp == tvp)
 3724                 error = -1;
 3725 #ifdef MAC
 3726         else
 3727                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3728                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3729 #endif
 3730 out:
 3731         if (!error) {
 3732                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3733                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3734                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3735                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3736         } else {
 3737                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3738                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3739                 if (tvp)
 3740                         vput(tvp);
 3741                 if (tdvp == tvp)
 3742                         vrele(tdvp);
 3743                 else
 3744                         vput(tdvp);
 3745                 vrele(fromnd.ni_dvp);
 3746                 vrele(fvp);
 3747         }
 3748         vrele(tond.ni_startdir);
 3749         vn_finished_write(mp);
 3750 out1:
 3751         if (fromnd.ni_startdir)
 3752                 vrele(fromnd.ni_startdir);
 3753         VFS_UNLOCK_GIANT(fvfslocked);
 3754         VFS_UNLOCK_GIANT(tvfslocked);
 3755         if (error == -1)
 3756                 return (0);
 3757         return (error);
 3758 }
 3759 
 3760 /*
 3761  * Make a directory file.
 3762  */
 3763 #ifndef _SYS_SYSPROTO_H_
 3764 struct mkdir_args {
 3765         char    *path;
 3766         int     mode;
 3767 };
 3768 #endif
 3769 int
 3770 sys_mkdir(td, uap)
 3771         struct thread *td;
 3772         register struct mkdir_args /* {
 3773                 char *path;
 3774                 int mode;
 3775         } */ *uap;
 3776 {
 3777 
 3778         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3779 }
 3780 
 3781 #ifndef _SYS_SYSPROTO_H_
 3782 struct mkdirat_args {
 3783         int     fd;
 3784         char    *path;
 3785         mode_t  mode;
 3786 };
 3787 #endif
 3788 int
 3789 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3790 {
 3791 
 3792         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3793 }
 3794 
 3795 int
 3796 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3797 {
 3798 
 3799         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3800 }
 3801 
 3802 int
 3803 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3804     int mode)
 3805 {
 3806         struct mount *mp;
 3807         struct vnode *vp;
 3808         struct vattr vattr;
 3809         int error;
 3810         struct nameidata nd;
 3811         int vfslocked;
 3812 
 3813         AUDIT_ARG_MODE(mode);
 3814 restart:
 3815         bwillwrite();
 3816         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
 3817             AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
 3818         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3819         if ((error = namei(&nd)) != 0)
 3820                 return (error);
 3821         vfslocked = NDHASGIANT(&nd);
 3822         vp = nd.ni_vp;
 3823         if (vp != NULL) {
 3824                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3825                 /*
 3826                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3827                  * the strange behaviour of leaving the vnode unlocked
 3828                  * if the target is the same vnode as the parent.
 3829                  */
 3830                 if (vp == nd.ni_dvp)
 3831                         vrele(nd.ni_dvp);
 3832                 else
 3833                         vput(nd.ni_dvp);
 3834                 vrele(vp);
 3835                 VFS_UNLOCK_GIANT(vfslocked);
 3836                 return (EEXIST);
 3837         }
 3838         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3839                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3840                 vput(nd.ni_dvp);
 3841                 VFS_UNLOCK_GIANT(vfslocked);
 3842                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3843                         return (error);
 3844                 goto restart;
 3845         }
 3846         VATTR_NULL(&vattr);
 3847         vattr.va_type = VDIR;
 3848         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3849 #ifdef MAC
 3850         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3851             &vattr);
 3852         if (error)
 3853                 goto out;
 3854 #endif
 3855         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3856 #ifdef MAC
 3857 out:
 3858 #endif
 3859         NDFREE(&nd, NDF_ONLY_PNBUF);
 3860         vput(nd.ni_dvp);
 3861         if (!error)
 3862                 vput(nd.ni_vp);
 3863         vn_finished_write(mp);
 3864         VFS_UNLOCK_GIANT(vfslocked);
 3865         return (error);
 3866 }
 3867 
 3868 /*
 3869  * Remove a directory file.
 3870  */
 3871 #ifndef _SYS_SYSPROTO_H_
 3872 struct rmdir_args {
 3873         char    *path;
 3874 };
 3875 #endif
 3876 int
 3877 sys_rmdir(td, uap)
 3878         struct thread *td;
 3879         struct rmdir_args /* {
 3880                 char *path;
 3881         } */ *uap;
 3882 {
 3883 
 3884         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3885 }
 3886 
 3887 int
 3888 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3889 {
 3890 
 3891         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3892 }
 3893 
 3894 int
 3895 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3896 {
 3897         struct mount *mp;
 3898         struct vnode *vp;
 3899         int error;
 3900         struct nameidata nd;
 3901         int vfslocked;
 3902 
 3903 restart:
 3904         bwillwrite();
 3905         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
 3906             AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
 3907         if ((error = namei(&nd)) != 0)
 3908                 return (error);
 3909         vfslocked = NDHASGIANT(&nd);
 3910         vp = nd.ni_vp;
 3911         if (vp->v_type != VDIR) {
 3912                 error = ENOTDIR;
 3913                 goto out;
 3914         }
 3915         /*
 3916          * No rmdir "." please.
 3917          */
 3918         if (nd.ni_dvp == vp) {
 3919                 error = EINVAL;
 3920                 goto out;
 3921         }
 3922         /*
 3923          * The root of a mounted filesystem cannot be deleted.
 3924          */
 3925         if (vp->v_vflag & VV_ROOT) {
 3926                 error = EBUSY;
 3927                 goto out;
 3928         }
 3929 #ifdef MAC
 3930         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3931             &nd.ni_cnd);
 3932         if (error)
 3933                 goto out;
 3934 #endif
 3935         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3936                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3937                 vput(vp);
 3938                 if (nd.ni_dvp == vp)
 3939                         vrele(nd.ni_dvp);
 3940                 else
 3941                         vput(nd.ni_dvp);
 3942                 VFS_UNLOCK_GIANT(vfslocked);
 3943                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3944                         return (error);
 3945                 goto restart;
 3946         }
 3947         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3948         vn_finished_write(mp);
 3949 out:
 3950         NDFREE(&nd, NDF_ONLY_PNBUF);
 3951         vput(vp);
 3952         if (nd.ni_dvp == vp)
 3953                 vrele(nd.ni_dvp);
 3954         else
 3955                 vput(nd.ni_dvp);
 3956         VFS_UNLOCK_GIANT(vfslocked);
 3957         return (error);
 3958 }
 3959 
 3960 #ifdef COMPAT_43
 3961 /*
 3962  * Read a block of directory entries in a filesystem independent format.
 3963  */
 3964 #ifndef _SYS_SYSPROTO_H_
 3965 struct ogetdirentries_args {
 3966         int     fd;
 3967         char    *buf;
 3968         u_int   count;
 3969         long    *basep;
 3970 };
 3971 #endif
 3972 int
 3973 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3974 {
 3975         long loff;
 3976         int error;
 3977 
 3978         error = kern_ogetdirentries(td, uap, &loff);
 3979         if (error == 0)
 3980                 error = copyout(&loff, uap->basep, sizeof(long));
 3981         return (error);
 3982 }
 3983 
 3984 int
 3985 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3986     long *ploff)
 3987 {
 3988         struct vnode *vp;
 3989         struct file *fp;
 3990         struct uio auio, kuio;
 3991         struct iovec aiov, kiov;
 3992         struct dirent *dp, *edp;
 3993         caddr_t dirbuf;
 3994         int error, eofflag, readcnt, vfslocked;
 3995         long loff;
 3996 
 3997         /* XXX arbitrary sanity limit on `count'. */
 3998         if (uap->count > 64 * 1024)
 3999                 return (EINVAL);
 4000         if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
 4001             &fp)) != 0)
 4002                 return (error);
 4003         if ((fp->f_flag & FREAD) == 0) {
 4004                 fdrop(fp, td);
 4005                 return (EBADF);
 4006         }
 4007         vp = fp->f_vnode;
 4008 unionread:
 4009         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4010         if (vp->v_type != VDIR) {
 4011                 VFS_UNLOCK_GIANT(vfslocked);
 4012                 fdrop(fp, td);
 4013                 return (EINVAL);
 4014         }
 4015         aiov.iov_base = uap->buf;
 4016         aiov.iov_len = uap->count;
 4017         auio.uio_iov = &aiov;
 4018         auio.uio_iovcnt = 1;
 4019         auio.uio_rw = UIO_READ;
 4020         auio.uio_segflg = UIO_USERSPACE;
 4021         auio.uio_td = td;
 4022         auio.uio_resid = uap->count;
 4023         vn_lock(vp, LK_SHARED | LK_RETRY);
 4024         loff = auio.uio_offset = fp->f_offset;
 4025 #ifdef MAC
 4026         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4027         if (error) {
 4028                 VOP_UNLOCK(vp, 0);
 4029                 VFS_UNLOCK_GIANT(vfslocked);
 4030                 fdrop(fp, td);
 4031                 return (error);
 4032         }
 4033 #endif
 4034 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 4035                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 4036                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 4037                             NULL, NULL);
 4038                         fp->f_offset = auio.uio_offset;
 4039                 } else
 4040 #       endif
 4041         {
 4042                 kuio = auio;
 4043                 kuio.uio_iov = &kiov;
 4044                 kuio.uio_segflg = UIO_SYSSPACE;
 4045                 kiov.iov_len = uap->count;
 4046                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 4047                 kiov.iov_base = dirbuf;
 4048                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 4049                             NULL, NULL);
 4050                 fp->f_offset = kuio.uio_offset;
 4051                 if (error == 0) {
 4052                         readcnt = uap->count - kuio.uio_resid;
 4053                         edp = (struct dirent *)&dirbuf[readcnt];
 4054                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 4055 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 4056                                         /*
 4057                                          * The expected low byte of
 4058                                          * dp->d_namlen is our dp->d_type.
 4059                                          * The high MBZ byte of dp->d_namlen
 4060                                          * is our dp->d_namlen.
 4061                                          */
 4062                                         dp->d_type = dp->d_namlen;
 4063                                         dp->d_namlen = 0;
 4064 #                               else
 4065                                         /*
 4066                                          * The dp->d_type is the high byte
 4067                                          * of the expected dp->d_namlen,
 4068                                          * so must be zero'ed.
 4069                                          */
 4070                                         dp->d_type = 0;
 4071 #                               endif
 4072                                 if (dp->d_reclen > 0) {
 4073                                         dp = (struct dirent *)
 4074                                             ((char *)dp + dp->d_reclen);
 4075                                 } else {
 4076                                         error = EIO;
 4077                                         break;
 4078                                 }
 4079                         }
 4080                         if (dp >= edp)
 4081                                 error = uiomove(dirbuf, readcnt, &auio);
 4082                 }
 4083                 free(dirbuf, M_TEMP);
 4084         }
 4085         if (error) {
 4086                 VOP_UNLOCK(vp, 0);
 4087                 VFS_UNLOCK_GIANT(vfslocked);
 4088                 fdrop(fp, td);
 4089                 return (error);
 4090         }
 4091         if (uap->count == auio.uio_resid &&
 4092             (vp->v_vflag & VV_ROOT) &&
 4093             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4094                 struct vnode *tvp = vp;
 4095                 vp = vp->v_mount->mnt_vnodecovered;
 4096                 VREF(vp);
 4097                 fp->f_vnode = vp;
 4098                 fp->f_data = vp;
 4099                 fp->f_offset = 0;
 4100                 vput(tvp);
 4101                 VFS_UNLOCK_GIANT(vfslocked);
 4102                 goto unionread;
 4103         }
 4104         VOP_UNLOCK(vp, 0);
 4105         VFS_UNLOCK_GIANT(vfslocked);
 4106         fdrop(fp, td);
 4107         td->td_retval[0] = uap->count - auio.uio_resid;
 4108         if (error == 0)
 4109                 *ploff = loff;
 4110         return (error);
 4111 }
 4112 #endif /* COMPAT_43 */
 4113 
 4114 /*
 4115  * Read a block of directory entries in a filesystem independent format.
 4116  */
 4117 #ifndef _SYS_SYSPROTO_H_
 4118 struct getdirentries_args {
 4119         int     fd;
 4120         char    *buf;
 4121         u_int   count;
 4122         long    *basep;
 4123 };
 4124 #endif
 4125 int
 4126 sys_getdirentries(td, uap)
 4127         struct thread *td;
 4128         register struct getdirentries_args /* {
 4129                 int fd;
 4130                 char *buf;
 4131                 u_int count;
 4132                 long *basep;
 4133         } */ *uap;
 4134 {
 4135         long base;
 4136         int error;
 4137 
 4138         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 4139         if (error)
 4140                 return (error);
 4141         if (uap->basep != NULL)
 4142                 error = copyout(&base, uap->basep, sizeof(long));
 4143         return (error);
 4144 }
 4145 
 4146 int
 4147 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4148     long *basep)
 4149 {
 4150         struct vnode *vp;
 4151         struct file *fp;
 4152         struct uio auio;
 4153         struct iovec aiov;
 4154         int vfslocked;
 4155         long loff;
 4156         int error, eofflag;
 4157 
 4158         AUDIT_ARG_FD(fd);
 4159         auio.uio_resid = count;
 4160         if (auio.uio_resid > IOSIZE_MAX)
 4161                 return (EINVAL);
 4162         if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
 4163             &fp)) != 0)
 4164                 return (error);
 4165         if ((fp->f_flag & FREAD) == 0) {
 4166                 fdrop(fp, td);
 4167                 return (EBADF);
 4168         }
 4169         vp = fp->f_vnode;
 4170 unionread:
 4171         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4172         if (vp->v_type != VDIR) {
 4173                 VFS_UNLOCK_GIANT(vfslocked);
 4174                 error = EINVAL;
 4175                 goto fail;
 4176         }
 4177         aiov.iov_base = buf;
 4178         aiov.iov_len = count;
 4179         auio.uio_iov = &aiov;
 4180         auio.uio_iovcnt = 1;
 4181         auio.uio_rw = UIO_READ;
 4182         auio.uio_segflg = UIO_USERSPACE;
 4183         auio.uio_td = td;
 4184         vn_lock(vp, LK_SHARED | LK_RETRY);
 4185         AUDIT_ARG_VNODE1(vp);
 4186         loff = auio.uio_offset = fp->f_offset;
 4187 #ifdef MAC
 4188         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4189         if (error == 0)
 4190 #endif
 4191                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4192                     NULL);
 4193         fp->f_offset = auio.uio_offset;
 4194         if (error) {
 4195                 VOP_UNLOCK(vp, 0);
 4196                 VFS_UNLOCK_GIANT(vfslocked);
 4197                 goto fail;
 4198         }
 4199         if (count == auio.uio_resid &&
 4200             (vp->v_vflag & VV_ROOT) &&
 4201             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4202                 struct vnode *tvp = vp;
 4203                 vp = vp->v_mount->mnt_vnodecovered;
 4204                 VREF(vp);
 4205                 fp->f_vnode = vp;
 4206                 fp->f_data = vp;
 4207                 fp->f_offset = 0;
 4208                 vput(tvp);
 4209                 VFS_UNLOCK_GIANT(vfslocked);
 4210                 goto unionread;
 4211         }
 4212         VOP_UNLOCK(vp, 0);
 4213         VFS_UNLOCK_GIANT(vfslocked);
 4214         *basep = loff;
 4215         td->td_retval[0] = count - auio.uio_resid;
 4216 fail:
 4217         fdrop(fp, td);
 4218         return (error);
 4219 }
 4220 
 4221 #ifndef _SYS_SYSPROTO_H_
 4222 struct getdents_args {
 4223         int fd;
 4224         char *buf;
 4225         size_t count;
 4226 };
 4227 #endif
 4228 int
 4229 sys_getdents(td, uap)
 4230         struct thread *td;
 4231         register struct getdents_args /* {
 4232                 int fd;
 4233                 char *buf;
 4234                 u_int count;
 4235         } */ *uap;
 4236 {
 4237         struct getdirentries_args ap;
 4238         ap.fd = uap->fd;
 4239         ap.buf = uap->buf;
 4240         ap.count = uap->count;
 4241         ap.basep = NULL;
 4242         return (sys_getdirentries(td, &ap));
 4243 }
 4244 
 4245 /*
 4246  * Set the mode mask for creation of filesystem nodes.
 4247  */
 4248 #ifndef _SYS_SYSPROTO_H_
 4249 struct umask_args {
 4250         int     newmask;
 4251 };
 4252 #endif
 4253 int
 4254 sys_umask(td, uap)
 4255         struct thread *td;
 4256         struct umask_args /* {
 4257                 int newmask;
 4258         } */ *uap;
 4259 {
 4260         register struct filedesc *fdp;
 4261 
 4262         FILEDESC_XLOCK(td->td_proc->p_fd);
 4263         fdp = td->td_proc->p_fd;
 4264         td->td_retval[0] = fdp->fd_cmask;
 4265         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4266         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4267         return (0);
 4268 }
 4269 
 4270 /*
 4271  * Void all references to file by ripping underlying filesystem away from
 4272  * vnode.
 4273  */
 4274 #ifndef _SYS_SYSPROTO_H_
 4275 struct revoke_args {
 4276         char    *path;
 4277 };
 4278 #endif
 4279 int
 4280 sys_revoke(td, uap)
 4281         struct thread *td;
 4282         register struct revoke_args /* {
 4283                 char *path;
 4284         } */ *uap;
 4285 {
 4286         struct vnode *vp;
 4287         struct vattr vattr;
 4288         int error;
 4289         struct nameidata nd;
 4290         int vfslocked;
 4291 
 4292         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4293             UIO_USERSPACE, uap->path, td);
 4294         if ((error = namei(&nd)) != 0)
 4295                 return (error);
 4296         vfslocked = NDHASGIANT(&nd);
 4297         vp = nd.ni_vp;
 4298         NDFREE(&nd, NDF_ONLY_PNBUF);
 4299         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4300                 error = EINVAL;
 4301                 goto out;
 4302         }
 4303 #ifdef MAC
 4304         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4305         if (error)
 4306                 goto out;
 4307 #endif
 4308         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4309         if (error)
 4310                 goto out;
 4311         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4312                 error = priv_check(td, PRIV_VFS_ADMIN);
 4313                 if (error)
 4314                         goto out;
 4315         }
 4316         if (vcount(vp) > 1)
 4317                 VOP_REVOKE(vp, REVOKEALL);
 4318 out:
 4319         vput(vp);
 4320         VFS_UNLOCK_GIANT(vfslocked);
 4321         return (error);
 4322 }
 4323 
 4324 /*
 4325  * Convert a user file descriptor to a kernel file entry and check that, if it
 4326  * is a capability, the correct rights are present. A reference on the file
 4327  * entry is held upon returning.
 4328  */
 4329 int
 4330 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
 4331     struct file **fpp)
 4332 {
 4333         struct file *fp;
 4334 #ifdef CAPABILITIES
 4335         struct file *fp_fromcap;
 4336 #endif
 4337         int error;
 4338 
 4339         error = 0;
 4340         fp = NULL;
 4341         if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
 4342                 return (EBADF);
 4343 #ifdef CAPABILITIES
 4344         /*
 4345          * If the file descriptor is for a capability, test rights and use the
 4346          * file descriptor referenced by the capability.
 4347          */
 4348         error = cap_funwrap(fp, rights, &fp_fromcap);
 4349         if (error) {
 4350                 fdrop(fp, curthread);
 4351                 return (error);
 4352         }
 4353         if (fp != fp_fromcap) {
 4354                 fhold(fp_fromcap);
 4355                 fdrop(fp, curthread);
 4356                 fp = fp_fromcap;
 4357         }
 4358 #endif /* CAPABILITIES */
 4359 
 4360         /*
 4361          * The file could be not of the vnode type, or it may be not
 4362          * yet fully initialized, in which case the f_vnode pointer
 4363          * may be set, but f_ops is still badfileops.  E.g.,
 4364          * devfs_open() transiently create such situation to
 4365          * facilitate csw d_fdopen().
 4366          *
 4367          * Dupfdopen() handling in kern_openat() installs the
 4368          * half-baked file into the process descriptor table, allowing
 4369          * other thread to dereference it. Guard against the race by
 4370          * checking f_ops.
 4371          */
 4372         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4373                 fdrop(fp, curthread);
 4374                 return (EINVAL);
 4375         }
 4376         *fpp = fp;
 4377         return (0);
 4378 }
 4379 
 4380 
 4381 /*
 4382  * Get an (NFS) file handle.
 4383  */
 4384 #ifndef _SYS_SYSPROTO_H_
 4385 struct lgetfh_args {
 4386         char    *fname;
 4387         fhandle_t *fhp;
 4388 };
 4389 #endif
 4390 int
 4391 sys_lgetfh(td, uap)
 4392         struct thread *td;
 4393         register struct lgetfh_args *uap;
 4394 {
 4395         struct nameidata nd;
 4396         fhandle_t fh;
 4397         register struct vnode *vp;
 4398         int vfslocked;
 4399         int error;
 4400 
 4401         error = priv_check(td, PRIV_VFS_GETFH);
 4402         if (error)
 4403                 return (error);
 4404         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4405             UIO_USERSPACE, uap->fname, td);
 4406         error = namei(&nd);
 4407         if (error)
 4408                 return (error);
 4409         vfslocked = NDHASGIANT(&nd);
 4410         NDFREE(&nd, NDF_ONLY_PNBUF);
 4411         vp = nd.ni_vp;
 4412         bzero(&fh, sizeof(fh));
 4413         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4414         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4415         vput(vp);
 4416         VFS_UNLOCK_GIANT(vfslocked);
 4417         if (error)
 4418                 return (error);
 4419         error = copyout(&fh, uap->fhp, sizeof (fh));
 4420         return (error);
 4421 }
 4422 
 4423 #ifndef _SYS_SYSPROTO_H_
 4424 struct getfh_args {
 4425         char    *fname;
 4426         fhandle_t *fhp;
 4427 };
 4428 #endif
 4429 int
 4430 sys_getfh(td, uap)
 4431         struct thread *td;
 4432         register struct getfh_args *uap;
 4433 {
 4434         struct nameidata nd;
 4435         fhandle_t fh;
 4436         register struct vnode *vp;
 4437         int vfslocked;
 4438         int error;
 4439 
 4440         error = priv_check(td, PRIV_VFS_GETFH);
 4441         if (error)
 4442                 return (error);
 4443         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4444             UIO_USERSPACE, uap->fname, td);
 4445         error = namei(&nd);
 4446         if (error)
 4447                 return (error);
 4448         vfslocked = NDHASGIANT(&nd);
 4449         NDFREE(&nd, NDF_ONLY_PNBUF);
 4450         vp = nd.ni_vp;
 4451         bzero(&fh, sizeof(fh));
 4452         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4453         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4454         vput(vp);
 4455         VFS_UNLOCK_GIANT(vfslocked);
 4456         if (error)
 4457                 return (error);
 4458         error = copyout(&fh, uap->fhp, sizeof (fh));
 4459         return (error);
 4460 }
 4461 
 4462 /*
 4463  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4464  * open descriptor.
 4465  *
 4466  * warning: do not remove the priv_check() call or this becomes one giant
 4467  * security hole.
 4468  */
 4469 #ifndef _SYS_SYSPROTO_H_
 4470 struct fhopen_args {
 4471         const struct fhandle *u_fhp;
 4472         int flags;
 4473 };
 4474 #endif
 4475 int
 4476 sys_fhopen(td, uap)
 4477         struct thread *td;
 4478         struct fhopen_args /* {
 4479                 const struct fhandle *u_fhp;
 4480                 int flags;
 4481         } */ *uap;
 4482 {
 4483         struct proc *p = td->td_proc;
 4484         struct mount *mp;
 4485         struct vnode *vp;
 4486         struct fhandle fhp;
 4487         struct vattr vat;
 4488         struct vattr *vap = &vat;
 4489         struct flock lf;
 4490         struct file *fp;
 4491         register struct filedesc *fdp = p->p_fd;
 4492         int fmode, error, type;
 4493         accmode_t accmode;
 4494         struct file *nfp;
 4495         int vfslocked;
 4496         int indx;
 4497 
 4498         error = priv_check(td, PRIV_VFS_FHOPEN);
 4499         if (error)
 4500                 return (error);
 4501         fmode = FFLAGS(uap->flags);
 4502         /* why not allow a non-read/write open for our lockd? */
 4503         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4504                 return (EINVAL);
 4505         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4506         if (error)
 4507                 return(error);
 4508         /* find the mount point */
 4509         mp = vfs_busyfs(&fhp.fh_fsid);
 4510         if (mp == NULL)
 4511                 return (ESTALE);
 4512         vfslocked = VFS_LOCK_GIANT(mp);
 4513         /* now give me my vnode, it gets returned to me locked */
 4514         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4515         vfs_unbusy(mp);
 4516         if (error)
 4517                 goto out;
 4518         /*
 4519          * from now on we have to make sure not
 4520          * to forget about the vnode
 4521          * any error that causes an abort must vput(vp)
 4522          * just set error = err and 'goto bad;'.
 4523          */
 4524 
 4525         /*
 4526          * from vn_open
 4527          */
 4528         if (vp->v_type == VLNK) {
 4529                 error = EMLINK;
 4530                 goto bad;
 4531         }
 4532         if (vp->v_type == VSOCK) {
 4533                 error = EOPNOTSUPP;
 4534                 goto bad;
 4535         }
 4536         if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
 4537                 error = ENOTDIR;
 4538                 goto bad;
 4539         }
 4540         accmode = 0;
 4541         if (fmode & (FWRITE | O_TRUNC)) {
 4542                 if (vp->v_type == VDIR) {
 4543                         error = EISDIR;
 4544                         goto bad;
 4545                 }
 4546                 error = vn_writechk(vp);
 4547                 if (error)
 4548                         goto bad;
 4549                 accmode |= VWRITE;
 4550         }
 4551         if (fmode & FREAD)
 4552                 accmode |= VREAD;
 4553         if ((fmode & O_APPEND) && (fmode & FWRITE))
 4554                 accmode |= VAPPEND;
 4555 #ifdef MAC
 4556         error = mac_vnode_check_open(td->td_ucred, vp, accmode);
 4557         if (error)
 4558                 goto bad;
 4559 #endif
 4560         if (accmode) {
 4561                 error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
 4562                 if (error)
 4563                         goto bad;
 4564         }
 4565         if (fmode & O_TRUNC) {
 4566                 vfs_ref(mp);
 4567                 VOP_UNLOCK(vp, 0);                              /* XXX */
 4568                 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 4569                         vrele(vp);
 4570                         vfs_rel(mp);
 4571                         goto out;
 4572                 }
 4573                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
 4574                 vfs_rel(mp);
 4575 #ifdef MAC
 4576                 /*
 4577                  * We don't yet have fp->f_cred, so use td->td_ucred, which
 4578                  * should be right.
 4579                  */
 4580                 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
 4581                 if (error == 0) {
 4582 #endif
 4583                         VATTR_NULL(vap);
 4584                         vap->va_size = 0;
 4585                         error = VOP_SETATTR(vp, vap, td->td_ucred);
 4586 #ifdef MAC
 4587                 }
 4588 #endif
 4589                 vn_finished_write(mp);
 4590                 if (error)
 4591                         goto bad;
 4592         }
 4593         error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
 4594         if (error)
 4595                 goto bad;
 4596 
 4597         if (fmode & FWRITE)
 4598                 vp->v_writecount++;
 4599 
 4600         /*
 4601          * end of vn_open code
 4602          */
 4603 
 4604         if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
 4605                 if (fmode & FWRITE)
 4606                         vp->v_writecount--;
 4607                 goto bad;
 4608         }
 4609         /* An extra reference on `nfp' has been held for us by falloc(). */
 4610         fp = nfp;
 4611         nfp->f_vnode = vp;
 4612         finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
 4613         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4614                 lf.l_whence = SEEK_SET;
 4615                 lf.l_start = 0;
 4616                 lf.l_len = 0;
 4617                 if (fmode & O_EXLOCK)
 4618                         lf.l_type = F_WRLCK;
 4619                 else
 4620                         lf.l_type = F_RDLCK;
 4621                 type = F_FLOCK;
 4622                 if ((fmode & FNONBLOCK) == 0)
 4623                         type |= F_WAIT;
 4624                 VOP_UNLOCK(vp, 0);
 4625                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 4626                             type)) != 0) {
 4627                         /*
 4628                          * The lock request failed.  Normally close the
 4629                          * descriptor but handle the case where someone might
 4630                          * have dup()d or close()d it when we weren't looking.
 4631                          */
 4632                         fdclose(fdp, fp, indx, td);
 4633 
 4634                         /*
 4635                          * release our private reference
 4636                          */
 4637                         fdrop(fp, td);
 4638                         goto out;
 4639                 }
 4640                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 4641                 atomic_set_int(&fp->f_flag, FHASLOCK);
 4642         }
 4643 
 4644         VOP_UNLOCK(vp, 0);
 4645         fdrop(fp, td);
 4646         VFS_UNLOCK_GIANT(vfslocked);
 4647         td->td_retval[0] = indx;
 4648         return (0);
 4649 
 4650 bad:
 4651         vput(vp);
 4652 out:
 4653         VFS_UNLOCK_GIANT(vfslocked);
 4654         return (error);
 4655 }
 4656 
 4657 /*
 4658  * Stat an (NFS) file handle.
 4659  */
 4660 #ifndef _SYS_SYSPROTO_H_
 4661 struct fhstat_args {
 4662         struct fhandle *u_fhp;
 4663         struct stat *sb;
 4664 };
 4665 #endif
 4666 int
 4667 sys_fhstat(td, uap)
 4668         struct thread *td;
 4669         register struct fhstat_args /* {
 4670                 struct fhandle *u_fhp;
 4671                 struct stat *sb;
 4672         } */ *uap;
 4673 {
 4674         struct stat sb;
 4675         fhandle_t fh;
 4676         struct mount *mp;
 4677         struct vnode *vp;
 4678         int vfslocked;
 4679         int error;
 4680 
 4681         error = priv_check(td, PRIV_VFS_FHSTAT);
 4682         if (error)
 4683                 return (error);
 4684         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4685         if (error)
 4686                 return (error);
 4687         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4688                 return (ESTALE);
 4689         vfslocked = VFS_LOCK_GIANT(mp);
 4690         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4691         vfs_unbusy(mp);
 4692         if (error) {
 4693                 VFS_UNLOCK_GIANT(vfslocked);
 4694                 return (error);
 4695         }
 4696         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 4697         vput(vp);
 4698         VFS_UNLOCK_GIANT(vfslocked);
 4699         if (error)
 4700                 return (error);
 4701         error = copyout(&sb, uap->sb, sizeof(sb));
 4702         return (error);
 4703 }
 4704 
 4705 /*
 4706  * Implement fstatfs() for (NFS) file handles.
 4707  */
 4708 #ifndef _SYS_SYSPROTO_H_
 4709 struct fhstatfs_args {
 4710         struct fhandle *u_fhp;
 4711         struct statfs *buf;
 4712 };
 4713 #endif
 4714 int
 4715 sys_fhstatfs(td, uap)
 4716         struct thread *td;
 4717         struct fhstatfs_args /* {
 4718                 struct fhandle *u_fhp;
 4719                 struct statfs *buf;
 4720         } */ *uap;
 4721 {
 4722         struct statfs sf;
 4723         fhandle_t fh;
 4724         int error;
 4725 
 4726         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4727         if (error)
 4728                 return (error);
 4729         error = kern_fhstatfs(td, fh, &sf);
 4730         if (error)
 4731                 return (error);
 4732         return (copyout(&sf, uap->buf, sizeof(sf)));
 4733 }
 4734 
 4735 int
 4736 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4737 {
 4738         struct statfs *sp;
 4739         struct mount *mp;
 4740         struct vnode *vp;
 4741         int vfslocked;
 4742         int error;
 4743 
 4744         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4745         if (error)
 4746                 return (error);
 4747         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4748                 return (ESTALE);
 4749         vfslocked = VFS_LOCK_GIANT(mp);
 4750         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4751         if (error) {
 4752                 vfs_unbusy(mp);
 4753                 VFS_UNLOCK_GIANT(vfslocked);
 4754                 return (error);
 4755         }
 4756         vput(vp);
 4757         error = prison_canseemount(td->td_ucred, mp);
 4758         if (error)
 4759                 goto out;
 4760 #ifdef MAC
 4761         error = mac_mount_check_stat(td->td_ucred, mp);
 4762         if (error)
 4763                 goto out;
 4764 #endif
 4765         /*
 4766          * Set these in case the underlying filesystem fails to do so.
 4767          */
 4768         sp = &mp->mnt_stat;
 4769         sp->f_version = STATFS_VERSION;
 4770         sp->f_namemax = NAME_MAX;
 4771         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4772         error = VFS_STATFS(mp, sp);
 4773         if (error == 0)
 4774                 *buf = *sp;
 4775 out:
 4776         vfs_unbusy(mp);
 4777         VFS_UNLOCK_GIANT(vfslocked);
 4778         return (error);
 4779 }
 4780 
 4781 int
 4782 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4783 {
 4784         struct file *fp;
 4785         struct mount *mp;
 4786         struct vnode *vp;
 4787         off_t olen, ooffset;
 4788         int error, vfslocked;
 4789 
 4790         fp = NULL;
 4791         vfslocked = 0;
 4792         error = fget(td, fd, CAP_WRITE, &fp);
 4793         if (error != 0)
 4794                 goto out;
 4795 
 4796         switch (fp->f_type) {
 4797         case DTYPE_VNODE:
 4798                 break;
 4799         case DTYPE_PIPE:
 4800         case DTYPE_FIFO:
 4801                 error = ESPIPE;
 4802                 goto out;
 4803         default:
 4804                 error = ENODEV;
 4805                 goto out;
 4806         }
 4807         if ((fp->f_flag & FWRITE) == 0) {
 4808                 error = EBADF;
 4809                 goto out;
 4810         }
 4811         vp = fp->f_vnode;
 4812         if (vp->v_type != VREG) {
 4813                 error = ENODEV;
 4814                 goto out;
 4815         }
 4816         if (offset < 0 || len <= 0) {
 4817                 error = EINVAL;
 4818                 goto out;
 4819         }
 4820         /* Check for wrap. */
 4821         if (offset > OFF_MAX - len) {
 4822                 error = EFBIG;
 4823                 goto out;
 4824         }
 4825 
 4826         /* Allocating blocks may take a long time, so iterate. */
 4827         for (;;) {
 4828                 olen = len;
 4829                 ooffset = offset;
 4830 
 4831                 bwillwrite();
 4832                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 4833                 mp = NULL;
 4834                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4835                 if (error != 0) {
 4836                         VFS_UNLOCK_GIANT(vfslocked);
 4837                         break;
 4838                 }
 4839                 error = vn_lock(vp, LK_EXCLUSIVE);
 4840                 if (error != 0) {
 4841                         vn_finished_write(mp);
 4842                         VFS_UNLOCK_GIANT(vfslocked);
 4843                         break;
 4844                 }
 4845 #ifdef MAC
 4846                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4847                 if (error == 0)
 4848 #endif
 4849                         error = VOP_ALLOCATE(vp, &offset, &len);
 4850                 VOP_UNLOCK(vp, 0);
 4851                 vn_finished_write(mp);
 4852                 VFS_UNLOCK_GIANT(vfslocked);
 4853 
 4854                 if (olen + ooffset != offset + len) {
 4855                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4856                             ooffset, olen, offset, len);
 4857                 }
 4858                 if (error != 0 || len == 0)
 4859                         break;
 4860                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4861                 maybe_yield();
 4862         }
 4863  out:
 4864         if (fp != NULL)
 4865                 fdrop(fp, td);
 4866         return (error);
 4867 }
 4868 
 4869 int
 4870 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4871 {
 4872 
 4873         return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
 4874 }
 4875 
 4876 /*
 4877  * Unlike madvise(2), we do not make a best effort to remember every
 4878  * possible caching hint.  Instead, we remember the last setting with
 4879  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4880  * region of any current setting.
 4881  */
 4882 int
 4883 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4884     int advice)
 4885 {
 4886         struct fadvise_info *fa, *new;
 4887         struct file *fp;
 4888         struct vnode *vp;
 4889         off_t end;
 4890         int error;
 4891 
 4892         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4893                 return (EINVAL);
 4894         switch (advice) {
 4895         case POSIX_FADV_SEQUENTIAL:
 4896         case POSIX_FADV_RANDOM:
 4897         case POSIX_FADV_NOREUSE:
 4898                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4899                 break;
 4900         case POSIX_FADV_NORMAL:
 4901         case POSIX_FADV_WILLNEED:
 4902         case POSIX_FADV_DONTNEED:
 4903                 new = NULL;
 4904                 break;
 4905         default:
 4906                 return (EINVAL);
 4907         }
 4908         /* XXX: CAP_POSIX_FADVISE? */
 4909         error = fget(td, fd, 0, &fp);
 4910         if (error != 0)
 4911                 goto out;
 4912         
 4913         switch (fp->f_type) {
 4914         case DTYPE_VNODE:
 4915                 break;
 4916         case DTYPE_PIPE:
 4917         case DTYPE_FIFO:
 4918                 error = ESPIPE;
 4919                 goto out;
 4920         default:
 4921                 error = ENODEV;
 4922                 goto out;
 4923         }
 4924         vp = fp->f_vnode;
 4925         if (vp->v_type != VREG) {
 4926                 error = ENODEV;
 4927                 goto out;
 4928         }
 4929         if (len == 0)
 4930                 end = OFF_MAX;
 4931         else
 4932                 end = offset + len - 1;
 4933         switch (advice) {
 4934         case POSIX_FADV_SEQUENTIAL:
 4935         case POSIX_FADV_RANDOM:
 4936         case POSIX_FADV_NOREUSE:
 4937                 /*
 4938                  * Try to merge any existing non-standard region with
 4939                  * this new region if possible, otherwise create a new
 4940                  * non-standard region for this request.
 4941                  */
 4942                 mtx_pool_lock(mtxpool_sleep, fp);
 4943                 fa = fp->f_advice;
 4944                 if (fa != NULL && fa->fa_advice == advice &&
 4945                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4946                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4947                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4948                         if (offset < fa->fa_start)
 4949                                 fa->fa_start = offset;
 4950                         if (end > fa->fa_end)
 4951                                 fa->fa_end = end;
 4952                 } else {
 4953                         new->fa_advice = advice;
 4954                         new->fa_start = offset;
 4955                         new->fa_end = end;
 4956                         fp->f_advice = new;
 4957                         new = fa;
 4958                 }
 4959                 mtx_pool_unlock(mtxpool_sleep, fp);
 4960                 break;
 4961         case POSIX_FADV_NORMAL:
 4962                 /*
 4963                  * If a the "normal" region overlaps with an existing
 4964                  * non-standard region, trim or remove the
 4965                  * non-standard region.
 4966                  */
 4967                 mtx_pool_lock(mtxpool_sleep, fp);
 4968                 fa = fp->f_advice;
 4969                 if (fa != NULL) {
 4970                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4971                                 new = fa;
 4972                                 fp->f_advice = NULL;
 4973                         } else if (offset <= fa->fa_start &&
 4974                             end >= fa->fa_start)
 4975                                 fa->fa_start = end + 1;
 4976                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4977                                 fa->fa_end = offset - 1;
 4978                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4979                                 /*
 4980                                  * If the "normal" region is a middle
 4981                                  * portion of the existing
 4982                                  * non-standard region, just remove
 4983                                  * the whole thing rather than picking
 4984                                  * one side or the other to
 4985                                  * preserve.
 4986                                  */
 4987                                 new = fa;
 4988                                 fp->f_advice = NULL;
 4989                         }
 4990                 }
 4991                 mtx_pool_unlock(mtxpool_sleep, fp);
 4992                 break;
 4993         case POSIX_FADV_WILLNEED:
 4994         case POSIX_FADV_DONTNEED:
 4995                 error = VOP_ADVISE(vp, offset, end, advice);
 4996                 break;
 4997         }
 4998 out:
 4999         if (fp != NULL)
 5000                 fdrop(fp, td);
 5001         free(new, M_FADVISE);
 5002         return (error);
 5003 }
 5004 
 5005 int
 5006 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 5007 {
 5008 
 5009         return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 5010             uap->advice));
 5011 }

Cache object: 8a69b8c370fdcba6b86e119ec55e9bf6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.