The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/7.3/sys/kern/vfs_syscalls.c 202902 2010-01-23 22:37:34Z kib $");
   39 
   40 #include "opt_compat.h"
   41 #include "opt_kdtrace.h"
   42 #include "opt_ktrace.h"
   43 #include "opt_mac.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/sysent.h>
   50 #include <sys/malloc.h>
   51 #include <sys/mount.h>
   52 #include <sys/mutex.h>
   53 #include <sys/sysproto.h>
   54 #include <sys/namei.h>
   55 #include <sys/filedesc.h>
   56 #include <sys/kernel.h>
   57 #include <sys/fcntl.h>
   58 #include <sys/file.h>
   59 #include <sys/filio.h>
   60 #include <sys/limits.h>
   61 #include <sys/linker.h>
   62 #include <sys/sdt.h>
   63 #include <sys/stat.h>
   64 #include <sys/sx.h>
   65 #include <sys/unistd.h>
   66 #include <sys/vnode.h>
   67 #include <sys/priv.h>
   68 #include <sys/proc.h>
   69 #include <sys/dirent.h>
   70 #include <sys/jail.h>
   71 #include <sys/syscallsubr.h>
   72 #include <sys/sysctl.h>
   73 #ifdef KTRACE
   74 #include <sys/ktrace.h>
   75 #endif
   76 
   77 #include <machine/stdarg.h>
   78 
   79 #include <security/audit/audit.h>
   80 #include <security/mac/mac_framework.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_object.h>
   84 #include <vm/vm_page.h>
   85 #include <vm/uma.h>
   86 
   87 SDT_PROVIDER_DEFINE(vfs);
   88 SDT_PROBE_DEFINE(vfs, , stat, mode);
   89 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
   90 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
   91 SDT_PROBE_DEFINE(vfs, , stat, reg);
   92 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
   93 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
   94 
   95 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   96 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
   97 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
   98 static int setfmode(struct thread *td, struct vnode *, int);
   99 static int setfflags(struct thread *td, struct vnode *, int);
  100 static int setutimes(struct thread *td, struct vnode *,
  101     const struct timespec *, int, int);
  102 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  103     struct thread *td);
  104 
  105 /*
  106  * The module initialization routine for POSIX asynchronous I/O will
  107  * set this to the version of AIO that it implements.  (Zero means
  108  * that it is not implemented.)  This value is used here by pathconf()
  109  * and in kern_descrip.c by fpathconf().
  110  */
  111 int async_io_version;
  112 
  113 #ifdef DEBUG
  114 static int syncprt = 0;
  115 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  116 #endif
  117 
  118 /*
  119  * Sync each mounted filesystem.
  120  */
  121 #ifndef _SYS_SYSPROTO_H_
  122 struct sync_args {
  123         int     dummy;
  124 };
  125 #endif
  126 /* ARGSUSED */
  127 int
  128 sync(td, uap)
  129         struct thread *td;
  130         struct sync_args *uap;
  131 {
  132         struct mount *mp, *nmp;
  133         int vfslocked;
  134 
  135         mtx_lock(&mountlist_mtx);
  136         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  137                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
  138                         nmp = TAILQ_NEXT(mp, mnt_list);
  139                         continue;
  140                 }
  141                 vfslocked = VFS_LOCK_GIANT(mp);
  142                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  143                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  144                         MNT_ILOCK(mp);
  145                         mp->mnt_noasync++;
  146                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
  147                         MNT_IUNLOCK(mp);
  148                         vfs_msync(mp, MNT_NOWAIT);
  149                         VFS_SYNC(mp, MNT_NOWAIT, td);
  150                         MNT_ILOCK(mp);
  151                         mp->mnt_noasync--;
  152                         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
  153                             mp->mnt_noasync == 0)
  154                                 mp->mnt_kern_flag |= MNTK_ASYNC;
  155                         MNT_IUNLOCK(mp);
  156                         vn_finished_write(mp);
  157                 }
  158                 VFS_UNLOCK_GIANT(vfslocked);
  159                 mtx_lock(&mountlist_mtx);
  160                 nmp = TAILQ_NEXT(mp, mnt_list);
  161                 vfs_unbusy(mp, td);
  162         }
  163         mtx_unlock(&mountlist_mtx);
  164         return (0);
  165 }
  166 
  167 /* XXX PRISON: could be per prison flag */
  168 static int prison_quotas;
  169 #if 0
  170 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
  171 #endif
  172 
  173 /*
  174  * Change filesystem quotas.
  175  */
  176 #ifndef _SYS_SYSPROTO_H_
  177 struct quotactl_args {
  178         char *path;
  179         int cmd;
  180         int uid;
  181         caddr_t arg;
  182 };
  183 #endif
  184 int
  185 quotactl(td, uap)
  186         struct thread *td;
  187         register struct quotactl_args /* {
  188                 char *path;
  189                 int cmd;
  190                 int uid;
  191                 caddr_t arg;
  192         } */ *uap;
  193 {
  194         struct mount *mp;
  195         int vfslocked;
  196         int error;
  197         struct nameidata nd;
  198 
  199         AUDIT_ARG(cmd, uap->cmd);
  200         AUDIT_ARG(uid, uap->uid);
  201         if (jailed(td->td_ucred) && !prison_quotas)
  202                 return (EPERM);
  203         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
  204            UIO_USERSPACE, uap->path, td);
  205         if ((error = namei(&nd)) != 0)
  206                 return (error);
  207         vfslocked = NDHASGIANT(&nd);
  208         NDFREE(&nd, NDF_ONLY_PNBUF);
  209         mp = nd.ni_vp->v_mount;
  210         vfs_ref(mp);
  211         vput(nd.ni_vp);
  212         error = vfs_busy(mp, 0, NULL, td);
  213         vfs_rel(mp);
  214         if (error) {
  215                 VFS_UNLOCK_GIANT(vfslocked);
  216                 return (error);
  217         }
  218         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
  219         vfs_unbusy(mp, td);
  220         VFS_UNLOCK_GIANT(vfslocked);
  221         return (error);
  222 }
  223 
  224 /*
  225  * Used by statfs conversion routines to scale the block size up if
  226  * necessary so that all of the block counts are <= 'max_size'.  Note
  227  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  228  * value of 'n'.
  229  */
  230 void
  231 statfs_scale_blocks(struct statfs *sf, long max_size)
  232 {
  233         uint64_t count;
  234         int shift;
  235 
  236         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  237 
  238         /*
  239          * Attempt to scale the block counts to give a more accurate
  240          * overview to userland of the ratio of free space to used
  241          * space.  To do this, find the largest block count and compute
  242          * a divisor that lets it fit into a signed integer <= max_size.
  243          */
  244         if (sf->f_bavail < 0)
  245                 count = -sf->f_bavail;
  246         else
  247                 count = sf->f_bavail;
  248         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  249         if (count <= max_size)
  250                 return;
  251 
  252         count >>= flsl(max_size);
  253         shift = 0;
  254         while (count > 0) {
  255                 shift++;
  256                 count >>=1;
  257         }
  258 
  259         sf->f_bsize <<= shift;
  260         sf->f_blocks >>= shift;
  261         sf->f_bfree >>= shift;
  262         sf->f_bavail >>= shift;
  263 }
  264 
  265 /*
  266  * Get filesystem statistics.
  267  */
  268 #ifndef _SYS_SYSPROTO_H_
  269 struct statfs_args {
  270         char *path;
  271         struct statfs *buf;
  272 };
  273 #endif
  274 int
  275 statfs(td, uap)
  276         struct thread *td;
  277         register struct statfs_args /* {
  278                 char *path;
  279                 struct statfs *buf;
  280         } */ *uap;
  281 {
  282         struct statfs sf;
  283         int error;
  284 
  285         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  286         if (error == 0)
  287                 error = copyout(&sf, uap->buf, sizeof(sf));
  288         return (error);
  289 }
  290 
  291 int
  292 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  293     struct statfs *buf)
  294 {
  295         struct mount *mp;
  296         struct statfs *sp, sb;
  297         int vfslocked;
  298         int error;
  299         struct nameidata nd;
  300 
  301         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  302             AUDITVNODE1, pathseg, path, td);
  303         error = namei(&nd);
  304         if (error)
  305                 return (error);
  306         vfslocked = NDHASGIANT(&nd);
  307         mp = nd.ni_vp->v_mount;
  308         vfs_ref(mp);
  309         NDFREE(&nd, NDF_ONLY_PNBUF);
  310         vput(nd.ni_vp);
  311         error = vfs_busy(mp, 0, NULL, td);
  312         vfs_rel(mp);
  313         if (error) {
  314                 VFS_UNLOCK_GIANT(vfslocked);
  315                 return (error);
  316         }
  317 #ifdef MAC
  318         error = mac_check_mount_stat(td->td_ucred, mp);
  319         if (error)
  320                 goto out;
  321 #endif
  322         /*
  323          * Set these in case the underlying filesystem fails to do so.
  324          */
  325         sp = &mp->mnt_stat;
  326         sp->f_version = STATFS_VERSION;
  327         sp->f_namemax = NAME_MAX;
  328         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  329         error = VFS_STATFS(mp, sp, td);
  330         if (error)
  331                 goto out;
  332         if (priv_check(td, PRIV_VFS_GENERATION)) {
  333                 bcopy(sp, &sb, sizeof(sb));
  334                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  335                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  336                 sp = &sb;
  337         }
  338         *buf = *sp;
  339 out:
  340         vfs_unbusy(mp, td);
  341         VFS_UNLOCK_GIANT(vfslocked);
  342         if (mtx_owned(&Giant))
  343                 printf("statfs(%d): %s: %d\n", vfslocked, path, error);
  344         return (error);
  345 }
  346 
  347 /*
  348  * Get filesystem statistics.
  349  */
  350 #ifndef _SYS_SYSPROTO_H_
  351 struct fstatfs_args {
  352         int fd;
  353         struct statfs *buf;
  354 };
  355 #endif
  356 int
  357 fstatfs(td, uap)
  358         struct thread *td;
  359         register struct fstatfs_args /* {
  360                 int fd;
  361                 struct statfs *buf;
  362         } */ *uap;
  363 {
  364         struct statfs sf;
  365         int error;
  366 
  367         error = kern_fstatfs(td, uap->fd, &sf);
  368         if (error == 0)
  369                 error = copyout(&sf, uap->buf, sizeof(sf));
  370         return (error);
  371 }
  372 
  373 int
  374 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  375 {
  376         struct file *fp;
  377         struct mount *mp;
  378         struct statfs *sp, sb;
  379         int vfslocked;
  380         struct vnode *vp;
  381         int error;
  382 
  383         AUDIT_ARG(fd, fd);
  384         error = getvnode(td->td_proc->p_fd, fd, &fp);
  385         if (error)
  386                 return (error);
  387         vp = fp->f_vnode;
  388         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  389         vn_lock(vp, LK_SHARED | LK_RETRY, td);
  390 #ifdef AUDIT
  391         AUDIT_ARG(vnode, vp, ARG_VNODE1);
  392 #endif
  393         mp = vp->v_mount;
  394         if (mp)
  395                 vfs_ref(mp);
  396         VOP_UNLOCK(vp, 0, td);
  397         fdrop(fp, td);
  398         if (mp == NULL) {
  399                 error = EBADF;
  400                 goto out;
  401         }
  402         error = vfs_busy(mp, 0, NULL, td);
  403         vfs_rel(mp);
  404         if (error) {
  405                 VFS_UNLOCK_GIANT(vfslocked);
  406                 return (error);
  407         }
  408 #ifdef MAC
  409         error = mac_check_mount_stat(td->td_ucred, mp);
  410         if (error)
  411                 goto out;
  412 #endif
  413         /*
  414          * Set these in case the underlying filesystem fails to do so.
  415          */
  416         sp = &mp->mnt_stat;
  417         sp->f_version = STATFS_VERSION;
  418         sp->f_namemax = NAME_MAX;
  419         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  420         error = VFS_STATFS(mp, sp, td);
  421         if (error)
  422                 goto out;
  423         if (priv_check(td, PRIV_VFS_GENERATION)) {
  424                 bcopy(sp, &sb, sizeof(sb));
  425                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  426                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  427                 sp = &sb;
  428         }
  429         *buf = *sp;
  430 out:
  431         if (mp)
  432                 vfs_unbusy(mp, td);
  433         VFS_UNLOCK_GIANT(vfslocked);
  434         return (error);
  435 }
  436 
  437 /*
  438  * Get statistics on all filesystems.
  439  */
  440 #ifndef _SYS_SYSPROTO_H_
  441 struct getfsstat_args {
  442         struct statfs *buf;
  443         long bufsize;
  444         int flags;
  445 };
  446 #endif
  447 int
  448 getfsstat(td, uap)
  449         struct thread *td;
  450         register struct getfsstat_args /* {
  451                 struct statfs *buf;
  452                 long bufsize;
  453                 int flags;
  454         } */ *uap;
  455 {
  456 
  457         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  458             uap->flags));
  459 }
  460 
  461 /*
  462  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  463  *      The caller is responsible for freeing memory which will be allocated
  464  *      in '*buf'.
  465  */
  466 int
  467 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  468     enum uio_seg bufseg, int flags)
  469 {
  470         struct mount *mp, *nmp;
  471         struct statfs *sfsp, *sp, sb;
  472         size_t count, maxcount;
  473         int vfslocked;
  474         int error;
  475 
  476         maxcount = bufsize / sizeof(struct statfs);
  477         if (bufsize == 0)
  478                 sfsp = NULL;
  479         else if (bufseg == UIO_USERSPACE)
  480                 sfsp = *buf;
  481         else /* if (bufseg == UIO_SYSSPACE) */ {
  482                 count = 0;
  483                 mtx_lock(&mountlist_mtx);
  484                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  485                         count++;
  486                 }
  487                 mtx_unlock(&mountlist_mtx);
  488                 if (maxcount > count)
  489                         maxcount = count;
  490                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  491                     M_WAITOK);
  492         }
  493         count = 0;
  494         mtx_lock(&mountlist_mtx);
  495         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  496                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  497                         nmp = TAILQ_NEXT(mp, mnt_list);
  498                         continue;
  499                 }
  500 #ifdef MAC
  501                 if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
  502                         nmp = TAILQ_NEXT(mp, mnt_list);
  503                         continue;
  504                 }
  505 #endif
  506                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
  507                         nmp = TAILQ_NEXT(mp, mnt_list);
  508                         continue;
  509                 }
  510                 vfslocked = VFS_LOCK_GIANT(mp);
  511                 if (sfsp && count < maxcount) {
  512                         sp = &mp->mnt_stat;
  513                         /*
  514                          * Set these in case the underlying filesystem
  515                          * fails to do so.
  516                          */
  517                         sp->f_version = STATFS_VERSION;
  518                         sp->f_namemax = NAME_MAX;
  519                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  520                         /*
  521                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  522                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  523                          * overrides MNT_WAIT.
  524                          */
  525                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  526                             (flags & MNT_WAIT)) &&
  527                             (error = VFS_STATFS(mp, sp, td))) {
  528                                 VFS_UNLOCK_GIANT(vfslocked);
  529                                 mtx_lock(&mountlist_mtx);
  530                                 nmp = TAILQ_NEXT(mp, mnt_list);
  531                                 vfs_unbusy(mp, td);
  532                                 continue;
  533                         }
  534                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  535                                 bcopy(sp, &sb, sizeof(sb));
  536                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  537                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  538                                 sp = &sb;
  539                         }
  540                         if (bufseg == UIO_SYSSPACE)
  541                                 bcopy(sp, sfsp, sizeof(*sp));
  542                         else /* if (bufseg == UIO_USERSPACE) */ {
  543                                 error = copyout(sp, sfsp, sizeof(*sp));
  544                                 if (error) {
  545                                         vfs_unbusy(mp, td);
  546                                         VFS_UNLOCK_GIANT(vfslocked);
  547                                         return (error);
  548                                 }
  549                         }
  550                         sfsp++;
  551                 }
  552                 VFS_UNLOCK_GIANT(vfslocked);
  553                 count++;
  554                 mtx_lock(&mountlist_mtx);
  555                 nmp = TAILQ_NEXT(mp, mnt_list);
  556                 vfs_unbusy(mp, td);
  557         }
  558         mtx_unlock(&mountlist_mtx);
  559         if (sfsp && count > maxcount)
  560                 td->td_retval[0] = maxcount;
  561         else
  562                 td->td_retval[0] = count;
  563         return (0);
  564 }
  565 
  566 #ifdef COMPAT_FREEBSD4
  567 /*
  568  * Get old format filesystem statistics.
  569  */
  570 static void cvtstatfs(struct statfs *, struct ostatfs *);
  571 
  572 #ifndef _SYS_SYSPROTO_H_
  573 struct freebsd4_statfs_args {
  574         char *path;
  575         struct ostatfs *buf;
  576 };
  577 #endif
  578 int
  579 freebsd4_statfs(td, uap)
  580         struct thread *td;
  581         struct freebsd4_statfs_args /* {
  582                 char *path;
  583                 struct ostatfs *buf;
  584         } */ *uap;
  585 {
  586         struct ostatfs osb;
  587         struct statfs sf;
  588         int error;
  589 
  590         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  591         if (error)
  592                 return (error);
  593         cvtstatfs(&sf, &osb);
  594         return (copyout(&osb, uap->buf, sizeof(osb)));
  595 }
  596 
  597 /*
  598  * Get filesystem statistics.
  599  */
  600 #ifndef _SYS_SYSPROTO_H_
  601 struct freebsd4_fstatfs_args {
  602         int fd;
  603         struct ostatfs *buf;
  604 };
  605 #endif
  606 int
  607 freebsd4_fstatfs(td, uap)
  608         struct thread *td;
  609         struct freebsd4_fstatfs_args /* {
  610                 int fd;
  611                 struct ostatfs *buf;
  612         } */ *uap;
  613 {
  614         struct ostatfs osb;
  615         struct statfs sf;
  616         int error;
  617 
  618         error = kern_fstatfs(td, uap->fd, &sf);
  619         if (error)
  620                 return (error);
  621         cvtstatfs(&sf, &osb);
  622         return (copyout(&osb, uap->buf, sizeof(osb)));
  623 }
  624 
  625 /*
  626  * Get statistics on all filesystems.
  627  */
  628 #ifndef _SYS_SYSPROTO_H_
  629 struct freebsd4_getfsstat_args {
  630         struct ostatfs *buf;
  631         long bufsize;
  632         int flags;
  633 };
  634 #endif
  635 int
  636 freebsd4_getfsstat(td, uap)
  637         struct thread *td;
  638         register struct freebsd4_getfsstat_args /* {
  639                 struct ostatfs *buf;
  640                 long bufsize;
  641                 int flags;
  642         } */ *uap;
  643 {
  644         struct statfs *buf, *sp;
  645         struct ostatfs osb;
  646         size_t count, size;
  647         int error;
  648 
  649         count = uap->bufsize / sizeof(struct ostatfs);
  650         size = count * sizeof(struct statfs);
  651         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  652         if (size > 0) {
  653                 count = td->td_retval[0];
  654                 sp = buf;
  655                 while (count > 0 && error == 0) {
  656                         cvtstatfs(sp, &osb);
  657                         error = copyout(&osb, uap->buf, sizeof(osb));
  658                         sp++;
  659                         uap->buf++;
  660                         count--;
  661                 }
  662                 free(buf, M_TEMP);
  663         }
  664         return (error);
  665 }
  666 
  667 /*
  668  * Implement fstatfs() for (NFS) file handles.
  669  */
  670 #ifndef _SYS_SYSPROTO_H_
  671 struct freebsd4_fhstatfs_args {
  672         struct fhandle *u_fhp;
  673         struct ostatfs *buf;
  674 };
  675 #endif
  676 int
  677 freebsd4_fhstatfs(td, uap)
  678         struct thread *td;
  679         struct freebsd4_fhstatfs_args /* {
  680                 struct fhandle *u_fhp;
  681                 struct ostatfs *buf;
  682         } */ *uap;
  683 {
  684         struct ostatfs osb;
  685         struct statfs sf;
  686         fhandle_t fh;
  687         int error;
  688 
  689         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  690         if (error)
  691                 return (error);
  692         error = kern_fhstatfs(td, fh, &sf);
  693         if (error)
  694                 return (error);
  695         cvtstatfs(&sf, &osb);
  696         return (copyout(&osb, uap->buf, sizeof(osb)));
  697 }
  698 
  699 /*
  700  * Convert a new format statfs structure to an old format statfs structure.
  701  */
  702 static void
  703 cvtstatfs(nsp, osp)
  704         struct statfs *nsp;
  705         struct ostatfs *osp;
  706 {
  707 
  708         statfs_scale_blocks(nsp, LONG_MAX);
  709         bzero(osp, sizeof(*osp));
  710         osp->f_bsize = nsp->f_bsize;
  711         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  712         osp->f_blocks = nsp->f_blocks;
  713         osp->f_bfree = nsp->f_bfree;
  714         osp->f_bavail = nsp->f_bavail;
  715         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  716         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  717         osp->f_owner = nsp->f_owner;
  718         osp->f_type = nsp->f_type;
  719         osp->f_flags = nsp->f_flags;
  720         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  721         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  722         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  723         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  724         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  725             MIN(MFSNAMELEN, OMFSNAMELEN));
  726         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  727             MIN(MNAMELEN, OMNAMELEN));
  728         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  729             MIN(MNAMELEN, OMNAMELEN));
  730         osp->f_fsid = nsp->f_fsid;
  731 }
  732 #endif /* COMPAT_FREEBSD4 */
  733 
  734 /*
  735  * Change current working directory to a given file descriptor.
  736  */
  737 #ifndef _SYS_SYSPROTO_H_
  738 struct fchdir_args {
  739         int     fd;
  740 };
  741 #endif
  742 int
  743 fchdir(td, uap)
  744         struct thread *td;
  745         struct fchdir_args /* {
  746                 int fd;
  747         } */ *uap;
  748 {
  749         register struct filedesc *fdp = td->td_proc->p_fd;
  750         struct vnode *vp, *tdp, *vpold;
  751         struct mount *mp;
  752         struct file *fp;
  753         int vfslocked;
  754         int error;
  755 
  756         AUDIT_ARG(fd, uap->fd);
  757         if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
  758                 return (error);
  759         vp = fp->f_vnode;
  760         VREF(vp);
  761         fdrop(fp, td);
  762         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  763         vn_lock(vp, LK_SHARED | LK_RETRY, td);
  764         AUDIT_ARG(vnode, vp, ARG_VNODE1);
  765         error = change_dir(vp, td);
  766         while (!error && (mp = vp->v_mountedhere) != NULL) {
  767                 int tvfslocked;
  768                 if (vfs_busy(mp, 0, 0, td))
  769                         continue;
  770                 tvfslocked = VFS_LOCK_GIANT(mp);
  771                 error = VFS_ROOT(mp, LK_SHARED, &tdp, td);
  772                 vfs_unbusy(mp, td);
  773                 if (error) {
  774                         VFS_UNLOCK_GIANT(tvfslocked);
  775                         break;
  776                 }
  777                 vput(vp);
  778                 VFS_UNLOCK_GIANT(vfslocked);
  779                 vp = tdp;
  780                 vfslocked = tvfslocked;
  781         }
  782         if (error) {
  783                 vput(vp);
  784                 VFS_UNLOCK_GIANT(vfslocked);
  785                 return (error);
  786         }
  787         VOP_UNLOCK(vp, 0, td);
  788         VFS_UNLOCK_GIANT(vfslocked);
  789         FILEDESC_XLOCK(fdp);
  790         vpold = fdp->fd_cdir;
  791         fdp->fd_cdir = vp;
  792         FILEDESC_XUNLOCK(fdp);
  793         vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  794         vrele(vpold);
  795         VFS_UNLOCK_GIANT(vfslocked);
  796         return (0);
  797 }
  798 
  799 /*
  800  * Change current working directory (``.'').
  801  */
  802 #ifndef _SYS_SYSPROTO_H_
  803 struct chdir_args {
  804         char    *path;
  805 };
  806 #endif
  807 int
  808 chdir(td, uap)
  809         struct thread *td;
  810         struct chdir_args /* {
  811                 char *path;
  812         } */ *uap;
  813 {
  814 
  815         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  816 }
  817 
  818 int
  819 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  820 {
  821         register struct filedesc *fdp = td->td_proc->p_fd;
  822         int error;
  823         struct nameidata nd;
  824         struct vnode *vp;
  825         int vfslocked;
  826 
  827         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
  828             MPSAFE, pathseg, path, td);
  829         if ((error = namei(&nd)) != 0)
  830                 return (error);
  831         vfslocked = NDHASGIANT(&nd);
  832         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  833                 vput(nd.ni_vp);
  834                 VFS_UNLOCK_GIANT(vfslocked);
  835                 NDFREE(&nd, NDF_ONLY_PNBUF);
  836                 return (error);
  837         }
  838         VOP_UNLOCK(nd.ni_vp, 0, td);
  839         VFS_UNLOCK_GIANT(vfslocked);
  840         NDFREE(&nd, NDF_ONLY_PNBUF);
  841         FILEDESC_XLOCK(fdp);
  842         vp = fdp->fd_cdir;
  843         fdp->fd_cdir = nd.ni_vp;
  844         FILEDESC_XUNLOCK(fdp);
  845         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  846         vrele(vp);
  847         VFS_UNLOCK_GIANT(vfslocked);
  848         return (0);
  849 }
  850 
  851 /*
  852  * Helper function for raised chroot(2) security function:  Refuse if
  853  * any filedescriptors are open directories.
  854  */
  855 static int
  856 chroot_refuse_vdir_fds(fdp)
  857         struct filedesc *fdp;
  858 {
  859         struct vnode *vp;
  860         struct file *fp;
  861         int fd;
  862 
  863         FILEDESC_LOCK_ASSERT(fdp);
  864 
  865         for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  866                 fp = fget_locked(fdp, fd);
  867                 if (fp == NULL)
  868                         continue;
  869                 if (fp->f_type == DTYPE_VNODE) {
  870                         vp = fp->f_vnode;
  871                         if (vp->v_type == VDIR)
  872                                 return (EPERM);
  873                 }
  874         }
  875         return (0);
  876 }
  877 
  878 /*
  879  * This sysctl determines if we will allow a process to chroot(2) if it
  880  * has a directory open:
  881  *      0: disallowed for all processes.
  882  *      1: allowed for processes that were not already chroot(2)'ed.
  883  *      2: allowed for all processes.
  884  */
  885 
  886 static int chroot_allow_open_directories = 1;
  887 
  888 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  889      &chroot_allow_open_directories, 0, "");
  890 
  891 /*
  892  * Change notion of root (``/'') directory.
  893  */
  894 #ifndef _SYS_SYSPROTO_H_
  895 struct chroot_args {
  896         char    *path;
  897 };
  898 #endif
  899 int
  900 chroot(td, uap)
  901         struct thread *td;
  902         struct chroot_args /* {
  903                 char *path;
  904         } */ *uap;
  905 {
  906         int error;
  907         struct nameidata nd;
  908         int vfslocked;
  909 
  910         error = priv_check(td, PRIV_VFS_CHROOT);
  911         if (error)
  912                 return (error);
  913         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
  914             AUDITVNODE1, UIO_USERSPACE, uap->path, td);
  915         error = namei(&nd);
  916         if (error)
  917                 goto error;
  918         vfslocked = NDHASGIANT(&nd);
  919         if ((error = change_dir(nd.ni_vp, td)) != 0)
  920                 goto e_vunlock;
  921 #ifdef MAC
  922         if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
  923                 goto e_vunlock;
  924 #endif
  925         VOP_UNLOCK(nd.ni_vp, 0, td);
  926         error = change_root(nd.ni_vp, td);
  927         vrele(nd.ni_vp);
  928         VFS_UNLOCK_GIANT(vfslocked);
  929         NDFREE(&nd, NDF_ONLY_PNBUF);
  930         return (error);
  931 e_vunlock:
  932         vput(nd.ni_vp);
  933         VFS_UNLOCK_GIANT(vfslocked);
  934 error:
  935         NDFREE(&nd, NDF_ONLY_PNBUF);
  936         return (error);
  937 }
  938 
  939 /*
  940  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  941  * instance.
  942  */
  943 int
  944 change_dir(vp, td)
  945         struct vnode *vp;
  946         struct thread *td;
  947 {
  948         int error;
  949 
  950         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  951         if (vp->v_type != VDIR)
  952                 return (ENOTDIR);
  953 #ifdef MAC
  954         error = mac_check_vnode_chdir(td->td_ucred, vp);
  955         if (error)
  956                 return (error);
  957 #endif
  958         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  959         return (error);
  960 }
  961 
  962 /*
  963  * Common routine for kern_chroot() and jail_attach().  The caller is
  964  * responsible for invoking priv_check() and mac_check_chroot() to authorize
  965  * this operation.
  966  */
  967 int
  968 change_root(vp, td)
  969         struct vnode *vp;
  970         struct thread *td;
  971 {
  972         struct filedesc *fdp;
  973         struct vnode *oldvp;
  974         int vfslocked;
  975         int error;
  976 
  977         VFS_ASSERT_GIANT(vp->v_mount);
  978         fdp = td->td_proc->p_fd;
  979         FILEDESC_XLOCK(fdp);
  980         if (chroot_allow_open_directories == 0 ||
  981             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  982                 error = chroot_refuse_vdir_fds(fdp);
  983                 if (error) {
  984                         FILEDESC_XUNLOCK(fdp);
  985                         return (error);
  986                 }
  987         }
  988         oldvp = fdp->fd_rdir;
  989         fdp->fd_rdir = vp;
  990         VREF(fdp->fd_rdir);
  991         if (!fdp->fd_jdir) {
  992                 fdp->fd_jdir = vp;
  993                 VREF(fdp->fd_jdir);
  994         }
  995         FILEDESC_XUNLOCK(fdp);
  996         vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  997         vrele(oldvp);
  998         VFS_UNLOCK_GIANT(vfslocked);
  999         return (0);
 1000 }
 1001 
 1002 /*
 1003  * Check permissions, allocate an open file structure, and call the device
 1004  * open routine if any.
 1005  */
 1006 #ifndef _SYS_SYSPROTO_H_
 1007 struct open_args {
 1008         char    *path;
 1009         int     flags;
 1010         int     mode;
 1011 };
 1012 #endif
 1013 int
 1014 open(td, uap)
 1015         struct thread *td;
 1016         register struct open_args /* {
 1017                 char *path;
 1018                 int flags;
 1019                 int mode;
 1020         } */ *uap;
 1021 {
 1022 
 1023         return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
 1024 }
 1025 
 1026 int
 1027 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1028     int mode)
 1029 {
 1030         struct proc *p = td->td_proc;
 1031         struct filedesc *fdp = p->p_fd;
 1032         struct file *fp;
 1033         struct vnode *vp;
 1034         struct vattr vat;
 1035         struct mount *mp;
 1036         int cmode;
 1037         struct file *nfp;
 1038         int type, indx, error;
 1039         struct flock lf;
 1040         struct nameidata nd;
 1041         int vfslocked;
 1042 
 1043         AUDIT_ARG(fflags, flags);
 1044         AUDIT_ARG(mode, mode);
 1045         if ((flags & O_ACCMODE) == O_ACCMODE)
 1046                 return (EINVAL);
 1047         flags = FFLAGS(flags);
 1048         error = falloc(td, &nfp, &indx);
 1049         if (error)
 1050                 return (error);
 1051         /* An extra reference on `nfp' has been held for us by falloc(). */
 1052         fp = nfp;
 1053         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 1054         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
 1055         td->td_dupfd = -1;              /* XXX check for fdopen */
 1056         error = vn_open(&nd, &flags, cmode, fp);
 1057         if (error) {
 1058                 /*
 1059                  * If the vn_open replaced the method vector, something
 1060                  * wonderous happened deep below and we just pass it up
 1061                  * pretending we know what we do.
 1062                  */
 1063                 if (error == ENXIO && fp->f_ops != &badfileops) {
 1064                         fdrop(fp, td);
 1065                         td->td_retval[0] = indx;
 1066                         return (0);
 1067                 }
 1068 
 1069                 /*
 1070                  * handle special fdopen() case.  bleh.  dupfdopen() is
 1071                  * responsible for dropping the old contents of ofiles[indx]
 1072                  * if it succeeds.
 1073                  */
 1074                 if ((error == ENODEV || error == ENXIO) &&
 1075                     td->td_dupfd >= 0 &&                /* XXX from fdopen */
 1076                     (error =
 1077                         dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
 1078                         td->td_retval[0] = indx;
 1079                         fdrop(fp, td);
 1080                         return (0);
 1081                 }
 1082                 /*
 1083                  * Clean up the descriptor, but only if another thread hadn't
 1084                  * replaced or closed it.
 1085                  */
 1086                 fdclose(fdp, fp, indx, td);
 1087                 fdrop(fp, td);
 1088 
 1089                 if (error == ERESTART)
 1090                         error = EINTR;
 1091                 return (error);
 1092         }
 1093         td->td_dupfd = 0;
 1094         vfslocked = NDHASGIANT(&nd);
 1095         NDFREE(&nd, NDF_ONLY_PNBUF);
 1096         vp = nd.ni_vp;
 1097 
 1098         FILE_LOCK(fp);
 1099         fp->f_vnode = vp;
 1100         if (fp->f_data == NULL)
 1101                 fp->f_data = vp;
 1102         fp->f_flag = flags & FMASK;
 1103         fp->f_seqcount = 1;
 1104         fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
 1105         if (fp->f_ops == &badfileops)
 1106                 fp->f_ops = &vnops;
 1107         FILE_UNLOCK(fp);
 1108 
 1109         VOP_UNLOCK(vp, 0, td);
 1110         if (flags & (O_EXLOCK | O_SHLOCK)) {
 1111                 lf.l_whence = SEEK_SET;
 1112                 lf.l_start = 0;
 1113                 lf.l_len = 0;
 1114                 if (flags & O_EXLOCK)
 1115                         lf.l_type = F_WRLCK;
 1116                 else
 1117                         lf.l_type = F_RDLCK;
 1118                 type = F_FLOCK;
 1119                 if ((flags & FNONBLOCK) == 0)
 1120                         type |= F_WAIT;
 1121                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 1122                             type)) != 0)
 1123                         goto bad;
 1124                 fp->f_flag |= FHASLOCK;
 1125         }
 1126         if (flags & O_TRUNC) {
 1127                 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 1128                         goto bad;
 1129                 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 1130                 VATTR_NULL(&vat);
 1131                 vat.va_size = 0;
 1132                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1133 #ifdef MAC
 1134                 error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
 1135                 if (error == 0)
 1136 #endif
 1137                         error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
 1138                 VOP_UNLOCK(vp, 0, td);
 1139                 vn_finished_write(mp);
 1140                 if (error)
 1141                         goto bad;
 1142         }
 1143         VFS_UNLOCK_GIANT(vfslocked);
 1144         /*
 1145          * Release our private reference, leaving the one associated with
 1146          * the descriptor table intact.
 1147          */
 1148         fdrop(fp, td);
 1149         td->td_retval[0] = indx;
 1150         return (0);
 1151 bad:
 1152         VFS_UNLOCK_GIANT(vfslocked);
 1153         fdclose(fdp, fp, indx, td);
 1154         fdrop(fp, td);
 1155         return (error);
 1156 }
 1157 
 1158 #ifdef COMPAT_43
 1159 /*
 1160  * Create a file.
 1161  */
 1162 #ifndef _SYS_SYSPROTO_H_
 1163 struct ocreat_args {
 1164         char    *path;
 1165         int     mode;
 1166 };
 1167 #endif
 1168 int
 1169 ocreat(td, uap)
 1170         struct thread *td;
 1171         register struct ocreat_args /* {
 1172                 char *path;
 1173                 int mode;
 1174         } */ *uap;
 1175 {
 1176 
 1177         return (kern_open(td, uap->path, UIO_USERSPACE,
 1178             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1179 }
 1180 #endif /* COMPAT_43 */
 1181 
 1182 /*
 1183  * Create a special file.
 1184  */
 1185 #ifndef _SYS_SYSPROTO_H_
 1186 struct mknod_args {
 1187         char    *path;
 1188         int     mode;
 1189         int     dev;
 1190 };
 1191 #endif
 1192 int
 1193 mknod(td, uap)
 1194         struct thread *td;
 1195         register struct mknod_args /* {
 1196                 char *path;
 1197                 int mode;
 1198                 int dev;
 1199         } */ *uap;
 1200 {
 1201 
 1202         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1203 }
 1204 
 1205 int
 1206 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1207     int dev)
 1208 {
 1209         struct vnode *vp;
 1210         struct mount *mp;
 1211         struct vattr vattr;
 1212         int error;
 1213         int whiteout = 0;
 1214         struct nameidata nd;
 1215         int vfslocked;
 1216 
 1217         AUDIT_ARG(mode, mode);
 1218         AUDIT_ARG(dev, dev);
 1219         switch (mode & S_IFMT) {
 1220         case S_IFCHR:
 1221         case S_IFBLK:
 1222                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1223                 break;
 1224         case S_IFMT:
 1225                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1226                 break;
 1227         case S_IFWHT:
 1228                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1229                 break;
 1230         default:
 1231                 error = EINVAL;
 1232                 break;
 1233         }
 1234         if (error)
 1235                 return (error);
 1236 restart:
 1237         bwillwrite();
 1238         NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1239             pathseg, path, td);
 1240         if ((error = namei(&nd)) != 0)
 1241                 return (error);
 1242         vfslocked = NDHASGIANT(&nd);
 1243         vp = nd.ni_vp;
 1244         if (vp != NULL) {
 1245                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1246                 if (vp == nd.ni_dvp)
 1247                         vrele(nd.ni_dvp);
 1248                 else
 1249                         vput(nd.ni_dvp);
 1250                 vrele(vp);
 1251                 VFS_UNLOCK_GIANT(vfslocked);
 1252                 return (EEXIST);
 1253         } else {
 1254                 VATTR_NULL(&vattr);
 1255                 vattr.va_mode = (mode & ALLPERMS) &
 1256                     ~td->td_proc->p_fd->fd_cmask;
 1257                 vattr.va_rdev = dev;
 1258                 whiteout = 0;
 1259 
 1260                 switch (mode & S_IFMT) {
 1261                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1262                         vattr.va_type = VBAD;
 1263                         break;
 1264                 case S_IFCHR:
 1265                         vattr.va_type = VCHR;
 1266                         break;
 1267                 case S_IFBLK:
 1268                         vattr.va_type = VBLK;
 1269                         break;
 1270                 case S_IFWHT:
 1271                         whiteout = 1;
 1272                         break;
 1273                 default:
 1274                         panic("kern_mknod: invalid mode");
 1275                 }
 1276         }
 1277         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1278                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1279                 vput(nd.ni_dvp);
 1280                 VFS_UNLOCK_GIANT(vfslocked);
 1281                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1282                         return (error);
 1283                 goto restart;
 1284         }
 1285 #ifdef MAC
 1286         if (error == 0 && !whiteout)
 1287                 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
 1288                     &nd.ni_cnd, &vattr);
 1289 #endif
 1290         if (!error) {
 1291                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 1292                 if (whiteout)
 1293                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1294                 else {
 1295                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1296                                                 &nd.ni_cnd, &vattr);
 1297                         if (error == 0)
 1298                                 vput(nd.ni_vp);
 1299                 }
 1300         }
 1301         NDFREE(&nd, NDF_ONLY_PNBUF);
 1302         vput(nd.ni_dvp);
 1303         vn_finished_write(mp);
 1304         VFS_UNLOCK_GIANT(vfslocked);
 1305         return (error);
 1306 }
 1307 
 1308 /*
 1309  * Create a named pipe.
 1310  */
 1311 #ifndef _SYS_SYSPROTO_H_
 1312 struct mkfifo_args {
 1313         char    *path;
 1314         int     mode;
 1315 };
 1316 #endif
 1317 int
 1318 mkfifo(td, uap)
 1319         struct thread *td;
 1320         register struct mkfifo_args /* {
 1321                 char *path;
 1322                 int mode;
 1323         } */ *uap;
 1324 {
 1325 
 1326         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1327 }
 1328 
 1329 int
 1330 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1331 {
 1332         struct mount *mp;
 1333         struct vattr vattr;
 1334         int error;
 1335         struct nameidata nd;
 1336         int vfslocked;
 1337 
 1338         AUDIT_ARG(mode, mode);
 1339 restart:
 1340         bwillwrite();
 1341         NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1342             pathseg, path, td);
 1343         if ((error = namei(&nd)) != 0)
 1344                 return (error);
 1345         vfslocked = NDHASGIANT(&nd);
 1346         if (nd.ni_vp != NULL) {
 1347                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1348                 if (nd.ni_vp == nd.ni_dvp)
 1349                         vrele(nd.ni_dvp);
 1350                 else
 1351                         vput(nd.ni_dvp);
 1352                 vrele(nd.ni_vp);
 1353                 VFS_UNLOCK_GIANT(vfslocked);
 1354                 return (EEXIST);
 1355         }
 1356         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1357                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1358                 vput(nd.ni_dvp);
 1359                 VFS_UNLOCK_GIANT(vfslocked);
 1360                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1361                         return (error);
 1362                 goto restart;
 1363         }
 1364         VATTR_NULL(&vattr);
 1365         vattr.va_type = VFIFO;
 1366         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1367 #ifdef MAC
 1368         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1369             &vattr);
 1370         if (error)
 1371                 goto out;
 1372 #endif
 1373         VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 1374         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1375         if (error == 0)
 1376                 vput(nd.ni_vp);
 1377 #ifdef MAC
 1378 out:
 1379 #endif
 1380         vput(nd.ni_dvp);
 1381         vn_finished_write(mp);
 1382         VFS_UNLOCK_GIANT(vfslocked);
 1383         NDFREE(&nd, NDF_ONLY_PNBUF);
 1384         return (error);
 1385 }
 1386 
 1387 /*
 1388  * Make a hard file link.
 1389  */
 1390 #ifndef _SYS_SYSPROTO_H_
 1391 struct link_args {
 1392         char    *path;
 1393         char    *link;
 1394 };
 1395 #endif
 1396 int
 1397 link(td, uap)
 1398         struct thread *td;
 1399         register struct link_args /* {
 1400                 char *path;
 1401                 char *link;
 1402         } */ *uap;
 1403 {
 1404         int error;
 1405 
 1406         error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
 1407         return (error);
 1408 }
 1409 
 1410 static int hardlink_check_uid = 0;
 1411 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1412     &hardlink_check_uid, 0,
 1413     "Unprivileged processes cannot create hard links to files owned by other "
 1414     "users");
 1415 static int hardlink_check_gid = 0;
 1416 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1417     &hardlink_check_gid, 0,
 1418     "Unprivileged processes cannot create hard links to files owned by other "
 1419     "groups");
 1420 
 1421 static int
 1422 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
 1423 {
 1424         struct vattr va;
 1425         int error;
 1426 
 1427         if (!hardlink_check_uid && !hardlink_check_gid)
 1428                 return (0);
 1429 
 1430         error = VOP_GETATTR(vp, &va, cred, td);
 1431         if (error != 0)
 1432                 return (error);
 1433 
 1434         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1435                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1436                 if (error)
 1437                         return (error);
 1438         }
 1439 
 1440         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1441                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1442                 if (error)
 1443                         return (error);
 1444         }
 1445 
 1446         return (0);
 1447 }
 1448 
 1449 int
 1450 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1451 {
 1452         struct vnode *vp;
 1453         struct mount *mp;
 1454         struct nameidata nd;
 1455         int vfslocked;
 1456         int lvfslocked;
 1457         int error;
 1458 
 1459         bwillwrite();
 1460         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
 1461         if ((error = namei(&nd)) != 0)
 1462                 return (error);
 1463         vfslocked = NDHASGIANT(&nd);
 1464         NDFREE(&nd, NDF_ONLY_PNBUF);
 1465         vp = nd.ni_vp;
 1466         if (vp->v_type == VDIR) {
 1467                 vrele(vp);
 1468                 VFS_UNLOCK_GIANT(vfslocked);
 1469                 return (EPERM);         /* POSIX */
 1470         }
 1471         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1472                 vrele(vp);
 1473                 VFS_UNLOCK_GIANT(vfslocked);
 1474                 return (error);
 1475         }
 1476         NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
 1477             segflg, link, td);
 1478         if ((error = namei(&nd)) == 0) {
 1479                 lvfslocked = NDHASGIANT(&nd);
 1480                 if (nd.ni_vp != NULL) {
 1481                         if (nd.ni_dvp == nd.ni_vp)
 1482                                 vrele(nd.ni_dvp);
 1483                         else
 1484                                 vput(nd.ni_dvp);
 1485                         vrele(nd.ni_vp);
 1486                         error = EEXIST;
 1487                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
 1488                     == 0) {
 1489                         VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 1490                         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 1491                         error = can_hardlink(vp, td, td->td_ucred);
 1492                         if (error == 0)
 1493 #ifdef MAC
 1494                                 error = mac_check_vnode_link(td->td_ucred,
 1495                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1496                         if (error == 0)
 1497 #endif
 1498                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1499                         VOP_UNLOCK(vp, 0, td);
 1500                         vput(nd.ni_dvp);
 1501                 }
 1502                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1503                 VFS_UNLOCK_GIANT(lvfslocked);
 1504         }
 1505         vrele(vp);
 1506         vn_finished_write(mp);
 1507         VFS_UNLOCK_GIANT(vfslocked);
 1508         return (error);
 1509 }
 1510 
 1511 /*
 1512  * Make a symbolic link.
 1513  */
 1514 #ifndef _SYS_SYSPROTO_H_
 1515 struct symlink_args {
 1516         char    *path;
 1517         char    *link;
 1518 };
 1519 #endif
 1520 int
 1521 symlink(td, uap)
 1522         struct thread *td;
 1523         register struct symlink_args /* {
 1524                 char *path;
 1525                 char *link;
 1526         } */ *uap;
 1527 {
 1528 
 1529         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1530 }
 1531 
 1532 int
 1533 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1534 {
 1535         struct mount *mp;
 1536         struct vattr vattr;
 1537         char *syspath;
 1538         int error;
 1539         struct nameidata nd;
 1540         int vfslocked;
 1541 
 1542         if (segflg == UIO_SYSSPACE) {
 1543                 syspath = path;
 1544         } else {
 1545                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1546                 if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
 1547                         goto out;
 1548         }
 1549         AUDIT_ARG(text, syspath);
 1550 restart:
 1551         bwillwrite();
 1552         NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 1553             segflg, link, td);
 1554         if ((error = namei(&nd)) != 0)
 1555                 goto out;
 1556         vfslocked = NDHASGIANT(&nd);
 1557         if (nd.ni_vp) {
 1558                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1559                 if (nd.ni_vp == nd.ni_dvp)
 1560                         vrele(nd.ni_dvp);
 1561                 else
 1562                         vput(nd.ni_dvp);
 1563                 vrele(nd.ni_vp);
 1564                 VFS_UNLOCK_GIANT(vfslocked);
 1565                 error = EEXIST;
 1566                 goto out;
 1567         }
 1568         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1569                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1570                 vput(nd.ni_dvp);
 1571                 VFS_UNLOCK_GIANT(vfslocked);
 1572                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1573                         goto out;
 1574                 goto restart;
 1575         }
 1576         VATTR_NULL(&vattr);
 1577         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1578 #ifdef MAC
 1579         vattr.va_type = VLNK;
 1580         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1581             &vattr);
 1582         if (error)
 1583                 goto out2;
 1584 #endif
 1585         VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 1586         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1587         if (error == 0)
 1588                 vput(nd.ni_vp);
 1589 #ifdef MAC
 1590 out2:
 1591 #endif
 1592         NDFREE(&nd, NDF_ONLY_PNBUF);
 1593         vput(nd.ni_dvp);
 1594         vn_finished_write(mp);
 1595         VFS_UNLOCK_GIANT(vfslocked);
 1596 out:
 1597         if (segflg != UIO_SYSSPACE)
 1598                 uma_zfree(namei_zone, syspath);
 1599         return (error);
 1600 }
 1601 
 1602 /*
 1603  * Delete a whiteout from the filesystem.
 1604  */
 1605 int
 1606 undelete(td, uap)
 1607         struct thread *td;
 1608         register struct undelete_args /* {
 1609                 char *path;
 1610         } */ *uap;
 1611 {
 1612         int error;
 1613         struct mount *mp;
 1614         struct nameidata nd;
 1615         int vfslocked;
 1616 
 1617 restart:
 1618         bwillwrite();
 1619         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
 1620             UIO_USERSPACE, uap->path, td);
 1621         error = namei(&nd);
 1622         if (error)
 1623                 return (error);
 1624         vfslocked = NDHASGIANT(&nd);
 1625 
 1626         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1627                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1628                 if (nd.ni_vp == nd.ni_dvp)
 1629                         vrele(nd.ni_dvp);
 1630                 else
 1631                         vput(nd.ni_dvp);
 1632                 if (nd.ni_vp)
 1633                         vrele(nd.ni_vp);
 1634                 VFS_UNLOCK_GIANT(vfslocked);
 1635                 return (EEXIST);
 1636         }
 1637         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1638                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1639                 vput(nd.ni_dvp);
 1640                 VFS_UNLOCK_GIANT(vfslocked);
 1641                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1642                         return (error);
 1643                 goto restart;
 1644         }
 1645         VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 1646         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1647         NDFREE(&nd, NDF_ONLY_PNBUF);
 1648         vput(nd.ni_dvp);
 1649         vn_finished_write(mp);
 1650         VFS_UNLOCK_GIANT(vfslocked);
 1651         return (error);
 1652 }
 1653 
 1654 /*
 1655  * Delete a name from the filesystem.
 1656  */
 1657 #ifndef _SYS_SYSPROTO_H_
 1658 struct unlink_args {
 1659         char    *path;
 1660 };
 1661 #endif
 1662 int
 1663 unlink(td, uap)
 1664         struct thread *td;
 1665         struct unlink_args /* {
 1666                 char *path;
 1667         } */ *uap;
 1668 {
 1669         int error;
 1670 
 1671         error = kern_unlink(td, uap->path, UIO_USERSPACE);
 1672         return (error);
 1673 }
 1674 
 1675 int
 1676 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1677 {
 1678         struct mount *mp;
 1679         struct vnode *vp;
 1680         int error;
 1681         struct nameidata nd;
 1682         int vfslocked;
 1683 
 1684 restart:
 1685         bwillwrite();
 1686         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1687             pathseg, path, td);
 1688         if ((error = namei(&nd)) != 0)
 1689                 return (error == EINVAL ? EPERM : error);
 1690         vfslocked = NDHASGIANT(&nd);
 1691         vp = nd.ni_vp;
 1692         if (vp->v_type == VDIR)
 1693                 error = EPERM;          /* POSIX */
 1694         else {
 1695                 /*
 1696                  * The root of a mounted filesystem cannot be deleted.
 1697                  *
 1698                  * XXX: can this only be a VDIR case?
 1699                  */
 1700                 if (vp->v_vflag & VV_ROOT)
 1701                         error = EBUSY;
 1702         }
 1703         if (error == 0) {
 1704                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1705                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1706                         vput(nd.ni_dvp);
 1707                         if (vp == nd.ni_dvp)
 1708                                 vrele(vp);
 1709                         else
 1710                                 vput(vp);
 1711                         VFS_UNLOCK_GIANT(vfslocked);
 1712                         if ((error = vn_start_write(NULL, &mp,
 1713                             V_XSLEEP | PCATCH)) != 0)
 1714                                 return (error);
 1715                         goto restart;
 1716                 }
 1717 #ifdef MAC
 1718                 error = mac_check_vnode_unlink(td->td_ucred, nd.ni_dvp, vp,
 1719                     &nd.ni_cnd);
 1720                 if (error)
 1721                         goto out;
 1722 #endif
 1723                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 1724                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1725 #ifdef MAC
 1726 out:
 1727 #endif
 1728                 vn_finished_write(mp);
 1729         }
 1730         NDFREE(&nd, NDF_ONLY_PNBUF);
 1731         vput(nd.ni_dvp);
 1732         if (vp == nd.ni_dvp)
 1733                 vrele(vp);
 1734         else
 1735                 vput(vp);
 1736         VFS_UNLOCK_GIANT(vfslocked);
 1737         return (error);
 1738 }
 1739 
 1740 /*
 1741  * Reposition read/write file offset.
 1742  */
 1743 #ifndef _SYS_SYSPROTO_H_
 1744 struct lseek_args {
 1745         int     fd;
 1746         int     pad;
 1747         off_t   offset;
 1748         int     whence;
 1749 };
 1750 #endif
 1751 int
 1752 lseek(td, uap)
 1753         struct thread *td;
 1754         register struct lseek_args /* {
 1755                 int fd;
 1756                 int pad;
 1757                 off_t offset;
 1758                 int whence;
 1759         } */ *uap;
 1760 {
 1761         struct ucred *cred = td->td_ucred;
 1762         struct file *fp;
 1763         struct vnode *vp;
 1764         struct vattr vattr;
 1765         off_t offset;
 1766         int error, noneg;
 1767         int vfslocked;
 1768 
 1769         if ((error = fget(td, uap->fd, &fp)) != 0)
 1770                 return (error);
 1771         if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 1772                 fdrop(fp, td);
 1773                 return (ESPIPE);
 1774         }
 1775         vp = fp->f_vnode;
 1776         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1777         noneg = (vp->v_type != VCHR);
 1778         offset = uap->offset;
 1779         switch (uap->whence) {
 1780         case L_INCR:
 1781                 if (noneg &&
 1782                     (fp->f_offset < 0 ||
 1783                     (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
 1784                         error = EOVERFLOW;
 1785                         break;
 1786                 }
 1787                 offset += fp->f_offset;
 1788                 break;
 1789         case L_XTND:
 1790                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
 1791                 error = VOP_GETATTR(vp, &vattr, cred, td);
 1792                 VOP_UNLOCK(vp, 0, td);
 1793                 if (error)
 1794                         break;
 1795                 if (noneg &&
 1796                     (vattr.va_size > OFF_MAX ||
 1797                     (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 1798                         error = EOVERFLOW;
 1799                         break;
 1800                 }
 1801                 offset += vattr.va_size;
 1802                 break;
 1803         case L_SET:
 1804                 break;
 1805         case SEEK_DATA:
 1806                 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 1807                 break;
 1808         case SEEK_HOLE:
 1809                 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 1810                 break;
 1811         default:
 1812                 error = EINVAL;
 1813         }
 1814         if (error == 0 && noneg && offset < 0)
 1815                 error = EINVAL;
 1816         if (error != 0)
 1817                 goto drop;
 1818         fp->f_offset = offset;
 1819         *(off_t *)(td->td_retval) = fp->f_offset;
 1820 drop:
 1821         fdrop(fp, td);
 1822         VFS_UNLOCK_GIANT(vfslocked);
 1823         return (error);
 1824 }
 1825 
 1826 #if defined(COMPAT_43)
 1827 /*
 1828  * Reposition read/write file offset.
 1829  */
 1830 #ifndef _SYS_SYSPROTO_H_
 1831 struct olseek_args {
 1832         int     fd;
 1833         long    offset;
 1834         int     whence;
 1835 };
 1836 #endif
 1837 int
 1838 olseek(td, uap)
 1839         struct thread *td;
 1840         register struct olseek_args /* {
 1841                 int fd;
 1842                 long offset;
 1843                 int whence;
 1844         } */ *uap;
 1845 {
 1846         struct lseek_args /* {
 1847                 int fd;
 1848                 int pad;
 1849                 off_t offset;
 1850                 int whence;
 1851         } */ nuap;
 1852 
 1853         nuap.fd = uap->fd;
 1854         nuap.offset = uap->offset;
 1855         nuap.whence = uap->whence;
 1856         return (lseek(td, &nuap));
 1857 }
 1858 #endif /* COMPAT_43 */
 1859 
 1860 /* Version with the 'pad' argument */
 1861 int
 1862 freebsd6_lseek(td, uap)
 1863         struct thread *td;
 1864         register struct freebsd6_lseek_args *uap;
 1865 {
 1866         struct lseek_args ouap;
 1867 
 1868         ouap.fd = uap->fd;
 1869         ouap.offset = uap->offset;
 1870         ouap.whence = uap->whence;
 1871         return (lseek(td, &ouap));
 1872 }
 1873 
 1874 /*
 1875  * Check access permissions using passed credentials.
 1876  */
 1877 static int
 1878 vn_access(vp, user_flags, cred, td)
 1879         struct vnode    *vp;
 1880         int             user_flags;
 1881         struct ucred    *cred;
 1882         struct thread   *td;
 1883 {
 1884         int error, flags;
 1885 
 1886         /* Flags == 0 means only check for existence. */
 1887         error = 0;
 1888         if (user_flags) {
 1889                 flags = 0;
 1890                 if (user_flags & R_OK)
 1891                         flags |= VREAD;
 1892                 if (user_flags & W_OK)
 1893                         flags |= VWRITE;
 1894                 if (user_flags & X_OK)
 1895                         flags |= VEXEC;
 1896 #ifdef MAC
 1897                 error = mac_check_vnode_access(cred, vp, flags);
 1898                 if (error)
 1899                         return (error);
 1900 #endif
 1901                 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1902                         error = VOP_ACCESS(vp, flags, cred, td);
 1903         }
 1904         return (error);
 1905 }
 1906 
 1907 /*
 1908  * Check access permissions using "real" credentials.
 1909  */
 1910 #ifndef _SYS_SYSPROTO_H_
 1911 struct access_args {
 1912         char    *path;
 1913         int     flags;
 1914 };
 1915 #endif
 1916 int
 1917 access(td, uap)
 1918         struct thread *td;
 1919         register struct access_args /* {
 1920                 char *path;
 1921                 int flags;
 1922         } */ *uap;
 1923 {
 1924 
 1925         return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 1926 }
 1927 
 1928 int
 1929 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 1930 {
 1931         struct ucred *cred, *tmpcred;
 1932         register struct vnode *vp;
 1933         struct nameidata nd;
 1934         int vfslocked;
 1935         int error;
 1936 
 1937         /*
 1938          * Create and modify a temporary credential instead of one that
 1939          * is potentially shared.  This could also mess up socket
 1940          * buffer accounting which can run in an interrupt context.
 1941          */
 1942         cred = td->td_ucred;
 1943         tmpcred = crdup(cred);
 1944         tmpcred->cr_uid = cred->cr_ruid;
 1945         tmpcred->cr_groups[0] = cred->cr_rgid;
 1946         td->td_ucred = tmpcred;
 1947         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 1948             AUDITVNODE1, pathseg, path, td);
 1949         if ((error = namei(&nd)) != 0)
 1950                 goto out1;
 1951         vfslocked = NDHASGIANT(&nd);
 1952         vp = nd.ni_vp;
 1953 
 1954         error = vn_access(vp, flags, tmpcred, td);
 1955         NDFREE(&nd, NDF_ONLY_PNBUF);
 1956         vput(vp);
 1957         VFS_UNLOCK_GIANT(vfslocked);
 1958 out1:
 1959         td->td_ucred = cred;
 1960         crfree(tmpcred);
 1961         return (error);
 1962 }
 1963 
 1964 /*
 1965  * Check access permissions using "effective" credentials.
 1966  */
 1967 #ifndef _SYS_SYSPROTO_H_
 1968 struct eaccess_args {
 1969         char    *path;
 1970         int     flags;
 1971 };
 1972 #endif
 1973 int
 1974 eaccess(td, uap)
 1975         struct thread *td;
 1976         register struct eaccess_args /* {
 1977                 char *path;
 1978                 int flags;
 1979         } */ *uap;
 1980 {
 1981 
 1982         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
 1983 }
 1984 
 1985 int
 1986 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 1987 {
 1988         struct nameidata nd;
 1989         struct vnode *vp;
 1990         int vfslocked;
 1991         int error;
 1992 
 1993         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 1994             pathseg, path, td);
 1995         if ((error = namei(&nd)) != 0)
 1996                 return (error);
 1997         vp = nd.ni_vp;
 1998         vfslocked = NDHASGIANT(&nd);
 1999         error = vn_access(vp, flags, td->td_ucred, td);
 2000         NDFREE(&nd, NDF_ONLY_PNBUF);
 2001         vput(vp);
 2002         VFS_UNLOCK_GIANT(vfslocked);
 2003         return (error);
 2004 }
 2005 
 2006 #if defined(COMPAT_43)
 2007 /*
 2008  * Get file status; this version follows links.
 2009  */
 2010 #ifndef _SYS_SYSPROTO_H_
 2011 struct ostat_args {
 2012         char    *path;
 2013         struct ostat *ub;
 2014 };
 2015 #endif
 2016 int
 2017 ostat(td, uap)
 2018         struct thread *td;
 2019         register struct ostat_args /* {
 2020                 char *path;
 2021                 struct ostat *ub;
 2022         } */ *uap;
 2023 {
 2024         struct stat sb;
 2025         struct ostat osb;
 2026         int error;
 2027 
 2028         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2029         if (error)
 2030                 return (error);
 2031         cvtstat(&sb, &osb);
 2032         error = copyout(&osb, uap->ub, sizeof (osb));
 2033         return (error);
 2034 }
 2035 
 2036 /*
 2037  * Get file status; this version does not follow links.
 2038  */
 2039 #ifndef _SYS_SYSPROTO_H_
 2040 struct olstat_args {
 2041         char    *path;
 2042         struct ostat *ub;
 2043 };
 2044 #endif
 2045 int
 2046 olstat(td, uap)
 2047         struct thread *td;
 2048         register struct olstat_args /* {
 2049                 char *path;
 2050                 struct ostat *ub;
 2051         } */ *uap;
 2052 {
 2053         struct stat sb;
 2054         struct ostat osb;
 2055         int error;
 2056 
 2057         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2058         if (error)
 2059                 return (error);
 2060         cvtstat(&sb, &osb);
 2061         error = copyout(&osb, uap->ub, sizeof (osb));
 2062         return (error);
 2063 }
 2064 
 2065 /*
 2066  * Convert from an old to a new stat structure.
 2067  */
 2068 void
 2069 cvtstat(st, ost)
 2070         struct stat *st;
 2071         struct ostat *ost;
 2072 {
 2073 
 2074         ost->st_dev = st->st_dev;
 2075         ost->st_ino = st->st_ino;
 2076         ost->st_mode = st->st_mode;
 2077         ost->st_nlink = st->st_nlink;
 2078         ost->st_uid = st->st_uid;
 2079         ost->st_gid = st->st_gid;
 2080         ost->st_rdev = st->st_rdev;
 2081         if (st->st_size < (quad_t)1 << 32)
 2082                 ost->st_size = st->st_size;
 2083         else
 2084                 ost->st_size = -2;
 2085         ost->st_atime = st->st_atime;
 2086         ost->st_mtime = st->st_mtime;
 2087         ost->st_ctime = st->st_ctime;
 2088         ost->st_blksize = st->st_blksize;
 2089         ost->st_blocks = st->st_blocks;
 2090         ost->st_flags = st->st_flags;
 2091         ost->st_gen = st->st_gen;
 2092 }
 2093 #endif /* COMPAT_43 */
 2094 
 2095 /*
 2096  * Get file status; this version follows links.
 2097  */
 2098 #ifndef _SYS_SYSPROTO_H_
 2099 struct stat_args {
 2100         char    *path;
 2101         struct stat *ub;
 2102 };
 2103 #endif
 2104 int
 2105 stat(td, uap)
 2106         struct thread *td;
 2107         register struct stat_args /* {
 2108                 char *path;
 2109                 struct stat *ub;
 2110         } */ *uap;
 2111 {
 2112         struct stat sb;
 2113         int error;
 2114 
 2115         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2116         if (error == 0)
 2117                 error = copyout(&sb, uap->ub, sizeof (sb));
 2118         return (error);
 2119 }
 2120 
 2121 int
 2122 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2123 {
 2124         struct nameidata nd;
 2125         struct stat sb;
 2126         int error, vfslocked;
 2127 
 2128         NDINIT(&nd, LOOKUP,
 2129             FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
 2130             pathseg, path, td);
 2131         if ((error = namei(&nd)) != 0)
 2132                 return (error);
 2133         vfslocked = NDHASGIANT(&nd);
 2134         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2135         if (!error) {
 2136                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2137                 if (S_ISREG(sb.st_mode))
 2138                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2139         }
 2140         NDFREE(&nd, NDF_ONLY_PNBUF);
 2141         vput(nd.ni_vp);
 2142         VFS_UNLOCK_GIANT(vfslocked);
 2143         if (mtx_owned(&Giant))
 2144                 printf("stat(%d): %s\n", vfslocked, path);
 2145         if (error)
 2146                 return (error);
 2147         *sbp = sb;
 2148 #ifdef KTRACE
 2149         if (KTRPOINT(td, KTR_STRUCT))
 2150                 ktrstat(&sb);
 2151 #endif
 2152         return (0);
 2153 }
 2154 
 2155 /*
 2156  * Get file status; this version does not follow links.
 2157  */
 2158 #ifndef _SYS_SYSPROTO_H_
 2159 struct lstat_args {
 2160         char    *path;
 2161         struct stat *ub;
 2162 };
 2163 #endif
 2164 int
 2165 lstat(td, uap)
 2166         struct thread *td;
 2167         register struct lstat_args /* {
 2168                 char *path;
 2169                 struct stat *ub;
 2170         } */ *uap;
 2171 {
 2172         struct stat sb;
 2173         int error;
 2174 
 2175         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2176         if (error == 0)
 2177                 error = copyout(&sb, uap->ub, sizeof (sb));
 2178         return (error);
 2179 }
 2180 
 2181 int
 2182 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2183 {
 2184         struct vnode *vp;
 2185         struct stat sb;
 2186         struct nameidata nd;
 2187         int error, vfslocked;
 2188 
 2189         NDINIT(&nd, LOOKUP,
 2190             NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
 2191             pathseg, path, td);
 2192         if ((error = namei(&nd)) != 0)
 2193                 return (error);
 2194         vfslocked = NDHASGIANT(&nd);
 2195         vp = nd.ni_vp;
 2196         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 2197         NDFREE(&nd, NDF_ONLY_PNBUF);
 2198         vput(vp);
 2199         VFS_UNLOCK_GIANT(vfslocked);
 2200         if (error)
 2201                 return (error);
 2202         *sbp = sb;
 2203 #ifdef KTRACE
 2204         if (KTRPOINT(td, KTR_STRUCT))
 2205                 ktrstat(&sb);
 2206 #endif
 2207         return (0);
 2208 }
 2209 
 2210 /*
 2211  * Implementation of the NetBSD [l]stat() functions.
 2212  */
 2213 void
 2214 cvtnstat(sb, nsb)
 2215         struct stat *sb;
 2216         struct nstat *nsb;
 2217 {
 2218         bzero(nsb, sizeof *nsb);
 2219         nsb->st_dev = sb->st_dev;
 2220         nsb->st_ino = sb->st_ino;
 2221         nsb->st_mode = sb->st_mode;
 2222         nsb->st_nlink = sb->st_nlink;
 2223         nsb->st_uid = sb->st_uid;
 2224         nsb->st_gid = sb->st_gid;
 2225         nsb->st_rdev = sb->st_rdev;
 2226         nsb->st_atimespec = sb->st_atimespec;
 2227         nsb->st_mtimespec = sb->st_mtimespec;
 2228         nsb->st_ctimespec = sb->st_ctimespec;
 2229         nsb->st_size = sb->st_size;
 2230         nsb->st_blocks = sb->st_blocks;
 2231         nsb->st_blksize = sb->st_blksize;
 2232         nsb->st_flags = sb->st_flags;
 2233         nsb->st_gen = sb->st_gen;
 2234         nsb->st_birthtimespec = sb->st_birthtimespec;
 2235 }
 2236 
 2237 #ifndef _SYS_SYSPROTO_H_
 2238 struct nstat_args {
 2239         char    *path;
 2240         struct nstat *ub;
 2241 };
 2242 #endif
 2243 int
 2244 nstat(td, uap)
 2245         struct thread *td;
 2246         register struct nstat_args /* {
 2247                 char *path;
 2248                 struct nstat *ub;
 2249         } */ *uap;
 2250 {
 2251         struct stat sb;
 2252         struct nstat nsb;
 2253         int error;
 2254 
 2255         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2256         if (error)
 2257                 return (error);
 2258         cvtnstat(&sb, &nsb);
 2259         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2260         return (error);
 2261 }
 2262 
 2263 /*
 2264  * NetBSD lstat.  Get file status; this version does not follow links.
 2265  */
 2266 #ifndef _SYS_SYSPROTO_H_
 2267 struct lstat_args {
 2268         char    *path;
 2269         struct stat *ub;
 2270 };
 2271 #endif
 2272 int
 2273 nlstat(td, uap)
 2274         struct thread *td;
 2275         register struct nlstat_args /* {
 2276                 char *path;
 2277                 struct nstat *ub;
 2278         } */ *uap;
 2279 {
 2280         struct stat sb;
 2281         struct nstat nsb;
 2282         int error;
 2283 
 2284         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2285         if (error)
 2286                 return (error);
 2287         cvtnstat(&sb, &nsb);
 2288         error = copyout(&nsb, uap->ub, sizeof (nsb));
 2289         return (error);
 2290 }
 2291 
 2292 /*
 2293  * Get configurable pathname variables.
 2294  */
 2295 #ifndef _SYS_SYSPROTO_H_
 2296 struct pathconf_args {
 2297         char    *path;
 2298         int     name;
 2299 };
 2300 #endif
 2301 int
 2302 pathconf(td, uap)
 2303         struct thread *td;
 2304         register struct pathconf_args /* {
 2305                 char *path;
 2306                 int name;
 2307         } */ *uap;
 2308 {
 2309 
 2310         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
 2311 }
 2312 
 2313 int
 2314 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
 2315 {
 2316         struct nameidata nd;
 2317         int error, vfslocked;
 2318 
 2319         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2320             AUDITVNODE1, pathseg, path, td);
 2321         if ((error = namei(&nd)) != 0)
 2322                 return (error);
 2323         vfslocked = NDHASGIANT(&nd);
 2324         NDFREE(&nd, NDF_ONLY_PNBUF);
 2325 
 2326         /* If asynchronous I/O is available, it works for all files. */
 2327         if (name == _PC_ASYNC_IO)
 2328                 td->td_retval[0] = async_io_version;
 2329         else
 2330                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2331         vput(nd.ni_vp);
 2332         VFS_UNLOCK_GIANT(vfslocked);
 2333         return (error);
 2334 }
 2335 
 2336 /*
 2337  * Return target name of a symbolic link.
 2338  */
 2339 #ifndef _SYS_SYSPROTO_H_
 2340 struct readlink_args {
 2341         char    *path;
 2342         char    *buf;
 2343         int     count;
 2344 };
 2345 #endif
 2346 int
 2347 readlink(td, uap)
 2348         struct thread *td;
 2349         register struct readlink_args /* {
 2350                 char *path;
 2351                 char *buf;
 2352                 int count;
 2353         } */ *uap;
 2354 {
 2355 
 2356         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2357             UIO_USERSPACE, uap->count));
 2358 }
 2359 
 2360 int
 2361 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2362     enum uio_seg bufseg, int count)
 2363 {
 2364         register struct vnode *vp;
 2365         struct iovec aiov;
 2366         struct uio auio;
 2367         int error;
 2368         struct nameidata nd;
 2369         int vfslocked;
 2370 
 2371         if (count > INT_MAX)
 2372                 return (EINVAL);
 2373 
 2374         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
 2375             AUDITVNODE1, pathseg, path, td);
 2376         if ((error = namei(&nd)) != 0)
 2377                 return (error);
 2378         NDFREE(&nd, NDF_ONLY_PNBUF);
 2379         vfslocked = NDHASGIANT(&nd);
 2380         vp = nd.ni_vp;
 2381 #ifdef MAC
 2382         error = mac_check_vnode_readlink(td->td_ucred, vp);
 2383         if (error) {
 2384                 vput(vp);
 2385                 VFS_UNLOCK_GIANT(vfslocked);
 2386                 return (error);
 2387         }
 2388 #endif
 2389         if (vp->v_type != VLNK)
 2390                 error = EINVAL;
 2391         else {
 2392                 aiov.iov_base = buf;
 2393                 aiov.iov_len = count;
 2394                 auio.uio_iov = &aiov;
 2395                 auio.uio_iovcnt = 1;
 2396                 auio.uio_offset = 0;
 2397                 auio.uio_rw = UIO_READ;
 2398                 auio.uio_segflg = bufseg;
 2399                 auio.uio_td = td;
 2400                 auio.uio_resid = count;
 2401                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2402         }
 2403         vput(vp);
 2404         VFS_UNLOCK_GIANT(vfslocked);
 2405         td->td_retval[0] = count - auio.uio_resid;
 2406         return (error);
 2407 }
 2408 
 2409 /*
 2410  * Common implementation code for chflags() and fchflags().
 2411  */
 2412 static int
 2413 setfflags(td, vp, flags)
 2414         struct thread *td;
 2415         struct vnode *vp;
 2416         int flags;
 2417 {
 2418         int error;
 2419         struct mount *mp;
 2420         struct vattr vattr;
 2421 
 2422         /*
 2423          * Prevent non-root users from setting flags on devices.  When
 2424          * a device is reused, users can retain ownership of the device
 2425          * if they are allowed to set flags and programs assume that
 2426          * chown can't fail when done as root.
 2427          */
 2428         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2429                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2430                 if (error)
 2431                         return (error);
 2432         }
 2433 
 2434         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2435                 return (error);
 2436         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 2437         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2438         VATTR_NULL(&vattr);
 2439         vattr.va_flags = flags;
 2440 #ifdef MAC
 2441         error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
 2442         if (error == 0)
 2443 #endif
 2444                 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 2445         VOP_UNLOCK(vp, 0, td);
 2446         vn_finished_write(mp);
 2447         return (error);
 2448 }
 2449 
 2450 /*
 2451  * Change flags of a file given a path name.
 2452  */
 2453 #ifndef _SYS_SYSPROTO_H_
 2454 struct chflags_args {
 2455         char    *path;
 2456         int     flags;
 2457 };
 2458 #endif
 2459 int
 2460 chflags(td, uap)
 2461         struct thread *td;
 2462         register struct chflags_args /* {
 2463                 char *path;
 2464                 int flags;
 2465         } */ *uap;
 2466 {
 2467         int error;
 2468         struct nameidata nd;
 2469         int vfslocked;
 2470 
 2471         AUDIT_ARG(fflags, uap->flags);
 2472         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2473             uap->path, td);
 2474         if ((error = namei(&nd)) != 0)
 2475                 return (error);
 2476         NDFREE(&nd, NDF_ONLY_PNBUF);
 2477         vfslocked = NDHASGIANT(&nd);
 2478         error = setfflags(td, nd.ni_vp, uap->flags);
 2479         vrele(nd.ni_vp);
 2480         VFS_UNLOCK_GIANT(vfslocked);
 2481         return (error);
 2482 }
 2483 
 2484 /*
 2485  * Same as chflags() but doesn't follow symlinks.
 2486  */
 2487 int
 2488 lchflags(td, uap)
 2489         struct thread *td;
 2490         register struct lchflags_args /* {
 2491                 char *path;
 2492                 int flags;
 2493         } */ *uap;
 2494 {
 2495         int error;
 2496         struct nameidata nd;
 2497         int vfslocked;
 2498 
 2499         AUDIT_ARG(fflags, uap->flags);
 2500         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2501             uap->path, td);
 2502         if ((error = namei(&nd)) != 0)
 2503                 return (error);
 2504         vfslocked = NDHASGIANT(&nd);
 2505         NDFREE(&nd, NDF_ONLY_PNBUF);
 2506         error = setfflags(td, nd.ni_vp, uap->flags);
 2507         vrele(nd.ni_vp);
 2508         VFS_UNLOCK_GIANT(vfslocked);
 2509         return (error);
 2510 }
 2511 
 2512 /*
 2513  * Change flags of a file given a file descriptor.
 2514  */
 2515 #ifndef _SYS_SYSPROTO_H_
 2516 struct fchflags_args {
 2517         int     fd;
 2518         int     flags;
 2519 };
 2520 #endif
 2521 int
 2522 fchflags(td, uap)
 2523         struct thread *td;
 2524         register struct fchflags_args /* {
 2525                 int fd;
 2526                 int flags;
 2527         } */ *uap;
 2528 {
 2529         struct file *fp;
 2530         int vfslocked;
 2531         int error;
 2532 
 2533         AUDIT_ARG(fd, uap->fd);
 2534         AUDIT_ARG(fflags, uap->flags);
 2535         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 2536                 return (error);
 2537         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2538 #ifdef AUDIT
 2539         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY, td);
 2540         AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
 2541         VOP_UNLOCK(fp->f_vnode, 0, td);
 2542 #endif
 2543         error = setfflags(td, fp->f_vnode, uap->flags);
 2544         VFS_UNLOCK_GIANT(vfslocked);
 2545         fdrop(fp, td);
 2546         return (error);
 2547 }
 2548 
 2549 /*
 2550  * Common implementation code for chmod(), lchmod() and fchmod().
 2551  */
 2552 static int
 2553 setfmode(td, vp, mode)
 2554         struct thread *td;
 2555         struct vnode *vp;
 2556         int mode;
 2557 {
 2558         int error;
 2559         struct mount *mp;
 2560         struct vattr vattr;
 2561 
 2562         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2563                 return (error);
 2564         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 2565         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2566         VATTR_NULL(&vattr);
 2567         vattr.va_mode = mode & ALLPERMS;
 2568 #ifdef MAC
 2569         error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
 2570         if (error == 0)
 2571 #endif
 2572                 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 2573         VOP_UNLOCK(vp, 0, td);
 2574         vn_finished_write(mp);
 2575         return (error);
 2576 }
 2577 
 2578 /*
 2579  * Change mode of a file given path name.
 2580  */
 2581 #ifndef _SYS_SYSPROTO_H_
 2582 struct chmod_args {
 2583         char    *path;
 2584         int     mode;
 2585 };
 2586 #endif
 2587 int
 2588 chmod(td, uap)
 2589         struct thread *td;
 2590         register struct chmod_args /* {
 2591                 char *path;
 2592                 int mode;
 2593         } */ *uap;
 2594 {
 2595 
 2596         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2597 }
 2598 
 2599 int
 2600 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2601 {
 2602         int error;
 2603         struct nameidata nd;
 2604         int vfslocked;
 2605 
 2606         AUDIT_ARG(mode, mode);
 2607         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 2608         if ((error = namei(&nd)) != 0)
 2609                 return (error);
 2610         vfslocked = NDHASGIANT(&nd);
 2611         NDFREE(&nd, NDF_ONLY_PNBUF);
 2612         error = setfmode(td, nd.ni_vp, mode);
 2613         vrele(nd.ni_vp);
 2614         VFS_UNLOCK_GIANT(vfslocked);
 2615         return (error);
 2616 }
 2617 
 2618 /*
 2619  * Change mode of a file given path name (don't follow links.)
 2620  */
 2621 #ifndef _SYS_SYSPROTO_H_
 2622 struct lchmod_args {
 2623         char    *path;
 2624         int     mode;
 2625 };
 2626 #endif
 2627 int
 2628 lchmod(td, uap)
 2629         struct thread *td;
 2630         register struct lchmod_args /* {
 2631                 char *path;
 2632                 int mode;
 2633         } */ *uap;
 2634 {
 2635         int error;
 2636         struct nameidata nd;
 2637         int vfslocked;
 2638 
 2639         AUDIT_ARG(mode, (mode_t)uap->mode);
 2640         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
 2641             uap->path, td);
 2642         if ((error = namei(&nd)) != 0)
 2643                 return (error);
 2644         vfslocked = NDHASGIANT(&nd);
 2645         NDFREE(&nd, NDF_ONLY_PNBUF);
 2646         error = setfmode(td, nd.ni_vp, uap->mode);
 2647         vrele(nd.ni_vp);
 2648         VFS_UNLOCK_GIANT(vfslocked);
 2649         return (error);
 2650 }
 2651 
 2652 /*
 2653  * Change mode of a file given a file descriptor.
 2654  */
 2655 #ifndef _SYS_SYSPROTO_H_
 2656 struct fchmod_args {
 2657         int     fd;
 2658         int     mode;
 2659 };
 2660 #endif
 2661 int
 2662 fchmod(td, uap)
 2663         struct thread *td;
 2664         register struct fchmod_args /* {
 2665                 int fd;
 2666                 int mode;
 2667         } */ *uap;
 2668 {
 2669         struct file *fp;
 2670         int vfslocked;
 2671         int error;
 2672 
 2673         AUDIT_ARG(fd, uap->fd);
 2674         AUDIT_ARG(mode, uap->mode);
 2675         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 2676                 return (error);
 2677         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2678 #ifdef AUDIT
 2679         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY, td);
 2680         AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
 2681         VOP_UNLOCK(fp->f_vnode, 0, td);
 2682 #endif
 2683         error = setfmode(td, fp->f_vnode, uap->mode);
 2684         VFS_UNLOCK_GIANT(vfslocked);
 2685         fdrop(fp, td);
 2686         return (error);
 2687 }
 2688 
 2689 /*
 2690  * Common implementation for chown(), lchown(), and fchown()
 2691  */
 2692 static int
 2693 setfown(td, vp, uid, gid)
 2694         struct thread *td;
 2695         struct vnode *vp;
 2696         uid_t uid;
 2697         gid_t gid;
 2698 {
 2699         int error;
 2700         struct mount *mp;
 2701         struct vattr vattr;
 2702 
 2703         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2704                 return (error);
 2705         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 2706         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2707         VATTR_NULL(&vattr);
 2708         vattr.va_uid = uid;
 2709         vattr.va_gid = gid;
 2710 #ifdef MAC
 2711         error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
 2712             vattr.va_gid);
 2713         if (error == 0)
 2714 #endif
 2715                 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 2716         VOP_UNLOCK(vp, 0, td);
 2717         vn_finished_write(mp);
 2718         return (error);
 2719 }
 2720 
 2721 /*
 2722  * Set ownership given a path name.
 2723  */
 2724 #ifndef _SYS_SYSPROTO_H_
 2725 struct chown_args {
 2726         char    *path;
 2727         int     uid;
 2728         int     gid;
 2729 };
 2730 #endif
 2731 int
 2732 chown(td, uap)
 2733         struct thread *td;
 2734         register struct chown_args /* {
 2735                 char *path;
 2736                 int uid;
 2737                 int gid;
 2738         } */ *uap;
 2739 {
 2740 
 2741         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2742 }
 2743 
 2744 int
 2745 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2746     int gid)
 2747 {
 2748         int error;
 2749         struct nameidata nd;
 2750         int vfslocked;
 2751 
 2752         AUDIT_ARG(owner, uid, gid);
 2753         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 2754         if ((error = namei(&nd)) != 0)
 2755                 return (error);
 2756         vfslocked = NDHASGIANT(&nd);
 2757         NDFREE(&nd, NDF_ONLY_PNBUF);
 2758         error = setfown(td, nd.ni_vp, uid, gid);
 2759         vrele(nd.ni_vp);
 2760         VFS_UNLOCK_GIANT(vfslocked);
 2761         return (error);
 2762 }
 2763 
 2764 /*
 2765  * Set ownership given a path name, do not cross symlinks.
 2766  */
 2767 #ifndef _SYS_SYSPROTO_H_
 2768 struct lchown_args {
 2769         char    *path;
 2770         int     uid;
 2771         int     gid;
 2772 };
 2773 #endif
 2774 int
 2775 lchown(td, uap)
 2776         struct thread *td;
 2777         register struct lchown_args /* {
 2778                 char *path;
 2779                 int uid;
 2780                 int gid;
 2781         } */ *uap;
 2782 {
 2783 
 2784         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2785 }
 2786 
 2787 int
 2788 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2789     int gid)
 2790 {
 2791         int error;
 2792         struct nameidata nd;
 2793         int vfslocked;
 2794 
 2795         AUDIT_ARG(owner, uid, gid);
 2796         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 2797         if ((error = namei(&nd)) != 0)
 2798                 return (error);
 2799         vfslocked = NDHASGIANT(&nd);
 2800         NDFREE(&nd, NDF_ONLY_PNBUF);
 2801         error = setfown(td, nd.ni_vp, uid, gid);
 2802         vrele(nd.ni_vp);
 2803         VFS_UNLOCK_GIANT(vfslocked);
 2804         return (error);
 2805 }
 2806 
 2807 /*
 2808  * Set ownership given a file descriptor.
 2809  */
 2810 #ifndef _SYS_SYSPROTO_H_
 2811 struct fchown_args {
 2812         int     fd;
 2813         int     uid;
 2814         int     gid;
 2815 };
 2816 #endif
 2817 int
 2818 fchown(td, uap)
 2819         struct thread *td;
 2820         register struct fchown_args /* {
 2821                 int fd;
 2822                 int uid;
 2823                 int gid;
 2824         } */ *uap;
 2825 {
 2826         struct file *fp;
 2827         int vfslocked;
 2828         int error;
 2829 
 2830         AUDIT_ARG(fd, uap->fd);
 2831         AUDIT_ARG(owner, uap->uid, uap->gid);
 2832         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 2833                 return (error);
 2834         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 2835 #ifdef AUDIT
 2836         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY, td);
 2837         AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
 2838         VOP_UNLOCK(fp->f_vnode, 0, td);
 2839 #endif
 2840         error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
 2841         VFS_UNLOCK_GIANT(vfslocked);
 2842         fdrop(fp, td);
 2843         return (error);
 2844 }
 2845 
 2846 /*
 2847  * Common implementation code for utimes(), lutimes(), and futimes().
 2848  */
 2849 static int
 2850 getutimes(usrtvp, tvpseg, tsp)
 2851         const struct timeval *usrtvp;
 2852         enum uio_seg tvpseg;
 2853         struct timespec *tsp;
 2854 {
 2855         struct timeval tv[2];
 2856         const struct timeval *tvp;
 2857         int error;
 2858 
 2859         if (usrtvp == NULL) {
 2860                 vfs_timestamp(&tsp[0]);
 2861                 tsp[1] = tsp[0];
 2862         } else {
 2863                 if (tvpseg == UIO_SYSSPACE) {
 2864                         tvp = usrtvp;
 2865                 } else {
 2866                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 2867                                 return (error);
 2868                         tvp = tv;
 2869                 }
 2870 
 2871                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 2872                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 2873                         return (EINVAL);
 2874                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 2875                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 2876         }
 2877         return (0);
 2878 }
 2879 
 2880 /*
 2881  * Common implementation code for utimes(), lutimes(), and futimes().
 2882  */
 2883 static int
 2884 setutimes(td, vp, ts, numtimes, nullflag)
 2885         struct thread *td;
 2886         struct vnode *vp;
 2887         const struct timespec *ts;
 2888         int numtimes;
 2889         int nullflag;
 2890 {
 2891         int error, setbirthtime;
 2892         struct mount *mp;
 2893         struct vattr vattr;
 2894 
 2895         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2896                 return (error);
 2897         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 2898         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2899         setbirthtime = 0;
 2900         if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
 2901             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 2902                 setbirthtime = 1;
 2903         VATTR_NULL(&vattr);
 2904         vattr.va_atime = ts[0];
 2905         vattr.va_mtime = ts[1];
 2906         if (setbirthtime)
 2907                 vattr.va_birthtime = ts[1];
 2908         if (numtimes > 2)
 2909                 vattr.va_birthtime = ts[2];
 2910         if (nullflag)
 2911                 vattr.va_vaflags |= VA_UTIMES_NULL;
 2912 #ifdef MAC
 2913         error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
 2914             vattr.va_mtime);
 2915 #endif
 2916         if (error == 0)
 2917                 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 2918         VOP_UNLOCK(vp, 0, td);
 2919         vn_finished_write(mp);
 2920         return (error);
 2921 }
 2922 
 2923 /*
 2924  * Set the access and modification times of a file.
 2925  */
 2926 #ifndef _SYS_SYSPROTO_H_
 2927 struct utimes_args {
 2928         char    *path;
 2929         struct  timeval *tptr;
 2930 };
 2931 #endif
 2932 int
 2933 utimes(td, uap)
 2934         struct thread *td;
 2935         register struct utimes_args /* {
 2936                 char *path;
 2937                 struct timeval *tptr;
 2938         } */ *uap;
 2939 {
 2940 
 2941         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 2942             UIO_USERSPACE));
 2943 }
 2944 
 2945 int
 2946 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 2947     struct timeval *tptr, enum uio_seg tptrseg)
 2948 {
 2949         struct timespec ts[2];
 2950         int error;
 2951         struct nameidata nd;
 2952         int vfslocked;
 2953 
 2954         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 2955                 return (error);
 2956         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 2957         if ((error = namei(&nd)) != 0)
 2958                 return (error);
 2959         vfslocked = NDHASGIANT(&nd);
 2960         NDFREE(&nd, NDF_ONLY_PNBUF);
 2961         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 2962         vrele(nd.ni_vp);
 2963         VFS_UNLOCK_GIANT(vfslocked);
 2964         return (error);
 2965 }
 2966 
 2967 /*
 2968  * Set the access and modification times of a file.
 2969  */
 2970 #ifndef _SYS_SYSPROTO_H_
 2971 struct lutimes_args {
 2972         char    *path;
 2973         struct  timeval *tptr;
 2974 };
 2975 #endif
 2976 int
 2977 lutimes(td, uap)
 2978         struct thread *td;
 2979         register struct lutimes_args /* {
 2980                 char *path;
 2981                 struct timeval *tptr;
 2982         } */ *uap;
 2983 {
 2984 
 2985         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 2986             UIO_USERSPACE));
 2987 }
 2988 
 2989 int
 2990 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 2991     struct timeval *tptr, enum uio_seg tptrseg)
 2992 {
 2993         struct timespec ts[2];
 2994         int error;
 2995         struct nameidata nd;
 2996         int vfslocked;
 2997 
 2998         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 2999                 return (error);
 3000         NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3001         if ((error = namei(&nd)) != 0)
 3002                 return (error);
 3003         vfslocked = NDHASGIANT(&nd);
 3004         NDFREE(&nd, NDF_ONLY_PNBUF);
 3005         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3006         vrele(nd.ni_vp);
 3007         VFS_UNLOCK_GIANT(vfslocked);
 3008         return (error);
 3009 }
 3010 
 3011 /*
 3012  * Set the access and modification times of a file.
 3013  */
 3014 #ifndef _SYS_SYSPROTO_H_
 3015 struct futimes_args {
 3016         int     fd;
 3017         struct  timeval *tptr;
 3018 };
 3019 #endif
 3020 int
 3021 futimes(td, uap)
 3022         struct thread *td;
 3023         register struct futimes_args /* {
 3024                 int  fd;
 3025                 struct timeval *tptr;
 3026         } */ *uap;
 3027 {
 3028 
 3029         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3030 }
 3031 
 3032 int
 3033 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3034     enum uio_seg tptrseg)
 3035 {
 3036         struct timespec ts[2];
 3037         struct file *fp;
 3038         int vfslocked;
 3039         int error;
 3040 
 3041         AUDIT_ARG(fd, fd);
 3042         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3043                 return (error);
 3044         if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
 3045                 return (error);
 3046         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 3047 #ifdef AUDIT
 3048         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY, td);
 3049         AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
 3050         VOP_UNLOCK(fp->f_vnode, 0, td);
 3051 #endif
 3052         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3053         VFS_UNLOCK_GIANT(vfslocked);
 3054         fdrop(fp, td);
 3055         return (error);
 3056 }
 3057 
 3058 /*
 3059  * Truncate a file given its path name.
 3060  */
 3061 #ifndef _SYS_SYSPROTO_H_
 3062 struct truncate_args {
 3063         char    *path;
 3064         int     pad;
 3065         off_t   length;
 3066 };
 3067 #endif
 3068 int
 3069 truncate(td, uap)
 3070         struct thread *td;
 3071         register struct truncate_args /* {
 3072                 char *path;
 3073                 int pad;
 3074                 off_t length;
 3075         } */ *uap;
 3076 {
 3077 
 3078         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3079 }
 3080 
 3081 int
 3082 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3083 {
 3084         struct mount *mp;
 3085         struct vnode *vp;
 3086         struct vattr vattr;
 3087         int error;
 3088         struct nameidata nd;
 3089         int vfslocked;
 3090 
 3091         if (length < 0)
 3092                 return(EINVAL);
 3093         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
 3094         if ((error = namei(&nd)) != 0)
 3095                 return (error);
 3096         vfslocked = NDHASGIANT(&nd);
 3097         vp = nd.ni_vp;
 3098         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3099                 vrele(vp);
 3100                 VFS_UNLOCK_GIANT(vfslocked);
 3101                 return (error);
 3102         }
 3103         NDFREE(&nd, NDF_ONLY_PNBUF);
 3104         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 3105         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 3106         if (vp->v_type == VDIR)
 3107                 error = EISDIR;
 3108 #ifdef MAC
 3109         else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
 3110         }
 3111 #endif
 3112         else if ((error = vn_writechk(vp)) == 0 &&
 3113             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3114                 VATTR_NULL(&vattr);
 3115                 vattr.va_size = length;
 3116                 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 3117         }
 3118         vput(vp);
 3119         vn_finished_write(mp);
 3120         VFS_UNLOCK_GIANT(vfslocked);
 3121         return (error);
 3122 }
 3123 
 3124 /*
 3125  * Truncate a file given a file descriptor.
 3126  */
 3127 #ifndef _SYS_SYSPROTO_H_
 3128 struct ftruncate_args {
 3129         int     fd;
 3130         int     pad;
 3131         off_t   length;
 3132 };
 3133 #endif
 3134 int
 3135 ftruncate(td, uap)
 3136         struct thread *td;
 3137         register struct ftruncate_args /* {
 3138                 int fd;
 3139                 int pad;
 3140                 off_t length;
 3141         } */ *uap;
 3142 {
 3143         struct mount *mp;
 3144         struct vattr vattr;
 3145         struct vnode *vp;
 3146         struct file *fp;
 3147         int vfslocked;
 3148         int error;
 3149 
 3150         AUDIT_ARG(fd, uap->fd);
 3151         if (uap->length < 0)
 3152                 return(EINVAL);
 3153         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 3154                 return (error);
 3155         if ((fp->f_flag & FWRITE) == 0) {
 3156                 fdrop(fp, td);
 3157                 return (EINVAL);
 3158         }
 3159         vp = fp->f_vnode;
 3160         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3161         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3162                 goto drop;
 3163         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 3164         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 3165         AUDIT_ARG(vnode, vp, ARG_VNODE1);
 3166         if (vp->v_type == VDIR)
 3167                 error = EISDIR;
 3168 #ifdef MAC
 3169         else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
 3170             vp))) {
 3171         }
 3172 #endif
 3173         else if ((error = vn_writechk(vp)) == 0) {
 3174                 VATTR_NULL(&vattr);
 3175                 vattr.va_size = uap->length;
 3176                 error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
 3177         }
 3178         VOP_UNLOCK(vp, 0, td);
 3179         vn_finished_write(mp);
 3180 drop:
 3181         VFS_UNLOCK_GIANT(vfslocked);
 3182         fdrop(fp, td);
 3183         return (error);
 3184 }
 3185 
 3186 #if defined(COMPAT_43)
 3187 /*
 3188  * Truncate a file given its path name.
 3189  */
 3190 #ifndef _SYS_SYSPROTO_H_
 3191 struct otruncate_args {
 3192         char    *path;
 3193         long    length;
 3194 };
 3195 #endif
 3196 int
 3197 otruncate(td, uap)
 3198         struct thread *td;
 3199         register struct otruncate_args /* {
 3200                 char *path;
 3201                 long length;
 3202         } */ *uap;
 3203 {
 3204         struct truncate_args /* {
 3205                 char *path;
 3206                 int pad;
 3207                 off_t length;
 3208         } */ nuap;
 3209 
 3210         nuap.path = uap->path;
 3211         nuap.length = uap->length;
 3212         return (truncate(td, &nuap));
 3213 }
 3214 
 3215 /*
 3216  * Truncate a file given a file descriptor.
 3217  */
 3218 #ifndef _SYS_SYSPROTO_H_
 3219 struct oftruncate_args {
 3220         int     fd;
 3221         long    length;
 3222 };
 3223 #endif
 3224 int
 3225 oftruncate(td, uap)
 3226         struct thread *td;
 3227         register struct oftruncate_args /* {
 3228                 int fd;
 3229                 long length;
 3230         } */ *uap;
 3231 {
 3232         struct ftruncate_args /* {
 3233                 int fd;
 3234                 int pad;
 3235                 off_t length;
 3236         } */ nuap;
 3237 
 3238         nuap.fd = uap->fd;
 3239         nuap.length = uap->length;
 3240         return (ftruncate(td, &nuap));
 3241 }
 3242 #endif /* COMPAT_43 */
 3243 
 3244 /* Versions with the pad argument */
 3245 int
 3246 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3247 {
 3248         struct truncate_args ouap;
 3249 
 3250         ouap.path = uap->path;
 3251         ouap.length = uap->length;
 3252         return (truncate(td, &ouap));
 3253 }
 3254 
 3255 int
 3256 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3257 {
 3258         struct ftruncate_args ouap;
 3259 
 3260         ouap.fd = uap->fd;
 3261         ouap.length = uap->length;
 3262         return (ftruncate(td, &ouap));
 3263 }
 3264 
 3265 /*
 3266  * Sync an open file.
 3267  */
 3268 #ifndef _SYS_SYSPROTO_H_
 3269 struct fsync_args {
 3270         int     fd;
 3271 };
 3272 #endif
 3273 int
 3274 fsync(td, uap)
 3275         struct thread *td;
 3276         struct fsync_args /* {
 3277                 int fd;
 3278         } */ *uap;
 3279 {
 3280         struct vnode *vp;
 3281         struct mount *mp;
 3282         struct file *fp;
 3283         int vfslocked;
 3284         int error, lock_flags;
 3285 
 3286         AUDIT_ARG(fd, uap->fd);
 3287         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 3288                 return (error);
 3289         vp = fp->f_vnode;
 3290         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3291         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3292                 goto drop;
 3293         if (MNT_SHARED_WRITES(mp) ||
 3294             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3295                 lock_flags = LK_SHARED;
 3296         } else {
 3297                 lock_flags = LK_EXCLUSIVE;
 3298         }
 3299         vn_lock(vp, lock_flags | LK_RETRY, td);
 3300         AUDIT_ARG(vnode, vp, ARG_VNODE1);
 3301         if (vp->v_object != NULL) {
 3302                 VM_OBJECT_LOCK(vp->v_object);
 3303                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3304                 VM_OBJECT_UNLOCK(vp->v_object);
 3305         }
 3306         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3307 
 3308         VOP_UNLOCK(vp, 0, td);
 3309         vn_finished_write(mp);
 3310 drop:
 3311         VFS_UNLOCK_GIANT(vfslocked);
 3312         fdrop(fp, td);
 3313         return (error);
 3314 }
 3315 
 3316 /*
 3317  * Rename files.  Source and destination must either both be directories, or
 3318  * both not be directories.  If target is a directory, it must be empty.
 3319  */
 3320 #ifndef _SYS_SYSPROTO_H_
 3321 struct rename_args {
 3322         char    *from;
 3323         char    *to;
 3324 };
 3325 #endif
 3326 int
 3327 rename(td, uap)
 3328         struct thread *td;
 3329         register struct rename_args /* {
 3330                 char *from;
 3331                 char *to;
 3332         } */ *uap;
 3333 {
 3334 
 3335         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3336 }
 3337 
 3338 int
 3339 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3340 {
 3341         struct mount *mp = NULL;
 3342         struct vnode *tvp, *fvp, *tdvp;
 3343         struct nameidata fromnd, tond;
 3344         int tvfslocked;
 3345         int fvfslocked;
 3346         int error;
 3347 
 3348         bwillwrite();
 3349 #ifdef MAC
 3350         NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
 3351             AUDITVNODE1, pathseg, from, td);
 3352 #else
 3353         NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
 3354             AUDITVNODE1, pathseg, from, td);
 3355 #endif
 3356         if ((error = namei(&fromnd)) != 0)
 3357                 return (error);
 3358         fvfslocked = NDHASGIANT(&fromnd);
 3359         tvfslocked = 0;
 3360 #ifdef MAC
 3361         error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
 3362             fromnd.ni_vp, &fromnd.ni_cnd);
 3363         VOP_UNLOCK(fromnd.ni_dvp, 0, td);
 3364         if (fromnd.ni_dvp != fromnd.ni_vp)
 3365                 VOP_UNLOCK(fromnd.ni_vp, 0, td);
 3366 #endif
 3367         fvp = fromnd.ni_vp;
 3368         if (error == 0)
 3369                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3370         if (error != 0) {
 3371                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3372                 vrele(fromnd.ni_dvp);
 3373                 vrele(fvp);
 3374                 goto out1;
 3375         }
 3376         NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 3377             MPSAFE | AUDITVNODE2, pathseg, to, td);
 3378         if (fromnd.ni_vp->v_type == VDIR)
 3379                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3380         if ((error = namei(&tond)) != 0) {
 3381                 /* Translate error code for rename("dir1", "dir2/."). */
 3382                 if (error == EISDIR && fvp->v_type == VDIR)
 3383                         error = EINVAL;
 3384                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3385                 vrele(fromnd.ni_dvp);
 3386                 vrele(fvp);
 3387                 vn_finished_write(mp);
 3388                 goto out1;
 3389         }
 3390         tvfslocked = NDHASGIANT(&tond);
 3391         tdvp = tond.ni_dvp;
 3392         tvp = tond.ni_vp;
 3393         if (tvp != NULL) {
 3394                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3395                         error = ENOTDIR;
 3396                         goto out;
 3397                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3398                         error = EISDIR;
 3399                         goto out;
 3400                 }
 3401         }
 3402         if (fvp == tdvp)
 3403                 error = EINVAL;
 3404         /*
 3405          * If the source is the same as the destination (that is, if they
 3406          * are links to the same vnode), then there is nothing to do.
 3407          */
 3408         if (fvp == tvp)
 3409                 error = -1;
 3410 #ifdef MAC
 3411         else
 3412                 error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
 3413                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3414 #endif
 3415 out:
 3416         if (!error) {
 3417                 VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
 3418                 if (fromnd.ni_dvp != tdvp) {
 3419                         VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 3420                 }
 3421                 if (tvp) {
 3422                         VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
 3423                 }
 3424                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3425                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3426                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3427                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3428         } else {
 3429                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3430                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3431                 if (tvp)
 3432                         vput(tvp);
 3433                 if (tdvp == tvp)
 3434                         vrele(tdvp);
 3435                 else
 3436                         vput(tdvp);
 3437                 vrele(fromnd.ni_dvp);
 3438                 vrele(fvp);
 3439         }
 3440         vrele(tond.ni_startdir);
 3441         vn_finished_write(mp);
 3442 out1:
 3443         if (fromnd.ni_startdir)
 3444                 vrele(fromnd.ni_startdir);
 3445         VFS_UNLOCK_GIANT(fvfslocked);
 3446         VFS_UNLOCK_GIANT(tvfslocked);
 3447         if (error == -1)
 3448                 return (0);
 3449         return (error);
 3450 }
 3451 
 3452 /*
 3453  * Make a directory file.
 3454  */
 3455 #ifndef _SYS_SYSPROTO_H_
 3456 struct mkdir_args {
 3457         char    *path;
 3458         int     mode;
 3459 };
 3460 #endif
 3461 int
 3462 mkdir(td, uap)
 3463         struct thread *td;
 3464         register struct mkdir_args /* {
 3465                 char *path;
 3466                 int mode;
 3467         } */ *uap;
 3468 {
 3469 
 3470         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3471 }
 3472 
 3473 int
 3474 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3475 {
 3476         struct mount *mp;
 3477         struct vnode *vp;
 3478         struct vattr vattr;
 3479         int error;
 3480         struct nameidata nd;
 3481         int vfslocked;
 3482 
 3483         AUDIT_ARG(mode, mode);
 3484 restart:
 3485         bwillwrite();
 3486         NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
 3487             segflg, path, td);
 3488         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3489         if ((error = namei(&nd)) != 0)
 3490                 return (error);
 3491         vfslocked = NDHASGIANT(&nd);
 3492         vp = nd.ni_vp;
 3493         if (vp != NULL) {
 3494                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3495                 /*
 3496                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3497                  * the strange behaviour of leaving the vnode unlocked
 3498                  * if the target is the same vnode as the parent.
 3499                  */
 3500                 if (vp == nd.ni_dvp)
 3501                         vrele(nd.ni_dvp);
 3502                 else
 3503                         vput(nd.ni_dvp);
 3504                 vrele(vp);
 3505                 VFS_UNLOCK_GIANT(vfslocked);
 3506                 return (EEXIST);
 3507         }
 3508         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3509                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3510                 vput(nd.ni_dvp);
 3511                 VFS_UNLOCK_GIANT(vfslocked);
 3512                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3513                         return (error);
 3514                 goto restart;
 3515         }
 3516         VATTR_NULL(&vattr);
 3517         vattr.va_type = VDIR;
 3518         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3519 #ifdef MAC
 3520         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3521             &vattr);
 3522         if (error)
 3523                 goto out;
 3524 #endif
 3525         VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 3526         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3527 #ifdef MAC
 3528 out:
 3529 #endif
 3530         NDFREE(&nd, NDF_ONLY_PNBUF);
 3531         vput(nd.ni_dvp);
 3532         if (!error)
 3533                 vput(nd.ni_vp);
 3534         vn_finished_write(mp);
 3535         VFS_UNLOCK_GIANT(vfslocked);
 3536         return (error);
 3537 }
 3538 
 3539 /*
 3540  * Remove a directory file.
 3541  */
 3542 #ifndef _SYS_SYSPROTO_H_
 3543 struct rmdir_args {
 3544         char    *path;
 3545 };
 3546 #endif
 3547 int
 3548 rmdir(td, uap)
 3549         struct thread *td;
 3550         struct rmdir_args /* {
 3551                 char *path;
 3552         } */ *uap;
 3553 {
 3554 
 3555         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3556 }
 3557 
 3558 int
 3559 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3560 {
 3561         struct mount *mp;
 3562         struct vnode *vp;
 3563         int error;
 3564         struct nameidata nd;
 3565         int vfslocked;
 3566 
 3567 restart:
 3568         bwillwrite();
 3569         NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
 3570             pathseg, path, td);
 3571         if ((error = namei(&nd)) != 0)
 3572                 return (error);
 3573         vfslocked = NDHASGIANT(&nd);
 3574         vp = nd.ni_vp;
 3575         if (vp->v_type != VDIR) {
 3576                 error = ENOTDIR;
 3577                 goto out;
 3578         }
 3579         /*
 3580          * No rmdir "." please.
 3581          */
 3582         if (nd.ni_dvp == vp) {
 3583                 error = EINVAL;
 3584                 goto out;
 3585         }
 3586         /*
 3587          * The root of a mounted filesystem cannot be deleted.
 3588          */
 3589         if (vp->v_vflag & VV_ROOT) {
 3590                 error = EBUSY;
 3591                 goto out;
 3592         }
 3593 #ifdef MAC
 3594         error = mac_check_vnode_unlink(td->td_ucred, nd.ni_dvp, vp,
 3595             &nd.ni_cnd);
 3596         if (error)
 3597                 goto out;
 3598 #endif
 3599         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3600                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3601                 vput(vp);
 3602                 if (nd.ni_dvp == vp)
 3603                         vrele(nd.ni_dvp);
 3604                 else
 3605                         vput(nd.ni_dvp);
 3606                 VFS_UNLOCK_GIANT(vfslocked);
 3607                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3608                         return (error);
 3609                 goto restart;
 3610         }
 3611         VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 3612         VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 3613         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3614         vn_finished_write(mp);
 3615 out:
 3616         NDFREE(&nd, NDF_ONLY_PNBUF);
 3617         vput(vp);
 3618         if (nd.ni_dvp == vp)
 3619                 vrele(nd.ni_dvp);
 3620         else
 3621                 vput(nd.ni_dvp);
 3622         VFS_UNLOCK_GIANT(vfslocked);
 3623         return (error);
 3624 }
 3625 
 3626 #ifdef COMPAT_43
 3627 /*
 3628  * Read a block of directory entries in a filesystem independent format.
 3629  */
 3630 #ifndef _SYS_SYSPROTO_H_
 3631 struct ogetdirentries_args {
 3632         int     fd;
 3633         char    *buf;
 3634         u_int   count;
 3635         long    *basep;
 3636 };
 3637 #endif
 3638 int
 3639 ogetdirentries(td, uap)
 3640         struct thread *td;
 3641         register struct ogetdirentries_args /* {
 3642                 int fd;
 3643                 char *buf;
 3644                 u_int count;
 3645                 long *basep;
 3646         } */ *uap;
 3647 {
 3648         struct vnode *vp;
 3649         struct file *fp;
 3650         struct uio auio, kuio;
 3651         struct iovec aiov, kiov;
 3652         struct dirent *dp, *edp;
 3653         caddr_t dirbuf;
 3654         int error, eofflag, readcnt, vfslocked;
 3655         long loff;
 3656 
 3657         /* XXX arbitrary sanity limit on `count'. */
 3658         if (uap->count > 64 * 1024)
 3659                 return (EINVAL);
 3660         if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 3661                 return (error);
 3662         if ((fp->f_flag & FREAD) == 0) {
 3663                 fdrop(fp, td);
 3664                 return (EBADF);
 3665         }
 3666         vp = fp->f_vnode;
 3667 unionread:
 3668         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3669         if (vp->v_type != VDIR) {
 3670                 VFS_UNLOCK_GIANT(vfslocked);
 3671                 fdrop(fp, td);
 3672                 return (EINVAL);
 3673         }
 3674         aiov.iov_base = uap->buf;
 3675         aiov.iov_len = uap->count;
 3676         auio.uio_iov = &aiov;
 3677         auio.uio_iovcnt = 1;
 3678         auio.uio_rw = UIO_READ;
 3679         auio.uio_segflg = UIO_USERSPACE;
 3680         auio.uio_td = td;
 3681         auio.uio_resid = uap->count;
 3682         vn_lock(vp, LK_SHARED | LK_RETRY, td);
 3683         loff = auio.uio_offset = fp->f_offset;
 3684 #ifdef MAC
 3685         error = mac_check_vnode_readdir(td->td_ucred, vp);
 3686         if (error) {
 3687                 VOP_UNLOCK(vp, 0, td);
 3688                 VFS_UNLOCK_GIANT(vfslocked);
 3689                 fdrop(fp, td);
 3690                 return (error);
 3691         }
 3692 #endif
 3693 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3694                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3695                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3696                             NULL, NULL);
 3697                         fp->f_offset = auio.uio_offset;
 3698                 } else
 3699 #       endif
 3700         {
 3701                 kuio = auio;
 3702                 kuio.uio_iov = &kiov;
 3703                 kuio.uio_segflg = UIO_SYSSPACE;
 3704                 kiov.iov_len = uap->count;
 3705                 MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
 3706                 kiov.iov_base = dirbuf;
 3707                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3708                             NULL, NULL);
 3709                 fp->f_offset = kuio.uio_offset;
 3710                 if (error == 0) {
 3711                         readcnt = uap->count - kuio.uio_resid;
 3712                         edp = (struct dirent *)&dirbuf[readcnt];
 3713                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3714 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3715                                         /*
 3716                                          * The expected low byte of
 3717                                          * dp->d_namlen is our dp->d_type.
 3718                                          * The high MBZ byte of dp->d_namlen
 3719                                          * is our dp->d_namlen.
 3720                                          */
 3721                                         dp->d_type = dp->d_namlen;
 3722                                         dp->d_namlen = 0;
 3723 #                               else
 3724                                         /*
 3725                                          * The dp->d_type is the high byte
 3726                                          * of the expected dp->d_namlen,
 3727                                          * so must be zero'ed.
 3728                                          */
 3729                                         dp->d_type = 0;
 3730 #                               endif
 3731                                 if (dp->d_reclen > 0) {
 3732                                         dp = (struct dirent *)
 3733                                             ((char *)dp + dp->d_reclen);
 3734                                 } else {
 3735                                         error = EIO;
 3736                                         break;
 3737                                 }
 3738                         }
 3739                         if (dp >= edp)
 3740                                 error = uiomove(dirbuf, readcnt, &auio);
 3741                 }
 3742                 FREE(dirbuf, M_TEMP);
 3743         }
 3744         if (error) {
 3745                 VOP_UNLOCK(vp, 0, td);
 3746                 VFS_UNLOCK_GIANT(vfslocked);
 3747                 fdrop(fp, td);
 3748                 return (error);
 3749         }
 3750         if (uap->count == auio.uio_resid &&
 3751             (vp->v_vflag & VV_ROOT) &&
 3752             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3753                 struct vnode *tvp = vp;
 3754                 vp = vp->v_mount->mnt_vnodecovered;
 3755                 VREF(vp);
 3756                 fp->f_vnode = vp;
 3757                 fp->f_data = vp;
 3758                 fp->f_offset = 0;
 3759                 vput(tvp);
 3760                 VFS_UNLOCK_GIANT(vfslocked);
 3761                 goto unionread;
 3762         }
 3763         VOP_UNLOCK(vp, 0, td);
 3764         VFS_UNLOCK_GIANT(vfslocked);
 3765         error = copyout(&loff, uap->basep, sizeof(long));
 3766         fdrop(fp, td);
 3767         td->td_retval[0] = uap->count - auio.uio_resid;
 3768         return (error);
 3769 }
 3770 #endif /* COMPAT_43 */
 3771 
 3772 /*
 3773  * Read a block of directory entries in a filesystem independent format.
 3774  */
 3775 #ifndef _SYS_SYSPROTO_H_
 3776 struct getdirentries_args {
 3777         int     fd;
 3778         char    *buf;
 3779         u_int   count;
 3780         long    *basep;
 3781 };
 3782 #endif
 3783 int
 3784 getdirentries(td, uap)
 3785         struct thread *td;
 3786         register struct getdirentries_args /* {
 3787                 int fd;
 3788                 char *buf;
 3789                 u_int count;
 3790                 long *basep;
 3791         } */ *uap;
 3792 {
 3793         long base;
 3794         int error;
 3795 
 3796         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 3797         if (error)
 3798                 return (error);
 3799         if (uap->basep != NULL)
 3800                 error = copyout(&base, uap->basep, sizeof(long));
 3801         return (error);
 3802 }
 3803 
 3804 int
 3805 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 3806     long *basep)
 3807 {
 3808         struct vnode *vp;
 3809         struct file *fp;
 3810         struct uio auio;
 3811         struct iovec aiov;
 3812         int vfslocked;
 3813         long loff;
 3814         int error, eofflag;
 3815 
 3816         AUDIT_ARG(fd, fd);
 3817         if (count > INT_MAX)
 3818                 return (EINVAL);
 3819         if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
 3820                 return (error);
 3821         if ((fp->f_flag & FREAD) == 0) {
 3822                 fdrop(fp, td);
 3823                 return (EBADF);
 3824         }
 3825         vp = fp->f_vnode;
 3826 unionread:
 3827         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 3828         if (vp->v_type != VDIR) {
 3829                 VFS_UNLOCK_GIANT(vfslocked);
 3830                 error = EINVAL;
 3831                 goto fail;
 3832         }
 3833         aiov.iov_base = buf;
 3834         aiov.iov_len = count;
 3835         auio.uio_iov = &aiov;
 3836         auio.uio_iovcnt = 1;
 3837         auio.uio_rw = UIO_READ;
 3838         auio.uio_segflg = UIO_USERSPACE;
 3839         auio.uio_td = td;
 3840         auio.uio_resid = count;
 3841         vn_lock(vp, LK_SHARED | LK_RETRY, td);
 3842         AUDIT_ARG(vnode, vp, ARG_VNODE1);
 3843         loff = auio.uio_offset = fp->f_offset;
 3844 #ifdef MAC
 3845         error = mac_check_vnode_readdir(td->td_ucred, vp);
 3846         if (error == 0)
 3847 #endif
 3848                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 3849                     NULL);
 3850         fp->f_offset = auio.uio_offset;
 3851         if (error) {
 3852                 VOP_UNLOCK(vp, 0, td);
 3853                 VFS_UNLOCK_GIANT(vfslocked);
 3854                 goto fail;
 3855         }
 3856         if (count == auio.uio_resid &&
 3857             (vp->v_vflag & VV_ROOT) &&
 3858             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3859                 struct vnode *tvp = vp;
 3860                 vp = vp->v_mount->mnt_vnodecovered;
 3861                 VREF(vp);
 3862                 fp->f_vnode = vp;
 3863                 fp->f_data = vp;
 3864                 fp->f_offset = 0;
 3865                 vput(tvp);
 3866                 VFS_UNLOCK_GIANT(vfslocked);
 3867                 goto unionread;
 3868         }
 3869         VOP_UNLOCK(vp, 0, td);
 3870         VFS_UNLOCK_GIANT(vfslocked);
 3871         *basep = loff;
 3872         td->td_retval[0] = count - auio.uio_resid;
 3873 fail:
 3874         fdrop(fp, td);
 3875         return (error);
 3876 }
 3877 
 3878 #ifndef _SYS_SYSPROTO_H_
 3879 struct getdents_args {
 3880         int fd;
 3881         char *buf;
 3882         size_t count;
 3883 };
 3884 #endif
 3885 int
 3886 getdents(td, uap)
 3887         struct thread *td;
 3888         register struct getdents_args /* {
 3889                 int fd;
 3890                 char *buf;
 3891                 u_int count;
 3892         } */ *uap;
 3893 {
 3894         struct getdirentries_args ap;
 3895         ap.fd = uap->fd;
 3896         ap.buf = uap->buf;
 3897         ap.count = uap->count;
 3898         ap.basep = NULL;
 3899         return (getdirentries(td, &ap));
 3900 }
 3901 
 3902 /*
 3903  * Set the mode mask for creation of filesystem nodes.
 3904  */
 3905 #ifndef _SYS_SYSPROTO_H_
 3906 struct umask_args {
 3907         int     newmask;
 3908 };
 3909 #endif
 3910 int
 3911 umask(td, uap)
 3912         struct thread *td;
 3913         struct umask_args /* {
 3914                 int newmask;
 3915         } */ *uap;
 3916 {
 3917         register struct filedesc *fdp;
 3918 
 3919         FILEDESC_XLOCK(td->td_proc->p_fd);
 3920         fdp = td->td_proc->p_fd;
 3921         td->td_retval[0] = fdp->fd_cmask;
 3922         fdp->fd_cmask = uap->newmask & ALLPERMS;
 3923         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 3924         return (0);
 3925 }
 3926 
 3927 /*
 3928  * Void all references to file by ripping underlying filesystem away from
 3929  * vnode.
 3930  */
 3931 #ifndef _SYS_SYSPROTO_H_
 3932 struct revoke_args {
 3933         char    *path;
 3934 };
 3935 #endif
 3936 int
 3937 revoke(td, uap)
 3938         struct thread *td;
 3939         register struct revoke_args /* {
 3940                 char *path;
 3941         } */ *uap;
 3942 {
 3943         struct vnode *vp;
 3944         struct vattr vattr;
 3945         int error;
 3946         struct nameidata nd;
 3947         int vfslocked;
 3948 
 3949         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 3950             UIO_USERSPACE, uap->path, td);
 3951         if ((error = namei(&nd)) != 0)
 3952                 return (error);
 3953         vfslocked = NDHASGIANT(&nd);
 3954         vp = nd.ni_vp;
 3955         NDFREE(&nd, NDF_ONLY_PNBUF);
 3956         if (vp->v_type != VCHR) {
 3957                 error = EINVAL;
 3958                 goto out;
 3959         }
 3960 #ifdef MAC
 3961         error = mac_check_vnode_revoke(td->td_ucred, vp);
 3962         if (error)
 3963                 goto out;
 3964 #endif
 3965         error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
 3966         if (error)
 3967                 goto out;
 3968         if (td->td_ucred->cr_uid != vattr.va_uid) {
 3969                 error = priv_check(td, PRIV_VFS_ADMIN);
 3970                 if (error)
 3971                         goto out;
 3972         }
 3973         if (vcount(vp) > 1)
 3974                 VOP_REVOKE(vp, REVOKEALL);
 3975 out:
 3976         vput(vp);
 3977         VFS_UNLOCK_GIANT(vfslocked);
 3978         return (error);
 3979 }
 3980 
 3981 /*
 3982  * Convert a user file descriptor to a kernel file entry.
 3983  * A reference on the file entry is held upon returning.
 3984  */
 3985 int
 3986 getvnode(fdp, fd, fpp)
 3987         struct filedesc *fdp;
 3988         int fd;
 3989         struct file **fpp;
 3990 {
 3991         int error;
 3992         struct file *fp;
 3993 
 3994         fp = NULL;
 3995         if (fdp == NULL)
 3996                 error = EBADF;
 3997         else {
 3998                 FILEDESC_SLOCK(fdp);
 3999                 if ((u_int)fd >= fdp->fd_nfiles ||
 4000                     (fp = fdp->fd_ofiles[fd]) == NULL)
 4001                         error = EBADF;
 4002                 else if (fp->f_vnode == NULL) {
 4003                         fp = NULL;
 4004                         error = EINVAL;
 4005                 } else {
 4006                         fhold(fp);
 4007                         error = 0;
 4008                 }
 4009                 FILEDESC_SUNLOCK(fdp);
 4010         }
 4011         *fpp = fp;
 4012         return (error);
 4013 }
 4014 
 4015 /*
 4016  * Get an (NFS) file handle.
 4017  */
 4018 #ifndef _SYS_SYSPROTO_H_
 4019 struct lgetfh_args {
 4020         char    *fname;
 4021         fhandle_t *fhp;
 4022 };
 4023 #endif
 4024 int
 4025 lgetfh(td, uap)
 4026         struct thread *td;
 4027         register struct lgetfh_args *uap;
 4028 {
 4029         struct nameidata nd;
 4030         fhandle_t fh;
 4031         register struct vnode *vp;
 4032         int vfslocked;
 4033         int error;
 4034 
 4035         error = priv_check(td, PRIV_VFS_GETFH);
 4036         if (error)
 4037                 return (error);
 4038         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4039             UIO_USERSPACE, uap->fname, td);
 4040         error = namei(&nd);
 4041         if (error)
 4042                 return (error);
 4043         vfslocked = NDHASGIANT(&nd);
 4044         NDFREE(&nd, NDF_ONLY_PNBUF);
 4045         vp = nd.ni_vp;
 4046         bzero(&fh, sizeof(fh));
 4047         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4048         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4049         vput(vp);
 4050         VFS_UNLOCK_GIANT(vfslocked);
 4051         if (error)
 4052                 return (error);
 4053         error = copyout(&fh, uap->fhp, sizeof (fh));
 4054         return (error);
 4055 }
 4056 
 4057 #ifndef _SYS_SYSPROTO_H_
 4058 struct getfh_args {
 4059         char    *fname;
 4060         fhandle_t *fhp;
 4061 };
 4062 #endif
 4063 int
 4064 getfh(td, uap)
 4065         struct thread *td;
 4066         register struct getfh_args *uap;
 4067 {
 4068         struct nameidata nd;
 4069         fhandle_t fh;
 4070         register struct vnode *vp;
 4071         int vfslocked;
 4072         int error;
 4073 
 4074         error = priv_check(td, PRIV_VFS_GETFH);
 4075         if (error)
 4076                 return (error);
 4077         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 4078             UIO_USERSPACE, uap->fname, td);
 4079         error = namei(&nd);
 4080         if (error)
 4081                 return (error);
 4082         vfslocked = NDHASGIANT(&nd);
 4083         NDFREE(&nd, NDF_ONLY_PNBUF);
 4084         vp = nd.ni_vp;
 4085         bzero(&fh, sizeof(fh));
 4086         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4087         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4088         vput(vp);
 4089         VFS_UNLOCK_GIANT(vfslocked);
 4090         if (error)
 4091                 return (error);
 4092         error = copyout(&fh, uap->fhp, sizeof (fh));
 4093         return (error);
 4094 }
 4095 
 4096 /*
 4097  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4098  * open descriptor.
 4099  *
 4100  * warning: do not remove the priv_check() call or this becomes one giant
 4101  * security hole.
 4102  */
 4103 #ifndef _SYS_SYSPROTO_H_
 4104 struct fhopen_args {
 4105         const struct fhandle *u_fhp;
 4106         int flags;
 4107 };
 4108 #endif
 4109 int
 4110 fhopen(td, uap)
 4111         struct thread *td;
 4112         struct fhopen_args /* {
 4113                 const struct fhandle *u_fhp;
 4114                 int flags;
 4115         } */ *uap;
 4116 {
 4117         struct proc *p = td->td_proc;
 4118         struct mount *mp;
 4119         struct vnode *vp;
 4120         struct fhandle fhp;
 4121         struct vattr vat;
 4122         struct vattr *vap = &vat;
 4123         struct flock lf;
 4124         struct file *fp;
 4125         register struct filedesc *fdp = p->p_fd;
 4126         int fmode, mode, error, type;
 4127         struct file *nfp;
 4128         int vfslocked;
 4129         int indx;
 4130 
 4131         error = priv_check(td, PRIV_VFS_FHOPEN);
 4132         if (error)
 4133                 return (error);
 4134         fmode = FFLAGS(uap->flags);
 4135         /* why not allow a non-read/write open for our lockd? */
 4136         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4137                 return (EINVAL);
 4138         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4139         if (error)
 4140                 return(error);
 4141         /* find the mount point */
 4142         mp = vfs_getvfs(&fhp.fh_fsid);
 4143         if (mp == NULL)
 4144                 return (ESTALE);
 4145         vfslocked = VFS_LOCK_GIANT(mp);
 4146         /* now give me my vnode, it gets returned to me locked */
 4147         error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
 4148         if (error)
 4149                 goto out;
 4150         /*
 4151          * from now on we have to make sure not
 4152          * to forget about the vnode
 4153          * any error that causes an abort must vput(vp)
 4154          * just set error = err and 'goto bad;'.
 4155          */
 4156 
 4157         /*
 4158          * from vn_open
 4159          */
 4160         if (vp->v_type == VLNK) {
 4161                 error = EMLINK;
 4162                 goto bad;
 4163         }
 4164         if (vp->v_type == VSOCK) {
 4165                 error = EOPNOTSUPP;
 4166                 goto bad;
 4167         }
 4168         mode = 0;
 4169         if (fmode & (FWRITE | O_TRUNC)) {
 4170                 if (vp->v_type == VDIR) {
 4171                         error = EISDIR;
 4172                         goto bad;
 4173                 }
 4174                 error = vn_writechk(vp);
 4175                 if (error)
 4176                         goto bad;
 4177                 mode |= VWRITE;
 4178         }
 4179         if (fmode & FREAD)
 4180                 mode |= VREAD;
 4181         if (fmode & O_APPEND)
 4182                 mode |= VAPPEND;
 4183 #ifdef MAC
 4184         error = mac_check_vnode_open(td->td_ucred, vp, mode);
 4185         if (error)
 4186                 goto bad;
 4187 #endif
 4188         if (mode) {
 4189                 error = VOP_ACCESS(vp, mode, td->td_ucred, td);
 4190                 if (error)
 4191                         goto bad;
 4192         }
 4193         if (fmode & O_TRUNC) {
 4194                 vfs_ref(mp);
 4195                 VOP_UNLOCK(vp, 0, td);                          /* XXX */
 4196                 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 4197                         vrele(vp);
 4198                         vfs_rel(mp);
 4199                         goto out;
 4200                 }
 4201                 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 4202                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);       /* XXX */
 4203                 vfs_rel(mp);
 4204 #ifdef MAC
 4205                 /*
 4206                  * We don't yet have fp->f_cred, so use td->td_ucred, which
 4207                  * should be right.
 4208                  */
 4209                 error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
 4210                 if (error == 0) {
 4211 #endif
 4212                         VATTR_NULL(vap);
 4213                         vap->va_size = 0;
 4214                         error = VOP_SETATTR(vp, vap, td->td_ucred, td);
 4215 #ifdef MAC
 4216                 }
 4217 #endif
 4218                 vn_finished_write(mp);
 4219                 if (error)
 4220                         goto bad;
 4221         }
 4222         error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
 4223         if (error)
 4224                 goto bad;
 4225 
 4226         if (fmode & FWRITE)
 4227                 vp->v_writecount++;
 4228 
 4229         /*
 4230          * end of vn_open code
 4231          */
 4232 
 4233         if ((error = falloc(td, &nfp, &indx)) != 0) {
 4234                 if (fmode & FWRITE)
 4235                         vp->v_writecount--;
 4236                 goto bad;
 4237         }
 4238         /* An extra reference on `nfp' has been held for us by falloc(). */
 4239         fp = nfp;
 4240 
 4241         FILE_LOCK(nfp);
 4242         nfp->f_vnode = vp;
 4243         nfp->f_data = vp;
 4244         nfp->f_flag = fmode & FMASK;
 4245         nfp->f_type = DTYPE_VNODE;
 4246         nfp->f_ops = &vnops;
 4247         FILE_UNLOCK(nfp);
 4248         if (fmode & (O_EXLOCK | O_SHLOCK)) {
 4249                 lf.l_whence = SEEK_SET;
 4250                 lf.l_start = 0;
 4251                 lf.l_len = 0;
 4252                 if (fmode & O_EXLOCK)
 4253                         lf.l_type = F_WRLCK;
 4254                 else
 4255                         lf.l_type = F_RDLCK;
 4256                 type = F_FLOCK;
 4257                 if ((fmode & FNONBLOCK) == 0)
 4258                         type |= F_WAIT;
 4259                 VOP_UNLOCK(vp, 0, td);
 4260                 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 4261                             type)) != 0) {
 4262                         /*
 4263                          * The lock request failed.  Normally close the
 4264                          * descriptor but handle the case where someone might
 4265                          * have dup()d or close()d it when we weren't looking.
 4266                          */
 4267                         fdclose(fdp, fp, indx, td);
 4268 
 4269                         /*
 4270                          * release our private reference
 4271                          */
 4272                         fdrop(fp, td);
 4273                         goto out;
 4274                 }
 4275                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 4276                 fp->f_flag |= FHASLOCK;
 4277         }
 4278 
 4279         VOP_UNLOCK(vp, 0, td);
 4280         fdrop(fp, td);
 4281         VFS_UNLOCK_GIANT(vfslocked);
 4282         td->td_retval[0] = indx;
 4283         return (0);
 4284 
 4285 bad:
 4286         vput(vp);
 4287 out:
 4288         vfs_rel(mp);
 4289         VFS_UNLOCK_GIANT(vfslocked);
 4290         return (error);
 4291 }
 4292 
 4293 /*
 4294  * Stat an (NFS) file handle.
 4295  */
 4296 #ifndef _SYS_SYSPROTO_H_
 4297 struct fhstat_args {
 4298         struct fhandle *u_fhp;
 4299         struct stat *sb;
 4300 };
 4301 #endif
 4302 int
 4303 fhstat(td, uap)
 4304         struct thread *td;
 4305         register struct fhstat_args /* {
 4306                 struct fhandle *u_fhp;
 4307                 struct stat *sb;
 4308         } */ *uap;
 4309 {
 4310         struct stat sb;
 4311         fhandle_t fh;
 4312         struct mount *mp;
 4313         struct vnode *vp;
 4314         int vfslocked;
 4315         int error;
 4316 
 4317         error = priv_check(td, PRIV_VFS_FHSTAT);
 4318         if (error)
 4319                 return (error);
 4320         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4321         if (error)
 4322                 return (error);
 4323         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 4324                 return (ESTALE);
 4325         vfslocked = VFS_LOCK_GIANT(mp);
 4326         if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
 4327                 vfs_rel(mp);
 4328                 VFS_UNLOCK_GIANT(vfslocked);
 4329                 return (error);
 4330         }
 4331         error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 4332         vput(vp);
 4333         vfs_rel(mp);
 4334         VFS_UNLOCK_GIANT(vfslocked);
 4335         if (error)
 4336                 return (error);
 4337         error = copyout(&sb, uap->sb, sizeof(sb));
 4338         return (error);
 4339 }
 4340 
 4341 /*
 4342  * Implement fstatfs() for (NFS) file handles.
 4343  */
 4344 #ifndef _SYS_SYSPROTO_H_
 4345 struct fhstatfs_args {
 4346         struct fhandle *u_fhp;
 4347         struct statfs *buf;
 4348 };
 4349 #endif
 4350 int
 4351 fhstatfs(td, uap)
 4352         struct thread *td;
 4353         struct fhstatfs_args /* {
 4354                 struct fhandle *u_fhp;
 4355                 struct statfs *buf;
 4356         } */ *uap;
 4357 {
 4358         struct statfs sf;
 4359         fhandle_t fh;
 4360         int error;
 4361 
 4362         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4363         if (error)
 4364                 return (error);
 4365         error = kern_fhstatfs(td, fh, &sf);
 4366         if (error)
 4367                 return (error);
 4368         return (copyout(&sf, uap->buf, sizeof(sf)));
 4369 }
 4370 
 4371 int
 4372 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4373 {
 4374         struct statfs *sp;
 4375         struct mount *mp;
 4376         struct vnode *vp;
 4377         int vfslocked;
 4378         int error;
 4379 
 4380         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4381         if (error)
 4382                 return (error);
 4383         if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 4384                 return (ESTALE);
 4385         vfslocked = VFS_LOCK_GIANT(mp);
 4386         error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
 4387         if (error) {
 4388                 VFS_UNLOCK_GIANT(vfslocked);
 4389                 vfs_rel(mp);
 4390                 return (error);
 4391         }
 4392         vput(vp);
 4393         error = prison_canseemount(td->td_ucred, mp);
 4394         if (error)
 4395                 goto out;
 4396 #ifdef MAC
 4397         error = mac_check_mount_stat(td->td_ucred, mp);
 4398         if (error)
 4399                 goto out;
 4400 #endif
 4401         /*
 4402          * Set these in case the underlying filesystem fails to do so.
 4403          */
 4404         sp = &mp->mnt_stat;
 4405         sp->f_version = STATFS_VERSION;
 4406         sp->f_namemax = NAME_MAX;
 4407         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4408         error = VFS_STATFS(mp, sp, td);
 4409         if (error == 0)
 4410                 *buf = *sp;
 4411 out:
 4412         vfs_rel(mp);
 4413         VFS_UNLOCK_GIANT(vfslocked);
 4414         return (error);
 4415 }

Cache object: 0254afb9d757eff5c199a6b3c4a231aa


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.