The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/11.0/sys/kern/vfs_syscalls.c 303192 2016-07-22 17:22:37Z kib $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/bio.h>
   47 #include <sys/buf.h>
   48 #include <sys/capsicum.h>
   49 #include <sys/disk.h>
   50 #include <sys/sysent.h>
   51 #include <sys/malloc.h>
   52 #include <sys/mount.h>
   53 #include <sys/mutex.h>
   54 #include <sys/sysproto.h>
   55 #include <sys/namei.h>
   56 #include <sys/filedesc.h>
   57 #include <sys/kernel.h>
   58 #include <sys/fcntl.h>
   59 #include <sys/file.h>
   60 #include <sys/filio.h>
   61 #include <sys/limits.h>
   62 #include <sys/linker.h>
   63 #include <sys/rwlock.h>
   64 #include <sys/sdt.h>
   65 #include <sys/stat.h>
   66 #include <sys/sx.h>
   67 #include <sys/unistd.h>
   68 #include <sys/vnode.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/dirent.h>
   72 #include <sys/jail.h>
   73 #include <sys/syscallsubr.h>
   74 #include <sys/sysctl.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 
   79 #include <machine/stdarg.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/uma.h>
   88 
   89 #include <ufs/ufs/quota.h>
   90 
   91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   92 
   93 SDT_PROVIDER_DEFINE(vfs);
   94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   96 
   97 static int kern_chflagsat(struct thread *td, int fd, const char *path,
   98     enum uio_seg pathseg, u_long flags, int atflag);
   99 static int setfflags(struct thread *td, struct vnode *, u_long);
  100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  101 static int getutimens(const struct timespec *, enum uio_seg,
  102     struct timespec *, int *);
  103 static int setutimes(struct thread *td, struct vnode *,
  104     const struct timespec *, int, int);
  105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  106     struct thread *td);
  107 
  108 /*
  109  * Sync each mounted filesystem.
  110  */
  111 #ifndef _SYS_SYSPROTO_H_
  112 struct sync_args {
  113         int     dummy;
  114 };
  115 #endif
  116 /* ARGSUSED */
  117 int
  118 sys_sync(td, uap)
  119         struct thread *td;
  120         struct sync_args *uap;
  121 {
  122         struct mount *mp, *nmp;
  123         int save;
  124 
  125         mtx_lock(&mountlist_mtx);
  126         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  127                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  128                         nmp = TAILQ_NEXT(mp, mnt_list);
  129                         continue;
  130                 }
  131                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  132                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  133                         save = curthread_pflags_set(TDP_SYNCIO);
  134                         vfs_msync(mp, MNT_NOWAIT);
  135                         VFS_SYNC(mp, MNT_NOWAIT);
  136                         curthread_pflags_restore(save);
  137                         vn_finished_write(mp);
  138                 }
  139                 mtx_lock(&mountlist_mtx);
  140                 nmp = TAILQ_NEXT(mp, mnt_list);
  141                 vfs_unbusy(mp);
  142         }
  143         mtx_unlock(&mountlist_mtx);
  144         return (0);
  145 }
  146 
  147 /*
  148  * Change filesystem quotas.
  149  */
  150 #ifndef _SYS_SYSPROTO_H_
  151 struct quotactl_args {
  152         char *path;
  153         int cmd;
  154         int uid;
  155         caddr_t arg;
  156 };
  157 #endif
  158 int
  159 sys_quotactl(td, uap)
  160         struct thread *td;
  161         register struct quotactl_args /* {
  162                 char *path;
  163                 int cmd;
  164                 int uid;
  165                 caddr_t arg;
  166         } */ *uap;
  167 {
  168         struct mount *mp;
  169         struct nameidata nd;
  170         int error;
  171 
  172         AUDIT_ARG_CMD(uap->cmd);
  173         AUDIT_ARG_UID(uap->uid);
  174         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  175                 return (EPERM);
  176         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  177             uap->path, td);
  178         if ((error = namei(&nd)) != 0)
  179                 return (error);
  180         NDFREE(&nd, NDF_ONLY_PNBUF);
  181         mp = nd.ni_vp->v_mount;
  182         vfs_ref(mp);
  183         vput(nd.ni_vp);
  184         error = vfs_busy(mp, 0);
  185         vfs_rel(mp);
  186         if (error != 0)
  187                 return (error);
  188         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  189 
  190         /*
  191          * Since quota on operation typically needs to open quota
  192          * file, the Q_QUOTAON handler needs to unbusy the mount point
  193          * before calling into namei.  Otherwise, unmount might be
  194          * started between two vfs_busy() invocations (first is our,
  195          * second is from mount point cross-walk code in lookup()),
  196          * causing deadlock.
  197          *
  198          * Require that Q_QUOTAON handles the vfs_busy() reference on
  199          * its own, always returning with ubusied mount point.
  200          */
  201         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  202                 vfs_unbusy(mp);
  203         return (error);
  204 }
  205 
  206 /*
  207  * Used by statfs conversion routines to scale the block size up if
  208  * necessary so that all of the block counts are <= 'max_size'.  Note
  209  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  210  * value of 'n'.
  211  */
  212 void
  213 statfs_scale_blocks(struct statfs *sf, long max_size)
  214 {
  215         uint64_t count;
  216         int shift;
  217 
  218         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  219 
  220         /*
  221          * Attempt to scale the block counts to give a more accurate
  222          * overview to userland of the ratio of free space to used
  223          * space.  To do this, find the largest block count and compute
  224          * a divisor that lets it fit into a signed integer <= max_size.
  225          */
  226         if (sf->f_bavail < 0)
  227                 count = -sf->f_bavail;
  228         else
  229                 count = sf->f_bavail;
  230         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  231         if (count <= max_size)
  232                 return;
  233 
  234         count >>= flsl(max_size);
  235         shift = 0;
  236         while (count > 0) {
  237                 shift++;
  238                 count >>=1;
  239         }
  240 
  241         sf->f_bsize <<= shift;
  242         sf->f_blocks >>= shift;
  243         sf->f_bfree >>= shift;
  244         sf->f_bavail >>= shift;
  245 }
  246 
  247 /*
  248  * Get filesystem statistics.
  249  */
  250 #ifndef _SYS_SYSPROTO_H_
  251 struct statfs_args {
  252         char *path;
  253         struct statfs *buf;
  254 };
  255 #endif
  256 int
  257 sys_statfs(td, uap)
  258         struct thread *td;
  259         register struct statfs_args /* {
  260                 char *path;
  261                 struct statfs *buf;
  262         } */ *uap;
  263 {
  264         struct statfs sf;
  265         int error;
  266 
  267         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  268         if (error == 0)
  269                 error = copyout(&sf, uap->buf, sizeof(sf));
  270         return (error);
  271 }
  272 
  273 int
  274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  275     struct statfs *buf)
  276 {
  277         struct mount *mp;
  278         struct statfs *sp, sb;
  279         struct nameidata nd;
  280         int error;
  281 
  282         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  283             pathseg, path, td);
  284         error = namei(&nd);
  285         if (error != 0)
  286                 return (error);
  287         mp = nd.ni_vp->v_mount;
  288         vfs_ref(mp);
  289         NDFREE(&nd, NDF_ONLY_PNBUF);
  290         vput(nd.ni_vp);
  291         error = vfs_busy(mp, 0);
  292         vfs_rel(mp);
  293         if (error != 0)
  294                 return (error);
  295 #ifdef MAC
  296         error = mac_mount_check_stat(td->td_ucred, mp);
  297         if (error != 0)
  298                 goto out;
  299 #endif
  300         /*
  301          * Set these in case the underlying filesystem fails to do so.
  302          */
  303         sp = &mp->mnt_stat;
  304         sp->f_version = STATFS_VERSION;
  305         sp->f_namemax = NAME_MAX;
  306         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  307         error = VFS_STATFS(mp, sp);
  308         if (error != 0)
  309                 goto out;
  310         if (priv_check(td, PRIV_VFS_GENERATION)) {
  311                 bcopy(sp, &sb, sizeof(sb));
  312                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  313                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  314                 sp = &sb;
  315         }
  316         *buf = *sp;
  317 out:
  318         vfs_unbusy(mp);
  319         return (error);
  320 }
  321 
  322 /*
  323  * Get filesystem statistics.
  324  */
  325 #ifndef _SYS_SYSPROTO_H_
  326 struct fstatfs_args {
  327         int fd;
  328         struct statfs *buf;
  329 };
  330 #endif
  331 int
  332 sys_fstatfs(td, uap)
  333         struct thread *td;
  334         register struct fstatfs_args /* {
  335                 int fd;
  336                 struct statfs *buf;
  337         } */ *uap;
  338 {
  339         struct statfs sf;
  340         int error;
  341 
  342         error = kern_fstatfs(td, uap->fd, &sf);
  343         if (error == 0)
  344                 error = copyout(&sf, uap->buf, sizeof(sf));
  345         return (error);
  346 }
  347 
  348 int
  349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  350 {
  351         struct file *fp;
  352         struct mount *mp;
  353         struct statfs *sp, sb;
  354         struct vnode *vp;
  355         cap_rights_t rights;
  356         int error;
  357 
  358         AUDIT_ARG_FD(fd);
  359         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp);
  360         if (error != 0)
  361                 return (error);
  362         vp = fp->f_vnode;
  363         vn_lock(vp, LK_SHARED | LK_RETRY);
  364 #ifdef AUDIT
  365         AUDIT_ARG_VNODE1(vp);
  366 #endif
  367         mp = vp->v_mount;
  368         if (mp)
  369                 vfs_ref(mp);
  370         VOP_UNLOCK(vp, 0);
  371         fdrop(fp, td);
  372         if (mp == NULL) {
  373                 error = EBADF;
  374                 goto out;
  375         }
  376         error = vfs_busy(mp, 0);
  377         vfs_rel(mp);
  378         if (error != 0)
  379                 return (error);
  380 #ifdef MAC
  381         error = mac_mount_check_stat(td->td_ucred, mp);
  382         if (error != 0)
  383                 goto out;
  384 #endif
  385         /*
  386          * Set these in case the underlying filesystem fails to do so.
  387          */
  388         sp = &mp->mnt_stat;
  389         sp->f_version = STATFS_VERSION;
  390         sp->f_namemax = NAME_MAX;
  391         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  392         error = VFS_STATFS(mp, sp);
  393         if (error != 0)
  394                 goto out;
  395         if (priv_check(td, PRIV_VFS_GENERATION)) {
  396                 bcopy(sp, &sb, sizeof(sb));
  397                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  398                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  399                 sp = &sb;
  400         }
  401         *buf = *sp;
  402 out:
  403         if (mp)
  404                 vfs_unbusy(mp);
  405         return (error);
  406 }
  407 
  408 /*
  409  * Get statistics on all filesystems.
  410  */
  411 #ifndef _SYS_SYSPROTO_H_
  412 struct getfsstat_args {
  413         struct statfs *buf;
  414         long bufsize;
  415         int flags;
  416 };
  417 #endif
  418 int
  419 sys_getfsstat(td, uap)
  420         struct thread *td;
  421         register struct getfsstat_args /* {
  422                 struct statfs *buf;
  423                 long bufsize;
  424                 int flags;
  425         } */ *uap;
  426 {
  427         size_t count;
  428         int error;
  429 
  430         if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
  431                 return (EINVAL);
  432         error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
  433             UIO_USERSPACE, uap->flags);
  434         if (error == 0)
  435                 td->td_retval[0] = count;
  436         return (error);
  437 }
  438 
  439 /*
  440  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  441  *      The caller is responsible for freeing memory which will be allocated
  442  *      in '*buf'.
  443  */
  444 int
  445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  446     size_t *countp, enum uio_seg bufseg, int flags)
  447 {
  448         struct mount *mp, *nmp;
  449         struct statfs *sfsp, *sp, sb;
  450         size_t count, maxcount;
  451         int error;
  452 
  453         maxcount = bufsize / sizeof(struct statfs);
  454         if (bufsize == 0)
  455                 sfsp = NULL;
  456         else if (bufseg == UIO_USERSPACE)
  457                 sfsp = *buf;
  458         else /* if (bufseg == UIO_SYSSPACE) */ {
  459                 count = 0;
  460                 mtx_lock(&mountlist_mtx);
  461                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  462                         count++;
  463                 }
  464                 mtx_unlock(&mountlist_mtx);
  465                 if (maxcount > count)
  466                         maxcount = count;
  467                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  468                     M_WAITOK);
  469         }
  470         count = 0;
  471         mtx_lock(&mountlist_mtx);
  472         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  473                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  474                         nmp = TAILQ_NEXT(mp, mnt_list);
  475                         continue;
  476                 }
  477 #ifdef MAC
  478                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  479                         nmp = TAILQ_NEXT(mp, mnt_list);
  480                         continue;
  481                 }
  482 #endif
  483                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  484                         nmp = TAILQ_NEXT(mp, mnt_list);
  485                         continue;
  486                 }
  487                 if (sfsp && count < maxcount) {
  488                         sp = &mp->mnt_stat;
  489                         /*
  490                          * Set these in case the underlying filesystem
  491                          * fails to do so.
  492                          */
  493                         sp->f_version = STATFS_VERSION;
  494                         sp->f_namemax = NAME_MAX;
  495                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  496                         /*
  497                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  498                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  499                          * overrides MNT_WAIT.
  500                          */
  501                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  502                             (flags & MNT_WAIT)) &&
  503                             (error = VFS_STATFS(mp, sp))) {
  504                                 mtx_lock(&mountlist_mtx);
  505                                 nmp = TAILQ_NEXT(mp, mnt_list);
  506                                 vfs_unbusy(mp);
  507                                 continue;
  508                         }
  509                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  510                                 bcopy(sp, &sb, sizeof(sb));
  511                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  512                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  513                                 sp = &sb;
  514                         }
  515                         if (bufseg == UIO_SYSSPACE)
  516                                 bcopy(sp, sfsp, sizeof(*sp));
  517                         else /* if (bufseg == UIO_USERSPACE) */ {
  518                                 error = copyout(sp, sfsp, sizeof(*sp));
  519                                 if (error != 0) {
  520                                         vfs_unbusy(mp);
  521                                         return (error);
  522                                 }
  523                         }
  524                         sfsp++;
  525                 }
  526                 count++;
  527                 mtx_lock(&mountlist_mtx);
  528                 nmp = TAILQ_NEXT(mp, mnt_list);
  529                 vfs_unbusy(mp);
  530         }
  531         mtx_unlock(&mountlist_mtx);
  532         if (sfsp && count > maxcount)
  533                 *countp = maxcount;
  534         else
  535                 *countp = count;
  536         return (0);
  537 }
  538 
  539 #ifdef COMPAT_FREEBSD4
  540 /*
  541  * Get old format filesystem statistics.
  542  */
  543 static void cvtstatfs(struct statfs *, struct ostatfs *);
  544 
  545 #ifndef _SYS_SYSPROTO_H_
  546 struct freebsd4_statfs_args {
  547         char *path;
  548         struct ostatfs *buf;
  549 };
  550 #endif
  551 int
  552 freebsd4_statfs(td, uap)
  553         struct thread *td;
  554         struct freebsd4_statfs_args /* {
  555                 char *path;
  556                 struct ostatfs *buf;
  557         } */ *uap;
  558 {
  559         struct ostatfs osb;
  560         struct statfs sf;
  561         int error;
  562 
  563         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  564         if (error != 0)
  565                 return (error);
  566         cvtstatfs(&sf, &osb);
  567         return (copyout(&osb, uap->buf, sizeof(osb)));
  568 }
  569 
  570 /*
  571  * Get filesystem statistics.
  572  */
  573 #ifndef _SYS_SYSPROTO_H_
  574 struct freebsd4_fstatfs_args {
  575         int fd;
  576         struct ostatfs *buf;
  577 };
  578 #endif
  579 int
  580 freebsd4_fstatfs(td, uap)
  581         struct thread *td;
  582         struct freebsd4_fstatfs_args /* {
  583                 int fd;
  584                 struct ostatfs *buf;
  585         } */ *uap;
  586 {
  587         struct ostatfs osb;
  588         struct statfs sf;
  589         int error;
  590 
  591         error = kern_fstatfs(td, uap->fd, &sf);
  592         if (error != 0)
  593                 return (error);
  594         cvtstatfs(&sf, &osb);
  595         return (copyout(&osb, uap->buf, sizeof(osb)));
  596 }
  597 
  598 /*
  599  * Get statistics on all filesystems.
  600  */
  601 #ifndef _SYS_SYSPROTO_H_
  602 struct freebsd4_getfsstat_args {
  603         struct ostatfs *buf;
  604         long bufsize;
  605         int flags;
  606 };
  607 #endif
  608 int
  609 freebsd4_getfsstat(td, uap)
  610         struct thread *td;
  611         register struct freebsd4_getfsstat_args /* {
  612                 struct ostatfs *buf;
  613                 long bufsize;
  614                 int flags;
  615         } */ *uap;
  616 {
  617         struct statfs *buf, *sp;
  618         struct ostatfs osb;
  619         size_t count, size;
  620         int error;
  621 
  622         if (uap->bufsize < 0)
  623                 return (EINVAL);
  624         count = uap->bufsize / sizeof(struct ostatfs);
  625         if (count > SIZE_MAX / sizeof(struct statfs))
  626                 return (EINVAL);
  627         size = count * sizeof(struct statfs);
  628         error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
  629             uap->flags);
  630         td->td_retval[0] = count;
  631         if (size != 0) {
  632                 sp = buf;
  633                 while (count != 0 && error == 0) {
  634                         cvtstatfs(sp, &osb);
  635                         error = copyout(&osb, uap->buf, sizeof(osb));
  636                         sp++;
  637                         uap->buf++;
  638                         count--;
  639                 }
  640                 free(buf, M_TEMP);
  641         }
  642         return (error);
  643 }
  644 
  645 /*
  646  * Implement fstatfs() for (NFS) file handles.
  647  */
  648 #ifndef _SYS_SYSPROTO_H_
  649 struct freebsd4_fhstatfs_args {
  650         struct fhandle *u_fhp;
  651         struct ostatfs *buf;
  652 };
  653 #endif
  654 int
  655 freebsd4_fhstatfs(td, uap)
  656         struct thread *td;
  657         struct freebsd4_fhstatfs_args /* {
  658                 struct fhandle *u_fhp;
  659                 struct ostatfs *buf;
  660         } */ *uap;
  661 {
  662         struct ostatfs osb;
  663         struct statfs sf;
  664         fhandle_t fh;
  665         int error;
  666 
  667         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  668         if (error != 0)
  669                 return (error);
  670         error = kern_fhstatfs(td, fh, &sf);
  671         if (error != 0)
  672                 return (error);
  673         cvtstatfs(&sf, &osb);
  674         return (copyout(&osb, uap->buf, sizeof(osb)));
  675 }
  676 
  677 /*
  678  * Convert a new format statfs structure to an old format statfs structure.
  679  */
  680 static void
  681 cvtstatfs(nsp, osp)
  682         struct statfs *nsp;
  683         struct ostatfs *osp;
  684 {
  685 
  686         statfs_scale_blocks(nsp, LONG_MAX);
  687         bzero(osp, sizeof(*osp));
  688         osp->f_bsize = nsp->f_bsize;
  689         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  690         osp->f_blocks = nsp->f_blocks;
  691         osp->f_bfree = nsp->f_bfree;
  692         osp->f_bavail = nsp->f_bavail;
  693         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  694         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  695         osp->f_owner = nsp->f_owner;
  696         osp->f_type = nsp->f_type;
  697         osp->f_flags = nsp->f_flags;
  698         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  699         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  700         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  701         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  702         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  703             MIN(MFSNAMELEN, OMFSNAMELEN));
  704         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  705             MIN(MNAMELEN, OMNAMELEN));
  706         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  707             MIN(MNAMELEN, OMNAMELEN));
  708         osp->f_fsid = nsp->f_fsid;
  709 }
  710 #endif /* COMPAT_FREEBSD4 */
  711 
  712 /*
  713  * Change current working directory to a given file descriptor.
  714  */
  715 #ifndef _SYS_SYSPROTO_H_
  716 struct fchdir_args {
  717         int     fd;
  718 };
  719 #endif
  720 int
  721 sys_fchdir(td, uap)
  722         struct thread *td;
  723         struct fchdir_args /* {
  724                 int fd;
  725         } */ *uap;
  726 {
  727         struct vnode *vp, *tdp;
  728         struct mount *mp;
  729         struct file *fp;
  730         cap_rights_t rights;
  731         int error;
  732 
  733         AUDIT_ARG_FD(uap->fd);
  734         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  735             &fp);
  736         if (error != 0)
  737                 return (error);
  738         vp = fp->f_vnode;
  739         VREF(vp);
  740         fdrop(fp, td);
  741         vn_lock(vp, LK_SHARED | LK_RETRY);
  742         AUDIT_ARG_VNODE1(vp);
  743         error = change_dir(vp, td);
  744         while (!error && (mp = vp->v_mountedhere) != NULL) {
  745                 if (vfs_busy(mp, 0))
  746                         continue;
  747                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  748                 vfs_unbusy(mp);
  749                 if (error != 0)
  750                         break;
  751                 vput(vp);
  752                 vp = tdp;
  753         }
  754         if (error != 0) {
  755                 vput(vp);
  756                 return (error);
  757         }
  758         VOP_UNLOCK(vp, 0);
  759         pwd_chdir(td, vp);
  760         return (0);
  761 }
  762 
  763 /*
  764  * Change current working directory (``.'').
  765  */
  766 #ifndef _SYS_SYSPROTO_H_
  767 struct chdir_args {
  768         char    *path;
  769 };
  770 #endif
  771 int
  772 sys_chdir(td, uap)
  773         struct thread *td;
  774         struct chdir_args /* {
  775                 char *path;
  776         } */ *uap;
  777 {
  778 
  779         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  780 }
  781 
  782 int
  783 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  784 {
  785         struct nameidata nd;
  786         int error;
  787 
  788         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  789             pathseg, path, td);
  790         if ((error = namei(&nd)) != 0)
  791                 return (error);
  792         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  793                 vput(nd.ni_vp);
  794                 NDFREE(&nd, NDF_ONLY_PNBUF);
  795                 return (error);
  796         }
  797         VOP_UNLOCK(nd.ni_vp, 0);
  798         NDFREE(&nd, NDF_ONLY_PNBUF);
  799         pwd_chdir(td, nd.ni_vp);
  800         return (0);
  801 }
  802 
  803 /*
  804  * Change notion of root (``/'') directory.
  805  */
  806 #ifndef _SYS_SYSPROTO_H_
  807 struct chroot_args {
  808         char    *path;
  809 };
  810 #endif
  811 int
  812 sys_chroot(td, uap)
  813         struct thread *td;
  814         struct chroot_args /* {
  815                 char *path;
  816         } */ *uap;
  817 {
  818         struct nameidata nd;
  819         int error;
  820 
  821         error = priv_check(td, PRIV_VFS_CHROOT);
  822         if (error != 0)
  823                 return (error);
  824         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  825             UIO_USERSPACE, uap->path, td);
  826         error = namei(&nd);
  827         if (error != 0)
  828                 goto error;
  829         error = change_dir(nd.ni_vp, td);
  830         if (error != 0)
  831                 goto e_vunlock;
  832 #ifdef MAC
  833         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  834         if (error != 0)
  835                 goto e_vunlock;
  836 #endif
  837         VOP_UNLOCK(nd.ni_vp, 0);
  838         error = pwd_chroot(td, nd.ni_vp);
  839         vrele(nd.ni_vp);
  840         NDFREE(&nd, NDF_ONLY_PNBUF);
  841         return (error);
  842 e_vunlock:
  843         vput(nd.ni_vp);
  844 error:
  845         NDFREE(&nd, NDF_ONLY_PNBUF);
  846         return (error);
  847 }
  848 
  849 /*
  850  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  851  * instance.
  852  */
  853 int
  854 change_dir(vp, td)
  855         struct vnode *vp;
  856         struct thread *td;
  857 {
  858 #ifdef MAC
  859         int error;
  860 #endif
  861 
  862         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  863         if (vp->v_type != VDIR)
  864                 return (ENOTDIR);
  865 #ifdef MAC
  866         error = mac_vnode_check_chdir(td->td_ucred, vp);
  867         if (error != 0)
  868                 return (error);
  869 #endif
  870         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  871 }
  872 
  873 static __inline void
  874 flags_to_rights(int flags, cap_rights_t *rightsp)
  875 {
  876 
  877         if (flags & O_EXEC) {
  878                 cap_rights_set(rightsp, CAP_FEXECVE);
  879         } else {
  880                 switch ((flags & O_ACCMODE)) {
  881                 case O_RDONLY:
  882                         cap_rights_set(rightsp, CAP_READ);
  883                         break;
  884                 case O_RDWR:
  885                         cap_rights_set(rightsp, CAP_READ);
  886                         /* FALLTHROUGH */
  887                 case O_WRONLY:
  888                         cap_rights_set(rightsp, CAP_WRITE);
  889                         if (!(flags & (O_APPEND | O_TRUNC)))
  890                                 cap_rights_set(rightsp, CAP_SEEK);
  891                         break;
  892                 }
  893         }
  894 
  895         if (flags & O_CREAT)
  896                 cap_rights_set(rightsp, CAP_CREATE);
  897 
  898         if (flags & O_TRUNC)
  899                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  900 
  901         if (flags & (O_SYNC | O_FSYNC))
  902                 cap_rights_set(rightsp, CAP_FSYNC);
  903 
  904         if (flags & (O_EXLOCK | O_SHLOCK))
  905                 cap_rights_set(rightsp, CAP_FLOCK);
  906 }
  907 
  908 /*
  909  * Check permissions, allocate an open file structure, and call the device
  910  * open routine if any.
  911  */
  912 #ifndef _SYS_SYSPROTO_H_
  913 struct open_args {
  914         char    *path;
  915         int     flags;
  916         int     mode;
  917 };
  918 #endif
  919 int
  920 sys_open(td, uap)
  921         struct thread *td;
  922         register struct open_args /* {
  923                 char *path;
  924                 int flags;
  925                 int mode;
  926         } */ *uap;
  927 {
  928 
  929         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
  930             uap->flags, uap->mode));
  931 }
  932 
  933 #ifndef _SYS_SYSPROTO_H_
  934 struct openat_args {
  935         int     fd;
  936         char    *path;
  937         int     flag;
  938         int     mode;
  939 };
  940 #endif
  941 int
  942 sys_openat(struct thread *td, struct openat_args *uap)
  943 {
  944 
  945         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
  946             uap->mode));
  947 }
  948 
  949 int
  950 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
  951     int flags, int mode)
  952 {
  953         struct proc *p = td->td_proc;
  954         struct filedesc *fdp = p->p_fd;
  955         struct file *fp;
  956         struct vnode *vp;
  957         struct nameidata nd;
  958         cap_rights_t rights;
  959         int cmode, error, indx;
  960 
  961         indx = -1;
  962 
  963         AUDIT_ARG_FFLAGS(flags);
  964         AUDIT_ARG_MODE(mode);
  965         /* XXX: audit dirfd */
  966         cap_rights_init(&rights, CAP_LOOKUP);
  967         flags_to_rights(flags, &rights);
  968         /*
  969          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
  970          * may be specified.
  971          */
  972         if (flags & O_EXEC) {
  973                 if (flags & O_ACCMODE)
  974                         return (EINVAL);
  975         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
  976                 return (EINVAL);
  977         } else {
  978                 flags = FFLAGS(flags);
  979         }
  980 
  981         /*
  982          * Allocate a file structure. The descriptor to reference it
  983          * is allocated and set by finstall() below.
  984          */
  985         error = falloc_noinstall(td, &fp);
  986         if (error != 0)
  987                 return (error);
  988         /*
  989          * An extra reference on `fp' has been held for us by
  990          * falloc_noinstall().
  991          */
  992         /* Set the flags early so the finit in devfs can pick them up. */
  993         fp->f_flag = flags & FMASK;
  994         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
  995         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
  996             &rights, td);
  997         td->td_dupfd = -1;              /* XXX check for fdopen */
  998         error = vn_open(&nd, &flags, cmode, fp);
  999         if (error != 0) {
 1000                 /*
 1001                  * If the vn_open replaced the method vector, something
 1002                  * wonderous happened deep below and we just pass it up
 1003                  * pretending we know what we do.
 1004                  */
 1005                 if (error == ENXIO && fp->f_ops != &badfileops)
 1006                         goto success;
 1007 
 1008                 /*
 1009                  * Handle special fdopen() case. bleh.
 1010                  *
 1011                  * Don't do this for relative (capability) lookups; we don't
 1012                  * understand exactly what would happen, and we don't think
 1013                  * that it ever should.
 1014                  */
 1015                 if (nd.ni_strictrelative == 0 &&
 1016                     (error == ENODEV || error == ENXIO) &&
 1017                     td->td_dupfd >= 0) {
 1018                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1019                             &indx);
 1020                         if (error == 0)
 1021                                 goto success;
 1022                 }
 1023 
 1024                 goto bad;
 1025         }
 1026         td->td_dupfd = 0;
 1027         NDFREE(&nd, NDF_ONLY_PNBUF);
 1028         vp = nd.ni_vp;
 1029 
 1030         /*
 1031          * Store the vnode, for any f_type. Typically, the vnode use
 1032          * count is decremented by direct call to vn_closefile() for
 1033          * files that switched type in the cdevsw fdopen() method.
 1034          */
 1035         fp->f_vnode = vp;
 1036         /*
 1037          * If the file wasn't claimed by devfs bind it to the normal
 1038          * vnode operations here.
 1039          */
 1040         if (fp->f_ops == &badfileops) {
 1041                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1042                 fp->f_seqcount = 1;
 1043                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1044                     DTYPE_VNODE, vp, &vnops);
 1045         }
 1046 
 1047         VOP_UNLOCK(vp, 0);
 1048         if (flags & O_TRUNC) {
 1049                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1050                 if (error != 0)
 1051                         goto bad;
 1052         }
 1053 success:
 1054         /*
 1055          * If we haven't already installed the FD (for dupfdopen), do so now.
 1056          */
 1057         if (indx == -1) {
 1058                 struct filecaps *fcaps;
 1059 
 1060 #ifdef CAPABILITIES
 1061                 if (nd.ni_strictrelative == 1)
 1062                         fcaps = &nd.ni_filecaps;
 1063                 else
 1064 #endif
 1065                         fcaps = NULL;
 1066                 error = finstall(td, fp, &indx, flags, fcaps);
 1067                 /* On success finstall() consumes fcaps. */
 1068                 if (error != 0) {
 1069                         filecaps_free(&nd.ni_filecaps);
 1070                         goto bad;
 1071                 }
 1072         } else {
 1073                 filecaps_free(&nd.ni_filecaps);
 1074         }
 1075 
 1076         /*
 1077          * Release our private reference, leaving the one associated with
 1078          * the descriptor table intact.
 1079          */
 1080         fdrop(fp, td);
 1081         td->td_retval[0] = indx;
 1082         return (0);
 1083 bad:
 1084         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1085         fdrop(fp, td);
 1086         return (error);
 1087 }
 1088 
 1089 #ifdef COMPAT_43
 1090 /*
 1091  * Create a file.
 1092  */
 1093 #ifndef _SYS_SYSPROTO_H_
 1094 struct ocreat_args {
 1095         char    *path;
 1096         int     mode;
 1097 };
 1098 #endif
 1099 int
 1100 ocreat(td, uap)
 1101         struct thread *td;
 1102         register struct ocreat_args /* {
 1103                 char *path;
 1104                 int mode;
 1105         } */ *uap;
 1106 {
 1107 
 1108         return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1109             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1110 }
 1111 #endif /* COMPAT_43 */
 1112 
 1113 /*
 1114  * Create a special file.
 1115  */
 1116 #ifndef _SYS_SYSPROTO_H_
 1117 struct mknod_args {
 1118         char    *path;
 1119         int     mode;
 1120         int     dev;
 1121 };
 1122 #endif
 1123 int
 1124 sys_mknod(td, uap)
 1125         struct thread *td;
 1126         register struct mknod_args /* {
 1127                 char *path;
 1128                 int mode;
 1129                 int dev;
 1130         } */ *uap;
 1131 {
 1132 
 1133         return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1134             uap->mode, uap->dev));
 1135 }
 1136 
 1137 #ifndef _SYS_SYSPROTO_H_
 1138 struct mknodat_args {
 1139         int     fd;
 1140         char    *path;
 1141         mode_t  mode;
 1142         dev_t   dev;
 1143 };
 1144 #endif
 1145 int
 1146 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1147 {
 1148 
 1149         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1150             uap->dev));
 1151 }
 1152 
 1153 int
 1154 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1155     int mode, int dev)
 1156 {
 1157         struct vnode *vp;
 1158         struct mount *mp;
 1159         struct vattr vattr;
 1160         struct nameidata nd;
 1161         cap_rights_t rights;
 1162         int error, whiteout = 0;
 1163 
 1164         AUDIT_ARG_MODE(mode);
 1165         AUDIT_ARG_DEV(dev);
 1166         switch (mode & S_IFMT) {
 1167         case S_IFCHR:
 1168         case S_IFBLK:
 1169                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1170                 if (error == 0 && dev == VNOVAL)
 1171                         error = EINVAL;
 1172                 break;
 1173         case S_IFMT:
 1174                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1175                 break;
 1176         case S_IFWHT:
 1177                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1178                 break;
 1179         case S_IFIFO:
 1180                 if (dev == 0)
 1181                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1182                 /* FALLTHROUGH */
 1183         default:
 1184                 error = EINVAL;
 1185                 break;
 1186         }
 1187         if (error != 0)
 1188                 return (error);
 1189 restart:
 1190         bwillwrite();
 1191         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1192             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 1193             td);
 1194         if ((error = namei(&nd)) != 0)
 1195                 return (error);
 1196         vp = nd.ni_vp;
 1197         if (vp != NULL) {
 1198                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1199                 if (vp == nd.ni_dvp)
 1200                         vrele(nd.ni_dvp);
 1201                 else
 1202                         vput(nd.ni_dvp);
 1203                 vrele(vp);
 1204                 return (EEXIST);
 1205         } else {
 1206                 VATTR_NULL(&vattr);
 1207                 vattr.va_mode = (mode & ALLPERMS) &
 1208                     ~td->td_proc->p_fd->fd_cmask;
 1209                 vattr.va_rdev = dev;
 1210                 whiteout = 0;
 1211 
 1212                 switch (mode & S_IFMT) {
 1213                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1214                         vattr.va_type = VBAD;
 1215                         break;
 1216                 case S_IFCHR:
 1217                         vattr.va_type = VCHR;
 1218                         break;
 1219                 case S_IFBLK:
 1220                         vattr.va_type = VBLK;
 1221                         break;
 1222                 case S_IFWHT:
 1223                         whiteout = 1;
 1224                         break;
 1225                 default:
 1226                         panic("kern_mknod: invalid mode");
 1227                 }
 1228         }
 1229         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1230                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1231                 vput(nd.ni_dvp);
 1232                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1233                         return (error);
 1234                 goto restart;
 1235         }
 1236 #ifdef MAC
 1237         if (error == 0 && !whiteout)
 1238                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1239                     &nd.ni_cnd, &vattr);
 1240 #endif
 1241         if (error == 0) {
 1242                 if (whiteout)
 1243                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1244                 else {
 1245                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1246                                                 &nd.ni_cnd, &vattr);
 1247                         if (error == 0)
 1248                                 vput(nd.ni_vp);
 1249                 }
 1250         }
 1251         NDFREE(&nd, NDF_ONLY_PNBUF);
 1252         vput(nd.ni_dvp);
 1253         vn_finished_write(mp);
 1254         return (error);
 1255 }
 1256 
 1257 /*
 1258  * Create a named pipe.
 1259  */
 1260 #ifndef _SYS_SYSPROTO_H_
 1261 struct mkfifo_args {
 1262         char    *path;
 1263         int     mode;
 1264 };
 1265 #endif
 1266 int
 1267 sys_mkfifo(td, uap)
 1268         struct thread *td;
 1269         register struct mkfifo_args /* {
 1270                 char *path;
 1271                 int mode;
 1272         } */ *uap;
 1273 {
 1274 
 1275         return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1276             uap->mode));
 1277 }
 1278 
 1279 #ifndef _SYS_SYSPROTO_H_
 1280 struct mkfifoat_args {
 1281         int     fd;
 1282         char    *path;
 1283         mode_t  mode;
 1284 };
 1285 #endif
 1286 int
 1287 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1288 {
 1289 
 1290         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1291             uap->mode));
 1292 }
 1293 
 1294 int
 1295 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1296     int mode)
 1297 {
 1298         struct mount *mp;
 1299         struct vattr vattr;
 1300         struct nameidata nd;
 1301         cap_rights_t rights;
 1302         int error;
 1303 
 1304         AUDIT_ARG_MODE(mode);
 1305 restart:
 1306         bwillwrite();
 1307         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1308             NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 1309             td);
 1310         if ((error = namei(&nd)) != 0)
 1311                 return (error);
 1312         if (nd.ni_vp != NULL) {
 1313                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1314                 if (nd.ni_vp == nd.ni_dvp)
 1315                         vrele(nd.ni_dvp);
 1316                 else
 1317                         vput(nd.ni_dvp);
 1318                 vrele(nd.ni_vp);
 1319                 return (EEXIST);
 1320         }
 1321         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1322                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1323                 vput(nd.ni_dvp);
 1324                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1325                         return (error);
 1326                 goto restart;
 1327         }
 1328         VATTR_NULL(&vattr);
 1329         vattr.va_type = VFIFO;
 1330         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1331 #ifdef MAC
 1332         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1333             &vattr);
 1334         if (error != 0)
 1335                 goto out;
 1336 #endif
 1337         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1338         if (error == 0)
 1339                 vput(nd.ni_vp);
 1340 #ifdef MAC
 1341 out:
 1342 #endif
 1343         vput(nd.ni_dvp);
 1344         vn_finished_write(mp);
 1345         NDFREE(&nd, NDF_ONLY_PNBUF);
 1346         return (error);
 1347 }
 1348 
 1349 /*
 1350  * Make a hard file link.
 1351  */
 1352 #ifndef _SYS_SYSPROTO_H_
 1353 struct link_args {
 1354         char    *path;
 1355         char    *link;
 1356 };
 1357 #endif
 1358 int
 1359 sys_link(td, uap)
 1360         struct thread *td;
 1361         register struct link_args /* {
 1362                 char *path;
 1363                 char *link;
 1364         } */ *uap;
 1365 {
 1366 
 1367         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 1368             UIO_USERSPACE, FOLLOW));
 1369 }
 1370 
 1371 #ifndef _SYS_SYSPROTO_H_
 1372 struct linkat_args {
 1373         int     fd1;
 1374         char    *path1;
 1375         int     fd2;
 1376         char    *path2;
 1377         int     flag;
 1378 };
 1379 #endif
 1380 int
 1381 sys_linkat(struct thread *td, struct linkat_args *uap)
 1382 {
 1383         int flag;
 1384 
 1385         flag = uap->flag;
 1386         if (flag & ~AT_SYMLINK_FOLLOW)
 1387                 return (EINVAL);
 1388 
 1389         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1390             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1391 }
 1392 
 1393 int hardlink_check_uid = 0;
 1394 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1395     &hardlink_check_uid, 0,
 1396     "Unprivileged processes cannot create hard links to files owned by other "
 1397     "users");
 1398 static int hardlink_check_gid = 0;
 1399 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1400     &hardlink_check_gid, 0,
 1401     "Unprivileged processes cannot create hard links to files owned by other "
 1402     "groups");
 1403 
 1404 static int
 1405 can_hardlink(struct vnode *vp, struct ucred *cred)
 1406 {
 1407         struct vattr va;
 1408         int error;
 1409 
 1410         if (!hardlink_check_uid && !hardlink_check_gid)
 1411                 return (0);
 1412 
 1413         error = VOP_GETATTR(vp, &va, cred);
 1414         if (error != 0)
 1415                 return (error);
 1416 
 1417         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1418                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1419                 if (error != 0)
 1420                         return (error);
 1421         }
 1422 
 1423         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1424                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1425                 if (error != 0)
 1426                         return (error);
 1427         }
 1428 
 1429         return (0);
 1430 }
 1431 
 1432 int
 1433 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1434     enum uio_seg segflg, int follow)
 1435 {
 1436         struct vnode *vp;
 1437         struct mount *mp;
 1438         struct nameidata nd;
 1439         cap_rights_t rights;
 1440         int error;
 1441 
 1442 again:
 1443         bwillwrite();
 1444         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1,
 1445             cap_rights_init(&rights, CAP_LINKAT_SOURCE), td);
 1446 
 1447         if ((error = namei(&nd)) != 0)
 1448                 return (error);
 1449         NDFREE(&nd, NDF_ONLY_PNBUF);
 1450         vp = nd.ni_vp;
 1451         if (vp->v_type == VDIR) {
 1452                 vrele(vp);
 1453                 return (EPERM);         /* POSIX */
 1454         }
 1455         NDINIT_ATRIGHTS(&nd, CREATE,
 1456             LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2,
 1457             cap_rights_init(&rights, CAP_LINKAT_TARGET), td);
 1458         if ((error = namei(&nd)) == 0) {
 1459                 if (nd.ni_vp != NULL) {
 1460                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1461                         if (nd.ni_dvp == nd.ni_vp)
 1462                                 vrele(nd.ni_dvp);
 1463                         else
 1464                                 vput(nd.ni_dvp);
 1465                         vrele(nd.ni_vp);
 1466                         vrele(vp);
 1467                         return (EEXIST);
 1468                 } else if (nd.ni_dvp->v_mount != vp->v_mount) {
 1469                         /*
 1470                          * Cross-device link.  No need to recheck
 1471                          * vp->v_type, since it cannot change, except
 1472                          * to VBAD.
 1473                          */
 1474                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1475                         vput(nd.ni_dvp);
 1476                         vrele(vp);
 1477                         return (EXDEV);
 1478                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1479                         error = can_hardlink(vp, td->td_ucred);
 1480 #ifdef MAC
 1481                         if (error == 0)
 1482                                 error = mac_vnode_check_link(td->td_ucred,
 1483                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1484 #endif
 1485                         if (error != 0) {
 1486                                 vput(vp);
 1487                                 vput(nd.ni_dvp);
 1488                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1489                                 return (error);
 1490                         }
 1491                         error = vn_start_write(vp, &mp, V_NOWAIT);
 1492                         if (error != 0) {
 1493                                 vput(vp);
 1494                                 vput(nd.ni_dvp);
 1495                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1496                                 error = vn_start_write(NULL, &mp,
 1497                                     V_XSLEEP | PCATCH);
 1498                                 if (error != 0)
 1499                                         return (error);
 1500                                 goto again;
 1501                         }
 1502                         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1503                         VOP_UNLOCK(vp, 0);
 1504                         vput(nd.ni_dvp);
 1505                         vn_finished_write(mp);
 1506                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1507                 } else {
 1508                         vput(nd.ni_dvp);
 1509                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1510                         vrele(vp);
 1511                         goto again;
 1512                 }
 1513         }
 1514         vrele(vp);
 1515         return (error);
 1516 }
 1517 
 1518 /*
 1519  * Make a symbolic link.
 1520  */
 1521 #ifndef _SYS_SYSPROTO_H_
 1522 struct symlink_args {
 1523         char    *path;
 1524         char    *link;
 1525 };
 1526 #endif
 1527 int
 1528 sys_symlink(td, uap)
 1529         struct thread *td;
 1530         register struct symlink_args /* {
 1531                 char *path;
 1532                 char *link;
 1533         } */ *uap;
 1534 {
 1535 
 1536         return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 1537             UIO_USERSPACE));
 1538 }
 1539 
 1540 #ifndef _SYS_SYSPROTO_H_
 1541 struct symlinkat_args {
 1542         char    *path;
 1543         int     fd;
 1544         char    *path2;
 1545 };
 1546 #endif
 1547 int
 1548 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1549 {
 1550 
 1551         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1552             UIO_USERSPACE));
 1553 }
 1554 
 1555 int
 1556 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1557     enum uio_seg segflg)
 1558 {
 1559         struct mount *mp;
 1560         struct vattr vattr;
 1561         char *syspath;
 1562         struct nameidata nd;
 1563         int error;
 1564         cap_rights_t rights;
 1565 
 1566         if (segflg == UIO_SYSSPACE) {
 1567                 syspath = path1;
 1568         } else {
 1569                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1570                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1571                         goto out;
 1572         }
 1573         AUDIT_ARG_TEXT(syspath);
 1574 restart:
 1575         bwillwrite();
 1576         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 1577             NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 1578             td);
 1579         if ((error = namei(&nd)) != 0)
 1580                 goto out;
 1581         if (nd.ni_vp) {
 1582                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1583                 if (nd.ni_vp == nd.ni_dvp)
 1584                         vrele(nd.ni_dvp);
 1585                 else
 1586                         vput(nd.ni_dvp);
 1587                 vrele(nd.ni_vp);
 1588                 error = EEXIST;
 1589                 goto out;
 1590         }
 1591         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1592                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1593                 vput(nd.ni_dvp);
 1594                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1595                         goto out;
 1596                 goto restart;
 1597         }
 1598         VATTR_NULL(&vattr);
 1599         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1600 #ifdef MAC
 1601         vattr.va_type = VLNK;
 1602         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1603             &vattr);
 1604         if (error != 0)
 1605                 goto out2;
 1606 #endif
 1607         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1608         if (error == 0)
 1609                 vput(nd.ni_vp);
 1610 #ifdef MAC
 1611 out2:
 1612 #endif
 1613         NDFREE(&nd, NDF_ONLY_PNBUF);
 1614         vput(nd.ni_dvp);
 1615         vn_finished_write(mp);
 1616 out:
 1617         if (segflg != UIO_SYSSPACE)
 1618                 uma_zfree(namei_zone, syspath);
 1619         return (error);
 1620 }
 1621 
 1622 /*
 1623  * Delete a whiteout from the filesystem.
 1624  */
 1625 int
 1626 sys_undelete(td, uap)
 1627         struct thread *td;
 1628         register struct undelete_args /* {
 1629                 char *path;
 1630         } */ *uap;
 1631 {
 1632         struct mount *mp;
 1633         struct nameidata nd;
 1634         int error;
 1635 
 1636 restart:
 1637         bwillwrite();
 1638         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1639             UIO_USERSPACE, uap->path, td);
 1640         error = namei(&nd);
 1641         if (error != 0)
 1642                 return (error);
 1643 
 1644         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1645                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1646                 if (nd.ni_vp == nd.ni_dvp)
 1647                         vrele(nd.ni_dvp);
 1648                 else
 1649                         vput(nd.ni_dvp);
 1650                 if (nd.ni_vp)
 1651                         vrele(nd.ni_vp);
 1652                 return (EEXIST);
 1653         }
 1654         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1655                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1656                 vput(nd.ni_dvp);
 1657                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1658                         return (error);
 1659                 goto restart;
 1660         }
 1661         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1662         NDFREE(&nd, NDF_ONLY_PNBUF);
 1663         vput(nd.ni_dvp);
 1664         vn_finished_write(mp);
 1665         return (error);
 1666 }
 1667 
 1668 /*
 1669  * Delete a name from the filesystem.
 1670  */
 1671 #ifndef _SYS_SYSPROTO_H_
 1672 struct unlink_args {
 1673         char    *path;
 1674 };
 1675 #endif
 1676 int
 1677 sys_unlink(td, uap)
 1678         struct thread *td;
 1679         struct unlink_args /* {
 1680                 char *path;
 1681         } */ *uap;
 1682 {
 1683 
 1684         return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0));
 1685 }
 1686 
 1687 #ifndef _SYS_SYSPROTO_H_
 1688 struct unlinkat_args {
 1689         int     fd;
 1690         char    *path;
 1691         int     flag;
 1692 };
 1693 #endif
 1694 int
 1695 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1696 {
 1697         int flag = uap->flag;
 1698         int fd = uap->fd;
 1699         char *path = uap->path;
 1700 
 1701         if (flag & ~AT_REMOVEDIR)
 1702                 return (EINVAL);
 1703 
 1704         if (flag & AT_REMOVEDIR)
 1705                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1706         else
 1707                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1708 }
 1709 
 1710 int
 1711 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1712     ino_t oldinum)
 1713 {
 1714         struct mount *mp;
 1715         struct vnode *vp;
 1716         struct nameidata nd;
 1717         struct stat sb;
 1718         cap_rights_t rights;
 1719         int error;
 1720 
 1721 restart:
 1722         bwillwrite();
 1723         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1724             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1725         if ((error = namei(&nd)) != 0)
 1726                 return (error == EINVAL ? EPERM : error);
 1727         vp = nd.ni_vp;
 1728         if (vp->v_type == VDIR && oldinum == 0) {
 1729                 error = EPERM;          /* POSIX */
 1730         } else if (oldinum != 0 &&
 1731                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1732                   sb.st_ino != oldinum) {
 1733                         error = EIDRM;  /* Identifier removed */
 1734         } else {
 1735                 /*
 1736                  * The root of a mounted filesystem cannot be deleted.
 1737                  *
 1738                  * XXX: can this only be a VDIR case?
 1739                  */
 1740                 if (vp->v_vflag & VV_ROOT)
 1741                         error = EBUSY;
 1742         }
 1743         if (error == 0) {
 1744                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1745                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1746                         vput(nd.ni_dvp);
 1747                         if (vp == nd.ni_dvp)
 1748                                 vrele(vp);
 1749                         else
 1750                                 vput(vp);
 1751                         if ((error = vn_start_write(NULL, &mp,
 1752                             V_XSLEEP | PCATCH)) != 0)
 1753                                 return (error);
 1754                         goto restart;
 1755                 }
 1756 #ifdef MAC
 1757                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1758                     &nd.ni_cnd);
 1759                 if (error != 0)
 1760                         goto out;
 1761 #endif
 1762                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1763                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1764 #ifdef MAC
 1765 out:
 1766 #endif
 1767                 vn_finished_write(mp);
 1768         }
 1769         NDFREE(&nd, NDF_ONLY_PNBUF);
 1770         vput(nd.ni_dvp);
 1771         if (vp == nd.ni_dvp)
 1772                 vrele(vp);
 1773         else
 1774                 vput(vp);
 1775         return (error);
 1776 }
 1777 
 1778 /*
 1779  * Reposition read/write file offset.
 1780  */
 1781 #ifndef _SYS_SYSPROTO_H_
 1782 struct lseek_args {
 1783         int     fd;
 1784         int     pad;
 1785         off_t   offset;
 1786         int     whence;
 1787 };
 1788 #endif
 1789 int
 1790 sys_lseek(td, uap)
 1791         struct thread *td;
 1792         register struct lseek_args /* {
 1793                 int fd;
 1794                 int pad;
 1795                 off_t offset;
 1796                 int whence;
 1797         } */ *uap;
 1798 {
 1799         struct file *fp;
 1800         cap_rights_t rights;
 1801         int error;
 1802 
 1803         AUDIT_ARG_FD(uap->fd);
 1804         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1805         if (error != 0)
 1806                 return (error);
 1807         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1808             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1809         fdrop(fp, td);
 1810         return (error);
 1811 }
 1812 
 1813 #if defined(COMPAT_43)
 1814 /*
 1815  * Reposition read/write file offset.
 1816  */
 1817 #ifndef _SYS_SYSPROTO_H_
 1818 struct olseek_args {
 1819         int     fd;
 1820         long    offset;
 1821         int     whence;
 1822 };
 1823 #endif
 1824 int
 1825 olseek(td, uap)
 1826         struct thread *td;
 1827         register struct olseek_args /* {
 1828                 int fd;
 1829                 long offset;
 1830                 int whence;
 1831         } */ *uap;
 1832 {
 1833         struct lseek_args /* {
 1834                 int fd;
 1835                 int pad;
 1836                 off_t offset;
 1837                 int whence;
 1838         } */ nuap;
 1839 
 1840         nuap.fd = uap->fd;
 1841         nuap.offset = uap->offset;
 1842         nuap.whence = uap->whence;
 1843         return (sys_lseek(td, &nuap));
 1844 }
 1845 #endif /* COMPAT_43 */
 1846 
 1847 #if defined(COMPAT_FREEBSD6)
 1848 /* Version with the 'pad' argument */
 1849 int
 1850 freebsd6_lseek(td, uap)
 1851         struct thread *td;
 1852         register struct freebsd6_lseek_args *uap;
 1853 {
 1854         struct lseek_args ouap;
 1855 
 1856         ouap.fd = uap->fd;
 1857         ouap.offset = uap->offset;
 1858         ouap.whence = uap->whence;
 1859         return (sys_lseek(td, &ouap));
 1860 }
 1861 #endif
 1862 
 1863 /*
 1864  * Check access permissions using passed credentials.
 1865  */
 1866 static int
 1867 vn_access(vp, user_flags, cred, td)
 1868         struct vnode    *vp;
 1869         int             user_flags;
 1870         struct ucred    *cred;
 1871         struct thread   *td;
 1872 {
 1873         accmode_t accmode;
 1874         int error;
 1875 
 1876         /* Flags == 0 means only check for existence. */
 1877         if (user_flags == 0)
 1878                 return (0);
 1879 
 1880         accmode = 0;
 1881         if (user_flags & R_OK)
 1882                 accmode |= VREAD;
 1883         if (user_flags & W_OK)
 1884                 accmode |= VWRITE;
 1885         if (user_flags & X_OK)
 1886                 accmode |= VEXEC;
 1887 #ifdef MAC
 1888         error = mac_vnode_check_access(cred, vp, accmode);
 1889         if (error != 0)
 1890                 return (error);
 1891 #endif
 1892         if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1893                 error = VOP_ACCESS(vp, accmode, cred, td);
 1894         return (error);
 1895 }
 1896 
 1897 /*
 1898  * Check access permissions using "real" credentials.
 1899  */
 1900 #ifndef _SYS_SYSPROTO_H_
 1901 struct access_args {
 1902         char    *path;
 1903         int     amode;
 1904 };
 1905 #endif
 1906 int
 1907 sys_access(td, uap)
 1908         struct thread *td;
 1909         register struct access_args /* {
 1910                 char *path;
 1911                 int amode;
 1912         } */ *uap;
 1913 {
 1914 
 1915         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 1916             0, uap->amode));
 1917 }
 1918 
 1919 #ifndef _SYS_SYSPROTO_H_
 1920 struct faccessat_args {
 1921         int     dirfd;
 1922         char    *path;
 1923         int     amode;
 1924         int     flag;
 1925 }
 1926 #endif
 1927 int
 1928 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 1929 {
 1930 
 1931         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1932             uap->amode));
 1933 }
 1934 
 1935 int
 1936 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1937     int flag, int amode)
 1938 {
 1939         struct ucred *cred, *usecred;
 1940         struct vnode *vp;
 1941         struct nameidata nd;
 1942         cap_rights_t rights;
 1943         int error;
 1944 
 1945         if (flag & ~AT_EACCESS)
 1946                 return (EINVAL);
 1947         if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 1948                 return (EINVAL);
 1949 
 1950         /*
 1951          * Create and modify a temporary credential instead of one that
 1952          * is potentially shared (if we need one).
 1953          */
 1954         cred = td->td_ucred;
 1955         if ((flag & AT_EACCESS) == 0 &&
 1956             ((cred->cr_uid != cred->cr_ruid ||
 1957             cred->cr_rgid != cred->cr_groups[0]))) {
 1958                 usecred = crdup(cred);
 1959                 usecred->cr_uid = cred->cr_ruid;
 1960                 usecred->cr_groups[0] = cred->cr_rgid;
 1961                 td->td_ucred = usecred;
 1962         } else
 1963                 usecred = cred;
 1964         AUDIT_ARG_VALUE(amode);
 1965         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 1966             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 1967             td);
 1968         if ((error = namei(&nd)) != 0)
 1969                 goto out;
 1970         vp = nd.ni_vp;
 1971 
 1972         error = vn_access(vp, amode, usecred, td);
 1973         NDFREE(&nd, NDF_ONLY_PNBUF);
 1974         vput(vp);
 1975 out:
 1976         if (usecred != cred) {
 1977                 td->td_ucred = cred;
 1978                 crfree(usecred);
 1979         }
 1980         return (error);
 1981 }
 1982 
 1983 /*
 1984  * Check access permissions using "effective" credentials.
 1985  */
 1986 #ifndef _SYS_SYSPROTO_H_
 1987 struct eaccess_args {
 1988         char    *path;
 1989         int     amode;
 1990 };
 1991 #endif
 1992 int
 1993 sys_eaccess(td, uap)
 1994         struct thread *td;
 1995         register struct eaccess_args /* {
 1996                 char *path;
 1997                 int amode;
 1998         } */ *uap;
 1999 {
 2000 
 2001         return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2002             AT_EACCESS, uap->amode));
 2003 }
 2004 
 2005 #if defined(COMPAT_43)
 2006 /*
 2007  * Get file status; this version follows links.
 2008  */
 2009 #ifndef _SYS_SYSPROTO_H_
 2010 struct ostat_args {
 2011         char    *path;
 2012         struct ostat *ub;
 2013 };
 2014 #endif
 2015 int
 2016 ostat(td, uap)
 2017         struct thread *td;
 2018         register struct ostat_args /* {
 2019                 char *path;
 2020                 struct ostat *ub;
 2021         } */ *uap;
 2022 {
 2023         struct stat sb;
 2024         struct ostat osb;
 2025         int error;
 2026 
 2027         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2028             &sb, NULL);
 2029         if (error != 0)
 2030                 return (error);
 2031         cvtstat(&sb, &osb);
 2032         return (copyout(&osb, uap->ub, sizeof (osb)));
 2033 }
 2034 
 2035 /*
 2036  * Get file status; this version does not follow links.
 2037  */
 2038 #ifndef _SYS_SYSPROTO_H_
 2039 struct olstat_args {
 2040         char    *path;
 2041         struct ostat *ub;
 2042 };
 2043 #endif
 2044 int
 2045 olstat(td, uap)
 2046         struct thread *td;
 2047         register struct olstat_args /* {
 2048                 char *path;
 2049                 struct ostat *ub;
 2050         } */ *uap;
 2051 {
 2052         struct stat sb;
 2053         struct ostat osb;
 2054         int error;
 2055 
 2056         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2057             UIO_USERSPACE, &sb, NULL);
 2058         if (error != 0)
 2059                 return (error);
 2060         cvtstat(&sb, &osb);
 2061         return (copyout(&osb, uap->ub, sizeof (osb)));
 2062 }
 2063 
 2064 /*
 2065  * Convert from an old to a new stat structure.
 2066  */
 2067 void
 2068 cvtstat(st, ost)
 2069         struct stat *st;
 2070         struct ostat *ost;
 2071 {
 2072 
 2073         bzero(ost, sizeof(*ost));
 2074         ost->st_dev = st->st_dev;
 2075         ost->st_ino = st->st_ino;
 2076         ost->st_mode = st->st_mode;
 2077         ost->st_nlink = st->st_nlink;
 2078         ost->st_uid = st->st_uid;
 2079         ost->st_gid = st->st_gid;
 2080         ost->st_rdev = st->st_rdev;
 2081         if (st->st_size < (quad_t)1 << 32)
 2082                 ost->st_size = st->st_size;
 2083         else
 2084                 ost->st_size = -2;
 2085         ost->st_atim = st->st_atim;
 2086         ost->st_mtim = st->st_mtim;
 2087         ost->st_ctim = st->st_ctim;
 2088         ost->st_blksize = st->st_blksize;
 2089         ost->st_blocks = st->st_blocks;
 2090         ost->st_flags = st->st_flags;
 2091         ost->st_gen = st->st_gen;
 2092 }
 2093 #endif /* COMPAT_43 */
 2094 
 2095 /*
 2096  * Get file status; this version follows links.
 2097  */
 2098 #ifndef _SYS_SYSPROTO_H_
 2099 struct stat_args {
 2100         char    *path;
 2101         struct stat *ub;
 2102 };
 2103 #endif
 2104 int
 2105 sys_stat(td, uap)
 2106         struct thread *td;
 2107         register struct stat_args /* {
 2108                 char *path;
 2109                 struct stat *ub;
 2110         } */ *uap;
 2111 {
 2112         struct stat sb;
 2113         int error;
 2114 
 2115         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2116             &sb, NULL);
 2117         if (error == 0)
 2118                 error = copyout(&sb, uap->ub, sizeof (sb));
 2119         return (error);
 2120 }
 2121 
 2122 #ifndef _SYS_SYSPROTO_H_
 2123 struct fstatat_args {
 2124         int     fd;
 2125         char    *path;
 2126         struct stat     *buf;
 2127         int     flag;
 2128 }
 2129 #endif
 2130 int
 2131 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2132 {
 2133         struct stat sb;
 2134         int error;
 2135 
 2136         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2137             UIO_USERSPACE, &sb, NULL);
 2138         if (error == 0)
 2139                 error = copyout(&sb, uap->buf, sizeof (sb));
 2140         return (error);
 2141 }
 2142 
 2143 int
 2144 kern_statat(struct thread *td, int flag, int fd, char *path,
 2145     enum uio_seg pathseg, struct stat *sbp,
 2146     void (*hook)(struct vnode *vp, struct stat *sbp))
 2147 {
 2148         struct nameidata nd;
 2149         struct stat sb;
 2150         cap_rights_t rights;
 2151         int error;
 2152 
 2153         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2154                 return (EINVAL);
 2155 
 2156         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2157             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2158             cap_rights_init(&rights, CAP_FSTAT), td);
 2159 
 2160         if ((error = namei(&nd)) != 0)
 2161                 return (error);
 2162         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2163         if (error == 0) {
 2164                 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 2165                 if (S_ISREG(sb.st_mode))
 2166                         SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 2167                 if (__predict_false(hook != NULL))
 2168                         hook(nd.ni_vp, &sb);
 2169         }
 2170         NDFREE(&nd, NDF_ONLY_PNBUF);
 2171         vput(nd.ni_vp);
 2172         if (error != 0)
 2173                 return (error);
 2174         *sbp = sb;
 2175 #ifdef KTRACE
 2176         if (KTRPOINT(td, KTR_STRUCT))
 2177                 ktrstat(&sb);
 2178 #endif
 2179         return (0);
 2180 }
 2181 
 2182 /*
 2183  * Get file status; this version does not follow links.
 2184  */
 2185 #ifndef _SYS_SYSPROTO_H_
 2186 struct lstat_args {
 2187         char    *path;
 2188         struct stat *ub;
 2189 };
 2190 #endif
 2191 int
 2192 sys_lstat(td, uap)
 2193         struct thread *td;
 2194         register struct lstat_args /* {
 2195                 char *path;
 2196                 struct stat *ub;
 2197         } */ *uap;
 2198 {
 2199         struct stat sb;
 2200         int error;
 2201 
 2202         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2203             UIO_USERSPACE, &sb, NULL);
 2204         if (error == 0)
 2205                 error = copyout(&sb, uap->ub, sizeof (sb));
 2206         return (error);
 2207 }
 2208 
 2209 /*
 2210  * Implementation of the NetBSD [l]stat() functions.
 2211  */
 2212 void
 2213 cvtnstat(sb, nsb)
 2214         struct stat *sb;
 2215         struct nstat *nsb;
 2216 {
 2217 
 2218         bzero(nsb, sizeof *nsb);
 2219         nsb->st_dev = sb->st_dev;
 2220         nsb->st_ino = sb->st_ino;
 2221         nsb->st_mode = sb->st_mode;
 2222         nsb->st_nlink = sb->st_nlink;
 2223         nsb->st_uid = sb->st_uid;
 2224         nsb->st_gid = sb->st_gid;
 2225         nsb->st_rdev = sb->st_rdev;
 2226         nsb->st_atim = sb->st_atim;
 2227         nsb->st_mtim = sb->st_mtim;
 2228         nsb->st_ctim = sb->st_ctim;
 2229         nsb->st_size = sb->st_size;
 2230         nsb->st_blocks = sb->st_blocks;
 2231         nsb->st_blksize = sb->st_blksize;
 2232         nsb->st_flags = sb->st_flags;
 2233         nsb->st_gen = sb->st_gen;
 2234         nsb->st_birthtim = sb->st_birthtim;
 2235 }
 2236 
 2237 #ifndef _SYS_SYSPROTO_H_
 2238 struct nstat_args {
 2239         char    *path;
 2240         struct nstat *ub;
 2241 };
 2242 #endif
 2243 int
 2244 sys_nstat(td, uap)
 2245         struct thread *td;
 2246         register struct nstat_args /* {
 2247                 char *path;
 2248                 struct nstat *ub;
 2249         } */ *uap;
 2250 {
 2251         struct stat sb;
 2252         struct nstat nsb;
 2253         int error;
 2254 
 2255         error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 2256             &sb, NULL);
 2257         if (error != 0)
 2258                 return (error);
 2259         cvtnstat(&sb, &nsb);
 2260         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2261 }
 2262 
 2263 /*
 2264  * NetBSD lstat.  Get file status; this version does not follow links.
 2265  */
 2266 #ifndef _SYS_SYSPROTO_H_
 2267 struct lstat_args {
 2268         char    *path;
 2269         struct stat *ub;
 2270 };
 2271 #endif
 2272 int
 2273 sys_nlstat(td, uap)
 2274         struct thread *td;
 2275         register struct nlstat_args /* {
 2276                 char *path;
 2277                 struct nstat *ub;
 2278         } */ *uap;
 2279 {
 2280         struct stat sb;
 2281         struct nstat nsb;
 2282         int error;
 2283 
 2284         error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 2285             UIO_USERSPACE, &sb, NULL);
 2286         if (error != 0)
 2287                 return (error);
 2288         cvtnstat(&sb, &nsb);
 2289         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2290 }
 2291 
 2292 /*
 2293  * Get configurable pathname variables.
 2294  */
 2295 #ifndef _SYS_SYSPROTO_H_
 2296 struct pathconf_args {
 2297         char    *path;
 2298         int     name;
 2299 };
 2300 #endif
 2301 int
 2302 sys_pathconf(td, uap)
 2303         struct thread *td;
 2304         register struct pathconf_args /* {
 2305                 char *path;
 2306                 int name;
 2307         } */ *uap;
 2308 {
 2309 
 2310         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2311 }
 2312 
 2313 #ifndef _SYS_SYSPROTO_H_
 2314 struct lpathconf_args {
 2315         char    *path;
 2316         int     name;
 2317 };
 2318 #endif
 2319 int
 2320 sys_lpathconf(td, uap)
 2321         struct thread *td;
 2322         register struct lpathconf_args /* {
 2323                 char *path;
 2324                 int name;
 2325         } */ *uap;
 2326 {
 2327 
 2328         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2329             NOFOLLOW));
 2330 }
 2331 
 2332 int
 2333 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2334     u_long flags)
 2335 {
 2336         struct nameidata nd;
 2337         int error;
 2338 
 2339         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2340             pathseg, path, td);
 2341         if ((error = namei(&nd)) != 0)
 2342                 return (error);
 2343         NDFREE(&nd, NDF_ONLY_PNBUF);
 2344 
 2345         error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2346         vput(nd.ni_vp);
 2347         return (error);
 2348 }
 2349 
 2350 /*
 2351  * Return target name of a symbolic link.
 2352  */
 2353 #ifndef _SYS_SYSPROTO_H_
 2354 struct readlink_args {
 2355         char    *path;
 2356         char    *buf;
 2357         size_t  count;
 2358 };
 2359 #endif
 2360 int
 2361 sys_readlink(td, uap)
 2362         struct thread *td;
 2363         register struct readlink_args /* {
 2364                 char *path;
 2365                 char *buf;
 2366                 size_t count;
 2367         } */ *uap;
 2368 {
 2369 
 2370         return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2371             uap->buf, UIO_USERSPACE, uap->count));
 2372 }
 2373 #ifndef _SYS_SYSPROTO_H_
 2374 struct readlinkat_args {
 2375         int     fd;
 2376         char    *path;
 2377         char    *buf;
 2378         size_t  bufsize;
 2379 };
 2380 #endif
 2381 int
 2382 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2383 {
 2384 
 2385         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2386             uap->buf, UIO_USERSPACE, uap->bufsize));
 2387 }
 2388 
 2389 int
 2390 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2391     char *buf, enum uio_seg bufseg, size_t count)
 2392 {
 2393         struct vnode *vp;
 2394         struct iovec aiov;
 2395         struct uio auio;
 2396         struct nameidata nd;
 2397         int error;
 2398 
 2399         if (count > IOSIZE_MAX)
 2400                 return (EINVAL);
 2401 
 2402         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2403             pathseg, path, fd, td);
 2404 
 2405         if ((error = namei(&nd)) != 0)
 2406                 return (error);
 2407         NDFREE(&nd, NDF_ONLY_PNBUF);
 2408         vp = nd.ni_vp;
 2409 #ifdef MAC
 2410         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2411         if (error != 0) {
 2412                 vput(vp);
 2413                 return (error);
 2414         }
 2415 #endif
 2416         if (vp->v_type != VLNK)
 2417                 error = EINVAL;
 2418         else {
 2419                 aiov.iov_base = buf;
 2420                 aiov.iov_len = count;
 2421                 auio.uio_iov = &aiov;
 2422                 auio.uio_iovcnt = 1;
 2423                 auio.uio_offset = 0;
 2424                 auio.uio_rw = UIO_READ;
 2425                 auio.uio_segflg = bufseg;
 2426                 auio.uio_td = td;
 2427                 auio.uio_resid = count;
 2428                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2429                 td->td_retval[0] = count - auio.uio_resid;
 2430         }
 2431         vput(vp);
 2432         return (error);
 2433 }
 2434 
 2435 /*
 2436  * Common implementation code for chflags() and fchflags().
 2437  */
 2438 static int
 2439 setfflags(td, vp, flags)
 2440         struct thread *td;
 2441         struct vnode *vp;
 2442         u_long flags;
 2443 {
 2444         struct mount *mp;
 2445         struct vattr vattr;
 2446         int error;
 2447 
 2448         /* We can't support the value matching VNOVAL. */
 2449         if (flags == VNOVAL)
 2450                 return (EOPNOTSUPP);
 2451 
 2452         /*
 2453          * Prevent non-root users from setting flags on devices.  When
 2454          * a device is reused, users can retain ownership of the device
 2455          * if they are allowed to set flags and programs assume that
 2456          * chown can't fail when done as root.
 2457          */
 2458         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2459                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2460                 if (error != 0)
 2461                         return (error);
 2462         }
 2463 
 2464         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2465                 return (error);
 2466         VATTR_NULL(&vattr);
 2467         vattr.va_flags = flags;
 2468         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2469 #ifdef MAC
 2470         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2471         if (error == 0)
 2472 #endif
 2473                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2474         VOP_UNLOCK(vp, 0);
 2475         vn_finished_write(mp);
 2476         return (error);
 2477 }
 2478 
 2479 /*
 2480  * Change flags of a file given a path name.
 2481  */
 2482 #ifndef _SYS_SYSPROTO_H_
 2483 struct chflags_args {
 2484         const char *path;
 2485         u_long  flags;
 2486 };
 2487 #endif
 2488 int
 2489 sys_chflags(td, uap)
 2490         struct thread *td;
 2491         register struct chflags_args /* {
 2492                 const char *path;
 2493                 u_long flags;
 2494         } */ *uap;
 2495 {
 2496 
 2497         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2498             uap->flags, 0));
 2499 }
 2500 
 2501 #ifndef _SYS_SYSPROTO_H_
 2502 struct chflagsat_args {
 2503         int     fd;
 2504         const char *path;
 2505         u_long  flags;
 2506         int     atflag;
 2507 }
 2508 #endif
 2509 int
 2510 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2511 {
 2512         int fd = uap->fd;
 2513         const char *path = uap->path;
 2514         u_long flags = uap->flags;
 2515         int atflag = uap->atflag;
 2516 
 2517         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2518                 return (EINVAL);
 2519 
 2520         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2521 }
 2522 
 2523 /*
 2524  * Same as chflags() but doesn't follow symlinks.
 2525  */
 2526 int
 2527 sys_lchflags(td, uap)
 2528         struct thread *td;
 2529         register struct lchflags_args /* {
 2530                 const char *path;
 2531                 u_long flags;
 2532         } */ *uap;
 2533 {
 2534 
 2535         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2536             uap->flags, AT_SYMLINK_NOFOLLOW));
 2537 }
 2538 
 2539 static int
 2540 kern_chflagsat(struct thread *td, int fd, const char *path,
 2541     enum uio_seg pathseg, u_long flags, int atflag)
 2542 {
 2543         struct nameidata nd;
 2544         cap_rights_t rights;
 2545         int error, follow;
 2546 
 2547         AUDIT_ARG_FFLAGS(flags);
 2548         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2549         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2550             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2551         if ((error = namei(&nd)) != 0)
 2552                 return (error);
 2553         NDFREE(&nd, NDF_ONLY_PNBUF);
 2554         error = setfflags(td, nd.ni_vp, flags);
 2555         vrele(nd.ni_vp);
 2556         return (error);
 2557 }
 2558 
 2559 /*
 2560  * Change flags of a file given a file descriptor.
 2561  */
 2562 #ifndef _SYS_SYSPROTO_H_
 2563 struct fchflags_args {
 2564         int     fd;
 2565         u_long  flags;
 2566 };
 2567 #endif
 2568 int
 2569 sys_fchflags(td, uap)
 2570         struct thread *td;
 2571         register struct fchflags_args /* {
 2572                 int fd;
 2573                 u_long flags;
 2574         } */ *uap;
 2575 {
 2576         struct file *fp;
 2577         cap_rights_t rights;
 2578         int error;
 2579 
 2580         AUDIT_ARG_FD(uap->fd);
 2581         AUDIT_ARG_FFLAGS(uap->flags);
 2582         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS),
 2583             &fp);
 2584         if (error != 0)
 2585                 return (error);
 2586 #ifdef AUDIT
 2587         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2588         AUDIT_ARG_VNODE1(fp->f_vnode);
 2589         VOP_UNLOCK(fp->f_vnode, 0);
 2590 #endif
 2591         error = setfflags(td, fp->f_vnode, uap->flags);
 2592         fdrop(fp, td);
 2593         return (error);
 2594 }
 2595 
 2596 /*
 2597  * Common implementation code for chmod(), lchmod() and fchmod().
 2598  */
 2599 int
 2600 setfmode(td, cred, vp, mode)
 2601         struct thread *td;
 2602         struct ucred *cred;
 2603         struct vnode *vp;
 2604         int mode;
 2605 {
 2606         struct mount *mp;
 2607         struct vattr vattr;
 2608         int error;
 2609 
 2610         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2611                 return (error);
 2612         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2613         VATTR_NULL(&vattr);
 2614         vattr.va_mode = mode & ALLPERMS;
 2615 #ifdef MAC
 2616         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2617         if (error == 0)
 2618 #endif
 2619                 error = VOP_SETATTR(vp, &vattr, cred);
 2620         VOP_UNLOCK(vp, 0);
 2621         vn_finished_write(mp);
 2622         return (error);
 2623 }
 2624 
 2625 /*
 2626  * Change mode of a file given path name.
 2627  */
 2628 #ifndef _SYS_SYSPROTO_H_
 2629 struct chmod_args {
 2630         char    *path;
 2631         int     mode;
 2632 };
 2633 #endif
 2634 int
 2635 sys_chmod(td, uap)
 2636         struct thread *td;
 2637         register struct chmod_args /* {
 2638                 char *path;
 2639                 int mode;
 2640         } */ *uap;
 2641 {
 2642 
 2643         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2644             uap->mode, 0));
 2645 }
 2646 
 2647 #ifndef _SYS_SYSPROTO_H_
 2648 struct fchmodat_args {
 2649         int     dirfd;
 2650         char    *path;
 2651         mode_t  mode;
 2652         int     flag;
 2653 }
 2654 #endif
 2655 int
 2656 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2657 {
 2658         int flag = uap->flag;
 2659         int fd = uap->fd;
 2660         char *path = uap->path;
 2661         mode_t mode = uap->mode;
 2662 
 2663         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2664                 return (EINVAL);
 2665 
 2666         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2667 }
 2668 
 2669 /*
 2670  * Change mode of a file given path name (don't follow links.)
 2671  */
 2672 #ifndef _SYS_SYSPROTO_H_
 2673 struct lchmod_args {
 2674         char    *path;
 2675         int     mode;
 2676 };
 2677 #endif
 2678 int
 2679 sys_lchmod(td, uap)
 2680         struct thread *td;
 2681         register struct lchmod_args /* {
 2682                 char *path;
 2683                 int mode;
 2684         } */ *uap;
 2685 {
 2686 
 2687         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2688             uap->mode, AT_SYMLINK_NOFOLLOW));
 2689 }
 2690 
 2691 int
 2692 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2693     mode_t mode, int flag)
 2694 {
 2695         struct nameidata nd;
 2696         cap_rights_t rights;
 2697         int error, follow;
 2698 
 2699         AUDIT_ARG_MODE(mode);
 2700         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2701         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2702             cap_rights_init(&rights, CAP_FCHMOD), td);
 2703         if ((error = namei(&nd)) != 0)
 2704                 return (error);
 2705         NDFREE(&nd, NDF_ONLY_PNBUF);
 2706         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2707         vrele(nd.ni_vp);
 2708         return (error);
 2709 }
 2710 
 2711 /*
 2712  * Change mode of a file given a file descriptor.
 2713  */
 2714 #ifndef _SYS_SYSPROTO_H_
 2715 struct fchmod_args {
 2716         int     fd;
 2717         int     mode;
 2718 };
 2719 #endif
 2720 int
 2721 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2722 {
 2723         struct file *fp;
 2724         cap_rights_t rights;
 2725         int error;
 2726 
 2727         AUDIT_ARG_FD(uap->fd);
 2728         AUDIT_ARG_MODE(uap->mode);
 2729 
 2730         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2731         if (error != 0)
 2732                 return (error);
 2733         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2734         fdrop(fp, td);
 2735         return (error);
 2736 }
 2737 
 2738 /*
 2739  * Common implementation for chown(), lchown(), and fchown()
 2740  */
 2741 int
 2742 setfown(td, cred, vp, uid, gid)
 2743         struct thread *td;
 2744         struct ucred *cred;
 2745         struct vnode *vp;
 2746         uid_t uid;
 2747         gid_t gid;
 2748 {
 2749         struct mount *mp;
 2750         struct vattr vattr;
 2751         int error;
 2752 
 2753         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2754                 return (error);
 2755         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2756         VATTR_NULL(&vattr);
 2757         vattr.va_uid = uid;
 2758         vattr.va_gid = gid;
 2759 #ifdef MAC
 2760         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2761             vattr.va_gid);
 2762         if (error == 0)
 2763 #endif
 2764                 error = VOP_SETATTR(vp, &vattr, cred);
 2765         VOP_UNLOCK(vp, 0);
 2766         vn_finished_write(mp);
 2767         return (error);
 2768 }
 2769 
 2770 /*
 2771  * Set ownership given a path name.
 2772  */
 2773 #ifndef _SYS_SYSPROTO_H_
 2774 struct chown_args {
 2775         char    *path;
 2776         int     uid;
 2777         int     gid;
 2778 };
 2779 #endif
 2780 int
 2781 sys_chown(td, uap)
 2782         struct thread *td;
 2783         register struct chown_args /* {
 2784                 char *path;
 2785                 int uid;
 2786                 int gid;
 2787         } */ *uap;
 2788 {
 2789 
 2790         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 2791             uap->gid, 0));
 2792 }
 2793 
 2794 #ifndef _SYS_SYSPROTO_H_
 2795 struct fchownat_args {
 2796         int fd;
 2797         const char * path;
 2798         uid_t uid;
 2799         gid_t gid;
 2800         int flag;
 2801 };
 2802 #endif
 2803 int
 2804 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2805 {
 2806         int flag;
 2807 
 2808         flag = uap->flag;
 2809         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2810                 return (EINVAL);
 2811 
 2812         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2813             uap->gid, uap->flag));
 2814 }
 2815 
 2816 int
 2817 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2818     int uid, int gid, int flag)
 2819 {
 2820         struct nameidata nd;
 2821         cap_rights_t rights;
 2822         int error, follow;
 2823 
 2824         AUDIT_ARG_OWNER(uid, gid);
 2825         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2826         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2827             cap_rights_init(&rights, CAP_FCHOWN), td);
 2828 
 2829         if ((error = namei(&nd)) != 0)
 2830                 return (error);
 2831         NDFREE(&nd, NDF_ONLY_PNBUF);
 2832         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2833         vrele(nd.ni_vp);
 2834         return (error);
 2835 }
 2836 
 2837 /*
 2838  * Set ownership given a path name, do not cross symlinks.
 2839  */
 2840 #ifndef _SYS_SYSPROTO_H_
 2841 struct lchown_args {
 2842         char    *path;
 2843         int     uid;
 2844         int     gid;
 2845 };
 2846 #endif
 2847 int
 2848 sys_lchown(td, uap)
 2849         struct thread *td;
 2850         register struct lchown_args /* {
 2851                 char *path;
 2852                 int uid;
 2853                 int gid;
 2854         } */ *uap;
 2855 {
 2856 
 2857         return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2858             uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 2859 }
 2860 
 2861 /*
 2862  * Set ownership given a file descriptor.
 2863  */
 2864 #ifndef _SYS_SYSPROTO_H_
 2865 struct fchown_args {
 2866         int     fd;
 2867         int     uid;
 2868         int     gid;
 2869 };
 2870 #endif
 2871 int
 2872 sys_fchown(td, uap)
 2873         struct thread *td;
 2874         register struct fchown_args /* {
 2875                 int fd;
 2876                 int uid;
 2877                 int gid;
 2878         } */ *uap;
 2879 {
 2880         struct file *fp;
 2881         cap_rights_t rights;
 2882         int error;
 2883 
 2884         AUDIT_ARG_FD(uap->fd);
 2885         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 2886         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 2887         if (error != 0)
 2888                 return (error);
 2889         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 2890         fdrop(fp, td);
 2891         return (error);
 2892 }
 2893 
 2894 /*
 2895  * Common implementation code for utimes(), lutimes(), and futimes().
 2896  */
 2897 static int
 2898 getutimes(usrtvp, tvpseg, tsp)
 2899         const struct timeval *usrtvp;
 2900         enum uio_seg tvpseg;
 2901         struct timespec *tsp;
 2902 {
 2903         struct timeval tv[2];
 2904         const struct timeval *tvp;
 2905         int error;
 2906 
 2907         if (usrtvp == NULL) {
 2908                 vfs_timestamp(&tsp[0]);
 2909                 tsp[1] = tsp[0];
 2910         } else {
 2911                 if (tvpseg == UIO_SYSSPACE) {
 2912                         tvp = usrtvp;
 2913                 } else {
 2914                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 2915                                 return (error);
 2916                         tvp = tv;
 2917                 }
 2918 
 2919                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 2920                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 2921                         return (EINVAL);
 2922                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 2923                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 2924         }
 2925         return (0);
 2926 }
 2927 
 2928 /*
 2929  * Common implementation code for futimens(), utimensat().
 2930  */
 2931 #define UTIMENS_NULL    0x1
 2932 #define UTIMENS_EXIT    0x2
 2933 static int
 2934 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
 2935     struct timespec *tsp, int *retflags)
 2936 {
 2937         struct timespec tsnow;
 2938         int error;
 2939 
 2940         vfs_timestamp(&tsnow);
 2941         *retflags = 0;
 2942         if (usrtsp == NULL) {
 2943                 tsp[0] = tsnow;
 2944                 tsp[1] = tsnow;
 2945                 *retflags |= UTIMENS_NULL;
 2946                 return (0);
 2947         }
 2948         if (tspseg == UIO_SYSSPACE) {
 2949                 tsp[0] = usrtsp[0];
 2950                 tsp[1] = usrtsp[1];
 2951         } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 2952                 return (error);
 2953         if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 2954                 *retflags |= UTIMENS_EXIT;
 2955         if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 2956                 *retflags |= UTIMENS_NULL;
 2957         if (tsp[0].tv_nsec == UTIME_OMIT)
 2958                 tsp[0].tv_sec = VNOVAL;
 2959         else if (tsp[0].tv_nsec == UTIME_NOW)
 2960                 tsp[0] = tsnow;
 2961         else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 2962                 return (EINVAL);
 2963         if (tsp[1].tv_nsec == UTIME_OMIT)
 2964                 tsp[1].tv_sec = VNOVAL;
 2965         else if (tsp[1].tv_nsec == UTIME_NOW)
 2966                 tsp[1] = tsnow;
 2967         else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 2968                 return (EINVAL);
 2969 
 2970         return (0);
 2971 }
 2972 
 2973 /*
 2974  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
 2975  * and utimensat().
 2976  */
 2977 static int
 2978 setutimes(td, vp, ts, numtimes, nullflag)
 2979         struct thread *td;
 2980         struct vnode *vp;
 2981         const struct timespec *ts;
 2982         int numtimes;
 2983         int nullflag;
 2984 {
 2985         struct mount *mp;
 2986         struct vattr vattr;
 2987         int error, setbirthtime;
 2988 
 2989         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2990                 return (error);
 2991         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2992         setbirthtime = 0;
 2993         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 2994             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 2995                 setbirthtime = 1;
 2996         VATTR_NULL(&vattr);
 2997         vattr.va_atime = ts[0];
 2998         vattr.va_mtime = ts[1];
 2999         if (setbirthtime)
 3000                 vattr.va_birthtime = ts[1];
 3001         if (numtimes > 2)
 3002                 vattr.va_birthtime = ts[2];
 3003         if (nullflag)
 3004                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3005 #ifdef MAC
 3006         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3007             vattr.va_mtime);
 3008 #endif
 3009         if (error == 0)
 3010                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3011         VOP_UNLOCK(vp, 0);
 3012         vn_finished_write(mp);
 3013         return (error);
 3014 }
 3015 
 3016 /*
 3017  * Set the access and modification times of a file.
 3018  */
 3019 #ifndef _SYS_SYSPROTO_H_
 3020 struct utimes_args {
 3021         char    *path;
 3022         struct  timeval *tptr;
 3023 };
 3024 #endif
 3025 int
 3026 sys_utimes(td, uap)
 3027         struct thread *td;
 3028         register struct utimes_args /* {
 3029                 char *path;
 3030                 struct timeval *tptr;
 3031         } */ *uap;
 3032 {
 3033 
 3034         return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3035             uap->tptr, UIO_USERSPACE));
 3036 }
 3037 
 3038 #ifndef _SYS_SYSPROTO_H_
 3039 struct futimesat_args {
 3040         int fd;
 3041         const char * path;
 3042         const struct timeval * times;
 3043 };
 3044 #endif
 3045 int
 3046 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3047 {
 3048 
 3049         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3050             uap->times, UIO_USERSPACE));
 3051 }
 3052 
 3053 int
 3054 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3055     struct timeval *tptr, enum uio_seg tptrseg)
 3056 {
 3057         struct nameidata nd;
 3058         struct timespec ts[2];
 3059         cap_rights_t rights;
 3060         int error;
 3061 
 3062         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3063                 return (error);
 3064         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3065             cap_rights_init(&rights, CAP_FUTIMES), td);
 3066 
 3067         if ((error = namei(&nd)) != 0)
 3068                 return (error);
 3069         NDFREE(&nd, NDF_ONLY_PNBUF);
 3070         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3071         vrele(nd.ni_vp);
 3072         return (error);
 3073 }
 3074 
 3075 /*
 3076  * Set the access and modification times of a file.
 3077  */
 3078 #ifndef _SYS_SYSPROTO_H_
 3079 struct lutimes_args {
 3080         char    *path;
 3081         struct  timeval *tptr;
 3082 };
 3083 #endif
 3084 int
 3085 sys_lutimes(td, uap)
 3086         struct thread *td;
 3087         register struct lutimes_args /* {
 3088                 char *path;
 3089                 struct timeval *tptr;
 3090         } */ *uap;
 3091 {
 3092 
 3093         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3094             UIO_USERSPACE));
 3095 }
 3096 
 3097 int
 3098 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3099     struct timeval *tptr, enum uio_seg tptrseg)
 3100 {
 3101         struct timespec ts[2];
 3102         struct nameidata nd;
 3103         int error;
 3104 
 3105         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3106                 return (error);
 3107         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3108         if ((error = namei(&nd)) != 0)
 3109                 return (error);
 3110         NDFREE(&nd, NDF_ONLY_PNBUF);
 3111         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3112         vrele(nd.ni_vp);
 3113         return (error);
 3114 }
 3115 
 3116 /*
 3117  * Set the access and modification times of a file.
 3118  */
 3119 #ifndef _SYS_SYSPROTO_H_
 3120 struct futimes_args {
 3121         int     fd;
 3122         struct  timeval *tptr;
 3123 };
 3124 #endif
 3125 int
 3126 sys_futimes(td, uap)
 3127         struct thread *td;
 3128         register struct futimes_args /* {
 3129                 int  fd;
 3130                 struct timeval *tptr;
 3131         } */ *uap;
 3132 {
 3133 
 3134         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3135 }
 3136 
 3137 int
 3138 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3139     enum uio_seg tptrseg)
 3140 {
 3141         struct timespec ts[2];
 3142         struct file *fp;
 3143         cap_rights_t rights;
 3144         int error;
 3145 
 3146         AUDIT_ARG_FD(fd);
 3147         error = getutimes(tptr, tptrseg, ts);
 3148         if (error != 0)
 3149                 return (error);
 3150         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3151         if (error != 0)
 3152                 return (error);
 3153 #ifdef AUDIT
 3154         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3155         AUDIT_ARG_VNODE1(fp->f_vnode);
 3156         VOP_UNLOCK(fp->f_vnode, 0);
 3157 #endif
 3158         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3159         fdrop(fp, td);
 3160         return (error);
 3161 }
 3162 
 3163 int
 3164 sys_futimens(struct thread *td, struct futimens_args *uap)
 3165 {
 3166 
 3167         return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 3168 }
 3169 
 3170 int
 3171 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 3172     enum uio_seg tptrseg)
 3173 {
 3174         struct timespec ts[2];
 3175         struct file *fp;
 3176         cap_rights_t rights;
 3177         int error, flags;
 3178 
 3179         AUDIT_ARG_FD(fd);
 3180         error = getutimens(tptr, tptrseg, ts, &flags);
 3181         if (error != 0)
 3182                 return (error);
 3183         if (flags & UTIMENS_EXIT)
 3184                 return (0);
 3185         error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3186         if (error != 0)
 3187                 return (error);
 3188 #ifdef AUDIT
 3189         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3190         AUDIT_ARG_VNODE1(fp->f_vnode);
 3191         VOP_UNLOCK(fp->f_vnode, 0);
 3192 #endif
 3193         error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 3194         fdrop(fp, td);
 3195         return (error);
 3196 }
 3197 
 3198 int
 3199 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 3200 {
 3201 
 3202         return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 3203             uap->times, UIO_USERSPACE, uap->flag));
 3204 }
 3205 
 3206 int
 3207 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3208     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 3209 {
 3210         struct nameidata nd;
 3211         struct timespec ts[2];
 3212         cap_rights_t rights;
 3213         int error, flags;
 3214 
 3215         if (flag & ~AT_SYMLINK_NOFOLLOW)
 3216                 return (EINVAL);
 3217 
 3218         if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 3219                 return (error);
 3220         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 3221             FOLLOW) | AUDITVNODE1, pathseg, path, fd,
 3222             cap_rights_init(&rights, CAP_FUTIMES), td);
 3223         if ((error = namei(&nd)) != 0)
 3224                 return (error);
 3225         /*
 3226          * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 3227          * POSIX states:
 3228          * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 3229          * "Search permission is denied by a component of the path prefix."
 3230          */
 3231         NDFREE(&nd, NDF_ONLY_PNBUF);
 3232         if ((flags & UTIMENS_EXIT) == 0)
 3233                 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 3234         vrele(nd.ni_vp);
 3235         return (error);
 3236 }
 3237 
 3238 /*
 3239  * Truncate a file given its path name.
 3240  */
 3241 #ifndef _SYS_SYSPROTO_H_
 3242 struct truncate_args {
 3243         char    *path;
 3244         int     pad;
 3245         off_t   length;
 3246 };
 3247 #endif
 3248 int
 3249 sys_truncate(td, uap)
 3250         struct thread *td;
 3251         register struct truncate_args /* {
 3252                 char *path;
 3253                 int pad;
 3254                 off_t length;
 3255         } */ *uap;
 3256 {
 3257 
 3258         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3259 }
 3260 
 3261 int
 3262 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3263 {
 3264         struct mount *mp;
 3265         struct vnode *vp;
 3266         void *rl_cookie;
 3267         struct vattr vattr;
 3268         struct nameidata nd;
 3269         int error;
 3270 
 3271         if (length < 0)
 3272                 return(EINVAL);
 3273         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3274         if ((error = namei(&nd)) != 0)
 3275                 return (error);
 3276         vp = nd.ni_vp;
 3277         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3278         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3279                 vn_rangelock_unlock(vp, rl_cookie);
 3280                 vrele(vp);
 3281                 return (error);
 3282         }
 3283         NDFREE(&nd, NDF_ONLY_PNBUF);
 3284         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3285         if (vp->v_type == VDIR)
 3286                 error = EISDIR;
 3287 #ifdef MAC
 3288         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3289         }
 3290 #endif
 3291         else if ((error = vn_writechk(vp)) == 0 &&
 3292             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3293                 VATTR_NULL(&vattr);
 3294                 vattr.va_size = length;
 3295                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3296         }
 3297         VOP_UNLOCK(vp, 0);
 3298         vn_finished_write(mp);
 3299         vn_rangelock_unlock(vp, rl_cookie);
 3300         vrele(vp);
 3301         return (error);
 3302 }
 3303 
 3304 #if defined(COMPAT_43)
 3305 /*
 3306  * Truncate a file given its path name.
 3307  */
 3308 #ifndef _SYS_SYSPROTO_H_
 3309 struct otruncate_args {
 3310         char    *path;
 3311         long    length;
 3312 };
 3313 #endif
 3314 int
 3315 otruncate(td, uap)
 3316         struct thread *td;
 3317         register struct otruncate_args /* {
 3318                 char *path;
 3319                 long length;
 3320         } */ *uap;
 3321 {
 3322         struct truncate_args /* {
 3323                 char *path;
 3324                 int pad;
 3325                 off_t length;
 3326         } */ nuap;
 3327 
 3328         nuap.path = uap->path;
 3329         nuap.length = uap->length;
 3330         return (sys_truncate(td, &nuap));
 3331 }
 3332 #endif /* COMPAT_43 */
 3333 
 3334 #if defined(COMPAT_FREEBSD6)
 3335 /* Versions with the pad argument */
 3336 int
 3337 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3338 {
 3339         struct truncate_args ouap;
 3340 
 3341         ouap.path = uap->path;
 3342         ouap.length = uap->length;
 3343         return (sys_truncate(td, &ouap));
 3344 }
 3345 
 3346 int
 3347 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3348 {
 3349         struct ftruncate_args ouap;
 3350 
 3351         ouap.fd = uap->fd;
 3352         ouap.length = uap->length;
 3353         return (sys_ftruncate(td, &ouap));
 3354 }
 3355 #endif
 3356 
 3357 /*
 3358  * Sync an open file.
 3359  */
 3360 #ifndef _SYS_SYSPROTO_H_
 3361 struct fsync_args {
 3362         int     fd;
 3363 };
 3364 #endif
 3365 int
 3366 sys_fsync(td, uap)
 3367         struct thread *td;
 3368         struct fsync_args /* {
 3369                 int fd;
 3370         } */ *uap;
 3371 {
 3372         struct vnode *vp;
 3373         struct mount *mp;
 3374         struct file *fp;
 3375         cap_rights_t rights;
 3376         int error, lock_flags;
 3377 
 3378         AUDIT_ARG_FD(uap->fd);
 3379         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp);
 3380         if (error != 0)
 3381                 return (error);
 3382         vp = fp->f_vnode;
 3383         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3384         if (error != 0)
 3385                 goto drop;
 3386         if (MNT_SHARED_WRITES(mp) ||
 3387             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3388                 lock_flags = LK_SHARED;
 3389         } else {
 3390                 lock_flags = LK_EXCLUSIVE;
 3391         }
 3392         vn_lock(vp, lock_flags | LK_RETRY);
 3393         AUDIT_ARG_VNODE1(vp);
 3394         if (vp->v_object != NULL) {
 3395                 VM_OBJECT_WLOCK(vp->v_object);
 3396                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3397                 VM_OBJECT_WUNLOCK(vp->v_object);
 3398         }
 3399         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3400 
 3401         VOP_UNLOCK(vp, 0);
 3402         vn_finished_write(mp);
 3403 drop:
 3404         fdrop(fp, td);
 3405         return (error);
 3406 }
 3407 
 3408 /*
 3409  * Rename files.  Source and destination must either both be directories, or
 3410  * both not be directories.  If target is a directory, it must be empty.
 3411  */
 3412 #ifndef _SYS_SYSPROTO_H_
 3413 struct rename_args {
 3414         char    *from;
 3415         char    *to;
 3416 };
 3417 #endif
 3418 int
 3419 sys_rename(td, uap)
 3420         struct thread *td;
 3421         register struct rename_args /* {
 3422                 char *from;
 3423                 char *to;
 3424         } */ *uap;
 3425 {
 3426 
 3427         return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 3428             uap->to, UIO_USERSPACE));
 3429 }
 3430 
 3431 #ifndef _SYS_SYSPROTO_H_
 3432 struct renameat_args {
 3433         int     oldfd;
 3434         char    *old;
 3435         int     newfd;
 3436         char    *new;
 3437 };
 3438 #endif
 3439 int
 3440 sys_renameat(struct thread *td, struct renameat_args *uap)
 3441 {
 3442 
 3443         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3444             UIO_USERSPACE));
 3445 }
 3446 
 3447 int
 3448 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3449     enum uio_seg pathseg)
 3450 {
 3451         struct mount *mp = NULL;
 3452         struct vnode *tvp, *fvp, *tdvp;
 3453         struct nameidata fromnd, tond;
 3454         cap_rights_t rights;
 3455         int error;
 3456 
 3457 again:
 3458         bwillwrite();
 3459 #ifdef MAC
 3460         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3461             AUDITVNODE1, pathseg, old, oldfd,
 3462             cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
 3463 #else
 3464         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3465             pathseg, old, oldfd,
 3466             cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
 3467 #endif
 3468 
 3469         if ((error = namei(&fromnd)) != 0)
 3470                 return (error);
 3471 #ifdef MAC
 3472         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3473             fromnd.ni_vp, &fromnd.ni_cnd);
 3474         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3475         if (fromnd.ni_dvp != fromnd.ni_vp)
 3476                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3477 #endif
 3478         fvp = fromnd.ni_vp;
 3479         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3480             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3481             cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td);
 3482         if (fromnd.ni_vp->v_type == VDIR)
 3483                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3484         if ((error = namei(&tond)) != 0) {
 3485                 /* Translate error code for rename("dir1", "dir2/."). */
 3486                 if (error == EISDIR && fvp->v_type == VDIR)
 3487                         error = EINVAL;
 3488                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3489                 vrele(fromnd.ni_dvp);
 3490                 vrele(fvp);
 3491                 goto out1;
 3492         }
 3493         tdvp = tond.ni_dvp;
 3494         tvp = tond.ni_vp;
 3495         error = vn_start_write(fvp, &mp, V_NOWAIT);
 3496         if (error != 0) {
 3497                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3498                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3499                 if (tvp != NULL)
 3500                         vput(tvp);
 3501                 if (tdvp == tvp)
 3502                         vrele(tdvp);
 3503                 else
 3504                         vput(tdvp);
 3505                 vrele(fromnd.ni_dvp);
 3506                 vrele(fvp);
 3507                 vrele(tond.ni_startdir);
 3508                 if (fromnd.ni_startdir != NULL)
 3509                         vrele(fromnd.ni_startdir);
 3510                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 3511                 if (error != 0)
 3512                         return (error);
 3513                 goto again;
 3514         }
 3515         if (tvp != NULL) {
 3516                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3517                         error = ENOTDIR;
 3518                         goto out;
 3519                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3520                         error = EISDIR;
 3521                         goto out;
 3522                 }
 3523 #ifdef CAPABILITIES
 3524                 if (newfd != AT_FDCWD) {
 3525                         /*
 3526                          * If the target already exists we require CAP_UNLINKAT
 3527                          * from 'newfd'.
 3528                          */
 3529                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3530                             cap_rights_init(&rights, CAP_UNLINKAT));
 3531                         if (error != 0)
 3532                                 goto out;
 3533                 }
 3534 #endif
 3535         }
 3536         if (fvp == tdvp) {
 3537                 error = EINVAL;
 3538                 goto out;
 3539         }
 3540         /*
 3541          * If the source is the same as the destination (that is, if they
 3542          * are links to the same vnode), then there is nothing to do.
 3543          */
 3544         if (fvp == tvp)
 3545                 error = -1;
 3546 #ifdef MAC
 3547         else
 3548                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3549                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3550 #endif
 3551 out:
 3552         if (error == 0) {
 3553                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3554                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3555                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3556                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3557         } else {
 3558                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3559                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3560                 if (tvp != NULL)
 3561                         vput(tvp);
 3562                 if (tdvp == tvp)
 3563                         vrele(tdvp);
 3564                 else
 3565                         vput(tdvp);
 3566                 vrele(fromnd.ni_dvp);
 3567                 vrele(fvp);
 3568         }
 3569         vrele(tond.ni_startdir);
 3570         vn_finished_write(mp);
 3571 out1:
 3572         if (fromnd.ni_startdir)
 3573                 vrele(fromnd.ni_startdir);
 3574         if (error == -1)
 3575                 return (0);
 3576         return (error);
 3577 }
 3578 
 3579 /*
 3580  * Make a directory file.
 3581  */
 3582 #ifndef _SYS_SYSPROTO_H_
 3583 struct mkdir_args {
 3584         char    *path;
 3585         int     mode;
 3586 };
 3587 #endif
 3588 int
 3589 sys_mkdir(td, uap)
 3590         struct thread *td;
 3591         register struct mkdir_args /* {
 3592                 char *path;
 3593                 int mode;
 3594         } */ *uap;
 3595 {
 3596 
 3597         return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 3598             uap->mode));
 3599 }
 3600 
 3601 #ifndef _SYS_SYSPROTO_H_
 3602 struct mkdirat_args {
 3603         int     fd;
 3604         char    *path;
 3605         mode_t  mode;
 3606 };
 3607 #endif
 3608 int
 3609 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3610 {
 3611 
 3612         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3613 }
 3614 
 3615 int
 3616 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3617     int mode)
 3618 {
 3619         struct mount *mp;
 3620         struct vnode *vp;
 3621         struct vattr vattr;
 3622         struct nameidata nd;
 3623         cap_rights_t rights;
 3624         int error;
 3625 
 3626         AUDIT_ARG_MODE(mode);
 3627 restart:
 3628         bwillwrite();
 3629         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 3630             NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 3631             td);
 3632         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3633         if ((error = namei(&nd)) != 0)
 3634                 return (error);
 3635         vp = nd.ni_vp;
 3636         if (vp != NULL) {
 3637                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3638                 /*
 3639                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3640                  * the strange behaviour of leaving the vnode unlocked
 3641                  * if the target is the same vnode as the parent.
 3642                  */
 3643                 if (vp == nd.ni_dvp)
 3644                         vrele(nd.ni_dvp);
 3645                 else
 3646                         vput(nd.ni_dvp);
 3647                 vrele(vp);
 3648                 return (EEXIST);
 3649         }
 3650         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3651                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3652                 vput(nd.ni_dvp);
 3653                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3654                         return (error);
 3655                 goto restart;
 3656         }
 3657         VATTR_NULL(&vattr);
 3658         vattr.va_type = VDIR;
 3659         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3660 #ifdef MAC
 3661         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3662             &vattr);
 3663         if (error != 0)
 3664                 goto out;
 3665 #endif
 3666         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3667 #ifdef MAC
 3668 out:
 3669 #endif
 3670         NDFREE(&nd, NDF_ONLY_PNBUF);
 3671         vput(nd.ni_dvp);
 3672         if (error == 0)
 3673                 vput(nd.ni_vp);
 3674         vn_finished_write(mp);
 3675         return (error);
 3676 }
 3677 
 3678 /*
 3679  * Remove a directory file.
 3680  */
 3681 #ifndef _SYS_SYSPROTO_H_
 3682 struct rmdir_args {
 3683         char    *path;
 3684 };
 3685 #endif
 3686 int
 3687 sys_rmdir(td, uap)
 3688         struct thread *td;
 3689         struct rmdir_args /* {
 3690                 char *path;
 3691         } */ *uap;
 3692 {
 3693 
 3694         return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE));
 3695 }
 3696 
 3697 int
 3698 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3699 {
 3700         struct mount *mp;
 3701         struct vnode *vp;
 3702         struct nameidata nd;
 3703         cap_rights_t rights;
 3704         int error;
 3705 
 3706 restart:
 3707         bwillwrite();
 3708         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3709             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3710         if ((error = namei(&nd)) != 0)
 3711                 return (error);
 3712         vp = nd.ni_vp;
 3713         if (vp->v_type != VDIR) {
 3714                 error = ENOTDIR;
 3715                 goto out;
 3716         }
 3717         /*
 3718          * No rmdir "." please.
 3719          */
 3720         if (nd.ni_dvp == vp) {
 3721                 error = EINVAL;
 3722                 goto out;
 3723         }
 3724         /*
 3725          * The root of a mounted filesystem cannot be deleted.
 3726          */
 3727         if (vp->v_vflag & VV_ROOT) {
 3728                 error = EBUSY;
 3729                 goto out;
 3730         }
 3731 #ifdef MAC
 3732         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3733             &nd.ni_cnd);
 3734         if (error != 0)
 3735                 goto out;
 3736 #endif
 3737         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3738                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3739                 vput(vp);
 3740                 if (nd.ni_dvp == vp)
 3741                         vrele(nd.ni_dvp);
 3742                 else
 3743                         vput(nd.ni_dvp);
 3744                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3745                         return (error);
 3746                 goto restart;
 3747         }
 3748         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3749         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3750         vn_finished_write(mp);
 3751 out:
 3752         NDFREE(&nd, NDF_ONLY_PNBUF);
 3753         vput(vp);
 3754         if (nd.ni_dvp == vp)
 3755                 vrele(nd.ni_dvp);
 3756         else
 3757                 vput(nd.ni_dvp);
 3758         return (error);
 3759 }
 3760 
 3761 #ifdef COMPAT_43
 3762 /*
 3763  * Read a block of directory entries in a filesystem independent format.
 3764  */
 3765 #ifndef _SYS_SYSPROTO_H_
 3766 struct ogetdirentries_args {
 3767         int     fd;
 3768         char    *buf;
 3769         u_int   count;
 3770         long    *basep;
 3771 };
 3772 #endif
 3773 int
 3774 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3775 {
 3776         long loff;
 3777         int error;
 3778 
 3779         error = kern_ogetdirentries(td, uap, &loff);
 3780         if (error == 0)
 3781                 error = copyout(&loff, uap->basep, sizeof(long));
 3782         return (error);
 3783 }
 3784 
 3785 int
 3786 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3787     long *ploff)
 3788 {
 3789         struct vnode *vp;
 3790         struct file *fp;
 3791         struct uio auio, kuio;
 3792         struct iovec aiov, kiov;
 3793         struct dirent *dp, *edp;
 3794         cap_rights_t rights;
 3795         caddr_t dirbuf;
 3796         int error, eofflag, readcnt;
 3797         long loff;
 3798         off_t foffset;
 3799 
 3800         /* XXX arbitrary sanity limit on `count'. */
 3801         if (uap->count > 64 * 1024)
 3802                 return (EINVAL);
 3803         error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp);
 3804         if (error != 0)
 3805                 return (error);
 3806         if ((fp->f_flag & FREAD) == 0) {
 3807                 fdrop(fp, td);
 3808                 return (EBADF);
 3809         }
 3810         vp = fp->f_vnode;
 3811         foffset = foffset_lock(fp, 0);
 3812 unionread:
 3813         if (vp->v_type != VDIR) {
 3814                 foffset_unlock(fp, foffset, 0);
 3815                 fdrop(fp, td);
 3816                 return (EINVAL);
 3817         }
 3818         aiov.iov_base = uap->buf;
 3819         aiov.iov_len = uap->count;
 3820         auio.uio_iov = &aiov;
 3821         auio.uio_iovcnt = 1;
 3822         auio.uio_rw = UIO_READ;
 3823         auio.uio_segflg = UIO_USERSPACE;
 3824         auio.uio_td = td;
 3825         auio.uio_resid = uap->count;
 3826         vn_lock(vp, LK_SHARED | LK_RETRY);
 3827         loff = auio.uio_offset = foffset;
 3828 #ifdef MAC
 3829         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3830         if (error != 0) {
 3831                 VOP_UNLOCK(vp, 0);
 3832                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 3833                 fdrop(fp, td);
 3834                 return (error);
 3835         }
 3836 #endif
 3837 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3838                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3839                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3840                             NULL, NULL);
 3841                         foffset = auio.uio_offset;
 3842                 } else
 3843 #       endif
 3844         {
 3845                 kuio = auio;
 3846                 kuio.uio_iov = &kiov;
 3847                 kuio.uio_segflg = UIO_SYSSPACE;
 3848                 kiov.iov_len = uap->count;
 3849                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 3850                 kiov.iov_base = dirbuf;
 3851                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3852                             NULL, NULL);
 3853                 foffset = kuio.uio_offset;
 3854                 if (error == 0) {
 3855                         readcnt = uap->count - kuio.uio_resid;
 3856                         edp = (struct dirent *)&dirbuf[readcnt];
 3857                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3858 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3859                                         /*
 3860                                          * The expected low byte of
 3861                                          * dp->d_namlen is our dp->d_type.
 3862                                          * The high MBZ byte of dp->d_namlen
 3863                                          * is our dp->d_namlen.
 3864                                          */
 3865                                         dp->d_type = dp->d_namlen;
 3866                                         dp->d_namlen = 0;
 3867 #                               else
 3868                                         /*
 3869                                          * The dp->d_type is the high byte
 3870                                          * of the expected dp->d_namlen,
 3871                                          * so must be zero'ed.
 3872                                          */
 3873                                         dp->d_type = 0;
 3874 #                               endif
 3875                                 if (dp->d_reclen > 0) {
 3876                                         dp = (struct dirent *)
 3877                                             ((char *)dp + dp->d_reclen);
 3878                                 } else {
 3879                                         error = EIO;
 3880                                         break;
 3881                                 }
 3882                         }
 3883                         if (dp >= edp)
 3884                                 error = uiomove(dirbuf, readcnt, &auio);
 3885                 }
 3886                 free(dirbuf, M_TEMP);
 3887         }
 3888         if (error != 0) {
 3889                 VOP_UNLOCK(vp, 0);
 3890                 foffset_unlock(fp, foffset, 0);
 3891                 fdrop(fp, td);
 3892                 return (error);
 3893         }
 3894         if (uap->count == auio.uio_resid &&
 3895             (vp->v_vflag & VV_ROOT) &&
 3896             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3897                 struct vnode *tvp = vp;
 3898                 vp = vp->v_mount->mnt_vnodecovered;
 3899                 VREF(vp);
 3900                 fp->f_vnode = vp;
 3901                 fp->f_data = vp;
 3902                 foffset = 0;
 3903                 vput(tvp);
 3904                 goto unionread;
 3905         }
 3906         VOP_UNLOCK(vp, 0);
 3907         foffset_unlock(fp, foffset, 0);
 3908         fdrop(fp, td);
 3909         td->td_retval[0] = uap->count - auio.uio_resid;
 3910         if (error == 0)
 3911                 *ploff = loff;
 3912         return (error);
 3913 }
 3914 #endif /* COMPAT_43 */
 3915 
 3916 /*
 3917  * Read a block of directory entries in a filesystem independent format.
 3918  */
 3919 #ifndef _SYS_SYSPROTO_H_
 3920 struct getdirentries_args {
 3921         int     fd;
 3922         char    *buf;
 3923         u_int   count;
 3924         long    *basep;
 3925 };
 3926 #endif
 3927 int
 3928 sys_getdirentries(td, uap)
 3929         struct thread *td;
 3930         register struct getdirentries_args /* {
 3931                 int fd;
 3932                 char *buf;
 3933                 u_int count;
 3934                 long *basep;
 3935         } */ *uap;
 3936 {
 3937         long base;
 3938         int error;
 3939 
 3940         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 3941             NULL, UIO_USERSPACE);
 3942         if (error != 0)
 3943                 return (error);
 3944         if (uap->basep != NULL)
 3945                 error = copyout(&base, uap->basep, sizeof(long));
 3946         return (error);
 3947 }
 3948 
 3949 int
 3950 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 3951     long *basep, ssize_t *residp, enum uio_seg bufseg)
 3952 {
 3953         struct vnode *vp;
 3954         struct file *fp;
 3955         struct uio auio;
 3956         struct iovec aiov;
 3957         cap_rights_t rights;
 3958         long loff;
 3959         int error, eofflag;
 3960         off_t foffset;
 3961 
 3962         AUDIT_ARG_FD(fd);
 3963         if (count > IOSIZE_MAX)
 3964                 return (EINVAL);
 3965         auio.uio_resid = count;
 3966         error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
 3967         if (error != 0)
 3968                 return (error);
 3969         if ((fp->f_flag & FREAD) == 0) {
 3970                 fdrop(fp, td);
 3971                 return (EBADF);
 3972         }
 3973         vp = fp->f_vnode;
 3974         foffset = foffset_lock(fp, 0);
 3975 unionread:
 3976         if (vp->v_type != VDIR) {
 3977                 error = EINVAL;
 3978                 goto fail;
 3979         }
 3980         aiov.iov_base = buf;
 3981         aiov.iov_len = count;
 3982         auio.uio_iov = &aiov;
 3983         auio.uio_iovcnt = 1;
 3984         auio.uio_rw = UIO_READ;
 3985         auio.uio_segflg = bufseg;
 3986         auio.uio_td = td;
 3987         vn_lock(vp, LK_SHARED | LK_RETRY);
 3988         AUDIT_ARG_VNODE1(vp);
 3989         loff = auio.uio_offset = foffset;
 3990 #ifdef MAC
 3991         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3992         if (error == 0)
 3993 #endif
 3994                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 3995                     NULL);
 3996         foffset = auio.uio_offset;
 3997         if (error != 0) {
 3998                 VOP_UNLOCK(vp, 0);
 3999                 goto fail;
 4000         }
 4001         if (count == auio.uio_resid &&
 4002             (vp->v_vflag & VV_ROOT) &&
 4003             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4004                 struct vnode *tvp = vp;
 4005 
 4006                 vp = vp->v_mount->mnt_vnodecovered;
 4007                 VREF(vp);
 4008                 fp->f_vnode = vp;
 4009                 fp->f_data = vp;
 4010                 foffset = 0;
 4011                 vput(tvp);
 4012                 goto unionread;
 4013         }
 4014         VOP_UNLOCK(vp, 0);
 4015         *basep = loff;
 4016         if (residp != NULL)
 4017                 *residp = auio.uio_resid;
 4018         td->td_retval[0] = count - auio.uio_resid;
 4019 fail:
 4020         foffset_unlock(fp, foffset, 0);
 4021         fdrop(fp, td);
 4022         return (error);
 4023 }
 4024 
 4025 #ifndef _SYS_SYSPROTO_H_
 4026 struct getdents_args {
 4027         int fd;
 4028         char *buf;
 4029         size_t count;
 4030 };
 4031 #endif
 4032 int
 4033 sys_getdents(td, uap)
 4034         struct thread *td;
 4035         register struct getdents_args /* {
 4036                 int fd;
 4037                 char *buf;
 4038                 u_int count;
 4039         } */ *uap;
 4040 {
 4041         struct getdirentries_args ap;
 4042 
 4043         ap.fd = uap->fd;
 4044         ap.buf = uap->buf;
 4045         ap.count = uap->count;
 4046         ap.basep = NULL;
 4047         return (sys_getdirentries(td, &ap));
 4048 }
 4049 
 4050 /*
 4051  * Set the mode mask for creation of filesystem nodes.
 4052  */
 4053 #ifndef _SYS_SYSPROTO_H_
 4054 struct umask_args {
 4055         int     newmask;
 4056 };
 4057 #endif
 4058 int
 4059 sys_umask(td, uap)
 4060         struct thread *td;
 4061         struct umask_args /* {
 4062                 int newmask;
 4063         } */ *uap;
 4064 {
 4065         struct filedesc *fdp;
 4066 
 4067         fdp = td->td_proc->p_fd;
 4068         FILEDESC_XLOCK(fdp);
 4069         td->td_retval[0] = fdp->fd_cmask;
 4070         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4071         FILEDESC_XUNLOCK(fdp);
 4072         return (0);
 4073 }
 4074 
 4075 /*
 4076  * Void all references to file by ripping underlying filesystem away from
 4077  * vnode.
 4078  */
 4079 #ifndef _SYS_SYSPROTO_H_
 4080 struct revoke_args {
 4081         char    *path;
 4082 };
 4083 #endif
 4084 int
 4085 sys_revoke(td, uap)
 4086         struct thread *td;
 4087         register struct revoke_args /* {
 4088                 char *path;
 4089         } */ *uap;
 4090 {
 4091         struct vnode *vp;
 4092         struct vattr vattr;
 4093         struct nameidata nd;
 4094         int error;
 4095 
 4096         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4097             uap->path, td);
 4098         if ((error = namei(&nd)) != 0)
 4099                 return (error);
 4100         vp = nd.ni_vp;
 4101         NDFREE(&nd, NDF_ONLY_PNBUF);
 4102         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4103                 error = EINVAL;
 4104                 goto out;
 4105         }
 4106 #ifdef MAC
 4107         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4108         if (error != 0)
 4109                 goto out;
 4110 #endif
 4111         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4112         if (error != 0)
 4113                 goto out;
 4114         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4115                 error = priv_check(td, PRIV_VFS_ADMIN);
 4116                 if (error != 0)
 4117                         goto out;
 4118         }
 4119         if (vcount(vp) > 1)
 4120                 VOP_REVOKE(vp, REVOKEALL);
 4121 out:
 4122         vput(vp);
 4123         return (error);
 4124 }
 4125 
 4126 /*
 4127  * Convert a user file descriptor to a kernel file entry and check that, if it
 4128  * is a capability, the correct rights are present. A reference on the file
 4129  * entry is held upon returning.
 4130  */
 4131 int
 4132 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 4133 {
 4134         struct file *fp;
 4135         int error;
 4136 
 4137         error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
 4138         if (error != 0)
 4139                 return (error);
 4140 
 4141         /*
 4142          * The file could be not of the vnode type, or it may be not
 4143          * yet fully initialized, in which case the f_vnode pointer
 4144          * may be set, but f_ops is still badfileops.  E.g.,
 4145          * devfs_open() transiently create such situation to
 4146          * facilitate csw d_fdopen().
 4147          *
 4148          * Dupfdopen() handling in kern_openat() installs the
 4149          * half-baked file into the process descriptor table, allowing
 4150          * other thread to dereference it. Guard against the race by
 4151          * checking f_ops.
 4152          */
 4153         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4154                 fdrop(fp, td);
 4155                 return (EINVAL);
 4156         }
 4157         *fpp = fp;
 4158         return (0);
 4159 }
 4160 
 4161 
 4162 /*
 4163  * Get an (NFS) file handle.
 4164  */
 4165 #ifndef _SYS_SYSPROTO_H_
 4166 struct lgetfh_args {
 4167         char    *fname;
 4168         fhandle_t *fhp;
 4169 };
 4170 #endif
 4171 int
 4172 sys_lgetfh(td, uap)
 4173         struct thread *td;
 4174         register struct lgetfh_args *uap;
 4175 {
 4176         struct nameidata nd;
 4177         fhandle_t fh;
 4178         register struct vnode *vp;
 4179         int error;
 4180 
 4181         error = priv_check(td, PRIV_VFS_GETFH);
 4182         if (error != 0)
 4183                 return (error);
 4184         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4185             uap->fname, td);
 4186         error = namei(&nd);
 4187         if (error != 0)
 4188                 return (error);
 4189         NDFREE(&nd, NDF_ONLY_PNBUF);
 4190         vp = nd.ni_vp;
 4191         bzero(&fh, sizeof(fh));
 4192         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4193         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4194         vput(vp);
 4195         if (error == 0)
 4196                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4197         return (error);
 4198 }
 4199 
 4200 #ifndef _SYS_SYSPROTO_H_
 4201 struct getfh_args {
 4202         char    *fname;
 4203         fhandle_t *fhp;
 4204 };
 4205 #endif
 4206 int
 4207 sys_getfh(td, uap)
 4208         struct thread *td;
 4209         register struct getfh_args *uap;
 4210 {
 4211         struct nameidata nd;
 4212         fhandle_t fh;
 4213         register struct vnode *vp;
 4214         int error;
 4215 
 4216         error = priv_check(td, PRIV_VFS_GETFH);
 4217         if (error != 0)
 4218                 return (error);
 4219         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4220             uap->fname, td);
 4221         error = namei(&nd);
 4222         if (error != 0)
 4223                 return (error);
 4224         NDFREE(&nd, NDF_ONLY_PNBUF);
 4225         vp = nd.ni_vp;
 4226         bzero(&fh, sizeof(fh));
 4227         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4228         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4229         vput(vp);
 4230         if (error == 0)
 4231                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4232         return (error);
 4233 }
 4234 
 4235 /*
 4236  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4237  * open descriptor.
 4238  *
 4239  * warning: do not remove the priv_check() call or this becomes one giant
 4240  * security hole.
 4241  */
 4242 #ifndef _SYS_SYSPROTO_H_
 4243 struct fhopen_args {
 4244         const struct fhandle *u_fhp;
 4245         int flags;
 4246 };
 4247 #endif
 4248 int
 4249 sys_fhopen(td, uap)
 4250         struct thread *td;
 4251         struct fhopen_args /* {
 4252                 const struct fhandle *u_fhp;
 4253                 int flags;
 4254         } */ *uap;
 4255 {
 4256         struct mount *mp;
 4257         struct vnode *vp;
 4258         struct fhandle fhp;
 4259         struct file *fp;
 4260         int fmode, error;
 4261         int indx;
 4262 
 4263         error = priv_check(td, PRIV_VFS_FHOPEN);
 4264         if (error != 0)
 4265                 return (error);
 4266         indx = -1;
 4267         fmode = FFLAGS(uap->flags);
 4268         /* why not allow a non-read/write open for our lockd? */
 4269         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4270                 return (EINVAL);
 4271         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4272         if (error != 0)
 4273                 return(error);
 4274         /* find the mount point */
 4275         mp = vfs_busyfs(&fhp.fh_fsid);
 4276         if (mp == NULL)
 4277                 return (ESTALE);
 4278         /* now give me my vnode, it gets returned to me locked */
 4279         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4280         vfs_unbusy(mp);
 4281         if (error != 0)
 4282                 return (error);
 4283 
 4284         error = falloc_noinstall(td, &fp);
 4285         if (error != 0) {
 4286                 vput(vp);
 4287                 return (error);
 4288         }
 4289         /*
 4290          * An extra reference on `fp' has been held for us by
 4291          * falloc_noinstall().
 4292          */
 4293 
 4294 #ifdef INVARIANTS
 4295         td->td_dupfd = -1;
 4296 #endif
 4297         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4298         if (error != 0) {
 4299                 KASSERT(fp->f_ops == &badfileops,
 4300                     ("VOP_OPEN in fhopen() set f_ops"));
 4301                 KASSERT(td->td_dupfd < 0,
 4302                     ("fhopen() encountered fdopen()"));
 4303 
 4304                 vput(vp);
 4305                 goto bad;
 4306         }
 4307 #ifdef INVARIANTS
 4308         td->td_dupfd = 0;
 4309 #endif
 4310         fp->f_vnode = vp;
 4311         fp->f_seqcount = 1;
 4312         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4313             &vnops);
 4314         VOP_UNLOCK(vp, 0);
 4315         if ((fmode & O_TRUNC) != 0) {
 4316                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4317                 if (error != 0)
 4318                         goto bad;
 4319         }
 4320 
 4321         error = finstall(td, fp, &indx, fmode, NULL);
 4322 bad:
 4323         fdrop(fp, td);
 4324         td->td_retval[0] = indx;
 4325         return (error);
 4326 }
 4327 
 4328 /*
 4329  * Stat an (NFS) file handle.
 4330  */
 4331 #ifndef _SYS_SYSPROTO_H_
 4332 struct fhstat_args {
 4333         struct fhandle *u_fhp;
 4334         struct stat *sb;
 4335 };
 4336 #endif
 4337 int
 4338 sys_fhstat(td, uap)
 4339         struct thread *td;
 4340         register struct fhstat_args /* {
 4341                 struct fhandle *u_fhp;
 4342                 struct stat *sb;
 4343         } */ *uap;
 4344 {
 4345         struct stat sb;
 4346         struct fhandle fh;
 4347         int error;
 4348 
 4349         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4350         if (error != 0)
 4351                 return (error);
 4352         error = kern_fhstat(td, fh, &sb);
 4353         if (error == 0)
 4354                 error = copyout(&sb, uap->sb, sizeof(sb));
 4355         return (error);
 4356 }
 4357 
 4358 int
 4359 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4360 {
 4361         struct mount *mp;
 4362         struct vnode *vp;
 4363         int error;
 4364 
 4365         error = priv_check(td, PRIV_VFS_FHSTAT);
 4366         if (error != 0)
 4367                 return (error);
 4368         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4369                 return (ESTALE);
 4370         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4371         vfs_unbusy(mp);
 4372         if (error != 0)
 4373                 return (error);
 4374         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4375         vput(vp);
 4376         return (error);
 4377 }
 4378 
 4379 /*
 4380  * Implement fstatfs() for (NFS) file handles.
 4381  */
 4382 #ifndef _SYS_SYSPROTO_H_
 4383 struct fhstatfs_args {
 4384         struct fhandle *u_fhp;
 4385         struct statfs *buf;
 4386 };
 4387 #endif
 4388 int
 4389 sys_fhstatfs(td, uap)
 4390         struct thread *td;
 4391         struct fhstatfs_args /* {
 4392                 struct fhandle *u_fhp;
 4393                 struct statfs *buf;
 4394         } */ *uap;
 4395 {
 4396         struct statfs sf;
 4397         fhandle_t fh;
 4398         int error;
 4399 
 4400         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4401         if (error != 0)
 4402                 return (error);
 4403         error = kern_fhstatfs(td, fh, &sf);
 4404         if (error != 0)
 4405                 return (error);
 4406         return (copyout(&sf, uap->buf, sizeof(sf)));
 4407 }
 4408 
 4409 int
 4410 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4411 {
 4412         struct statfs *sp;
 4413         struct mount *mp;
 4414         struct vnode *vp;
 4415         int error;
 4416 
 4417         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4418         if (error != 0)
 4419                 return (error);
 4420         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4421                 return (ESTALE);
 4422         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4423         if (error != 0) {
 4424                 vfs_unbusy(mp);
 4425                 return (error);
 4426         }
 4427         vput(vp);
 4428         error = prison_canseemount(td->td_ucred, mp);
 4429         if (error != 0)
 4430                 goto out;
 4431 #ifdef MAC
 4432         error = mac_mount_check_stat(td->td_ucred, mp);
 4433         if (error != 0)
 4434                 goto out;
 4435 #endif
 4436         /*
 4437          * Set these in case the underlying filesystem fails to do so.
 4438          */
 4439         sp = &mp->mnt_stat;
 4440         sp->f_version = STATFS_VERSION;
 4441         sp->f_namemax = NAME_MAX;
 4442         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4443         error = VFS_STATFS(mp, sp);
 4444         if (error == 0)
 4445                 *buf = *sp;
 4446 out:
 4447         vfs_unbusy(mp);
 4448         return (error);
 4449 }
 4450 
 4451 int
 4452 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4453 {
 4454         struct file *fp;
 4455         struct mount *mp;
 4456         struct vnode *vp;
 4457         cap_rights_t rights;
 4458         off_t olen, ooffset;
 4459         int error;
 4460 
 4461         if (offset < 0 || len <= 0)
 4462                 return (EINVAL);
 4463         /* Check for wrap. */
 4464         if (offset > OFF_MAX - len)
 4465                 return (EFBIG);
 4466         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4467         if (error != 0)
 4468                 return (error);
 4469         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4470                 error = ESPIPE;
 4471                 goto out;
 4472         }
 4473         if ((fp->f_flag & FWRITE) == 0) {
 4474                 error = EBADF;
 4475                 goto out;
 4476         }
 4477         if (fp->f_type != DTYPE_VNODE) {
 4478                 error = ENODEV;
 4479                 goto out;
 4480         }
 4481         vp = fp->f_vnode;
 4482         if (vp->v_type != VREG) {
 4483                 error = ENODEV;
 4484                 goto out;
 4485         }
 4486 
 4487         /* Allocating blocks may take a long time, so iterate. */
 4488         for (;;) {
 4489                 olen = len;
 4490                 ooffset = offset;
 4491 
 4492                 bwillwrite();
 4493                 mp = NULL;
 4494                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4495                 if (error != 0)
 4496                         break;
 4497                 error = vn_lock(vp, LK_EXCLUSIVE);
 4498                 if (error != 0) {
 4499                         vn_finished_write(mp);
 4500                         break;
 4501                 }
 4502 #ifdef MAC
 4503                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4504                 if (error == 0)
 4505 #endif
 4506                         error = VOP_ALLOCATE(vp, &offset, &len);
 4507                 VOP_UNLOCK(vp, 0);
 4508                 vn_finished_write(mp);
 4509 
 4510                 if (olen + ooffset != offset + len) {
 4511                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4512                             ooffset, olen, offset, len);
 4513                 }
 4514                 if (error != 0 || len == 0)
 4515                         break;
 4516                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4517                 maybe_yield();
 4518         }
 4519  out:
 4520         fdrop(fp, td);
 4521         return (error);
 4522 }
 4523 
 4524 int
 4525 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4526 {
 4527         int error;
 4528 
 4529         error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
 4530         return (kern_posix_error(td, error));
 4531 }
 4532 
 4533 /*
 4534  * Unlike madvise(2), we do not make a best effort to remember every
 4535  * possible caching hint.  Instead, we remember the last setting with
 4536  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4537  * region of any current setting.
 4538  */
 4539 int
 4540 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4541     int advice)
 4542 {
 4543         struct fadvise_info *fa, *new;
 4544         struct file *fp;
 4545         struct vnode *vp;
 4546         cap_rights_t rights;
 4547         off_t end;
 4548         int error;
 4549 
 4550         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4551                 return (EINVAL);
 4552         switch (advice) {
 4553         case POSIX_FADV_SEQUENTIAL:
 4554         case POSIX_FADV_RANDOM:
 4555         case POSIX_FADV_NOREUSE:
 4556                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4557                 break;
 4558         case POSIX_FADV_NORMAL:
 4559         case POSIX_FADV_WILLNEED:
 4560         case POSIX_FADV_DONTNEED:
 4561                 new = NULL;
 4562                 break;
 4563         default:
 4564                 return (EINVAL);
 4565         }
 4566         /* XXX: CAP_POSIX_FADVISE? */
 4567         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4568         if (error != 0)
 4569                 goto out;
 4570         if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 4571                 error = ESPIPE;
 4572                 goto out;
 4573         }
 4574         if (fp->f_type != DTYPE_VNODE) {
 4575                 error = ENODEV;
 4576                 goto out;
 4577         }
 4578         vp = fp->f_vnode;
 4579         if (vp->v_type != VREG) {
 4580                 error = ENODEV;
 4581                 goto out;
 4582         }
 4583         if (len == 0)
 4584                 end = OFF_MAX;
 4585         else
 4586                 end = offset + len - 1;
 4587         switch (advice) {
 4588         case POSIX_FADV_SEQUENTIAL:
 4589         case POSIX_FADV_RANDOM:
 4590         case POSIX_FADV_NOREUSE:
 4591                 /*
 4592                  * Try to merge any existing non-standard region with
 4593                  * this new region if possible, otherwise create a new
 4594                  * non-standard region for this request.
 4595                  */
 4596                 mtx_pool_lock(mtxpool_sleep, fp);
 4597                 fa = fp->f_advice;
 4598                 if (fa != NULL && fa->fa_advice == advice &&
 4599                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4600                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4601                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4602                         if (offset < fa->fa_start)
 4603                                 fa->fa_start = offset;
 4604                         if (end > fa->fa_end)
 4605                                 fa->fa_end = end;
 4606                 } else {
 4607                         new->fa_advice = advice;
 4608                         new->fa_start = offset;
 4609                         new->fa_end = end;
 4610                         fp->f_advice = new;
 4611                         new = fa;
 4612                 }
 4613                 mtx_pool_unlock(mtxpool_sleep, fp);
 4614                 break;
 4615         case POSIX_FADV_NORMAL:
 4616                 /*
 4617                  * If a the "normal" region overlaps with an existing
 4618                  * non-standard region, trim or remove the
 4619                  * non-standard region.
 4620                  */
 4621                 mtx_pool_lock(mtxpool_sleep, fp);
 4622                 fa = fp->f_advice;
 4623                 if (fa != NULL) {
 4624                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4625                                 new = fa;
 4626                                 fp->f_advice = NULL;
 4627                         } else if (offset <= fa->fa_start &&
 4628                             end >= fa->fa_start)
 4629                                 fa->fa_start = end + 1;
 4630                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4631                                 fa->fa_end = offset - 1;
 4632                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4633                                 /*
 4634                                  * If the "normal" region is a middle
 4635                                  * portion of the existing
 4636                                  * non-standard region, just remove
 4637                                  * the whole thing rather than picking
 4638                                  * one side or the other to
 4639                                  * preserve.
 4640                                  */
 4641                                 new = fa;
 4642                                 fp->f_advice = NULL;
 4643                         }
 4644                 }
 4645                 mtx_pool_unlock(mtxpool_sleep, fp);
 4646                 break;
 4647         case POSIX_FADV_WILLNEED:
 4648         case POSIX_FADV_DONTNEED:
 4649                 error = VOP_ADVISE(vp, offset, end, advice);
 4650                 break;
 4651         }
 4652 out:
 4653         if (fp != NULL)
 4654                 fdrop(fp, td);
 4655         free(new, M_FADVISE);
 4656         return (error);
 4657 }
 4658 
 4659 int
 4660 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4661 {
 4662         int error;
 4663 
 4664         error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 4665             uap->advice);
 4666         return (kern_posix_error(td, error));
 4667 }

Cache object: 09a84c996f65b2db1b5a437019593c3c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.