vfs_syscalls.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_syscalls.c      8.13 (Berkeley) 4/15/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.1/sys/kern/vfs_syscalls.c 301050 2016-05-31 16:55:41Z glebius $");
   39 
   40 #include "opt_capsicum.h"
   41 #include "opt_compat.h"
   42 #include "opt_kdtrace.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/capability.h>
   50 #include <sys/disk.h>
   51 #include <sys/sysent.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/namei.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/kernel.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/filio.h>
   62 #include <sys/limits.h>
   63 #include <sys/linker.h>
   64 #include <sys/rwlock.h>
   65 #include <sys/sdt.h>
   66 #include <sys/stat.h>
   67 #include <sys/sx.h>
   68 #include <sys/unistd.h>
   69 #include <sys/vnode.h>
   70 #include <sys/priv.h>
   71 #include <sys/proc.h>
   72 #include <sys/dirent.h>
   73 #include <sys/jail.h>
   74 #include <sys/syscallsubr.h>
   75 #include <sys/sysctl.h>
   76 #ifdef KTRACE
   77 #include <sys/ktrace.h>
   78 #endif
   79 
   80 #include <machine/stdarg.h>
   81 
   82 #include <security/audit/audit.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/uma.h>
   89 
   90 #include <ufs/ufs/quota.h>
   91 
   92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
   93 
   94 SDT_PROVIDER_DEFINE(vfs);
   95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
   96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
   97 
   98 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
   99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
  100 static int kern_chflags(struct thread *td, const char *path,
  101     enum uio_seg pathseg, u_long flags);
  102 static int kern_chflagsat(struct thread *td, int fd, const char *path,
  103     enum uio_seg pathseg, u_long flags, int atflag);
  104 static int setfflags(struct thread *td, struct vnode *, u_long);
  105 static int setutimes(struct thread *td, struct vnode *,
  106     const struct timespec *, int, int);
  107 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
  108     struct thread *td);
  109 
  110 /*
  111  * The module initialization routine for POSIX asynchronous I/O will
  112  * set this to the version of AIO that it implements.  (Zero means
  113  * that it is not implemented.)  This value is used here by pathconf()
  114  * and in kern_descrip.c by fpathconf().
  115  */
  116 int async_io_version;
  117 
  118 #ifdef DEBUG
  119 static int syncprt = 0;
  120 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
  121 #endif
  122 
  123 /*
  124  * Sync each mounted filesystem.
  125  */
  126 #ifndef _SYS_SYSPROTO_H_
  127 struct sync_args {
  128         int     dummy;
  129 };
  130 #endif
  131 /* ARGSUSED */
  132 int
  133 sys_sync(td, uap)
  134         struct thread *td;
  135         struct sync_args *uap;
  136 {
  137         struct mount *mp, *nmp;
  138         int save;
  139 
  140         mtx_lock(&mountlist_mtx);
  141         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  142                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  143                         nmp = TAILQ_NEXT(mp, mnt_list);
  144                         continue;
  145                 }
  146                 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
  147                     vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
  148                         save = curthread_pflags_set(TDP_SYNCIO);
  149                         vfs_msync(mp, MNT_NOWAIT);
  150                         VFS_SYNC(mp, MNT_NOWAIT);
  151                         curthread_pflags_restore(save);
  152                         vn_finished_write(mp);
  153                 }
  154                 mtx_lock(&mountlist_mtx);
  155                 nmp = TAILQ_NEXT(mp, mnt_list);
  156                 vfs_unbusy(mp);
  157         }
  158         mtx_unlock(&mountlist_mtx);
  159         return (0);
  160 }
  161 
  162 /*
  163  * Change filesystem quotas.
  164  */
  165 #ifndef _SYS_SYSPROTO_H_
  166 struct quotactl_args {
  167         char *path;
  168         int cmd;
  169         int uid;
  170         caddr_t arg;
  171 };
  172 #endif
  173 int
  174 sys_quotactl(td, uap)
  175         struct thread *td;
  176         register struct quotactl_args /* {
  177                 char *path;
  178                 int cmd;
  179                 int uid;
  180                 caddr_t arg;
  181         } */ *uap;
  182 {
  183         struct mount *mp;
  184         struct nameidata nd;
  185         int error;
  186 
  187         AUDIT_ARG_CMD(uap->cmd);
  188         AUDIT_ARG_UID(uap->uid);
  189         if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
  190                 return (EPERM);
  191         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
  192             uap->path, td);
  193         if ((error = namei(&nd)) != 0)
  194                 return (error);
  195         NDFREE(&nd, NDF_ONLY_PNBUF);
  196         mp = nd.ni_vp->v_mount;
  197         vfs_ref(mp);
  198         vput(nd.ni_vp);
  199         error = vfs_busy(mp, 0);
  200         vfs_rel(mp);
  201         if (error != 0)
  202                 return (error);
  203         error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
  204 
  205         /*
  206          * Since quota on operation typically needs to open quota
  207          * file, the Q_QUOTAON handler needs to unbusy the mount point
  208          * before calling into namei.  Otherwise, unmount might be
  209          * started between two vfs_busy() invocations (first is our,
  210          * second is from mount point cross-walk code in lookup()),
  211          * causing deadlock.
  212          *
  213          * Require that Q_QUOTAON handles the vfs_busy() reference on
  214          * its own, always returning with ubusied mount point.
  215          */
  216         if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
  217                 vfs_unbusy(mp);
  218         return (error);
  219 }
  220 
  221 /*
  222  * Used by statfs conversion routines to scale the block size up if
  223  * necessary so that all of the block counts are <= 'max_size'.  Note
  224  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  225  * value of 'n'.
  226  */
  227 void
  228 statfs_scale_blocks(struct statfs *sf, long max_size)
  229 {
  230         uint64_t count;
  231         int shift;
  232 
  233         KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
  234 
  235         /*
  236          * Attempt to scale the block counts to give a more accurate
  237          * overview to userland of the ratio of free space to used
  238          * space.  To do this, find the largest block count and compute
  239          * a divisor that lets it fit into a signed integer <= max_size.
  240          */
  241         if (sf->f_bavail < 0)
  242                 count = -sf->f_bavail;
  243         else
  244                 count = sf->f_bavail;
  245         count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
  246         if (count <= max_size)
  247                 return;
  248 
  249         count >>= flsl(max_size);
  250         shift = 0;
  251         while (count > 0) {
  252                 shift++;
  253                 count >>=1;
  254         }
  255 
  256         sf->f_bsize <<= shift;
  257         sf->f_blocks >>= shift;
  258         sf->f_bfree >>= shift;
  259         sf->f_bavail >>= shift;
  260 }
  261 
  262 /*
  263  * Get filesystem statistics.
  264  */
  265 #ifndef _SYS_SYSPROTO_H_
  266 struct statfs_args {
  267         char *path;
  268         struct statfs *buf;
  269 };
  270 #endif
  271 int
  272 sys_statfs(td, uap)
  273         struct thread *td;
  274         register struct statfs_args /* {
  275                 char *path;
  276                 struct statfs *buf;
  277         } */ *uap;
  278 {
  279         struct statfs sf;
  280         int error;
  281 
  282         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  283         if (error == 0)
  284                 error = copyout(&sf, uap->buf, sizeof(sf));
  285         return (error);
  286 }
  287 
  288 int
  289 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
  290     struct statfs *buf)
  291 {
  292         struct mount *mp;
  293         struct statfs *sp, sb;
  294         struct nameidata nd;
  295         int error;
  296 
  297         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  298             pathseg, path, td);
  299         error = namei(&nd);
  300         if (error != 0)
  301                 return (error);
  302         mp = nd.ni_vp->v_mount;
  303         vfs_ref(mp);
  304         NDFREE(&nd, NDF_ONLY_PNBUF);
  305         vput(nd.ni_vp);
  306         error = vfs_busy(mp, 0);
  307         vfs_rel(mp);
  308         if (error != 0)
  309                 return (error);
  310 #ifdef MAC
  311         error = mac_mount_check_stat(td->td_ucred, mp);
  312         if (error != 0)
  313                 goto out;
  314 #endif
  315         /*
  316          * Set these in case the underlying filesystem fails to do so.
  317          */
  318         sp = &mp->mnt_stat;
  319         sp->f_version = STATFS_VERSION;
  320         sp->f_namemax = NAME_MAX;
  321         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  322         error = VFS_STATFS(mp, sp);
  323         if (error != 0)
  324                 goto out;
  325         if (priv_check(td, PRIV_VFS_GENERATION)) {
  326                 bcopy(sp, &sb, sizeof(sb));
  327                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  328                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  329                 sp = &sb;
  330         }
  331         *buf = *sp;
  332 out:
  333         vfs_unbusy(mp);
  334         return (error);
  335 }
  336 
  337 /*
  338  * Get filesystem statistics.
  339  */
  340 #ifndef _SYS_SYSPROTO_H_
  341 struct fstatfs_args {
  342         int fd;
  343         struct statfs *buf;
  344 };
  345 #endif
  346 int
  347 sys_fstatfs(td, uap)
  348         struct thread *td;
  349         register struct fstatfs_args /* {
  350                 int fd;
  351                 struct statfs *buf;
  352         } */ *uap;
  353 {
  354         struct statfs sf;
  355         int error;
  356 
  357         error = kern_fstatfs(td, uap->fd, &sf);
  358         if (error == 0)
  359                 error = copyout(&sf, uap->buf, sizeof(sf));
  360         return (error);
  361 }
  362 
  363 int
  364 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
  365 {
  366         struct file *fp;
  367         struct mount *mp;
  368         struct statfs *sp, sb;
  369         struct vnode *vp;
  370         cap_rights_t rights;
  371         int error;
  372 
  373         AUDIT_ARG_FD(fd);
  374         error = getvnode(td->td_proc->p_fd, fd,
  375             cap_rights_init(&rights, CAP_FSTATFS), &fp);
  376         if (error != 0)
  377                 return (error);
  378         vp = fp->f_vnode;
  379         vn_lock(vp, LK_SHARED | LK_RETRY);
  380 #ifdef AUDIT
  381         AUDIT_ARG_VNODE1(vp);
  382 #endif
  383         mp = vp->v_mount;
  384         if (mp)
  385                 vfs_ref(mp);
  386         VOP_UNLOCK(vp, 0);
  387         fdrop(fp, td);
  388         if (mp == NULL) {
  389                 error = EBADF;
  390                 goto out;
  391         }
  392         error = vfs_busy(mp, 0);
  393         vfs_rel(mp);
  394         if (error != 0)
  395                 return (error);
  396 #ifdef MAC
  397         error = mac_mount_check_stat(td->td_ucred, mp);
  398         if (error != 0)
  399                 goto out;
  400 #endif
  401         /*
  402          * Set these in case the underlying filesystem fails to do so.
  403          */
  404         sp = &mp->mnt_stat;
  405         sp->f_version = STATFS_VERSION;
  406         sp->f_namemax = NAME_MAX;
  407         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  408         error = VFS_STATFS(mp, sp);
  409         if (error != 0)
  410                 goto out;
  411         if (priv_check(td, PRIV_VFS_GENERATION)) {
  412                 bcopy(sp, &sb, sizeof(sb));
  413                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  414                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  415                 sp = &sb;
  416         }
  417         *buf = *sp;
  418 out:
  419         if (mp)
  420                 vfs_unbusy(mp);
  421         return (error);
  422 }
  423 
  424 /*
  425  * Get statistics on all filesystems.
  426  */
  427 #ifndef _SYS_SYSPROTO_H_
  428 struct getfsstat_args {
  429         struct statfs *buf;
  430         long bufsize;
  431         int flags;
  432 };
  433 #endif
  434 int
  435 sys_getfsstat(td, uap)
  436         struct thread *td;
  437         register struct getfsstat_args /* {
  438                 struct statfs *buf;
  439                 long bufsize;
  440                 int flags;
  441         } */ *uap;
  442 {
  443 
  444         return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
  445             uap->flags));
  446 }
  447 
  448 /*
  449  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  450  *      The caller is responsible for freeing memory which will be allocated
  451  *      in '*buf'.
  452  */
  453 int
  454 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
  455     enum uio_seg bufseg, int flags)
  456 {
  457         struct mount *mp, *nmp;
  458         struct statfs *sfsp, *sp, sb;
  459         size_t count, maxcount;
  460         int error;
  461 
  462         maxcount = bufsize / sizeof(struct statfs);
  463         if (bufsize == 0)
  464                 sfsp = NULL;
  465         else if (bufseg == UIO_USERSPACE)
  466                 sfsp = *buf;
  467         else /* if (bufseg == UIO_SYSSPACE) */ {
  468                 count = 0;
  469                 mtx_lock(&mountlist_mtx);
  470                 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  471                         count++;
  472                 }
  473                 mtx_unlock(&mountlist_mtx);
  474                 if (maxcount > count)
  475                         maxcount = count;
  476                 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
  477                     M_WAITOK);
  478         }
  479         count = 0;
  480         mtx_lock(&mountlist_mtx);
  481         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  482                 if (prison_canseemount(td->td_ucred, mp) != 0) {
  483                         nmp = TAILQ_NEXT(mp, mnt_list);
  484                         continue;
  485                 }
  486 #ifdef MAC
  487                 if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
  488                         nmp = TAILQ_NEXT(mp, mnt_list);
  489                         continue;
  490                 }
  491 #endif
  492                 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
  493                         nmp = TAILQ_NEXT(mp, mnt_list);
  494                         continue;
  495                 }
  496                 if (sfsp && count < maxcount) {
  497                         sp = &mp->mnt_stat;
  498                         /*
  499                          * Set these in case the underlying filesystem
  500                          * fails to do so.
  501                          */
  502                         sp->f_version = STATFS_VERSION;
  503                         sp->f_namemax = NAME_MAX;
  504                         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
  505                         /*
  506                          * If MNT_NOWAIT or MNT_LAZY is specified, do not
  507                          * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
  508                          * overrides MNT_WAIT.
  509                          */
  510                         if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
  511                             (flags & MNT_WAIT)) &&
  512                             (error = VFS_STATFS(mp, sp))) {
  513                                 mtx_lock(&mountlist_mtx);
  514                                 nmp = TAILQ_NEXT(mp, mnt_list);
  515                                 vfs_unbusy(mp);
  516                                 continue;
  517                         }
  518                         if (priv_check(td, PRIV_VFS_GENERATION)) {
  519                                 bcopy(sp, &sb, sizeof(sb));
  520                                 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
  521                                 prison_enforce_statfs(td->td_ucred, mp, &sb);
  522                                 sp = &sb;
  523                         }
  524                         if (bufseg == UIO_SYSSPACE)
  525                                 bcopy(sp, sfsp, sizeof(*sp));
  526                         else /* if (bufseg == UIO_USERSPACE) */ {
  527                                 error = copyout(sp, sfsp, sizeof(*sp));
  528                                 if (error != 0) {
  529                                         vfs_unbusy(mp);
  530                                         return (error);
  531                                 }
  532                         }
  533                         sfsp++;
  534                 }
  535                 count++;
  536                 mtx_lock(&mountlist_mtx);
  537                 nmp = TAILQ_NEXT(mp, mnt_list);
  538                 vfs_unbusy(mp);
  539         }
  540         mtx_unlock(&mountlist_mtx);
  541         if (sfsp && count > maxcount)
  542                 td->td_retval[0] = maxcount;
  543         else
  544                 td->td_retval[0] = count;
  545         return (0);
  546 }
  547 
  548 #ifdef COMPAT_FREEBSD4
  549 /*
  550  * Get old format filesystem statistics.
  551  */
  552 static void cvtstatfs(struct statfs *, struct ostatfs *);
  553 
  554 #ifndef _SYS_SYSPROTO_H_
  555 struct freebsd4_statfs_args {
  556         char *path;
  557         struct ostatfs *buf;
  558 };
  559 #endif
  560 int
  561 freebsd4_statfs(td, uap)
  562         struct thread *td;
  563         struct freebsd4_statfs_args /* {
  564                 char *path;
  565                 struct ostatfs *buf;
  566         } */ *uap;
  567 {
  568         struct ostatfs osb;
  569         struct statfs sf;
  570         int error;
  571 
  572         error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
  573         if (error != 0)
  574                 return (error);
  575         cvtstatfs(&sf, &osb);
  576         return (copyout(&osb, uap->buf, sizeof(osb)));
  577 }
  578 
  579 /*
  580  * Get filesystem statistics.
  581  */
  582 #ifndef _SYS_SYSPROTO_H_
  583 struct freebsd4_fstatfs_args {
  584         int fd;
  585         struct ostatfs *buf;
  586 };
  587 #endif
  588 int
  589 freebsd4_fstatfs(td, uap)
  590         struct thread *td;
  591         struct freebsd4_fstatfs_args /* {
  592                 int fd;
  593                 struct ostatfs *buf;
  594         } */ *uap;
  595 {
  596         struct ostatfs osb;
  597         struct statfs sf;
  598         int error;
  599 
  600         error = kern_fstatfs(td, uap->fd, &sf);
  601         if (error != 0)
  602                 return (error);
  603         cvtstatfs(&sf, &osb);
  604         return (copyout(&osb, uap->buf, sizeof(osb)));
  605 }
  606 
  607 /*
  608  * Get statistics on all filesystems.
  609  */
  610 #ifndef _SYS_SYSPROTO_H_
  611 struct freebsd4_getfsstat_args {
  612         struct ostatfs *buf;
  613         long bufsize;
  614         int flags;
  615 };
  616 #endif
  617 int
  618 freebsd4_getfsstat(td, uap)
  619         struct thread *td;
  620         register struct freebsd4_getfsstat_args /* {
  621                 struct ostatfs *buf;
  622                 long bufsize;
  623                 int flags;
  624         } */ *uap;
  625 {
  626         struct statfs *buf, *sp;
  627         struct ostatfs osb;
  628         size_t count, size;
  629         int error;
  630 
  631         count = uap->bufsize / sizeof(struct ostatfs);
  632         size = count * sizeof(struct statfs);
  633         error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
  634         if (size > 0) {
  635                 count = td->td_retval[0];
  636                 sp = buf;
  637                 while (count > 0 && error == 0) {
  638                         cvtstatfs(sp, &osb);
  639                         error = copyout(&osb, uap->buf, sizeof(osb));
  640                         sp++;
  641                         uap->buf++;
  642                         count--;
  643                 }
  644                 free(buf, M_TEMP);
  645         }
  646         return (error);
  647 }
  648 
  649 /*
  650  * Implement fstatfs() for (NFS) file handles.
  651  */
  652 #ifndef _SYS_SYSPROTO_H_
  653 struct freebsd4_fhstatfs_args {
  654         struct fhandle *u_fhp;
  655         struct ostatfs *buf;
  656 };
  657 #endif
  658 int
  659 freebsd4_fhstatfs(td, uap)
  660         struct thread *td;
  661         struct freebsd4_fhstatfs_args /* {
  662                 struct fhandle *u_fhp;
  663                 struct ostatfs *buf;
  664         } */ *uap;
  665 {
  666         struct ostatfs osb;
  667         struct statfs sf;
  668         fhandle_t fh;
  669         int error;
  670 
  671         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
  672         if (error != 0)
  673                 return (error);
  674         error = kern_fhstatfs(td, fh, &sf);
  675         if (error != 0)
  676                 return (error);
  677         cvtstatfs(&sf, &osb);
  678         return (copyout(&osb, uap->buf, sizeof(osb)));
  679 }
  680 
  681 /*
  682  * Convert a new format statfs structure to an old format statfs structure.
  683  */
  684 static void
  685 cvtstatfs(nsp, osp)
  686         struct statfs *nsp;
  687         struct ostatfs *osp;
  688 {
  689 
  690         statfs_scale_blocks(nsp, LONG_MAX);
  691         bzero(osp, sizeof(*osp));
  692         osp->f_bsize = nsp->f_bsize;
  693         osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
  694         osp->f_blocks = nsp->f_blocks;
  695         osp->f_bfree = nsp->f_bfree;
  696         osp->f_bavail = nsp->f_bavail;
  697         osp->f_files = MIN(nsp->f_files, LONG_MAX);
  698         osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
  699         osp->f_owner = nsp->f_owner;
  700         osp->f_type = nsp->f_type;
  701         osp->f_flags = nsp->f_flags;
  702         osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
  703         osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
  704         osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
  705         osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
  706         strlcpy(osp->f_fstypename, nsp->f_fstypename,
  707             MIN(MFSNAMELEN, OMFSNAMELEN));
  708         strlcpy(osp->f_mntonname, nsp->f_mntonname,
  709             MIN(MNAMELEN, OMNAMELEN));
  710         strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
  711             MIN(MNAMELEN, OMNAMELEN));
  712         osp->f_fsid = nsp->f_fsid;
  713 }
  714 #endif /* COMPAT_FREEBSD4 */
  715 
  716 /*
  717  * Change current working directory to a given file descriptor.
  718  */
  719 #ifndef _SYS_SYSPROTO_H_
  720 struct fchdir_args {
  721         int     fd;
  722 };
  723 #endif
  724 int
  725 sys_fchdir(td, uap)
  726         struct thread *td;
  727         struct fchdir_args /* {
  728                 int fd;
  729         } */ *uap;
  730 {
  731         register struct filedesc *fdp = td->td_proc->p_fd;
  732         struct vnode *vp, *tdp, *vpold;
  733         struct mount *mp;
  734         struct file *fp;
  735         cap_rights_t rights;
  736         int error;
  737 
  738         AUDIT_ARG_FD(uap->fd);
  739         error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
  740             &fp);
  741         if (error != 0)
  742                 return (error);
  743         vp = fp->f_vnode;
  744         VREF(vp);
  745         fdrop(fp, td);
  746         vn_lock(vp, LK_SHARED | LK_RETRY);
  747         AUDIT_ARG_VNODE1(vp);
  748         error = change_dir(vp, td);
  749         while (!error && (mp = vp->v_mountedhere) != NULL) {
  750                 if (vfs_busy(mp, 0))
  751                         continue;
  752                 error = VFS_ROOT(mp, LK_SHARED, &tdp);
  753                 vfs_unbusy(mp);
  754                 if (error != 0)
  755                         break;
  756                 vput(vp);
  757                 vp = tdp;
  758         }
  759         if (error != 0) {
  760                 vput(vp);
  761                 return (error);
  762         }
  763         VOP_UNLOCK(vp, 0);
  764         FILEDESC_XLOCK(fdp);
  765         vpold = fdp->fd_cdir;
  766         fdp->fd_cdir = vp;
  767         FILEDESC_XUNLOCK(fdp);
  768         vrele(vpold);
  769         return (0);
  770 }
  771 
  772 /*
  773  * Change current working directory (``.'').
  774  */
  775 #ifndef _SYS_SYSPROTO_H_
  776 struct chdir_args {
  777         char    *path;
  778 };
  779 #endif
  780 int
  781 sys_chdir(td, uap)
  782         struct thread *td;
  783         struct chdir_args /* {
  784                 char *path;
  785         } */ *uap;
  786 {
  787 
  788         return (kern_chdir(td, uap->path, UIO_USERSPACE));
  789 }
  790 
  791 int
  792 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
  793 {
  794         register struct filedesc *fdp = td->td_proc->p_fd;
  795         struct nameidata nd;
  796         struct vnode *vp;
  797         int error;
  798 
  799         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  800             pathseg, path, td);
  801         if ((error = namei(&nd)) != 0)
  802                 return (error);
  803         if ((error = change_dir(nd.ni_vp, td)) != 0) {
  804                 vput(nd.ni_vp);
  805                 NDFREE(&nd, NDF_ONLY_PNBUF);
  806                 return (error);
  807         }
  808         VOP_UNLOCK(nd.ni_vp, 0);
  809         NDFREE(&nd, NDF_ONLY_PNBUF);
  810         FILEDESC_XLOCK(fdp);
  811         vp = fdp->fd_cdir;
  812         fdp->fd_cdir = nd.ni_vp;
  813         FILEDESC_XUNLOCK(fdp);
  814         vrele(vp);
  815         return (0);
  816 }
  817 
  818 /*
  819  * Helper function for raised chroot(2) security function:  Refuse if
  820  * any filedescriptors are open directories.
  821  */
  822 static int
  823 chroot_refuse_vdir_fds(fdp)
  824         struct filedesc *fdp;
  825 {
  826         struct vnode *vp;
  827         struct file *fp;
  828         int fd;
  829 
  830         FILEDESC_LOCK_ASSERT(fdp);
  831 
  832         for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
  833                 fp = fget_locked(fdp, fd);
  834                 if (fp == NULL)
  835                         continue;
  836                 if (fp->f_type == DTYPE_VNODE) {
  837                         vp = fp->f_vnode;
  838                         if (vp->v_type == VDIR)
  839                                 return (EPERM);
  840                 }
  841         }
  842         return (0);
  843 }
  844 
  845 /*
  846  * This sysctl determines if we will allow a process to chroot(2) if it
  847  * has a directory open:
  848  *      0: disallowed for all processes.
  849  *      1: allowed for processes that were not already chroot(2)'ed.
  850  *      2: allowed for all processes.
  851  */
  852 
  853 static int chroot_allow_open_directories = 1;
  854 
  855 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
  856      &chroot_allow_open_directories, 0,
  857      "Allow a process to chroot(2) if it has a directory open");
  858 
  859 /*
  860  * Change notion of root (``/'') directory.
  861  */
  862 #ifndef _SYS_SYSPROTO_H_
  863 struct chroot_args {
  864         char    *path;
  865 };
  866 #endif
  867 int
  868 sys_chroot(td, uap)
  869         struct thread *td;
  870         struct chroot_args /* {
  871                 char *path;
  872         } */ *uap;
  873 {
  874         struct nameidata nd;
  875         int error;
  876 
  877         error = priv_check(td, PRIV_VFS_CHROOT);
  878         if (error != 0)
  879                 return (error);
  880         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
  881             UIO_USERSPACE, uap->path, td);
  882         error = namei(&nd);
  883         if (error != 0)
  884                 goto error;
  885         error = change_dir(nd.ni_vp, td);
  886         if (error != 0)
  887                 goto e_vunlock;
  888 #ifdef MAC
  889         error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
  890         if (error != 0)
  891                 goto e_vunlock;
  892 #endif
  893         VOP_UNLOCK(nd.ni_vp, 0);
  894         error = change_root(nd.ni_vp, td);
  895         vrele(nd.ni_vp);
  896         NDFREE(&nd, NDF_ONLY_PNBUF);
  897         return (error);
  898 e_vunlock:
  899         vput(nd.ni_vp);
  900 error:
  901         NDFREE(&nd, NDF_ONLY_PNBUF);
  902         return (error);
  903 }
  904 
  905 /*
  906  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  907  * instance.
  908  */
  909 int
  910 change_dir(vp, td)
  911         struct vnode *vp;
  912         struct thread *td;
  913 {
  914 #ifdef MAC
  915         int error;
  916 #endif
  917 
  918         ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
  919         if (vp->v_type != VDIR)
  920                 return (ENOTDIR);
  921 #ifdef MAC
  922         error = mac_vnode_check_chdir(td->td_ucred, vp);
  923         if (error != 0)
  924                 return (error);
  925 #endif
  926         return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
  927 }
  928 
  929 /*
  930  * Common routine for kern_chroot() and jail_attach().  The caller is
  931  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  932  * authorize this operation.
  933  */
  934 int
  935 change_root(vp, td)
  936         struct vnode *vp;
  937         struct thread *td;
  938 {
  939         struct filedesc *fdp;
  940         struct vnode *oldvp;
  941         int error;
  942 
  943         fdp = td->td_proc->p_fd;
  944         FILEDESC_XLOCK(fdp);
  945         if (chroot_allow_open_directories == 0 ||
  946             (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  947                 error = chroot_refuse_vdir_fds(fdp);
  948                 if (error != 0) {
  949                         FILEDESC_XUNLOCK(fdp);
  950                         return (error);
  951                 }
  952         }
  953         oldvp = fdp->fd_rdir;
  954         fdp->fd_rdir = vp;
  955         VREF(fdp->fd_rdir);
  956         if (!fdp->fd_jdir) {
  957                 fdp->fd_jdir = vp;
  958                 VREF(fdp->fd_jdir);
  959         }
  960         FILEDESC_XUNLOCK(fdp);
  961         vrele(oldvp);
  962         return (0);
  963 }
  964 
  965 static __inline void
  966 flags_to_rights(int flags, cap_rights_t *rightsp)
  967 {
  968 
  969         if (flags & O_EXEC) {
  970                 cap_rights_set(rightsp, CAP_FEXECVE);
  971         } else {
  972                 switch ((flags & O_ACCMODE)) {
  973                 case O_RDONLY:
  974                         cap_rights_set(rightsp, CAP_READ);
  975                         break;
  976                 case O_RDWR:
  977                         cap_rights_set(rightsp, CAP_READ);
  978                         /* FALLTHROUGH */
  979                 case O_WRONLY:
  980                         cap_rights_set(rightsp, CAP_WRITE);
  981                         if (!(flags & (O_APPEND | O_TRUNC)))
  982                                 cap_rights_set(rightsp, CAP_SEEK);
  983                         break;
  984                 }
  985         }
  986 
  987         if (flags & O_CREAT)
  988                 cap_rights_set(rightsp, CAP_CREATE);
  989 
  990         if (flags & O_TRUNC)
  991                 cap_rights_set(rightsp, CAP_FTRUNCATE);
  992 
  993         if (flags & (O_SYNC | O_FSYNC))
  994                 cap_rights_set(rightsp, CAP_FSYNC);
  995 
  996         if (flags & (O_EXLOCK | O_SHLOCK))
  997                 cap_rights_set(rightsp, CAP_FLOCK);
  998 }
  999 
 1000 /*
 1001  * Check permissions, allocate an open file structure, and call the device
 1002  * open routine if any.
 1003  */
 1004 #ifndef _SYS_SYSPROTO_H_
 1005 struct open_args {
 1006         char    *path;
 1007         int     flags;
 1008         int     mode;
 1009 };
 1010 #endif
 1011 int
 1012 sys_open(td, uap)
 1013         struct thread *td;
 1014         register struct open_args /* {
 1015                 char *path;
 1016                 int flags;
 1017                 int mode;
 1018         } */ *uap;
 1019 {
 1020 
 1021         return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 1022 }
 1023 
 1024 #ifndef _SYS_SYSPROTO_H_
 1025 struct openat_args {
 1026         int     fd;
 1027         char    *path;
 1028         int     flag;
 1029         int     mode;
 1030 };
 1031 #endif
 1032 int
 1033 sys_openat(struct thread *td, struct openat_args *uap)
 1034 {
 1035 
 1036         return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 1037             uap->mode));
 1038 }
 1039 
 1040 int
 1041 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
 1042     int mode)
 1043 {
 1044 
 1045         return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 1046 }
 1047 
 1048 int
 1049 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1050     int flags, int mode)
 1051 {
 1052         struct proc *p = td->td_proc;
 1053         struct filedesc *fdp = p->p_fd;
 1054         struct file *fp;
 1055         struct vnode *vp;
 1056         struct nameidata nd;
 1057         cap_rights_t rights;
 1058         int cmode, error, indx;
 1059 
 1060         indx = -1;
 1061 
 1062         AUDIT_ARG_FFLAGS(flags);
 1063         AUDIT_ARG_MODE(mode);
 1064         /* XXX: audit dirfd */
 1065         cap_rights_init(&rights, CAP_LOOKUP);
 1066         flags_to_rights(flags, &rights);
 1067         /*
 1068          * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 1069          * may be specified.
 1070          */
 1071         if (flags & O_EXEC) {
 1072                 if (flags & O_ACCMODE)
 1073                         return (EINVAL);
 1074         } else if ((flags & O_ACCMODE) == O_ACCMODE) {
 1075                 return (EINVAL);
 1076         } else {
 1077                 flags = FFLAGS(flags);
 1078         }
 1079 
 1080         /*
 1081          * Allocate the file descriptor, but don't install a descriptor yet.
 1082          */
 1083         error = falloc_noinstall(td, &fp);
 1084         if (error != 0)
 1085                 return (error);
 1086         /*
 1087          * An extra reference on `fp' has been held for us by
 1088          * falloc_noinstall().
 1089          */
 1090         /* Set the flags early so the finit in devfs can pick them up. */
 1091         fp->f_flag = flags & FMASK;
 1092         cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 1093         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 1094             &rights, td);
 1095         td->td_dupfd = -1;              /* XXX check for fdopen */
 1096         error = vn_open(&nd, &flags, cmode, fp);
 1097         if (error != 0) {
 1098                 /*
 1099                  * If the vn_open replaced the method vector, something
 1100                  * wonderous happened deep below and we just pass it up
 1101                  * pretending we know what we do.
 1102                  */
 1103                 if (error == ENXIO && fp->f_ops != &badfileops)
 1104                         goto success;
 1105 
 1106                 /*
 1107                  * Handle special fdopen() case. bleh.
 1108                  *
 1109                  * Don't do this for relative (capability) lookups; we don't
 1110                  * understand exactly what would happen, and we don't think
 1111                  * that it ever should.
 1112                  */
 1113                 if (nd.ni_strictrelative == 0 &&
 1114                     (error == ENODEV || error == ENXIO) &&
 1115                     td->td_dupfd >= 0) {
 1116                         error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 1117                             &indx);
 1118                         if (error == 0)
 1119                                 goto success;
 1120                 }
 1121 
 1122                 goto bad;
 1123         }
 1124         td->td_dupfd = 0;
 1125         NDFREE(&nd, NDF_ONLY_PNBUF);
 1126         vp = nd.ni_vp;
 1127 
 1128         /*
 1129          * Store the vnode, for any f_type. Typically, the vnode use
 1130          * count is decremented by direct call to vn_closefile() for
 1131          * files that switched type in the cdevsw fdopen() method.
 1132          */
 1133         fp->f_vnode = vp;
 1134         /*
 1135          * If the file wasn't claimed by devfs bind it to the normal
 1136          * vnode operations here.
 1137          */
 1138         if (fp->f_ops == &badfileops) {
 1139                 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 1140                 fp->f_seqcount = 1;
 1141                 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 1142                     DTYPE_VNODE, vp, &vnops);
 1143         }
 1144 
 1145         VOP_UNLOCK(vp, 0);
 1146         if (flags & O_TRUNC) {
 1147                 error = fo_truncate(fp, 0, td->td_ucred, td);
 1148                 if (error != 0)
 1149                         goto bad;
 1150         }
 1151 success:
 1152         /*
 1153          * If we haven't already installed the FD (for dupfdopen), do so now.
 1154          */
 1155         if (indx == -1) {
 1156                 struct filecaps *fcaps;
 1157 
 1158 #ifdef CAPABILITIES
 1159                 if (nd.ni_strictrelative == 1)
 1160                         fcaps = &nd.ni_filecaps;
 1161                 else
 1162 #endif
 1163                         fcaps = NULL;
 1164                 error = finstall(td, fp, &indx, flags, fcaps);
 1165                 /* On success finstall() consumes fcaps. */
 1166                 if (error != 0) {
 1167                         filecaps_free(&nd.ni_filecaps);
 1168                         goto bad;
 1169                 }
 1170         } else {
 1171                 filecaps_free(&nd.ni_filecaps);
 1172         }
 1173 
 1174         /*
 1175          * Release our private reference, leaving the one associated with
 1176          * the descriptor table intact.
 1177          */
 1178         fdrop(fp, td);
 1179         td->td_retval[0] = indx;
 1180         return (0);
 1181 bad:
 1182         KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 1183         fdrop(fp, td);
 1184         return (error);
 1185 }
 1186 
 1187 #ifdef COMPAT_43
 1188 /*
 1189  * Create a file.
 1190  */
 1191 #ifndef _SYS_SYSPROTO_H_
 1192 struct ocreat_args {
 1193         char    *path;
 1194         int     mode;
 1195 };
 1196 #endif
 1197 int
 1198 ocreat(td, uap)
 1199         struct thread *td;
 1200         register struct ocreat_args /* {
 1201                 char *path;
 1202                 int mode;
 1203         } */ *uap;
 1204 {
 1205 
 1206         return (kern_open(td, uap->path, UIO_USERSPACE,
 1207             O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 1208 }
 1209 #endif /* COMPAT_43 */
 1210 
 1211 /*
 1212  * Create a special file.
 1213  */
 1214 #ifndef _SYS_SYSPROTO_H_
 1215 struct mknod_args {
 1216         char    *path;
 1217         int     mode;
 1218         int     dev;
 1219 };
 1220 #endif
 1221 int
 1222 sys_mknod(td, uap)
 1223         struct thread *td;
 1224         register struct mknod_args /* {
 1225                 char *path;
 1226                 int mode;
 1227                 int dev;
 1228         } */ *uap;
 1229 {
 1230 
 1231         return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 1232 }
 1233 
 1234 #ifndef _SYS_SYSPROTO_H_
 1235 struct mknodat_args {
 1236         int     fd;
 1237         char    *path;
 1238         mode_t  mode;
 1239         dev_t   dev;
 1240 };
 1241 #endif
 1242 int
 1243 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 1244 {
 1245 
 1246         return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 1247             uap->dev));
 1248 }
 1249 
 1250 int
 1251 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
 1252     int dev)
 1253 {
 1254 
 1255         return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
 1256 }
 1257 
 1258 int
 1259 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1260     int mode, int dev)
 1261 {
 1262         struct vnode *vp;
 1263         struct mount *mp;
 1264         struct vattr vattr;
 1265         struct nameidata nd;
 1266         cap_rights_t rights;
 1267         int error, whiteout = 0;
 1268 
 1269         AUDIT_ARG_MODE(mode);
 1270         AUDIT_ARG_DEV(dev);
 1271         switch (mode & S_IFMT) {
 1272         case S_IFCHR:
 1273         case S_IFBLK:
 1274                 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 1275                 break;
 1276         case S_IFMT:
 1277                 error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 1278                 break;
 1279         case S_IFWHT:
 1280                 error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 1281                 break;
 1282         case S_IFIFO:
 1283                 if (dev == 0)
 1284                         return (kern_mkfifoat(td, fd, path, pathseg, mode));
 1285                 /* FALLTHROUGH */
 1286         default:
 1287                 error = EINVAL;
 1288                 break;
 1289         }
 1290         if (error != 0)
 1291                 return (error);
 1292 restart:
 1293         bwillwrite();
 1294         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 1295             pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td);
 1296         if ((error = namei(&nd)) != 0)
 1297                 return (error);
 1298         vp = nd.ni_vp;
 1299         if (vp != NULL) {
 1300                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1301                 if (vp == nd.ni_dvp)
 1302                         vrele(nd.ni_dvp);
 1303                 else
 1304                         vput(nd.ni_dvp);
 1305                 vrele(vp);
 1306                 return (EEXIST);
 1307         } else {
 1308                 VATTR_NULL(&vattr);
 1309                 vattr.va_mode = (mode & ALLPERMS) &
 1310                     ~td->td_proc->p_fd->fd_cmask;
 1311                 vattr.va_rdev = dev;
 1312                 whiteout = 0;
 1313 
 1314                 switch (mode & S_IFMT) {
 1315                 case S_IFMT:    /* used by badsect to flag bad sectors */
 1316                         vattr.va_type = VBAD;
 1317                         break;
 1318                 case S_IFCHR:
 1319                         vattr.va_type = VCHR;
 1320                         break;
 1321                 case S_IFBLK:
 1322                         vattr.va_type = VBLK;
 1323                         break;
 1324                 case S_IFWHT:
 1325                         whiteout = 1;
 1326                         break;
 1327                 default:
 1328                         panic("kern_mknod: invalid mode");
 1329                 }
 1330         }
 1331         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1332                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1333                 vput(nd.ni_dvp);
 1334                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1335                         return (error);
 1336                 goto restart;
 1337         }
 1338 #ifdef MAC
 1339         if (error == 0 && !whiteout)
 1340                 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 1341                     &nd.ni_cnd, &vattr);
 1342 #endif
 1343         if (error == 0) {
 1344                 if (whiteout)
 1345                         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 1346                 else {
 1347                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 1348                                                 &nd.ni_cnd, &vattr);
 1349                         if (error == 0)
 1350                                 vput(nd.ni_vp);
 1351                 }
 1352         }
 1353         NDFREE(&nd, NDF_ONLY_PNBUF);
 1354         vput(nd.ni_dvp);
 1355         vn_finished_write(mp);
 1356         return (error);
 1357 }
 1358 
 1359 /*
 1360  * Create a named pipe.
 1361  */
 1362 #ifndef _SYS_SYSPROTO_H_
 1363 struct mkfifo_args {
 1364         char    *path;
 1365         int     mode;
 1366 };
 1367 #endif
 1368 int
 1369 sys_mkfifo(td, uap)
 1370         struct thread *td;
 1371         register struct mkfifo_args /* {
 1372                 char *path;
 1373                 int mode;
 1374         } */ *uap;
 1375 {
 1376 
 1377         return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 1378 }
 1379 
 1380 #ifndef _SYS_SYSPROTO_H_
 1381 struct mkfifoat_args {
 1382         int     fd;
 1383         char    *path;
 1384         mode_t  mode;
 1385 };
 1386 #endif
 1387 int
 1388 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 1389 {
 1390 
 1391         return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 1392             uap->mode));
 1393 }
 1394 
 1395 int
 1396 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 1397 {
 1398 
 1399         return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
 1400 }
 1401 
 1402 int
 1403 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1404     int mode)
 1405 {
 1406         struct mount *mp;
 1407         struct vattr vattr;
 1408         struct nameidata nd;
 1409         cap_rights_t rights;
 1410         int error;
 1411 
 1412         AUDIT_ARG_MODE(mode);
 1413 restart:
 1414         bwillwrite();
 1415         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 1416             pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td);
 1417         if ((error = namei(&nd)) != 0)
 1418                 return (error);
 1419         if (nd.ni_vp != NULL) {
 1420                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1421                 if (nd.ni_vp == nd.ni_dvp)
 1422                         vrele(nd.ni_dvp);
 1423                 else
 1424                         vput(nd.ni_dvp);
 1425                 vrele(nd.ni_vp);
 1426                 return (EEXIST);
 1427         }
 1428         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1429                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1430                 vput(nd.ni_dvp);
 1431                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1432                         return (error);
 1433                 goto restart;
 1434         }
 1435         VATTR_NULL(&vattr);
 1436         vattr.va_type = VFIFO;
 1437         vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 1438 #ifdef MAC
 1439         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1440             &vattr);
 1441         if (error != 0)
 1442                 goto out;
 1443 #endif
 1444         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1445         if (error == 0)
 1446                 vput(nd.ni_vp);
 1447 #ifdef MAC
 1448 out:
 1449 #endif
 1450         vput(nd.ni_dvp);
 1451         vn_finished_write(mp);
 1452         NDFREE(&nd, NDF_ONLY_PNBUF);
 1453         return (error);
 1454 }
 1455 
 1456 /*
 1457  * Make a hard file link.
 1458  */
 1459 #ifndef _SYS_SYSPROTO_H_
 1460 struct link_args {
 1461         char    *path;
 1462         char    *link;
 1463 };
 1464 #endif
 1465 int
 1466 sys_link(td, uap)
 1467         struct thread *td;
 1468         register struct link_args /* {
 1469                 char *path;
 1470                 char *link;
 1471         } */ *uap;
 1472 {
 1473 
 1474         return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 1475 }
 1476 
 1477 #ifndef _SYS_SYSPROTO_H_
 1478 struct linkat_args {
 1479         int     fd1;
 1480         char    *path1;
 1481         int     fd2;
 1482         char    *path2;
 1483         int     flag;
 1484 };
 1485 #endif
 1486 int
 1487 sys_linkat(struct thread *td, struct linkat_args *uap)
 1488 {
 1489         int flag;
 1490 
 1491         flag = uap->flag;
 1492         if (flag & ~AT_SYMLINK_FOLLOW)
 1493                 return (EINVAL);
 1494 
 1495         return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 1496             UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 1497 }
 1498 
 1499 int hardlink_check_uid = 0;
 1500 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
 1501     &hardlink_check_uid, 0,
 1502     "Unprivileged processes cannot create hard links to files owned by other "
 1503     "users");
 1504 static int hardlink_check_gid = 0;
 1505 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
 1506     &hardlink_check_gid, 0,
 1507     "Unprivileged processes cannot create hard links to files owned by other "
 1508     "groups");
 1509 
 1510 static int
 1511 can_hardlink(struct vnode *vp, struct ucred *cred)
 1512 {
 1513         struct vattr va;
 1514         int error;
 1515 
 1516         if (!hardlink_check_uid && !hardlink_check_gid)
 1517                 return (0);
 1518 
 1519         error = VOP_GETATTR(vp, &va, cred);
 1520         if (error != 0)
 1521                 return (error);
 1522 
 1523         if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 1524                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1525                 if (error != 0)
 1526                         return (error);
 1527         }
 1528 
 1529         if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 1530                 error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 1531                 if (error != 0)
 1532                         return (error);
 1533         }
 1534 
 1535         return (0);
 1536 }
 1537 
 1538 int
 1539 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1540 {
 1541 
 1542         return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
 1543 }
 1544 
 1545 int
 1546 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 1547     enum uio_seg segflg, int follow)
 1548 {
 1549         struct vnode *vp;
 1550         struct mount *mp;
 1551         struct nameidata nd;
 1552         cap_rights_t rights;
 1553         int error;
 1554 
 1555         bwillwrite();
 1556         NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 1557 
 1558 again:
 1559         if ((error = namei(&nd)) != 0)
 1560                 return (error);
 1561         NDFREE(&nd, NDF_ONLY_PNBUF);
 1562         vp = nd.ni_vp;
 1563         if (vp->v_type == VDIR) {
 1564                 vrele(vp);
 1565                 return (EPERM);         /* POSIX */
 1566         }
 1567         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 1568                 vrele(vp);
 1569                 return (error);
 1570         }
 1571         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2,
 1572             segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td);
 1573         if ((error = namei(&nd)) == 0) {
 1574                 if (nd.ni_vp != NULL) {
 1575                         if (nd.ni_dvp == nd.ni_vp)
 1576                                 vrele(nd.ni_dvp);
 1577                         else
 1578                                 vput(nd.ni_dvp);
 1579                         vrele(nd.ni_vp);
 1580                         error = EEXIST;
 1581                 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 1582                         /*
 1583                          * Check for cross-device links.  No need to
 1584                          * recheck vp->v_type, since it cannot change
 1585                          * for non-doomed vnode.
 1586                          */
 1587                         if (nd.ni_dvp->v_mount != vp->v_mount)
 1588                                 error = EXDEV;
 1589                         else
 1590                                 error = can_hardlink(vp, td->td_ucred);
 1591                         if (error == 0)
 1592 #ifdef MAC
 1593                                 error = mac_vnode_check_link(td->td_ucred,
 1594                                     nd.ni_dvp, vp, &nd.ni_cnd);
 1595                         if (error == 0)
 1596 #endif
 1597                                 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 1598                         VOP_UNLOCK(vp, 0);
 1599                         vput(nd.ni_dvp);
 1600                 } else {
 1601                         vput(nd.ni_dvp);
 1602                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1603                         vrele(vp);
 1604                         vn_finished_write(mp);
 1605                         goto again;
 1606                 }
 1607                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1608         }
 1609         vrele(vp);
 1610         vn_finished_write(mp);
 1611         return (error);
 1612 }
 1613 
 1614 /*
 1615  * Make a symbolic link.
 1616  */
 1617 #ifndef _SYS_SYSPROTO_H_
 1618 struct symlink_args {
 1619         char    *path;
 1620         char    *link;
 1621 };
 1622 #endif
 1623 int
 1624 sys_symlink(td, uap)
 1625         struct thread *td;
 1626         register struct symlink_args /* {
 1627                 char *path;
 1628                 char *link;
 1629         } */ *uap;
 1630 {
 1631 
 1632         return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 1633 }
 1634 
 1635 #ifndef _SYS_SYSPROTO_H_
 1636 struct symlinkat_args {
 1637         char    *path;
 1638         int     fd;
 1639         char    *path2;
 1640 };
 1641 #endif
 1642 int
 1643 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 1644 {
 1645 
 1646         return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 1647             UIO_USERSPACE));
 1648 }
 1649 
 1650 int
 1651 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 1652 {
 1653 
 1654         return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
 1655 }
 1656 
 1657 int
 1658 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 1659     enum uio_seg segflg)
 1660 {
 1661         struct mount *mp;
 1662         struct vattr vattr;
 1663         char *syspath;
 1664         struct nameidata nd;
 1665         int error;
 1666         cap_rights_t rights;
 1667 
 1668         if (segflg == UIO_SYSSPACE) {
 1669                 syspath = path1;
 1670         } else {
 1671                 syspath = uma_zalloc(namei_zone, M_WAITOK);
 1672                 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 1673                         goto out;
 1674         }
 1675         AUDIT_ARG_TEXT(syspath);
 1676 restart:
 1677         bwillwrite();
 1678         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 1679             segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td);
 1680         if ((error = namei(&nd)) != 0)
 1681                 goto out;
 1682         if (nd.ni_vp) {
 1683                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1684                 if (nd.ni_vp == nd.ni_dvp)
 1685                         vrele(nd.ni_dvp);
 1686                 else
 1687                         vput(nd.ni_dvp);
 1688                 vrele(nd.ni_vp);
 1689                 error = EEXIST;
 1690                 goto out;
 1691         }
 1692         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1693                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1694                 vput(nd.ni_dvp);
 1695                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1696                         goto out;
 1697                 goto restart;
 1698         }
 1699         VATTR_NULL(&vattr);
 1700         vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 1701 #ifdef MAC
 1702         vattr.va_type = VLNK;
 1703         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 1704             &vattr);
 1705         if (error != 0)
 1706                 goto out2;
 1707 #endif
 1708         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 1709         if (error == 0)
 1710                 vput(nd.ni_vp);
 1711 #ifdef MAC
 1712 out2:
 1713 #endif
 1714         NDFREE(&nd, NDF_ONLY_PNBUF);
 1715         vput(nd.ni_dvp);
 1716         vn_finished_write(mp);
 1717 out:
 1718         if (segflg != UIO_SYSSPACE)
 1719                 uma_zfree(namei_zone, syspath);
 1720         return (error);
 1721 }
 1722 
 1723 /*
 1724  * Delete a whiteout from the filesystem.
 1725  */
 1726 int
 1727 sys_undelete(td, uap)
 1728         struct thread *td;
 1729         register struct undelete_args /* {
 1730                 char *path;
 1731         } */ *uap;
 1732 {
 1733         struct mount *mp;
 1734         struct nameidata nd;
 1735         int error;
 1736 
 1737 restart:
 1738         bwillwrite();
 1739         NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 1740             UIO_USERSPACE, uap->path, td);
 1741         error = namei(&nd);
 1742         if (error != 0)
 1743                 return (error);
 1744 
 1745         if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 1746                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1747                 if (nd.ni_vp == nd.ni_dvp)
 1748                         vrele(nd.ni_dvp);
 1749                 else
 1750                         vput(nd.ni_dvp);
 1751                 if (nd.ni_vp)
 1752                         vrele(nd.ni_vp);
 1753                 return (EEXIST);
 1754         }
 1755         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1756                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1757                 vput(nd.ni_dvp);
 1758                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 1759                         return (error);
 1760                 goto restart;
 1761         }
 1762         error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 1763         NDFREE(&nd, NDF_ONLY_PNBUF);
 1764         vput(nd.ni_dvp);
 1765         vn_finished_write(mp);
 1766         return (error);
 1767 }
 1768 
 1769 /*
 1770  * Delete a name from the filesystem.
 1771  */
 1772 #ifndef _SYS_SYSPROTO_H_
 1773 struct unlink_args {
 1774         char    *path;
 1775 };
 1776 #endif
 1777 int
 1778 sys_unlink(td, uap)
 1779         struct thread *td;
 1780         struct unlink_args /* {
 1781                 char *path;
 1782         } */ *uap;
 1783 {
 1784 
 1785         return (kern_unlink(td, uap->path, UIO_USERSPACE));
 1786 }
 1787 
 1788 #ifndef _SYS_SYSPROTO_H_
 1789 struct unlinkat_args {
 1790         int     fd;
 1791         char    *path;
 1792         int     flag;
 1793 };
 1794 #endif
 1795 int
 1796 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 1797 {
 1798         int flag = uap->flag;
 1799         int fd = uap->fd;
 1800         char *path = uap->path;
 1801 
 1802         if (flag & ~AT_REMOVEDIR)
 1803                 return (EINVAL);
 1804 
 1805         if (flag & AT_REMOVEDIR)
 1806                 return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 1807         else
 1808                 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 1809 }
 1810 
 1811 int
 1812 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 1813 {
 1814 
 1815         return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 1816 }
 1817 
 1818 int
 1819 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 1820     ino_t oldinum)
 1821 {
 1822         struct mount *mp;
 1823         struct vnode *vp;
 1824         struct nameidata nd;
 1825         struct stat sb;
 1826         cap_rights_t rights;
 1827         int error;
 1828 
 1829 restart:
 1830         bwillwrite();
 1831         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 1832             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 1833         if ((error = namei(&nd)) != 0)
 1834                 return (error == EINVAL ? EPERM : error);
 1835         vp = nd.ni_vp;
 1836         if (vp->v_type == VDIR && oldinum == 0) {
 1837                 error = EPERM;          /* POSIX */
 1838         } else if (oldinum != 0 &&
 1839                   ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 1840                   sb.st_ino != oldinum) {
 1841                         error = EIDRM;  /* Identifier removed */
 1842         } else {
 1843                 /*
 1844                  * The root of a mounted filesystem cannot be deleted.
 1845                  *
 1846                  * XXX: can this only be a VDIR case?
 1847                  */
 1848                 if (vp->v_vflag & VV_ROOT)
 1849                         error = EBUSY;
 1850         }
 1851         if (error == 0) {
 1852                 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 1853                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1854                         vput(nd.ni_dvp);
 1855                         if (vp == nd.ni_dvp)
 1856                                 vrele(vp);
 1857                         else
 1858                                 vput(vp);
 1859                         if ((error = vn_start_write(NULL, &mp,
 1860                             V_XSLEEP | PCATCH)) != 0)
 1861                                 return (error);
 1862                         goto restart;
 1863                 }
 1864 #ifdef MAC
 1865                 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 1866                     &nd.ni_cnd);
 1867                 if (error != 0)
 1868                         goto out;
 1869 #endif
 1870                 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 1871                 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 1872 #ifdef MAC
 1873 out:
 1874 #endif
 1875                 vn_finished_write(mp);
 1876         }
 1877         NDFREE(&nd, NDF_ONLY_PNBUF);
 1878         vput(nd.ni_dvp);
 1879         if (vp == nd.ni_dvp)
 1880                 vrele(vp);
 1881         else
 1882                 vput(vp);
 1883         return (error);
 1884 }
 1885 
 1886 /*
 1887  * Reposition read/write file offset.
 1888  */
 1889 #ifndef _SYS_SYSPROTO_H_
 1890 struct lseek_args {
 1891         int     fd;
 1892         int     pad;
 1893         off_t   offset;
 1894         int     whence;
 1895 };
 1896 #endif
 1897 int
 1898 sys_lseek(td, uap)
 1899         struct thread *td;
 1900         register struct lseek_args /* {
 1901                 int fd;
 1902                 int pad;
 1903                 off_t offset;
 1904                 int whence;
 1905         } */ *uap;
 1906 {
 1907         struct file *fp;
 1908         cap_rights_t rights;
 1909         int error;
 1910 
 1911         AUDIT_ARG_FD(uap->fd);
 1912         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 1913         if (error != 0)
 1914                 return (error);
 1915         error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 1916             fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
 1917         fdrop(fp, td);
 1918         return (error);
 1919 }
 1920 
 1921 #if defined(COMPAT_43)
 1922 /*
 1923  * Reposition read/write file offset.
 1924  */
 1925 #ifndef _SYS_SYSPROTO_H_
 1926 struct olseek_args {
 1927         int     fd;
 1928         long    offset;
 1929         int     whence;
 1930 };
 1931 #endif
 1932 int
 1933 olseek(td, uap)
 1934         struct thread *td;
 1935         register struct olseek_args /* {
 1936                 int fd;
 1937                 long offset;
 1938                 int whence;
 1939         } */ *uap;
 1940 {
 1941         struct lseek_args /* {
 1942                 int fd;
 1943                 int pad;
 1944                 off_t offset;
 1945                 int whence;
 1946         } */ nuap;
 1947 
 1948         nuap.fd = uap->fd;
 1949         nuap.offset = uap->offset;
 1950         nuap.whence = uap->whence;
 1951         return (sys_lseek(td, &nuap));
 1952 }
 1953 #endif /* COMPAT_43 */
 1954 
 1955 /* Version with the 'pad' argument */
 1956 int
 1957 freebsd6_lseek(td, uap)
 1958         struct thread *td;
 1959         register struct freebsd6_lseek_args *uap;
 1960 {
 1961         struct lseek_args ouap;
 1962 
 1963         ouap.fd = uap->fd;
 1964         ouap.offset = uap->offset;
 1965         ouap.whence = uap->whence;
 1966         return (sys_lseek(td, &ouap));
 1967 }
 1968 
 1969 /*
 1970  * Check access permissions using passed credentials.
 1971  */
 1972 static int
 1973 vn_access(vp, user_flags, cred, td)
 1974         struct vnode    *vp;
 1975         int             user_flags;
 1976         struct ucred    *cred;
 1977         struct thread   *td;
 1978 {
 1979         accmode_t accmode;
 1980         int error;
 1981 
 1982         /* Flags == 0 means only check for existence. */
 1983         error = 0;
 1984         if (user_flags) {
 1985                 accmode = 0;
 1986                 if (user_flags & R_OK)
 1987                         accmode |= VREAD;
 1988                 if (user_flags & W_OK)
 1989                         accmode |= VWRITE;
 1990                 if (user_flags & X_OK)
 1991                         accmode |= VEXEC;
 1992 #ifdef MAC
 1993                 error = mac_vnode_check_access(cred, vp, accmode);
 1994                 if (error != 0)
 1995                         return (error);
 1996 #endif
 1997                 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 1998                         error = VOP_ACCESS(vp, accmode, cred, td);
 1999         }
 2000         return (error);
 2001 }
 2002 
 2003 /*
 2004  * Check access permissions using "real" credentials.
 2005  */
 2006 #ifndef _SYS_SYSPROTO_H_
 2007 struct access_args {
 2008         char    *path;
 2009         int     amode;
 2010 };
 2011 #endif
 2012 int
 2013 sys_access(td, uap)
 2014         struct thread *td;
 2015         register struct access_args /* {
 2016                 char *path;
 2017                 int amode;
 2018         } */ *uap;
 2019 {
 2020 
 2021         return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode));
 2022 }
 2023 
 2024 #ifndef _SYS_SYSPROTO_H_
 2025 struct faccessat_args {
 2026         int     dirfd;
 2027         char    *path;
 2028         int     amode;
 2029         int     flag;
 2030 }
 2031 #endif
 2032 int
 2033 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 2034 {
 2035 
 2036         if (uap->flag & ~AT_EACCESS)
 2037                 return (EINVAL);
 2038         return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 2039             uap->amode));
 2040 }
 2041 
 2042 int
 2043 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2044 {
 2045 
 2046         return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode));
 2047 }
 2048 
 2049 int
 2050 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2051     int flag, int amode)
 2052 {
 2053         struct ucred *cred, *tmpcred;
 2054         struct vnode *vp;
 2055         struct nameidata nd;
 2056         cap_rights_t rights;
 2057         int error;
 2058 
 2059         /*
 2060          * Create and modify a temporary credential instead of one that
 2061          * is potentially shared.
 2062          */
 2063         if (!(flag & AT_EACCESS)) {
 2064                 cred = td->td_ucred;
 2065                 tmpcred = crdup(cred);
 2066                 tmpcred->cr_uid = cred->cr_ruid;
 2067                 tmpcred->cr_groups[0] = cred->cr_rgid;
 2068                 td->td_ucred = tmpcred;
 2069         } else
 2070                 cred = tmpcred = td->td_ucred;
 2071         AUDIT_ARG_VALUE(amode);
 2072         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 2073             AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 2074             td);
 2075         if ((error = namei(&nd)) != 0)
 2076                 goto out1;
 2077         vp = nd.ni_vp;
 2078 
 2079         error = vn_access(vp, amode, tmpcred, td);
 2080         NDFREE(&nd, NDF_ONLY_PNBUF);
 2081         vput(vp);
 2082 out1:
 2083         if (!(flag & AT_EACCESS)) {
 2084                 td->td_ucred = cred;
 2085                 crfree(tmpcred);
 2086         }
 2087         return (error);
 2088 }
 2089 
 2090 /*
 2091  * Check access permissions using "effective" credentials.
 2092  */
 2093 #ifndef _SYS_SYSPROTO_H_
 2094 struct eaccess_args {
 2095         char    *path;
 2096         int     amode;
 2097 };
 2098 #endif
 2099 int
 2100 sys_eaccess(td, uap)
 2101         struct thread *td;
 2102         register struct eaccess_args /* {
 2103                 char *path;
 2104                 int amode;
 2105         } */ *uap;
 2106 {
 2107 
 2108         return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode));
 2109 }
 2110 
 2111 int
 2112 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode)
 2113 {
 2114 
 2115         return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode));
 2116 }
 2117 
 2118 #if defined(COMPAT_43)
 2119 /*
 2120  * Get file status; this version follows links.
 2121  */
 2122 #ifndef _SYS_SYSPROTO_H_
 2123 struct ostat_args {
 2124         char    *path;
 2125         struct ostat *ub;
 2126 };
 2127 #endif
 2128 int
 2129 ostat(td, uap)
 2130         struct thread *td;
 2131         register struct ostat_args /* {
 2132                 char *path;
 2133                 struct ostat *ub;
 2134         } */ *uap;
 2135 {
 2136         struct stat sb;
 2137         struct ostat osb;
 2138         int error;
 2139 
 2140         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2141         if (error != 0)
 2142                 return (error);
 2143         cvtstat(&sb, &osb);
 2144         return (copyout(&osb, uap->ub, sizeof (osb)));
 2145 }
 2146 
 2147 /*
 2148  * Get file status; this version does not follow links.
 2149  */
 2150 #ifndef _SYS_SYSPROTO_H_
 2151 struct olstat_args {
 2152         char    *path;
 2153         struct ostat *ub;
 2154 };
 2155 #endif
 2156 int
 2157 olstat(td, uap)
 2158         struct thread *td;
 2159         register struct olstat_args /* {
 2160                 char *path;
 2161                 struct ostat *ub;
 2162         } */ *uap;
 2163 {
 2164         struct stat sb;
 2165         struct ostat osb;
 2166         int error;
 2167 
 2168         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2169         if (error != 0)
 2170                 return (error);
 2171         cvtstat(&sb, &osb);
 2172         return (copyout(&osb, uap->ub, sizeof (osb)));
 2173 }
 2174 
 2175 /*
 2176  * Convert from an old to a new stat structure.
 2177  */
 2178 void
 2179 cvtstat(st, ost)
 2180         struct stat *st;
 2181         struct ostat *ost;
 2182 {
 2183 
 2184         bzero(ost, sizeof(*ost));
 2185         ost->st_dev = st->st_dev;
 2186         ost->st_ino = st->st_ino;
 2187         ost->st_mode = st->st_mode;
 2188         ost->st_nlink = st->st_nlink;
 2189         ost->st_uid = st->st_uid;
 2190         ost->st_gid = st->st_gid;
 2191         ost->st_rdev = st->st_rdev;
 2192         if (st->st_size < (quad_t)1 << 32)
 2193                 ost->st_size = st->st_size;
 2194         else
 2195                 ost->st_size = -2;
 2196         ost->st_atim = st->st_atim;
 2197         ost->st_mtim = st->st_mtim;
 2198         ost->st_ctim = st->st_ctim;
 2199         ost->st_blksize = st->st_blksize;
 2200         ost->st_blocks = st->st_blocks;
 2201         ost->st_flags = st->st_flags;
 2202         ost->st_gen = st->st_gen;
 2203 }
 2204 #endif /* COMPAT_43 */
 2205 
 2206 /*
 2207  * Get file status; this version follows links.
 2208  */
 2209 #ifndef _SYS_SYSPROTO_H_
 2210 struct stat_args {
 2211         char    *path;
 2212         struct stat *ub;
 2213 };
 2214 #endif
 2215 int
 2216 sys_stat(td, uap)
 2217         struct thread *td;
 2218         register struct stat_args /* {
 2219                 char *path;
 2220                 struct stat *ub;
 2221         } */ *uap;
 2222 {
 2223         struct stat sb;
 2224         int error;
 2225 
 2226         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2227         if (error == 0)
 2228                 error = copyout(&sb, uap->ub, sizeof (sb));
 2229         return (error);
 2230 }
 2231 
 2232 #ifndef _SYS_SYSPROTO_H_
 2233 struct fstatat_args {
 2234         int     fd;
 2235         char    *path;
 2236         struct stat     *buf;
 2237         int     flag;
 2238 }
 2239 #endif
 2240 int
 2241 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 2242 {
 2243         struct stat sb;
 2244         int error;
 2245 
 2246         error = kern_statat(td, uap->flag, uap->fd, uap->path,
 2247             UIO_USERSPACE, &sb);
 2248         if (error == 0)
 2249                 error = copyout(&sb, uap->buf, sizeof (sb));
 2250         return (error);
 2251 }
 2252 
 2253 int
 2254 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2255 {
 2256 
 2257         return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
 2258 }
 2259 
 2260 int
 2261 kern_statat(struct thread *td, int flag, int fd, char *path,
 2262     enum uio_seg pathseg, struct stat *sbp)
 2263 {
 2264 
 2265         return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
 2266 }
 2267 
 2268 int
 2269 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
 2270     enum uio_seg pathseg, struct stat *sbp,
 2271     void (*hook)(struct vnode *vp, struct stat *sbp))
 2272 {
 2273         struct nameidata nd;
 2274         struct stat sb;
 2275         cap_rights_t rights;
 2276         int error;
 2277 
 2278         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2279                 return (EINVAL);
 2280 
 2281         NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 2282             FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 2283             cap_rights_init(&rights, CAP_FSTAT), td);
 2284 
 2285         if ((error = namei(&nd)) != 0)
 2286                 return (error);
 2287         error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 2288         if (error == 0) {
 2289                 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
 2290                 if (S_ISREG(sb.st_mode))
 2291                         SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
 2292                 if (__predict_false(hook != NULL))
 2293                         hook(nd.ni_vp, &sb);
 2294         }
 2295         NDFREE(&nd, NDF_ONLY_PNBUF);
 2296         vput(nd.ni_vp);
 2297         if (error != 0)
 2298                 return (error);
 2299         *sbp = sb;
 2300 #ifdef KTRACE
 2301         if (KTRPOINT(td, KTR_STRUCT))
 2302                 ktrstat(&sb);
 2303 #endif
 2304         return (0);
 2305 }
 2306 
 2307 /*
 2308  * Get file status; this version does not follow links.
 2309  */
 2310 #ifndef _SYS_SYSPROTO_H_
 2311 struct lstat_args {
 2312         char    *path;
 2313         struct stat *ub;
 2314 };
 2315 #endif
 2316 int
 2317 sys_lstat(td, uap)
 2318         struct thread *td;
 2319         register struct lstat_args /* {
 2320                 char *path;
 2321                 struct stat *ub;
 2322         } */ *uap;
 2323 {
 2324         struct stat sb;
 2325         int error;
 2326 
 2327         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2328         if (error == 0)
 2329                 error = copyout(&sb, uap->ub, sizeof (sb));
 2330         return (error);
 2331 }
 2332 
 2333 int
 2334 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
 2335 {
 2336 
 2337         return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
 2338             sbp));
 2339 }
 2340 
 2341 /*
 2342  * Implementation of the NetBSD [l]stat() functions.
 2343  */
 2344 void
 2345 cvtnstat(sb, nsb)
 2346         struct stat *sb;
 2347         struct nstat *nsb;
 2348 {
 2349 
 2350         bzero(nsb, sizeof *nsb);
 2351         nsb->st_dev = sb->st_dev;
 2352         nsb->st_ino = sb->st_ino;
 2353         nsb->st_mode = sb->st_mode;
 2354         nsb->st_nlink = sb->st_nlink;
 2355         nsb->st_uid = sb->st_uid;
 2356         nsb->st_gid = sb->st_gid;
 2357         nsb->st_rdev = sb->st_rdev;
 2358         nsb->st_atim = sb->st_atim;
 2359         nsb->st_mtim = sb->st_mtim;
 2360         nsb->st_ctim = sb->st_ctim;
 2361         nsb->st_size = sb->st_size;
 2362         nsb->st_blocks = sb->st_blocks;
 2363         nsb->st_blksize = sb->st_blksize;
 2364         nsb->st_flags = sb->st_flags;
 2365         nsb->st_gen = sb->st_gen;
 2366         nsb->st_birthtim = sb->st_birthtim;
 2367 }
 2368 
 2369 #ifndef _SYS_SYSPROTO_H_
 2370 struct nstat_args {
 2371         char    *path;
 2372         struct nstat *ub;
 2373 };
 2374 #endif
 2375 int
 2376 sys_nstat(td, uap)
 2377         struct thread *td;
 2378         register struct nstat_args /* {
 2379                 char *path;
 2380                 struct nstat *ub;
 2381         } */ *uap;
 2382 {
 2383         struct stat sb;
 2384         struct nstat nsb;
 2385         int error;
 2386 
 2387         error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 2388         if (error != 0)
 2389                 return (error);
 2390         cvtnstat(&sb, &nsb);
 2391         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2392 }
 2393 
 2394 /*
 2395  * NetBSD lstat.  Get file status; this version does not follow links.
 2396  */
 2397 #ifndef _SYS_SYSPROTO_H_
 2398 struct lstat_args {
 2399         char    *path;
 2400         struct stat *ub;
 2401 };
 2402 #endif
 2403 int
 2404 sys_nlstat(td, uap)
 2405         struct thread *td;
 2406         register struct nlstat_args /* {
 2407                 char *path;
 2408                 struct nstat *ub;
 2409         } */ *uap;
 2410 {
 2411         struct stat sb;
 2412         struct nstat nsb;
 2413         int error;
 2414 
 2415         error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 2416         if (error != 0)
 2417                 return (error);
 2418         cvtnstat(&sb, &nsb);
 2419         return (copyout(&nsb, uap->ub, sizeof (nsb)));
 2420 }
 2421 
 2422 /*
 2423  * Get configurable pathname variables.
 2424  */
 2425 #ifndef _SYS_SYSPROTO_H_
 2426 struct pathconf_args {
 2427         char    *path;
 2428         int     name;
 2429 };
 2430 #endif
 2431 int
 2432 sys_pathconf(td, uap)
 2433         struct thread *td;
 2434         register struct pathconf_args /* {
 2435                 char *path;
 2436                 int name;
 2437         } */ *uap;
 2438 {
 2439 
 2440         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 2441 }
 2442 
 2443 #ifndef _SYS_SYSPROTO_H_
 2444 struct lpathconf_args {
 2445         char    *path;
 2446         int     name;
 2447 };
 2448 #endif
 2449 int
 2450 sys_lpathconf(td, uap)
 2451         struct thread *td;
 2452         register struct lpathconf_args /* {
 2453                 char *path;
 2454                 int name;
 2455         } */ *uap;
 2456 {
 2457 
 2458         return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 2459             NOFOLLOW));
 2460 }
 2461 
 2462 int
 2463 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
 2464     u_long flags)
 2465 {
 2466         struct nameidata nd;
 2467         int error;
 2468 
 2469         NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 2470             pathseg, path, td);
 2471         if ((error = namei(&nd)) != 0)
 2472                 return (error);
 2473         NDFREE(&nd, NDF_ONLY_PNBUF);
 2474 
 2475         /* If asynchronous I/O is available, it works for all files. */
 2476         if (name == _PC_ASYNC_IO)
 2477                 td->td_retval[0] = async_io_version;
 2478         else
 2479                 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 2480         vput(nd.ni_vp);
 2481         return (error);
 2482 }
 2483 
 2484 /*
 2485  * Return target name of a symbolic link.
 2486  */
 2487 #ifndef _SYS_SYSPROTO_H_
 2488 struct readlink_args {
 2489         char    *path;
 2490         char    *buf;
 2491         size_t  count;
 2492 };
 2493 #endif
 2494 int
 2495 sys_readlink(td, uap)
 2496         struct thread *td;
 2497         register struct readlink_args /* {
 2498                 char *path;
 2499                 char *buf;
 2500                 size_t count;
 2501         } */ *uap;
 2502 {
 2503 
 2504         return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 2505             UIO_USERSPACE, uap->count));
 2506 }
 2507 #ifndef _SYS_SYSPROTO_H_
 2508 struct readlinkat_args {
 2509         int     fd;
 2510         char    *path;
 2511         char    *buf;
 2512         size_t  bufsize;
 2513 };
 2514 #endif
 2515 int
 2516 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 2517 {
 2518 
 2519         return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 2520             uap->buf, UIO_USERSPACE, uap->bufsize));
 2521 }
 2522 
 2523 int
 2524 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
 2525     enum uio_seg bufseg, size_t count)
 2526 {
 2527 
 2528         return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
 2529             count));
 2530 }
 2531 
 2532 int
 2533 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2534     char *buf, enum uio_seg bufseg, size_t count)
 2535 {
 2536         struct vnode *vp;
 2537         struct iovec aiov;
 2538         struct uio auio;
 2539         struct nameidata nd;
 2540         int error;
 2541 
 2542         if (count > IOSIZE_MAX)
 2543                 return (EINVAL);
 2544 
 2545         NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 2546             pathseg, path, fd, td);
 2547 
 2548         if ((error = namei(&nd)) != 0)
 2549                 return (error);
 2550         NDFREE(&nd, NDF_ONLY_PNBUF);
 2551         vp = nd.ni_vp;
 2552 #ifdef MAC
 2553         error = mac_vnode_check_readlink(td->td_ucred, vp);
 2554         if (error != 0) {
 2555                 vput(vp);
 2556                 return (error);
 2557         }
 2558 #endif
 2559         if (vp->v_type != VLNK)
 2560                 error = EINVAL;
 2561         else {
 2562                 aiov.iov_base = buf;
 2563                 aiov.iov_len = count;
 2564                 auio.uio_iov = &aiov;
 2565                 auio.uio_iovcnt = 1;
 2566                 auio.uio_offset = 0;
 2567                 auio.uio_rw = UIO_READ;
 2568                 auio.uio_segflg = bufseg;
 2569                 auio.uio_td = td;
 2570                 auio.uio_resid = count;
 2571                 error = VOP_READLINK(vp, &auio, td->td_ucred);
 2572                 td->td_retval[0] = count - auio.uio_resid;
 2573         }
 2574         vput(vp);
 2575         return (error);
 2576 }
 2577 
 2578 /*
 2579  * Common implementation code for chflags() and fchflags().
 2580  */
 2581 static int
 2582 setfflags(td, vp, flags)
 2583         struct thread *td;
 2584         struct vnode *vp;
 2585         u_long flags;
 2586 {
 2587         struct mount *mp;
 2588         struct vattr vattr;
 2589         int error;
 2590 
 2591         /* We can't support the value matching VNOVAL. */
 2592         if (flags == VNOVAL)
 2593                 return (EOPNOTSUPP);
 2594 
 2595         /*
 2596          * Prevent non-root users from setting flags on devices.  When
 2597          * a device is reused, users can retain ownership of the device
 2598          * if they are allowed to set flags and programs assume that
 2599          * chown can't fail when done as root.
 2600          */
 2601         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 2602                 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 2603                 if (error != 0)
 2604                         return (error);
 2605         }
 2606 
 2607         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2608                 return (error);
 2609         VATTR_NULL(&vattr);
 2610         vattr.va_flags = flags;
 2611         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2612 #ifdef MAC
 2613         error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 2614         if (error == 0)
 2615 #endif
 2616                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 2617         VOP_UNLOCK(vp, 0);
 2618         vn_finished_write(mp);
 2619         return (error);
 2620 }
 2621 
 2622 /*
 2623  * Change flags of a file given a path name.
 2624  */
 2625 #ifndef _SYS_SYSPROTO_H_
 2626 struct chflags_args {
 2627         const char *path;
 2628         u_long  flags;
 2629 };
 2630 #endif
 2631 int
 2632 sys_chflags(td, uap)
 2633         struct thread *td;
 2634         register struct chflags_args /* {
 2635                 const char *path;
 2636                 u_long flags;
 2637         } */ *uap;
 2638 {
 2639 
 2640         return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags));
 2641 }
 2642 
 2643 #ifndef _SYS_SYSPROTO_H_
 2644 struct chflagsat_args {
 2645         int     fd;
 2646         const char *path;
 2647         u_long  flags;
 2648         int     atflag;
 2649 }
 2650 #endif
 2651 int
 2652 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 2653 {
 2654         int fd = uap->fd;
 2655         const char *path = uap->path;
 2656         u_long flags = uap->flags;
 2657         int atflag = uap->atflag;
 2658 
 2659         if (atflag & ~AT_SYMLINK_NOFOLLOW)
 2660                 return (EINVAL);
 2661 
 2662         return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 2663 }
 2664 
 2665 static int
 2666 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg,
 2667     u_long flags)
 2668 {
 2669 
 2670         return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0));
 2671 }
 2672 
 2673 /*
 2674  * Same as chflags() but doesn't follow symlinks.
 2675  */
 2676 int
 2677 sys_lchflags(td, uap)
 2678         struct thread *td;
 2679         register struct lchflags_args /* {
 2680                 const char *path;
 2681                 u_long flags;
 2682         } */ *uap;
 2683 {
 2684 
 2685         return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2686             uap->flags, AT_SYMLINK_NOFOLLOW));
 2687 }
 2688 
 2689 static int
 2690 kern_chflagsat(struct thread *td, int fd, const char *path,
 2691     enum uio_seg pathseg, u_long flags, int atflag)
 2692 {
 2693         struct nameidata nd;
 2694         cap_rights_t rights;
 2695         int error, follow;
 2696 
 2697         AUDIT_ARG_FFLAGS(flags);
 2698         follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2699         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2700             cap_rights_init(&rights, CAP_FCHFLAGS), td);
 2701         if ((error = namei(&nd)) != 0)
 2702                 return (error);
 2703         NDFREE(&nd, NDF_ONLY_PNBUF);
 2704         error = setfflags(td, nd.ni_vp, flags);
 2705         vrele(nd.ni_vp);
 2706         return (error);
 2707 }
 2708 
 2709 /*
 2710  * Change flags of a file given a file descriptor.
 2711  */
 2712 #ifndef _SYS_SYSPROTO_H_
 2713 struct fchflags_args {
 2714         int     fd;
 2715         u_long  flags;
 2716 };
 2717 #endif
 2718 int
 2719 sys_fchflags(td, uap)
 2720         struct thread *td;
 2721         register struct fchflags_args /* {
 2722                 int fd;
 2723                 u_long flags;
 2724         } */ *uap;
 2725 {
 2726         struct file *fp;
 2727         cap_rights_t rights;
 2728         int error;
 2729 
 2730         AUDIT_ARG_FD(uap->fd);
 2731         AUDIT_ARG_FFLAGS(uap->flags);
 2732         error = getvnode(td->td_proc->p_fd, uap->fd,
 2733             cap_rights_init(&rights, CAP_FCHFLAGS), &fp);
 2734         if (error != 0)
 2735                 return (error);
 2736 #ifdef AUDIT
 2737         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 2738         AUDIT_ARG_VNODE1(fp->f_vnode);
 2739         VOP_UNLOCK(fp->f_vnode, 0);
 2740 #endif
 2741         error = setfflags(td, fp->f_vnode, uap->flags);
 2742         fdrop(fp, td);
 2743         return (error);
 2744 }
 2745 
 2746 /*
 2747  * Common implementation code for chmod(), lchmod() and fchmod().
 2748  */
 2749 int
 2750 setfmode(td, cred, vp, mode)
 2751         struct thread *td;
 2752         struct ucred *cred;
 2753         struct vnode *vp;
 2754         int mode;
 2755 {
 2756         struct mount *mp;
 2757         struct vattr vattr;
 2758         int error;
 2759 
 2760         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2761                 return (error);
 2762         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2763         VATTR_NULL(&vattr);
 2764         vattr.va_mode = mode & ALLPERMS;
 2765 #ifdef MAC
 2766         error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 2767         if (error == 0)
 2768 #endif
 2769                 error = VOP_SETATTR(vp, &vattr, cred);
 2770         VOP_UNLOCK(vp, 0);
 2771         vn_finished_write(mp);
 2772         return (error);
 2773 }
 2774 
 2775 /*
 2776  * Change mode of a file given path name.
 2777  */
 2778 #ifndef _SYS_SYSPROTO_H_
 2779 struct chmod_args {
 2780         char    *path;
 2781         int     mode;
 2782 };
 2783 #endif
 2784 int
 2785 sys_chmod(td, uap)
 2786         struct thread *td;
 2787         register struct chmod_args /* {
 2788                 char *path;
 2789                 int mode;
 2790         } */ *uap;
 2791 {
 2792 
 2793         return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 2794 }
 2795 
 2796 #ifndef _SYS_SYSPROTO_H_
 2797 struct fchmodat_args {
 2798         int     dirfd;
 2799         char    *path;
 2800         mode_t  mode;
 2801         int     flag;
 2802 }
 2803 #endif
 2804 int
 2805 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 2806 {
 2807         int flag = uap->flag;
 2808         int fd = uap->fd;
 2809         char *path = uap->path;
 2810         mode_t mode = uap->mode;
 2811 
 2812         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2813                 return (EINVAL);
 2814 
 2815         return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 2816 }
 2817 
 2818 int
 2819 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 2820 {
 2821 
 2822         return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
 2823 }
 2824 
 2825 /*
 2826  * Change mode of a file given path name (don't follow links.)
 2827  */
 2828 #ifndef _SYS_SYSPROTO_H_
 2829 struct lchmod_args {
 2830         char    *path;
 2831         int     mode;
 2832 };
 2833 #endif
 2834 int
 2835 sys_lchmod(td, uap)
 2836         struct thread *td;
 2837         register struct lchmod_args /* {
 2838                 char *path;
 2839                 int mode;
 2840         } */ *uap;
 2841 {
 2842 
 2843         return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 2844             uap->mode, AT_SYMLINK_NOFOLLOW));
 2845 }
 2846 
 2847 int
 2848 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2849     mode_t mode, int flag)
 2850 {
 2851         struct nameidata nd;
 2852         cap_rights_t rights;
 2853         int error, follow;
 2854 
 2855         AUDIT_ARG_MODE(mode);
 2856         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2857         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2858             cap_rights_init(&rights, CAP_FCHMOD), td);
 2859         if ((error = namei(&nd)) != 0)
 2860                 return (error);
 2861         NDFREE(&nd, NDF_ONLY_PNBUF);
 2862         error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 2863         vrele(nd.ni_vp);
 2864         return (error);
 2865 }
 2866 
 2867 /*
 2868  * Change mode of a file given a file descriptor.
 2869  */
 2870 #ifndef _SYS_SYSPROTO_H_
 2871 struct fchmod_args {
 2872         int     fd;
 2873         int     mode;
 2874 };
 2875 #endif
 2876 int
 2877 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 2878 {
 2879         struct file *fp;
 2880         cap_rights_t rights;
 2881         int error;
 2882 
 2883         AUDIT_ARG_FD(uap->fd);
 2884         AUDIT_ARG_MODE(uap->mode);
 2885 
 2886         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 2887         if (error != 0)
 2888                 return (error);
 2889         error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 2890         fdrop(fp, td);
 2891         return (error);
 2892 }
 2893 
 2894 /*
 2895  * Common implementation for chown(), lchown(), and fchown()
 2896  */
 2897 int
 2898 setfown(td, cred, vp, uid, gid)
 2899         struct thread *td;
 2900         struct ucred *cred;
 2901         struct vnode *vp;
 2902         uid_t uid;
 2903         gid_t gid;
 2904 {
 2905         struct mount *mp;
 2906         struct vattr vattr;
 2907         int error;
 2908 
 2909         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 2910                 return (error);
 2911         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2912         VATTR_NULL(&vattr);
 2913         vattr.va_uid = uid;
 2914         vattr.va_gid = gid;
 2915 #ifdef MAC
 2916         error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 2917             vattr.va_gid);
 2918         if (error == 0)
 2919 #endif
 2920                 error = VOP_SETATTR(vp, &vattr, cred);
 2921         VOP_UNLOCK(vp, 0);
 2922         vn_finished_write(mp);
 2923         return (error);
 2924 }
 2925 
 2926 /*
 2927  * Set ownership given a path name.
 2928  */
 2929 #ifndef _SYS_SYSPROTO_H_
 2930 struct chown_args {
 2931         char    *path;
 2932         int     uid;
 2933         int     gid;
 2934 };
 2935 #endif
 2936 int
 2937 sys_chown(td, uap)
 2938         struct thread *td;
 2939         register struct chown_args /* {
 2940                 char *path;
 2941                 int uid;
 2942                 int gid;
 2943         } */ *uap;
 2944 {
 2945 
 2946         return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 2947 }
 2948 
 2949 #ifndef _SYS_SYSPROTO_H_
 2950 struct fchownat_args {
 2951         int fd;
 2952         const char * path;
 2953         uid_t uid;
 2954         gid_t gid;
 2955         int flag;
 2956 };
 2957 #endif
 2958 int
 2959 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 2960 {
 2961         int flag;
 2962 
 2963         flag = uap->flag;
 2964         if (flag & ~AT_SYMLINK_NOFOLLOW)
 2965                 return (EINVAL);
 2966 
 2967         return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 2968             uap->gid, uap->flag));
 2969 }
 2970 
 2971 int
 2972 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 2973     int gid)
 2974 {
 2975 
 2976         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
 2977 }
 2978 
 2979 int
 2980 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 2981     int uid, int gid, int flag)
 2982 {
 2983         struct nameidata nd;
 2984         cap_rights_t rights;
 2985         int error, follow;
 2986 
 2987         AUDIT_ARG_OWNER(uid, gid);
 2988         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 2989         NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 2990             cap_rights_init(&rights, CAP_FCHOWN), td);
 2991 
 2992         if ((error = namei(&nd)) != 0)
 2993                 return (error);
 2994         NDFREE(&nd, NDF_ONLY_PNBUF);
 2995         error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 2996         vrele(nd.ni_vp);
 2997         return (error);
 2998 }
 2999 
 3000 /*
 3001  * Set ownership given a path name, do not cross symlinks.
 3002  */
 3003 #ifndef _SYS_SYSPROTO_H_
 3004 struct lchown_args {
 3005         char    *path;
 3006         int     uid;
 3007         int     gid;
 3008 };
 3009 #endif
 3010 int
 3011 sys_lchown(td, uap)
 3012         struct thread *td;
 3013         register struct lchown_args /* {
 3014                 char *path;
 3015                 int uid;
 3016                 int gid;
 3017         } */ *uap;
 3018 {
 3019 
 3020         return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 3021 }
 3022 
 3023 int
 3024 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
 3025     int gid)
 3026 {
 3027 
 3028         return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
 3029             AT_SYMLINK_NOFOLLOW));
 3030 }
 3031 
 3032 /*
 3033  * Set ownership given a file descriptor.
 3034  */
 3035 #ifndef _SYS_SYSPROTO_H_
 3036 struct fchown_args {
 3037         int     fd;
 3038         int     uid;
 3039         int     gid;
 3040 };
 3041 #endif
 3042 int
 3043 sys_fchown(td, uap)
 3044         struct thread *td;
 3045         register struct fchown_args /* {
 3046                 int fd;
 3047                 int uid;
 3048                 int gid;
 3049         } */ *uap;
 3050 {
 3051         struct file *fp;
 3052         cap_rights_t rights;
 3053         int error;
 3054 
 3055         AUDIT_ARG_FD(uap->fd);
 3056         AUDIT_ARG_OWNER(uap->uid, uap->gid);
 3057         error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 3058         if (error != 0)
 3059                 return (error);
 3060         error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 3061         fdrop(fp, td);
 3062         return (error);
 3063 }
 3064 
 3065 /*
 3066  * Common implementation code for utimes(), lutimes(), and futimes().
 3067  */
 3068 static int
 3069 getutimes(usrtvp, tvpseg, tsp)
 3070         const struct timeval *usrtvp;
 3071         enum uio_seg tvpseg;
 3072         struct timespec *tsp;
 3073 {
 3074         struct timeval tv[2];
 3075         const struct timeval *tvp;
 3076         int error;
 3077 
 3078         if (usrtvp == NULL) {
 3079                 vfs_timestamp(&tsp[0]);
 3080                 tsp[1] = tsp[0];
 3081         } else {
 3082                 if (tvpseg == UIO_SYSSPACE) {
 3083                         tvp = usrtvp;
 3084                 } else {
 3085                         if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 3086                                 return (error);
 3087                         tvp = tv;
 3088                 }
 3089 
 3090                 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 3091                     tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 3092                         return (EINVAL);
 3093                 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 3094                 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 3095         }
 3096         return (0);
 3097 }
 3098 
 3099 /*
 3100  * Common implementation code for utimes(), lutimes(), and futimes().
 3101  */
 3102 static int
 3103 setutimes(td, vp, ts, numtimes, nullflag)
 3104         struct thread *td;
 3105         struct vnode *vp;
 3106         const struct timespec *ts;
 3107         int numtimes;
 3108         int nullflag;
 3109 {
 3110         struct mount *mp;
 3111         struct vattr vattr;
 3112         int error, setbirthtime;
 3113 
 3114         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 3115                 return (error);
 3116         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3117         setbirthtime = 0;
 3118         if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 3119             timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 3120                 setbirthtime = 1;
 3121         VATTR_NULL(&vattr);
 3122         vattr.va_atime = ts[0];
 3123         vattr.va_mtime = ts[1];
 3124         if (setbirthtime)
 3125                 vattr.va_birthtime = ts[1];
 3126         if (numtimes > 2)
 3127                 vattr.va_birthtime = ts[2];
 3128         if (nullflag)
 3129                 vattr.va_vaflags |= VA_UTIMES_NULL;
 3130 #ifdef MAC
 3131         error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 3132             vattr.va_mtime);
 3133 #endif
 3134         if (error == 0)
 3135                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3136         VOP_UNLOCK(vp, 0);
 3137         vn_finished_write(mp);
 3138         return (error);
 3139 }
 3140 
 3141 /*
 3142  * Set the access and modification times of a file.
 3143  */
 3144 #ifndef _SYS_SYSPROTO_H_
 3145 struct utimes_args {
 3146         char    *path;
 3147         struct  timeval *tptr;
 3148 };
 3149 #endif
 3150 int
 3151 sys_utimes(td, uap)
 3152         struct thread *td;
 3153         register struct utimes_args /* {
 3154                 char *path;
 3155                 struct timeval *tptr;
 3156         } */ *uap;
 3157 {
 3158 
 3159         return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3160             UIO_USERSPACE));
 3161 }
 3162 
 3163 #ifndef _SYS_SYSPROTO_H_
 3164 struct futimesat_args {
 3165         int fd;
 3166         const char * path;
 3167         const struct timeval * times;
 3168 };
 3169 #endif
 3170 int
 3171 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 3172 {
 3173 
 3174         return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 3175             uap->times, UIO_USERSPACE));
 3176 }
 3177 
 3178 int
 3179 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 3180     struct timeval *tptr, enum uio_seg tptrseg)
 3181 {
 3182 
 3183         return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
 3184 }
 3185 
 3186 int
 3187 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
 3188     struct timeval *tptr, enum uio_seg tptrseg)
 3189 {
 3190         struct nameidata nd;
 3191         struct timespec ts[2];
 3192         cap_rights_t rights;
 3193         int error;
 3194 
 3195         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3196                 return (error);
 3197         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 3198             cap_rights_init(&rights, CAP_FUTIMES), td);
 3199 
 3200         if ((error = namei(&nd)) != 0)
 3201                 return (error);
 3202         NDFREE(&nd, NDF_ONLY_PNBUF);
 3203         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3204         vrele(nd.ni_vp);
 3205         return (error);
 3206 }
 3207 
 3208 /*
 3209  * Set the access and modification times of a file.
 3210  */
 3211 #ifndef _SYS_SYSPROTO_H_
 3212 struct lutimes_args {
 3213         char    *path;
 3214         struct  timeval *tptr;
 3215 };
 3216 #endif
 3217 int
 3218 sys_lutimes(td, uap)
 3219         struct thread *td;
 3220         register struct lutimes_args /* {
 3221                 char *path;
 3222                 struct timeval *tptr;
 3223         } */ *uap;
 3224 {
 3225 
 3226         return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 3227             UIO_USERSPACE));
 3228 }
 3229 
 3230 int
 3231 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 3232     struct timeval *tptr, enum uio_seg tptrseg)
 3233 {
 3234         struct timespec ts[2];
 3235         struct nameidata nd;
 3236         int error;
 3237 
 3238         if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 3239                 return (error);
 3240         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 3241         if ((error = namei(&nd)) != 0)
 3242                 return (error);
 3243         NDFREE(&nd, NDF_ONLY_PNBUF);
 3244         error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 3245         vrele(nd.ni_vp);
 3246         return (error);
 3247 }
 3248 
 3249 /*
 3250  * Set the access and modification times of a file.
 3251  */
 3252 #ifndef _SYS_SYSPROTO_H_
 3253 struct futimes_args {
 3254         int     fd;
 3255         struct  timeval *tptr;
 3256 };
 3257 #endif
 3258 int
 3259 sys_futimes(td, uap)
 3260         struct thread *td;
 3261         register struct futimes_args /* {
 3262                 int  fd;
 3263                 struct timeval *tptr;
 3264         } */ *uap;
 3265 {
 3266 
 3267         return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 3268 }
 3269 
 3270 int
 3271 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 3272     enum uio_seg tptrseg)
 3273 {
 3274         struct timespec ts[2];
 3275         struct file *fp;
 3276         cap_rights_t rights;
 3277         int error;
 3278 
 3279         AUDIT_ARG_FD(fd);
 3280         error = getutimes(tptr, tptrseg, ts);
 3281         if (error != 0)
 3282                 return (error);
 3283         error = getvnode(td->td_proc->p_fd, fd,
 3284             cap_rights_init(&rights, CAP_FUTIMES), &fp);
 3285         if (error != 0)
 3286                 return (error);
 3287 #ifdef AUDIT
 3288         vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 3289         AUDIT_ARG_VNODE1(fp->f_vnode);
 3290         VOP_UNLOCK(fp->f_vnode, 0);
 3291 #endif
 3292         error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 3293         fdrop(fp, td);
 3294         return (error);
 3295 }
 3296 
 3297 /*
 3298  * Truncate a file given its path name.
 3299  */
 3300 #ifndef _SYS_SYSPROTO_H_
 3301 struct truncate_args {
 3302         char    *path;
 3303         int     pad;
 3304         off_t   length;
 3305 };
 3306 #endif
 3307 int
 3308 sys_truncate(td, uap)
 3309         struct thread *td;
 3310         register struct truncate_args /* {
 3311                 char *path;
 3312                 int pad;
 3313                 off_t length;
 3314         } */ *uap;
 3315 {
 3316 
 3317         return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 3318 }
 3319 
 3320 int
 3321 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 3322 {
 3323         struct mount *mp;
 3324         struct vnode *vp;
 3325         void *rl_cookie;
 3326         struct vattr vattr;
 3327         struct nameidata nd;
 3328         int error;
 3329 
 3330         if (length < 0)
 3331                 return(EINVAL);
 3332         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 3333         if ((error = namei(&nd)) != 0)
 3334                 return (error);
 3335         vp = nd.ni_vp;
 3336         rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 3337         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 3338                 vn_rangelock_unlock(vp, rl_cookie);
 3339                 vrele(vp);
 3340                 return (error);
 3341         }
 3342         NDFREE(&nd, NDF_ONLY_PNBUF);
 3343         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 3344         if (vp->v_type == VDIR)
 3345                 error = EISDIR;
 3346 #ifdef MAC
 3347         else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 3348         }
 3349 #endif
 3350         else if ((error = vn_writechk(vp)) == 0 &&
 3351             (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 3352                 VATTR_NULL(&vattr);
 3353                 vattr.va_size = length;
 3354                 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 3355         }
 3356         VOP_UNLOCK(vp, 0);
 3357         vn_finished_write(mp);
 3358         vn_rangelock_unlock(vp, rl_cookie);
 3359         vrele(vp);
 3360         return (error);
 3361 }
 3362 
 3363 #if defined(COMPAT_43)
 3364 /*
 3365  * Truncate a file given its path name.
 3366  */
 3367 #ifndef _SYS_SYSPROTO_H_
 3368 struct otruncate_args {
 3369         char    *path;
 3370         long    length;
 3371 };
 3372 #endif
 3373 int
 3374 otruncate(td, uap)
 3375         struct thread *td;
 3376         register struct otruncate_args /* {
 3377                 char *path;
 3378                 long length;
 3379         } */ *uap;
 3380 {
 3381         struct truncate_args /* {
 3382                 char *path;
 3383                 int pad;
 3384                 off_t length;
 3385         } */ nuap;
 3386 
 3387         nuap.path = uap->path;
 3388         nuap.length = uap->length;
 3389         return (sys_truncate(td, &nuap));
 3390 }
 3391 #endif /* COMPAT_43 */
 3392 
 3393 /* Versions with the pad argument */
 3394 int
 3395 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 3396 {
 3397         struct truncate_args ouap;
 3398 
 3399         ouap.path = uap->path;
 3400         ouap.length = uap->length;
 3401         return (sys_truncate(td, &ouap));
 3402 }
 3403 
 3404 int
 3405 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 3406 {
 3407         struct ftruncate_args ouap;
 3408 
 3409         ouap.fd = uap->fd;
 3410         ouap.length = uap->length;
 3411         return (sys_ftruncate(td, &ouap));
 3412 }
 3413 
 3414 /*
 3415  * Sync an open file.
 3416  */
 3417 #ifndef _SYS_SYSPROTO_H_
 3418 struct fsync_args {
 3419         int     fd;
 3420 };
 3421 #endif
 3422 int
 3423 sys_fsync(td, uap)
 3424         struct thread *td;
 3425         struct fsync_args /* {
 3426                 int fd;
 3427         } */ *uap;
 3428 {
 3429         struct vnode *vp;
 3430         struct mount *mp;
 3431         struct file *fp;
 3432         cap_rights_t rights;
 3433         int error, lock_flags;
 3434 
 3435         AUDIT_ARG_FD(uap->fd);
 3436         error = getvnode(td->td_proc->p_fd, uap->fd,
 3437             cap_rights_init(&rights, CAP_FSYNC), &fp);
 3438         if (error != 0)
 3439                 return (error);
 3440         vp = fp->f_vnode;
 3441         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 3442         if (error != 0)
 3443                 goto drop;
 3444         if (MNT_SHARED_WRITES(mp) ||
 3445             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 3446                 lock_flags = LK_SHARED;
 3447         } else {
 3448                 lock_flags = LK_EXCLUSIVE;
 3449         }
 3450         vn_lock(vp, lock_flags | LK_RETRY);
 3451         AUDIT_ARG_VNODE1(vp);
 3452         if (vp->v_object != NULL) {
 3453                 VM_OBJECT_WLOCK(vp->v_object);
 3454                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 3455                 VM_OBJECT_WUNLOCK(vp->v_object);
 3456         }
 3457         error = VOP_FSYNC(vp, MNT_WAIT, td);
 3458 
 3459         VOP_UNLOCK(vp, 0);
 3460         vn_finished_write(mp);
 3461 drop:
 3462         fdrop(fp, td);
 3463         return (error);
 3464 }
 3465 
 3466 /*
 3467  * Rename files.  Source and destination must either both be directories, or
 3468  * both not be directories.  If target is a directory, it must be empty.
 3469  */
 3470 #ifndef _SYS_SYSPROTO_H_
 3471 struct rename_args {
 3472         char    *from;
 3473         char    *to;
 3474 };
 3475 #endif
 3476 int
 3477 sys_rename(td, uap)
 3478         struct thread *td;
 3479         register struct rename_args /* {
 3480                 char *from;
 3481                 char *to;
 3482         } */ *uap;
 3483 {
 3484 
 3485         return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 3486 }
 3487 
 3488 #ifndef _SYS_SYSPROTO_H_
 3489 struct renameat_args {
 3490         int     oldfd;
 3491         char    *old;
 3492         int     newfd;
 3493         char    *new;
 3494 };
 3495 #endif
 3496 int
 3497 sys_renameat(struct thread *td, struct renameat_args *uap)
 3498 {
 3499 
 3500         return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 3501             UIO_USERSPACE));
 3502 }
 3503 
 3504 int
 3505 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 3506 {
 3507 
 3508         return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
 3509 }
 3510 
 3511 int
 3512 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 3513     enum uio_seg pathseg)
 3514 {
 3515         struct mount *mp = NULL;
 3516         struct vnode *tvp, *fvp, *tdvp;
 3517         struct nameidata fromnd, tond;
 3518         cap_rights_t rights;
 3519         int error;
 3520 
 3521         bwillwrite();
 3522 #ifdef MAC
 3523         NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 3524             AUDITVNODE1, pathseg, old, oldfd,
 3525             cap_rights_init(&rights, CAP_RENAMEAT), td);
 3526 #else
 3527         NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 3528             pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td);
 3529 #endif
 3530 
 3531         if ((error = namei(&fromnd)) != 0)
 3532                 return (error);
 3533 #ifdef MAC
 3534         error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 3535             fromnd.ni_vp, &fromnd.ni_cnd);
 3536         VOP_UNLOCK(fromnd.ni_dvp, 0);
 3537         if (fromnd.ni_dvp != fromnd.ni_vp)
 3538                 VOP_UNLOCK(fromnd.ni_vp, 0);
 3539 #endif
 3540         fvp = fromnd.ni_vp;
 3541         if (error == 0)
 3542                 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 3543         if (error != 0) {
 3544                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3545                 vrele(fromnd.ni_dvp);
 3546                 vrele(fvp);
 3547                 goto out1;
 3548         }
 3549         NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 3550             SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 3551             cap_rights_init(&rights, CAP_LINKAT), td);
 3552         if (fromnd.ni_vp->v_type == VDIR)
 3553                 tond.ni_cnd.cn_flags |= WILLBEDIR;
 3554         if ((error = namei(&tond)) != 0) {
 3555                 /* Translate error code for rename("dir1", "dir2/."). */
 3556                 if (error == EISDIR && fvp->v_type == VDIR)
 3557                         error = EINVAL;
 3558                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3559                 vrele(fromnd.ni_dvp);
 3560                 vrele(fvp);
 3561                 vn_finished_write(mp);
 3562                 goto out1;
 3563         }
 3564         tdvp = tond.ni_dvp;
 3565         tvp = tond.ni_vp;
 3566         if (tvp != NULL) {
 3567                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 3568                         error = ENOTDIR;
 3569                         goto out;
 3570                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 3571                         error = EISDIR;
 3572                         goto out;
 3573                 }
 3574 #ifdef CAPABILITIES
 3575                 if (newfd != AT_FDCWD) {
 3576                         /*
 3577                          * If the target already exists we require CAP_UNLINKAT
 3578                          * from 'newfd'.
 3579                          */
 3580                         error = cap_check(&tond.ni_filecaps.fc_rights,
 3581                             cap_rights_init(&rights, CAP_UNLINKAT));
 3582                         if (error != 0)
 3583                                 goto out;
 3584                 }
 3585 #endif
 3586         }
 3587         if (fvp == tdvp) {
 3588                 error = EINVAL;
 3589                 goto out;
 3590         }
 3591         /*
 3592          * If the source is the same as the destination (that is, if they
 3593          * are links to the same vnode), then there is nothing to do.
 3594          */
 3595         if (fvp == tvp)
 3596                 error = -1;
 3597 #ifdef MAC
 3598         else
 3599                 error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 3600                     tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 3601 #endif
 3602 out:
 3603         if (error == 0) {
 3604                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 3605                     tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 3606                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3607                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3608         } else {
 3609                 NDFREE(&fromnd, NDF_ONLY_PNBUF);
 3610                 NDFREE(&tond, NDF_ONLY_PNBUF);
 3611                 if (tvp != NULL)
 3612                         vput(tvp);
 3613                 if (tdvp == tvp)
 3614                         vrele(tdvp);
 3615                 else
 3616                         vput(tdvp);
 3617                 vrele(fromnd.ni_dvp);
 3618                 vrele(fvp);
 3619         }
 3620         vrele(tond.ni_startdir);
 3621         vn_finished_write(mp);
 3622 out1:
 3623         if (fromnd.ni_startdir)
 3624                 vrele(fromnd.ni_startdir);
 3625         if (error == -1)
 3626                 return (0);
 3627         return (error);
 3628 }
 3629 
 3630 /*
 3631  * Make a directory file.
 3632  */
 3633 #ifndef _SYS_SYSPROTO_H_
 3634 struct mkdir_args {
 3635         char    *path;
 3636         int     mode;
 3637 };
 3638 #endif
 3639 int
 3640 sys_mkdir(td, uap)
 3641         struct thread *td;
 3642         register struct mkdir_args /* {
 3643                 char *path;
 3644                 int mode;
 3645         } */ *uap;
 3646 {
 3647 
 3648         return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 3649 }
 3650 
 3651 #ifndef _SYS_SYSPROTO_H_
 3652 struct mkdirat_args {
 3653         int     fd;
 3654         char    *path;
 3655         mode_t  mode;
 3656 };
 3657 #endif
 3658 int
 3659 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 3660 {
 3661 
 3662         return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 3663 }
 3664 
 3665 int
 3666 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 3667 {
 3668 
 3669         return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
 3670 }
 3671 
 3672 int
 3673 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
 3674     int mode)
 3675 {
 3676         struct mount *mp;
 3677         struct vnode *vp;
 3678         struct vattr vattr;
 3679         struct nameidata nd;
 3680         cap_rights_t rights;
 3681         int error;
 3682 
 3683         AUDIT_ARG_MODE(mode);
 3684 restart:
 3685         bwillwrite();
 3686         NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
 3687             segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td);
 3688         nd.ni_cnd.cn_flags |= WILLBEDIR;
 3689         if ((error = namei(&nd)) != 0)
 3690                 return (error);
 3691         vp = nd.ni_vp;
 3692         if (vp != NULL) {
 3693                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3694                 /*
 3695                  * XXX namei called with LOCKPARENT but not LOCKLEAF has
 3696                  * the strange behaviour of leaving the vnode unlocked
 3697                  * if the target is the same vnode as the parent.
 3698                  */
 3699                 if (vp == nd.ni_dvp)
 3700                         vrele(nd.ni_dvp);
 3701                 else
 3702                         vput(nd.ni_dvp);
 3703                 vrele(vp);
 3704                 return (EEXIST);
 3705         }
 3706         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3707                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3708                 vput(nd.ni_dvp);
 3709                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3710                         return (error);
 3711                 goto restart;
 3712         }
 3713         VATTR_NULL(&vattr);
 3714         vattr.va_type = VDIR;
 3715         vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 3716 #ifdef MAC
 3717         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 3718             &vattr);
 3719         if (error != 0)
 3720                 goto out;
 3721 #endif
 3722         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 3723 #ifdef MAC
 3724 out:
 3725 #endif
 3726         NDFREE(&nd, NDF_ONLY_PNBUF);
 3727         vput(nd.ni_dvp);
 3728         if (error == 0)
 3729                 vput(nd.ni_vp);
 3730         vn_finished_write(mp);
 3731         return (error);
 3732 }
 3733 
 3734 /*
 3735  * Remove a directory file.
 3736  */
 3737 #ifndef _SYS_SYSPROTO_H_
 3738 struct rmdir_args {
 3739         char    *path;
 3740 };
 3741 #endif
 3742 int
 3743 sys_rmdir(td, uap)
 3744         struct thread *td;
 3745         struct rmdir_args /* {
 3746                 char *path;
 3747         } */ *uap;
 3748 {
 3749 
 3750         return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 3751 }
 3752 
 3753 int
 3754 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 3755 {
 3756 
 3757         return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
 3758 }
 3759 
 3760 int
 3761 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 3762 {
 3763         struct mount *mp;
 3764         struct vnode *vp;
 3765         struct nameidata nd;
 3766         cap_rights_t rights;
 3767         int error;
 3768 
 3769 restart:
 3770         bwillwrite();
 3771         NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 3772             pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 3773         if ((error = namei(&nd)) != 0)
 3774                 return (error);
 3775         vp = nd.ni_vp;
 3776         if (vp->v_type != VDIR) {
 3777                 error = ENOTDIR;
 3778                 goto out;
 3779         }
 3780         /*
 3781          * No rmdir "." please.
 3782          */
 3783         if (nd.ni_dvp == vp) {
 3784                 error = EINVAL;
 3785                 goto out;
 3786         }
 3787         /*
 3788          * The root of a mounted filesystem cannot be deleted.
 3789          */
 3790         if (vp->v_vflag & VV_ROOT) {
 3791                 error = EBUSY;
 3792                 goto out;
 3793         }
 3794 #ifdef MAC
 3795         error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 3796             &nd.ni_cnd);
 3797         if (error != 0)
 3798                 goto out;
 3799 #endif
 3800         if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 3801                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3802                 vput(vp);
 3803                 if (nd.ni_dvp == vp)
 3804                         vrele(nd.ni_dvp);
 3805                 else
 3806                         vput(nd.ni_dvp);
 3807                 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 3808                         return (error);
 3809                 goto restart;
 3810         }
 3811         vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 3812         error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3813         vn_finished_write(mp);
 3814 out:
 3815         NDFREE(&nd, NDF_ONLY_PNBUF);
 3816         vput(vp);
 3817         if (nd.ni_dvp == vp)
 3818                 vrele(nd.ni_dvp);
 3819         else
 3820                 vput(nd.ni_dvp);
 3821         return (error);
 3822 }
 3823 
 3824 #ifdef COMPAT_43
 3825 /*
 3826  * Read a block of directory entries in a filesystem independent format.
 3827  */
 3828 #ifndef _SYS_SYSPROTO_H_
 3829 struct ogetdirentries_args {
 3830         int     fd;
 3831         char    *buf;
 3832         u_int   count;
 3833         long    *basep;
 3834 };
 3835 #endif
 3836 int
 3837 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 3838 {
 3839         long loff;
 3840         int error;
 3841 
 3842         error = kern_ogetdirentries(td, uap, &loff);
 3843         if (error == 0)
 3844                 error = copyout(&loff, uap->basep, sizeof(long));
 3845         return (error);
 3846 }
 3847 
 3848 int
 3849 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 3850     long *ploff)
 3851 {
 3852         struct vnode *vp;
 3853         struct file *fp;
 3854         struct uio auio, kuio;
 3855         struct iovec aiov, kiov;
 3856         struct dirent *dp, *edp;
 3857         cap_rights_t rights;
 3858         caddr_t dirbuf;
 3859         int error, eofflag, readcnt;
 3860         long loff;
 3861         off_t foffset;
 3862 
 3863         /* XXX arbitrary sanity limit on `count'. */
 3864         if (uap->count > 64 * 1024)
 3865                 return (EINVAL);
 3866         error = getvnode(td->td_proc->p_fd, uap->fd,
 3867             cap_rights_init(&rights, CAP_READ), &fp);
 3868         if (error != 0)
 3869                 return (error);
 3870         if ((fp->f_flag & FREAD) == 0) {
 3871                 fdrop(fp, td);
 3872                 return (EBADF);
 3873         }
 3874         vp = fp->f_vnode;
 3875         foffset = foffset_lock(fp, 0);
 3876 unionread:
 3877         if (vp->v_type != VDIR) {
 3878                 foffset_unlock(fp, foffset, 0);
 3879                 fdrop(fp, td);
 3880                 return (EINVAL);
 3881         }
 3882         aiov.iov_base = uap->buf;
 3883         aiov.iov_len = uap->count;
 3884         auio.uio_iov = &aiov;
 3885         auio.uio_iovcnt = 1;
 3886         auio.uio_rw = UIO_READ;
 3887         auio.uio_segflg = UIO_USERSPACE;
 3888         auio.uio_td = td;
 3889         auio.uio_resid = uap->count;
 3890         vn_lock(vp, LK_SHARED | LK_RETRY);
 3891         loff = auio.uio_offset = foffset;
 3892 #ifdef MAC
 3893         error = mac_vnode_check_readdir(td->td_ucred, vp);
 3894         if (error != 0) {
 3895                 VOP_UNLOCK(vp, 0);
 3896                 foffset_unlock(fp, foffset, FOF_NOUPDATE);
 3897                 fdrop(fp, td);
 3898                 return (error);
 3899         }
 3900 #endif
 3901 #       if (BYTE_ORDER != LITTLE_ENDIAN)
 3902                 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 3903                         error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 3904                             NULL, NULL);
 3905                         foffset = auio.uio_offset;
 3906                 } else
 3907 #       endif
 3908         {
 3909                 kuio = auio;
 3910                 kuio.uio_iov = &kiov;
 3911                 kuio.uio_segflg = UIO_SYSSPACE;
 3912                 kiov.iov_len = uap->count;
 3913                 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
 3914                 kiov.iov_base = dirbuf;
 3915                 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 3916                             NULL, NULL);
 3917                 foffset = kuio.uio_offset;
 3918                 if (error == 0) {
 3919                         readcnt = uap->count - kuio.uio_resid;
 3920                         edp = (struct dirent *)&dirbuf[readcnt];
 3921                         for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 3922 #                               if (BYTE_ORDER == LITTLE_ENDIAN)
 3923                                         /*
 3924                                          * The expected low byte of
 3925                                          * dp->d_namlen is our dp->d_type.
 3926                                          * The high MBZ byte of dp->d_namlen
 3927                                          * is our dp->d_namlen.
 3928                                          */
 3929                                         dp->d_type = dp->d_namlen;
 3930                                         dp->d_namlen = 0;
 3931 #                               else
 3932                                         /*
 3933                                          * The dp->d_type is the high byte
 3934                                          * of the expected dp->d_namlen,
 3935                                          * so must be zero'ed.
 3936                                          */
 3937                                         dp->d_type = 0;
 3938 #                               endif
 3939                                 if (dp->d_reclen > 0) {
 3940                                         dp = (struct dirent *)
 3941                                             ((char *)dp + dp->d_reclen);
 3942                                 } else {
 3943                                         error = EIO;
 3944                                         break;
 3945                                 }
 3946                         }
 3947                         if (dp >= edp)
 3948                                 error = uiomove(dirbuf, readcnt, &auio);
 3949                 }
 3950                 free(dirbuf, M_TEMP);
 3951         }
 3952         if (error != 0) {
 3953                 VOP_UNLOCK(vp, 0);
 3954                 foffset_unlock(fp, foffset, 0);
 3955                 fdrop(fp, td);
 3956                 return (error);
 3957         }
 3958         if (uap->count == auio.uio_resid &&
 3959             (vp->v_vflag & VV_ROOT) &&
 3960             (vp->v_mount->mnt_flag & MNT_UNION)) {
 3961                 struct vnode *tvp = vp;
 3962                 vp = vp->v_mount->mnt_vnodecovered;
 3963                 VREF(vp);
 3964                 fp->f_vnode = vp;
 3965                 fp->f_data = vp;
 3966                 foffset = 0;
 3967                 vput(tvp);
 3968                 goto unionread;
 3969         }
 3970         VOP_UNLOCK(vp, 0);
 3971         foffset_unlock(fp, foffset, 0);
 3972         fdrop(fp, td);
 3973         td->td_retval[0] = uap->count - auio.uio_resid;
 3974         if (error == 0)
 3975                 *ploff = loff;
 3976         return (error);
 3977 }
 3978 #endif /* COMPAT_43 */
 3979 
 3980 /*
 3981  * Read a block of directory entries in a filesystem independent format.
 3982  */
 3983 #ifndef _SYS_SYSPROTO_H_
 3984 struct getdirentries_args {
 3985         int     fd;
 3986         char    *buf;
 3987         u_int   count;
 3988         long    *basep;
 3989 };
 3990 #endif
 3991 int
 3992 sys_getdirentries(td, uap)
 3993         struct thread *td;
 3994         register struct getdirentries_args /* {
 3995                 int fd;
 3996                 char *buf;
 3997                 u_int count;
 3998                 long *basep;
 3999         } */ *uap;
 4000 {
 4001         long base;
 4002         int error;
 4003 
 4004         error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 4005             NULL, UIO_USERSPACE);
 4006         if (error != 0)
 4007                 return (error);
 4008         if (uap->basep != NULL)
 4009                 error = copyout(&base, uap->basep, sizeof(long));
 4010         return (error);
 4011 }
 4012 
 4013 int
 4014 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
 4015     long *basep, ssize_t *residp, enum uio_seg bufseg)
 4016 {
 4017         struct vnode *vp;
 4018         struct file *fp;
 4019         struct uio auio;
 4020         struct iovec aiov;
 4021         cap_rights_t rights;
 4022         long loff;
 4023         int error, eofflag;
 4024         off_t foffset;
 4025 
 4026         AUDIT_ARG_FD(fd);
 4027         if (count > IOSIZE_MAX)
 4028                 return (EINVAL);
 4029         auio.uio_resid = count;
 4030         error = getvnode(td->td_proc->p_fd, fd,
 4031             cap_rights_init(&rights, CAP_READ), &fp);
 4032         if (error != 0)
 4033                 return (error);
 4034         if ((fp->f_flag & FREAD) == 0) {
 4035                 fdrop(fp, td);
 4036                 return (EBADF);
 4037         }
 4038         vp = fp->f_vnode;
 4039         foffset = foffset_lock(fp, 0);
 4040 unionread:
 4041         if (vp->v_type != VDIR) {
 4042                 error = EINVAL;
 4043                 goto fail;
 4044         }
 4045         aiov.iov_base = buf;
 4046         aiov.iov_len = count;
 4047         auio.uio_iov = &aiov;
 4048         auio.uio_iovcnt = 1;
 4049         auio.uio_rw = UIO_READ;
 4050         auio.uio_segflg = bufseg;
 4051         auio.uio_td = td;
 4052         vn_lock(vp, LK_SHARED | LK_RETRY);
 4053         AUDIT_ARG_VNODE1(vp);
 4054         loff = auio.uio_offset = foffset;
 4055 #ifdef MAC
 4056         error = mac_vnode_check_readdir(td->td_ucred, vp);
 4057         if (error == 0)
 4058 #endif
 4059                 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 4060                     NULL);
 4061         foffset = auio.uio_offset;
 4062         if (error != 0) {
 4063                 VOP_UNLOCK(vp, 0);
 4064                 goto fail;
 4065         }
 4066         if (count == auio.uio_resid &&
 4067             (vp->v_vflag & VV_ROOT) &&
 4068             (vp->v_mount->mnt_flag & MNT_UNION)) {
 4069                 struct vnode *tvp = vp;
 4070 
 4071                 vp = vp->v_mount->mnt_vnodecovered;
 4072                 VREF(vp);
 4073                 fp->f_vnode = vp;
 4074                 fp->f_data = vp;
 4075                 foffset = 0;
 4076                 vput(tvp);
 4077                 goto unionread;
 4078         }
 4079         VOP_UNLOCK(vp, 0);
 4080         *basep = loff;
 4081         if (residp != NULL)
 4082                 *residp = auio.uio_resid;
 4083         td->td_retval[0] = count - auio.uio_resid;
 4084 fail:
 4085         foffset_unlock(fp, foffset, 0);
 4086         fdrop(fp, td);
 4087         return (error);
 4088 }
 4089 
 4090 #ifndef _SYS_SYSPROTO_H_
 4091 struct getdents_args {
 4092         int fd;
 4093         char *buf;
 4094         size_t count;
 4095 };
 4096 #endif
 4097 int
 4098 sys_getdents(td, uap)
 4099         struct thread *td;
 4100         register struct getdents_args /* {
 4101                 int fd;
 4102                 char *buf;
 4103                 u_int count;
 4104         } */ *uap;
 4105 {
 4106         struct getdirentries_args ap;
 4107 
 4108         ap.fd = uap->fd;
 4109         ap.buf = uap->buf;
 4110         ap.count = uap->count;
 4111         ap.basep = NULL;
 4112         return (sys_getdirentries(td, &ap));
 4113 }
 4114 
 4115 /*
 4116  * Set the mode mask for creation of filesystem nodes.
 4117  */
 4118 #ifndef _SYS_SYSPROTO_H_
 4119 struct umask_args {
 4120         int     newmask;
 4121 };
 4122 #endif
 4123 int
 4124 sys_umask(td, uap)
 4125         struct thread *td;
 4126         struct umask_args /* {
 4127                 int newmask;
 4128         } */ *uap;
 4129 {
 4130         register struct filedesc *fdp;
 4131 
 4132         FILEDESC_XLOCK(td->td_proc->p_fd);
 4133         fdp = td->td_proc->p_fd;
 4134         td->td_retval[0] = fdp->fd_cmask;
 4135         fdp->fd_cmask = uap->newmask & ALLPERMS;
 4136         FILEDESC_XUNLOCK(td->td_proc->p_fd);
 4137         return (0);
 4138 }
 4139 
 4140 /*
 4141  * Void all references to file by ripping underlying filesystem away from
 4142  * vnode.
 4143  */
 4144 #ifndef _SYS_SYSPROTO_H_
 4145 struct revoke_args {
 4146         char    *path;
 4147 };
 4148 #endif
 4149 int
 4150 sys_revoke(td, uap)
 4151         struct thread *td;
 4152         register struct revoke_args /* {
 4153                 char *path;
 4154         } */ *uap;
 4155 {
 4156         struct vnode *vp;
 4157         struct vattr vattr;
 4158         struct nameidata nd;
 4159         int error;
 4160 
 4161         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4162             uap->path, td);
 4163         if ((error = namei(&nd)) != 0)
 4164                 return (error);
 4165         vp = nd.ni_vp;
 4166         NDFREE(&nd, NDF_ONLY_PNBUF);
 4167         if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 4168                 error = EINVAL;
 4169                 goto out;
 4170         }
 4171 #ifdef MAC
 4172         error = mac_vnode_check_revoke(td->td_ucred, vp);
 4173         if (error != 0)
 4174                 goto out;
 4175 #endif
 4176         error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 4177         if (error != 0)
 4178                 goto out;
 4179         if (td->td_ucred->cr_uid != vattr.va_uid) {
 4180                 error = priv_check(td, PRIV_VFS_ADMIN);
 4181                 if (error != 0)
 4182                         goto out;
 4183         }
 4184         if (vcount(vp) > 1)
 4185                 VOP_REVOKE(vp, REVOKEALL);
 4186 out:
 4187         vput(vp);
 4188         return (error);
 4189 }
 4190 
 4191 /*
 4192  * Convert a user file descriptor to a kernel file entry and check that, if it
 4193  * is a capability, the correct rights are present. A reference on the file
 4194  * entry is held upon returning.
 4195  */
 4196 int
 4197 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp)
 4198 {
 4199         struct file *fp;
 4200         int error;
 4201 
 4202         error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL);
 4203         if (error != 0)
 4204                 return (error);
 4205 
 4206         /*
 4207          * The file could be not of the vnode type, or it may be not
 4208          * yet fully initialized, in which case the f_vnode pointer
 4209          * may be set, but f_ops is still badfileops.  E.g.,
 4210          * devfs_open() transiently create such situation to
 4211          * facilitate csw d_fdopen().
 4212          *
 4213          * Dupfdopen() handling in kern_openat() installs the
 4214          * half-baked file into the process descriptor table, allowing
 4215          * other thread to dereference it. Guard against the race by
 4216          * checking f_ops.
 4217          */
 4218         if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 4219                 fdrop(fp, curthread);
 4220                 return (EINVAL);
 4221         }
 4222         *fpp = fp;
 4223         return (0);
 4224 }
 4225 
 4226 
 4227 /*
 4228  * Get an (NFS) file handle.
 4229  */
 4230 #ifndef _SYS_SYSPROTO_H_
 4231 struct lgetfh_args {
 4232         char    *fname;
 4233         fhandle_t *fhp;
 4234 };
 4235 #endif
 4236 int
 4237 sys_lgetfh(td, uap)
 4238         struct thread *td;
 4239         register struct lgetfh_args *uap;
 4240 {
 4241         struct nameidata nd;
 4242         fhandle_t fh;
 4243         register struct vnode *vp;
 4244         int error;
 4245 
 4246         error = priv_check(td, PRIV_VFS_GETFH);
 4247         if (error != 0)
 4248                 return (error);
 4249         NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4250             uap->fname, td);
 4251         error = namei(&nd);
 4252         if (error != 0)
 4253                 return (error);
 4254         NDFREE(&nd, NDF_ONLY_PNBUF);
 4255         vp = nd.ni_vp;
 4256         bzero(&fh, sizeof(fh));
 4257         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4258         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4259         vput(vp);
 4260         if (error == 0)
 4261                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4262         return (error);
 4263 }
 4264 
 4265 #ifndef _SYS_SYSPROTO_H_
 4266 struct getfh_args {
 4267         char    *fname;
 4268         fhandle_t *fhp;
 4269 };
 4270 #endif
 4271 int
 4272 sys_getfh(td, uap)
 4273         struct thread *td;
 4274         register struct getfh_args *uap;
 4275 {
 4276         struct nameidata nd;
 4277         fhandle_t fh;
 4278         register struct vnode *vp;
 4279         int error;
 4280 
 4281         error = priv_check(td, PRIV_VFS_GETFH);
 4282         if (error != 0)
 4283                 return (error);
 4284         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 4285             uap->fname, td);
 4286         error = namei(&nd);
 4287         if (error != 0)
 4288                 return (error);
 4289         NDFREE(&nd, NDF_ONLY_PNBUF);
 4290         vp = nd.ni_vp;
 4291         bzero(&fh, sizeof(fh));
 4292         fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 4293         error = VOP_VPTOFH(vp, &fh.fh_fid);
 4294         vput(vp);
 4295         if (error == 0)
 4296                 error = copyout(&fh, uap->fhp, sizeof (fh));
 4297         return (error);
 4298 }
 4299 
 4300 /*
 4301  * syscall for the rpc.lockd to use to translate a NFS file handle into an
 4302  * open descriptor.
 4303  *
 4304  * warning: do not remove the priv_check() call or this becomes one giant
 4305  * security hole.
 4306  */
 4307 #ifndef _SYS_SYSPROTO_H_
 4308 struct fhopen_args {
 4309         const struct fhandle *u_fhp;
 4310         int flags;
 4311 };
 4312 #endif
 4313 int
 4314 sys_fhopen(td, uap)
 4315         struct thread *td;
 4316         struct fhopen_args /* {
 4317                 const struct fhandle *u_fhp;
 4318                 int flags;
 4319         } */ *uap;
 4320 {
 4321         struct mount *mp;
 4322         struct vnode *vp;
 4323         struct fhandle fhp;
 4324         struct file *fp;
 4325         int fmode, error;
 4326         int indx;
 4327 
 4328         error = priv_check(td, PRIV_VFS_FHOPEN);
 4329         if (error != 0)
 4330                 return (error);
 4331         indx = -1;
 4332         fmode = FFLAGS(uap->flags);
 4333         /* why not allow a non-read/write open for our lockd? */
 4334         if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 4335                 return (EINVAL);
 4336         error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 4337         if (error != 0)
 4338                 return(error);
 4339         /* find the mount point */
 4340         mp = vfs_busyfs(&fhp.fh_fsid);
 4341         if (mp == NULL)
 4342                 return (ESTALE);
 4343         /* now give me my vnode, it gets returned to me locked */
 4344         error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 4345         vfs_unbusy(mp);
 4346         if (error != 0)
 4347                 return (error);
 4348 
 4349         error = falloc_noinstall(td, &fp);
 4350         if (error != 0) {
 4351                 vput(vp);
 4352                 return (error);
 4353         }
 4354         /*
 4355          * An extra reference on `fp' has been held for us by
 4356          * falloc_noinstall().
 4357          */
 4358 
 4359 #ifdef INVARIANTS
 4360         td->td_dupfd = -1;
 4361 #endif
 4362         error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 4363         if (error != 0) {
 4364                 KASSERT(fp->f_ops == &badfileops,
 4365                     ("VOP_OPEN in fhopen() set f_ops"));
 4366                 KASSERT(td->td_dupfd < 0,
 4367                     ("fhopen() encountered fdopen()"));
 4368 
 4369                 vput(vp);
 4370                 goto bad;
 4371         }
 4372 #ifdef INVARIANTS
 4373         td->td_dupfd = 0;
 4374 #endif
 4375         fp->f_vnode = vp;
 4376         fp->f_seqcount = 1;
 4377         finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 4378             &vnops);
 4379         VOP_UNLOCK(vp, 0);
 4380         if ((fmode & O_TRUNC) != 0) {
 4381                 error = fo_truncate(fp, 0, td->td_ucred, td);
 4382                 if (error != 0)
 4383                         goto bad;
 4384         }
 4385 
 4386         error = finstall(td, fp, &indx, fmode, NULL);
 4387 bad:
 4388         fdrop(fp, td);
 4389         td->td_retval[0] = indx;
 4390         return (error);
 4391 }
 4392 
 4393 /*
 4394  * Stat an (NFS) file handle.
 4395  */
 4396 #ifndef _SYS_SYSPROTO_H_
 4397 struct fhstat_args {
 4398         struct fhandle *u_fhp;
 4399         struct stat *sb;
 4400 };
 4401 #endif
 4402 int
 4403 sys_fhstat(td, uap)
 4404         struct thread *td;
 4405         register struct fhstat_args /* {
 4406                 struct fhandle *u_fhp;
 4407                 struct stat *sb;
 4408         } */ *uap;
 4409 {
 4410         struct stat sb;
 4411         struct fhandle fh;
 4412         int error;
 4413 
 4414         error = copyin(uap->u_fhp, &fh, sizeof(fh));
 4415         if (error != 0)
 4416                 return (error);
 4417         error = kern_fhstat(td, fh, &sb);
 4418         if (error == 0)
 4419                 error = copyout(&sb, uap->sb, sizeof(sb));
 4420         return (error);
 4421 }
 4422 
 4423 int
 4424 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 4425 {
 4426         struct mount *mp;
 4427         struct vnode *vp;
 4428         int error;
 4429 
 4430         error = priv_check(td, PRIV_VFS_FHSTAT);
 4431         if (error != 0)
 4432                 return (error);
 4433         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4434                 return (ESTALE);
 4435         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4436         vfs_unbusy(mp);
 4437         if (error != 0)
 4438                 return (error);
 4439         error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 4440         vput(vp);
 4441         return (error);
 4442 }
 4443 
 4444 /*
 4445  * Implement fstatfs() for (NFS) file handles.
 4446  */
 4447 #ifndef _SYS_SYSPROTO_H_
 4448 struct fhstatfs_args {
 4449         struct fhandle *u_fhp;
 4450         struct statfs *buf;
 4451 };
 4452 #endif
 4453 int
 4454 sys_fhstatfs(td, uap)
 4455         struct thread *td;
 4456         struct fhstatfs_args /* {
 4457                 struct fhandle *u_fhp;
 4458                 struct statfs *buf;
 4459         } */ *uap;
 4460 {
 4461         struct statfs sf;
 4462         fhandle_t fh;
 4463         int error;
 4464 
 4465         error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 4466         if (error != 0)
 4467                 return (error);
 4468         error = kern_fhstatfs(td, fh, &sf);
 4469         if (error != 0)
 4470                 return (error);
 4471         return (copyout(&sf, uap->buf, sizeof(sf)));
 4472 }
 4473 
 4474 int
 4475 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 4476 {
 4477         struct statfs *sp;
 4478         struct mount *mp;
 4479         struct vnode *vp;
 4480         int error;
 4481 
 4482         error = priv_check(td, PRIV_VFS_FHSTATFS);
 4483         if (error != 0)
 4484                 return (error);
 4485         if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 4486                 return (ESTALE);
 4487         error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 4488         if (error != 0) {
 4489                 vfs_unbusy(mp);
 4490                 return (error);
 4491         }
 4492         vput(vp);
 4493         error = prison_canseemount(td->td_ucred, mp);
 4494         if (error != 0)
 4495                 goto out;
 4496 #ifdef MAC
 4497         error = mac_mount_check_stat(td->td_ucred, mp);
 4498         if (error != 0)
 4499                 goto out;
 4500 #endif
 4501         /*
 4502          * Set these in case the underlying filesystem fails to do so.
 4503          */
 4504         sp = &mp->mnt_stat;
 4505         sp->f_version = STATFS_VERSION;
 4506         sp->f_namemax = NAME_MAX;
 4507         sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 4508         error = VFS_STATFS(mp, sp);
 4509         if (error == 0)
 4510                 *buf = *sp;
 4511 out:
 4512         vfs_unbusy(mp);
 4513         return (error);
 4514 }
 4515 
 4516 int
 4517 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 4518 {
 4519         struct file *fp;
 4520         struct mount *mp;
 4521         struct vnode *vp;
 4522         cap_rights_t rights;
 4523         off_t olen, ooffset;
 4524         int error;
 4525 
 4526         fp = NULL;
 4527         error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 4528         if (error != 0)
 4529                 goto out;
 4530 
 4531         switch (fp->f_type) {
 4532         case DTYPE_VNODE:
 4533                 break;
 4534         case DTYPE_PIPE:
 4535         case DTYPE_FIFO:
 4536                 error = ESPIPE;
 4537                 goto out;
 4538         default:
 4539                 error = ENODEV;
 4540                 goto out;
 4541         }
 4542         if ((fp->f_flag & FWRITE) == 0) {
 4543                 error = EBADF;
 4544                 goto out;
 4545         }
 4546         vp = fp->f_vnode;
 4547         if (vp->v_type != VREG) {
 4548                 error = ENODEV;
 4549                 goto out;
 4550         }
 4551         if (offset < 0 || len <= 0) {
 4552                 error = EINVAL;
 4553                 goto out;
 4554         }
 4555         /* Check for wrap. */
 4556         if (offset > OFF_MAX - len) {
 4557                 error = EFBIG;
 4558                 goto out;
 4559         }
 4560 
 4561         /* Allocating blocks may take a long time, so iterate. */
 4562         for (;;) {
 4563                 olen = len;
 4564                 ooffset = offset;
 4565 
 4566                 bwillwrite();
 4567                 mp = NULL;
 4568                 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 4569                 if (error != 0)
 4570                         break;
 4571                 error = vn_lock(vp, LK_EXCLUSIVE);
 4572                 if (error != 0) {
 4573                         vn_finished_write(mp);
 4574                         break;
 4575                 }
 4576 #ifdef MAC
 4577                 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 4578                 if (error == 0)
 4579 #endif
 4580                         error = VOP_ALLOCATE(vp, &offset, &len);
 4581                 VOP_UNLOCK(vp, 0);
 4582                 vn_finished_write(mp);
 4583 
 4584                 if (olen + ooffset != offset + len) {
 4585                         panic("offset + len changed from %jx/%jx to %jx/%jx",
 4586                             ooffset, olen, offset, len);
 4587                 }
 4588                 if (error != 0 || len == 0)
 4589                         break;
 4590                 KASSERT(olen > len, ("Iteration did not make progress?"));
 4591                 maybe_yield();
 4592         }
 4593  out:
 4594         if (fp != NULL)
 4595                 fdrop(fp, td);
 4596         return (error);
 4597 }
 4598 
 4599 int
 4600 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 4601 {
 4602 
 4603         td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
 4604             uap->len);
 4605         return (0);
 4606 }
 4607 
 4608 /*
 4609  * Unlike madvise(2), we do not make a best effort to remember every
 4610  * possible caching hint.  Instead, we remember the last setting with
 4611  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
 4612  * region of any current setting.
 4613  */
 4614 int
 4615 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 4616     int advice)
 4617 {
 4618         struct fadvise_info *fa, *new;
 4619         struct file *fp;
 4620         struct vnode *vp;
 4621         cap_rights_t rights;
 4622         off_t end;
 4623         int error;
 4624 
 4625         if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 4626                 return (EINVAL);
 4627         switch (advice) {
 4628         case POSIX_FADV_SEQUENTIAL:
 4629         case POSIX_FADV_RANDOM:
 4630         case POSIX_FADV_NOREUSE:
 4631                 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 4632                 break;
 4633         case POSIX_FADV_NORMAL:
 4634         case POSIX_FADV_WILLNEED:
 4635         case POSIX_FADV_DONTNEED:
 4636                 new = NULL;
 4637                 break;
 4638         default:
 4639                 return (EINVAL);
 4640         }
 4641         /* XXX: CAP_POSIX_FADVISE? */
 4642         error = fget(td, fd, cap_rights_init(&rights), &fp);
 4643         if (error != 0)
 4644                 goto out;
 4645 
 4646         switch (fp->f_type) {
 4647         case DTYPE_VNODE:
 4648                 break;
 4649         case DTYPE_PIPE:
 4650         case DTYPE_FIFO:
 4651                 error = ESPIPE;
 4652                 goto out;
 4653         default:
 4654                 error = ENODEV;
 4655                 goto out;
 4656         }
 4657         vp = fp->f_vnode;
 4658         if (vp->v_type != VREG) {
 4659                 error = ENODEV;
 4660                 goto out;
 4661         }
 4662         if (len == 0)
 4663                 end = OFF_MAX;
 4664         else
 4665                 end = offset + len - 1;
 4666         switch (advice) {
 4667         case POSIX_FADV_SEQUENTIAL:
 4668         case POSIX_FADV_RANDOM:
 4669         case POSIX_FADV_NOREUSE:
 4670                 /*
 4671                  * Try to merge any existing non-standard region with
 4672                  * this new region if possible, otherwise create a new
 4673                  * non-standard region for this request.
 4674                  */
 4675                 mtx_pool_lock(mtxpool_sleep, fp);
 4676                 fa = fp->f_advice;
 4677                 if (fa != NULL && fa->fa_advice == advice &&
 4678                     ((fa->fa_start <= end && fa->fa_end >= offset) ||
 4679                     (end != OFF_MAX && fa->fa_start == end + 1) ||
 4680                     (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 4681                         if (offset < fa->fa_start)
 4682                                 fa->fa_start = offset;
 4683                         if (end > fa->fa_end)
 4684                                 fa->fa_end = end;
 4685                 } else {
 4686                         new->fa_advice = advice;
 4687                         new->fa_start = offset;
 4688                         new->fa_end = end;
 4689                         new->fa_prevstart = 0;
 4690                         new->fa_prevend = 0;
 4691                         fp->f_advice = new;
 4692                         new = fa;
 4693                 }
 4694                 mtx_pool_unlock(mtxpool_sleep, fp);
 4695                 break;
 4696         case POSIX_FADV_NORMAL:
 4697                 /*
 4698                  * If a the "normal" region overlaps with an existing
 4699                  * non-standard region, trim or remove the
 4700                  * non-standard region.
 4701                  */
 4702                 mtx_pool_lock(mtxpool_sleep, fp);
 4703                 fa = fp->f_advice;
 4704                 if (fa != NULL) {
 4705                         if (offset <= fa->fa_start && end >= fa->fa_end) {
 4706                                 new = fa;
 4707                                 fp->f_advice = NULL;
 4708                         } else if (offset <= fa->fa_start &&
 4709                             end >= fa->fa_start)
 4710                                 fa->fa_start = end + 1;
 4711                         else if (offset <= fa->fa_end && end >= fa->fa_end)
 4712                                 fa->fa_end = offset - 1;
 4713                         else if (offset >= fa->fa_start && end <= fa->fa_end) {
 4714                                 /*
 4715                                  * If the "normal" region is a middle
 4716                                  * portion of the existing
 4717                                  * non-standard region, just remove
 4718                                  * the whole thing rather than picking
 4719                                  * one side or the other to
 4720                                  * preserve.
 4721                                  */
 4722                                 new = fa;
 4723                                 fp->f_advice = NULL;
 4724                         }
 4725                 }
 4726                 mtx_pool_unlock(mtxpool_sleep, fp);
 4727                 break;
 4728         case POSIX_FADV_WILLNEED:
 4729         case POSIX_FADV_DONTNEED:
 4730                 error = VOP_ADVISE(vp, offset, end, advice);
 4731                 break;
 4732         }
 4733 out:
 4734         if (fp != NULL)
 4735                 fdrop(fp, td);
 4736         free(new, M_FADVISE);
 4737         return (error);
 4738 }
 4739 
 4740 int
 4741 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 4742 {
 4743 
 4744         td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
 4745             uap->len, uap->advice);
 4746         return (0);
 4747 }
Cache object: cf50187e7213c4d996835beec50bacae
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_syscalls.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_syscalls.c