The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_vnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1993, 1995
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD: releng/12.0/sys/ufs/ufs/ufs_vnops.c 341085 2018-11-27 17:58:25Z markj $");
   41 
   42 #include "opt_quota.h"
   43 #include "opt_suiddir.h"
   44 #include "opt_ufs.h"
   45 #include "opt_ffs.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/malloc.h>
   50 #include <sys/namei.h>
   51 #include <sys/kernel.h>
   52 #include <sys/fcntl.h>
   53 #include <sys/filio.h>
   54 #include <sys/stat.h>
   55 #include <sys/bio.h>
   56 #include <sys/buf.h>
   57 #include <sys/mount.h>
   58 #include <sys/priv.h>
   59 #include <sys/refcount.h>
   60 #include <sys/unistd.h>
   61 #include <sys/vnode.h>
   62 #include <sys/dirent.h>
   63 #include <sys/lockf.h>
   64 #include <sys/conf.h>
   65 #include <sys/acl.h>
   66 
   67 #include <security/mac/mac_framework.h>
   68 
   69 #include <sys/file.h>           /* XXX */
   70 
   71 #include <vm/vm.h>
   72 #include <vm/vm_extern.h>
   73 
   74 #include <ufs/ufs/acl.h>
   75 #include <ufs/ufs/extattr.h>
   76 #include <ufs/ufs/quota.h>
   77 #include <ufs/ufs/inode.h>
   78 #include <ufs/ufs/dir.h>
   79 #include <ufs/ufs/ufsmount.h>
   80 #include <ufs/ufs/ufs_extern.h>
   81 #ifdef UFS_DIRHASH
   82 #include <ufs/ufs/dirhash.h>
   83 #endif
   84 #ifdef UFS_GJOURNAL
   85 #include <ufs/ufs/gjournal.h>
   86 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
   87 #endif
   88 
   89 #ifdef QUOTA
   90 FEATURE(ufs_quota, "UFS disk quotas support");
   91 FEATURE(ufs_quota64, "64bit UFS disk quotas support");
   92 #endif
   93 
   94 #ifdef SUIDDIR
   95 FEATURE(suiddir,
   96     "Give all new files in directory the same ownership as the directory");
   97 #endif
   98 
   99 
  100 #include <ufs/ffs/ffs_extern.h>
  101 
  102 static vop_accessx_t    ufs_accessx;
  103 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
  104 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
  105 static vop_close_t      ufs_close;
  106 static vop_create_t     ufs_create;
  107 static vop_getattr_t    ufs_getattr;
  108 static vop_ioctl_t      ufs_ioctl;
  109 static vop_link_t       ufs_link;
  110 static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *);
  111 static vop_markatime_t  ufs_markatime;
  112 static vop_mkdir_t      ufs_mkdir;
  113 static vop_mknod_t      ufs_mknod;
  114 static vop_open_t       ufs_open;
  115 static vop_pathconf_t   ufs_pathconf;
  116 static vop_print_t      ufs_print;
  117 static vop_readlink_t   ufs_readlink;
  118 static vop_remove_t     ufs_remove;
  119 static vop_rename_t     ufs_rename;
  120 static vop_rmdir_t      ufs_rmdir;
  121 static vop_setattr_t    ufs_setattr;
  122 static vop_strategy_t   ufs_strategy;
  123 static vop_symlink_t    ufs_symlink;
  124 static vop_whiteout_t   ufs_whiteout;
  125 static vop_close_t      ufsfifo_close;
  126 static vop_kqfilter_t   ufsfifo_kqfilter;
  127 
  128 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
  129 
  130 /*
  131  * A virgin directory (no blushing please).
  132  */
  133 static struct dirtemplate mastertemplate = {
  134         0, 12, DT_DIR, 1, ".",
  135         0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
  136 };
  137 static struct odirtemplate omastertemplate = {
  138         0, 12, 1, ".",
  139         0, DIRBLKSIZ - 12, 2, ".."
  140 };
  141 
  142 static void
  143 ufs_itimes_locked(struct vnode *vp)
  144 {
  145         struct inode *ip;
  146         struct timespec ts;
  147 
  148         ASSERT_VI_LOCKED(vp, __func__);
  149 
  150         ip = VTOI(vp);
  151         if (UFS_RDONLY(ip))
  152                 goto out;
  153         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
  154                 return;
  155 
  156         if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
  157                 ip->i_flag |= IN_LAZYMOD;
  158         else if (((vp->v_mount->mnt_kern_flag &
  159                     (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
  160                     (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
  161                 ip->i_flag |= IN_MODIFIED;
  162         else if (ip->i_flag & IN_ACCESS)
  163                 ip->i_flag |= IN_LAZYACCESS;
  164         vfs_timestamp(&ts);
  165         if (ip->i_flag & IN_ACCESS) {
  166                 DIP_SET(ip, i_atime, ts.tv_sec);
  167                 DIP_SET(ip, i_atimensec, ts.tv_nsec);
  168         }
  169         if (ip->i_flag & IN_UPDATE) {
  170                 DIP_SET(ip, i_mtime, ts.tv_sec);
  171                 DIP_SET(ip, i_mtimensec, ts.tv_nsec);
  172         }
  173         if (ip->i_flag & IN_CHANGE) {
  174                 DIP_SET(ip, i_ctime, ts.tv_sec);
  175                 DIP_SET(ip, i_ctimensec, ts.tv_nsec);
  176                 DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
  177         }
  178 
  179  out:
  180         ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
  181 }
  182 
  183 void
  184 ufs_itimes(struct vnode *vp)
  185 {
  186 
  187         VI_LOCK(vp);
  188         ufs_itimes_locked(vp);
  189         VI_UNLOCK(vp);
  190 }
  191 
  192 /*
  193  * Create a regular file
  194  */
  195 static int
  196 ufs_create(ap)
  197         struct vop_create_args /* {
  198                 struct vnode *a_dvp;
  199                 struct vnode **a_vpp;
  200                 struct componentname *a_cnp;
  201                 struct vattr *a_vap;
  202         } */ *ap;
  203 {
  204         int error;
  205 
  206         error =
  207             ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
  208             ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
  209         if (error != 0)
  210                 return (error);
  211         if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
  212                 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
  213         return (0);
  214 }
  215 
  216 /*
  217  * Mknod vnode call
  218  */
  219 /* ARGSUSED */
  220 static int
  221 ufs_mknod(ap)
  222         struct vop_mknod_args /* {
  223                 struct vnode *a_dvp;
  224                 struct vnode **a_vpp;
  225                 struct componentname *a_cnp;
  226                 struct vattr *a_vap;
  227         } */ *ap;
  228 {
  229         struct vattr *vap = ap->a_vap;
  230         struct vnode **vpp = ap->a_vpp;
  231         struct inode *ip;
  232         ino_t ino;
  233         int error;
  234 
  235         error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
  236             ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
  237         if (error)
  238                 return (error);
  239         ip = VTOI(*vpp);
  240         ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
  241         if (vap->va_rdev != VNOVAL) {
  242                 /*
  243                  * Want to be able to use this to make badblock
  244                  * inodes, so don't truncate the dev number.
  245                  */
  246                 DIP_SET(ip, i_rdev, vap->va_rdev);
  247         }
  248         /*
  249          * Remove inode, then reload it through VFS_VGET so it is
  250          * checked to see if it is an alias of an existing entry in
  251          * the inode cache.  XXX I don't believe this is necessary now.
  252          */
  253         (*vpp)->v_type = VNON;
  254         ino = ip->i_number;     /* Save this before vgone() invalidates ip. */
  255         vgone(*vpp);
  256         vput(*vpp);
  257         error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
  258         if (error) {
  259                 *vpp = NULL;
  260                 return (error);
  261         }
  262         return (0);
  263 }
  264 
  265 /*
  266  * Open called.
  267  */
  268 /* ARGSUSED */
  269 static int
  270 ufs_open(struct vop_open_args *ap)
  271 {
  272         struct vnode *vp = ap->a_vp;
  273         struct inode *ip;
  274 
  275         if (vp->v_type == VCHR || vp->v_type == VBLK)
  276                 return (EOPNOTSUPP);
  277 
  278         ip = VTOI(vp);
  279         /*
  280          * Files marked append-only must be opened for appending.
  281          */
  282         if ((ip->i_flags & APPEND) &&
  283             (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
  284                 return (EPERM);
  285         vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
  286         return (0);
  287 }
  288 
  289 /*
  290  * Close called.
  291  *
  292  * Update the times on the inode.
  293  */
  294 /* ARGSUSED */
  295 static int
  296 ufs_close(ap)
  297         struct vop_close_args /* {
  298                 struct vnode *a_vp;
  299                 int  a_fflag;
  300                 struct ucred *a_cred;
  301                 struct thread *a_td;
  302         } */ *ap;
  303 {
  304         struct vnode *vp = ap->a_vp;
  305         int usecount;
  306 
  307         VI_LOCK(vp);
  308         usecount = vp->v_usecount;
  309         if (usecount > 1)
  310                 ufs_itimes_locked(vp);
  311         VI_UNLOCK(vp);
  312         return (0);
  313 }
  314 
  315 static int
  316 ufs_accessx(ap)
  317         struct vop_accessx_args /* {
  318                 struct vnode *a_vp;
  319                 accmode_t a_accmode;
  320                 struct ucred *a_cred;
  321                 struct thread *a_td;
  322         } */ *ap;
  323 {
  324         struct vnode *vp = ap->a_vp;
  325         struct inode *ip = VTOI(vp);
  326         accmode_t accmode = ap->a_accmode;
  327         int error;
  328 #ifdef UFS_ACL
  329         struct acl *acl;
  330         acl_type_t type;
  331 #endif
  332 
  333         /*
  334          * Disallow write attempts on read-only filesystems;
  335          * unless the file is a socket, fifo, or a block or
  336          * character device resident on the filesystem.
  337          */
  338         if (accmode & VMODIFY_PERMS) {
  339                 switch (vp->v_type) {
  340                 case VDIR:
  341                 case VLNK:
  342                 case VREG:
  343                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  344                                 return (EROFS);
  345 #ifdef QUOTA
  346                         /*
  347                          * Inode is accounted in the quotas only if struct
  348                          * dquot is attached to it. VOP_ACCESS() is called
  349                          * from vn_open_cred() and provides a convenient
  350                          * point to call getinoquota().  The lock mode is
  351                          * exclusive when the file is opening for write.
  352                          */
  353                         if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
  354                                 error = getinoquota(ip);
  355                                 if (error != 0)
  356                                         return (error);
  357                         }
  358 #endif
  359                         break;
  360                 default:
  361                         break;
  362                 }
  363         }
  364 
  365         /*
  366          * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
  367          * permits the owner of the file to remove the IMMUTABLE flag.
  368          */
  369         if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
  370             (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
  371                 return (EPERM);
  372 
  373 #ifdef UFS_ACL
  374         if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
  375                 if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
  376                         type = ACL_TYPE_NFS4;
  377                 else
  378                         type = ACL_TYPE_ACCESS;
  379 
  380                 acl = acl_alloc(M_WAITOK);
  381                 if (type == ACL_TYPE_NFS4)
  382                         error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
  383                 else
  384                         error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
  385                 switch (error) {
  386                 case 0:
  387                         if (type == ACL_TYPE_NFS4) {
  388                                 error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
  389                                     ip->i_gid, acl, accmode, ap->a_cred, NULL);
  390                         } else {
  391                                 error = vfs_unixify_accmode(&accmode);
  392                                 if (error == 0)
  393                                         error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
  394                                             ip->i_gid, acl, accmode, ap->a_cred, NULL);
  395                         }
  396                         break;
  397                 default:
  398                         if (error != EOPNOTSUPP)
  399                                 printf(
  400 "ufs_accessx(): Error retrieving ACL on object (%d).\n",
  401                                     error);
  402                         /*
  403                          * XXX: Fall back until debugged.  Should
  404                          * eventually possibly log an error, and return
  405                          * EPERM for safety.
  406                          */
  407                         error = vfs_unixify_accmode(&accmode);
  408                         if (error == 0)
  409                                 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
  410                                     ip->i_gid, accmode, ap->a_cred, NULL);
  411                 }
  412                 acl_free(acl);
  413 
  414                 return (error);
  415         }
  416 #endif /* !UFS_ACL */
  417         error = vfs_unixify_accmode(&accmode);
  418         if (error == 0)
  419                 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
  420                     accmode, ap->a_cred, NULL);
  421         return (error);
  422 }
  423 
  424 /* ARGSUSED */
  425 static int
  426 ufs_getattr(ap)
  427         struct vop_getattr_args /* {
  428                 struct vnode *a_vp;
  429                 struct vattr *a_vap;
  430                 struct ucred *a_cred;
  431         } */ *ap;
  432 {
  433         struct vnode *vp = ap->a_vp;
  434         struct inode *ip = VTOI(vp);
  435         struct vattr *vap = ap->a_vap;
  436 
  437         VI_LOCK(vp);
  438         ufs_itimes_locked(vp);
  439         if (I_IS_UFS1(ip)) {
  440                 vap->va_atime.tv_sec = ip->i_din1->di_atime;
  441                 vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
  442         } else {
  443                 vap->va_atime.tv_sec = ip->i_din2->di_atime;
  444                 vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
  445         }
  446         VI_UNLOCK(vp);
  447         /*
  448          * Copy from inode table
  449          */
  450         vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev);
  451         vap->va_fileid = ip->i_number;
  452         vap->va_mode = ip->i_mode & ~IFMT;
  453         vap->va_nlink = ip->i_effnlink;
  454         vap->va_uid = ip->i_uid;
  455         vap->va_gid = ip->i_gid;
  456         if (I_IS_UFS1(ip)) {
  457                 vap->va_rdev = ip->i_din1->di_rdev;
  458                 vap->va_size = ip->i_din1->di_size;
  459                 vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
  460                 vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
  461                 vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
  462                 vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
  463                 vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
  464                 vap->va_filerev = ip->i_din1->di_modrev;
  465         } else {
  466                 vap->va_rdev = ip->i_din2->di_rdev;
  467                 vap->va_size = ip->i_din2->di_size;
  468                 vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
  469                 vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
  470                 vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
  471                 vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
  472                 vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
  473                 vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
  474                 vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
  475                 vap->va_filerev = ip->i_din2->di_modrev;
  476         }
  477         vap->va_flags = ip->i_flags;
  478         vap->va_gen = ip->i_gen;
  479         vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
  480         vap->va_type = IFTOVT(ip->i_mode);
  481         return (0);
  482 }
  483 
  484 /*
  485  * Set attribute vnode op. called from several syscalls
  486  */
  487 static int
  488 ufs_setattr(ap)
  489         struct vop_setattr_args /* {
  490                 struct vnode *a_vp;
  491                 struct vattr *a_vap;
  492                 struct ucred *a_cred;
  493         } */ *ap;
  494 {
  495         struct vattr *vap = ap->a_vap;
  496         struct vnode *vp = ap->a_vp;
  497         struct inode *ip = VTOI(vp);
  498         struct ucred *cred = ap->a_cred;
  499         struct thread *td = curthread;
  500         int error;
  501 
  502         /*
  503          * Check for unsettable attributes.
  504          */
  505         if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
  506             (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
  507             (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
  508             ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
  509                 return (EINVAL);
  510         }
  511         if (vap->va_flags != VNOVAL) {
  512                 if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
  513                     SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
  514                     UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
  515                     UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
  516                     UF_SPARSE | UF_SYSTEM)) != 0)
  517                         return (EOPNOTSUPP);
  518                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  519                         return (EROFS);
  520                 /*
  521                  * Callers may only modify the file flags on objects they
  522                  * have VADMIN rights for.
  523                  */
  524                 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
  525                         return (error);
  526                 /*
  527                  * Unprivileged processes are not permitted to unset system
  528                  * flags, or modify flags if any system flags are set.
  529                  * Privileged non-jail processes may not modify system flags
  530                  * if securelevel > 0 and any existing system flags are set.
  531                  * Privileged jail processes behave like privileged non-jail
  532                  * processes if the PR_ALLOW_CHFLAGS permission bit is set;
  533                  * otherwise, they behave like unprivileged processes.
  534                  */
  535                 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
  536                         if (ip->i_flags &
  537                             (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
  538                                 error = securelevel_gt(cred, 0);
  539                                 if (error)
  540                                         return (error);
  541                         }
  542                         /* The snapshot flag cannot be toggled. */
  543                         if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
  544                                 return (EPERM);
  545                 } else {
  546                         if (ip->i_flags &
  547                             (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
  548                             ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
  549                                 return (EPERM);
  550                 }
  551                 ip->i_flags = vap->va_flags;
  552                 DIP_SET(ip, i_flags, vap->va_flags);
  553                 ip->i_flag |= IN_CHANGE;
  554                 error = UFS_UPDATE(vp, 0);
  555                 if (ip->i_flags & (IMMUTABLE | APPEND))
  556                         return (error);
  557         }
  558         /*
  559          * If immutable or append, no one can change any of its attributes
  560          * except the ones already handled (in some cases, file flags
  561          * including the immutability flags themselves for the superuser).
  562          */
  563         if (ip->i_flags & (IMMUTABLE | APPEND))
  564                 return (EPERM);
  565         /*
  566          * Go through the fields and update iff not VNOVAL.
  567          */
  568         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
  569                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  570                         return (EROFS);
  571                 if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
  572                     td)) != 0)
  573                         return (error);
  574         }
  575         if (vap->va_size != VNOVAL) {
  576                 /*
  577                  * XXX most of the following special cases should be in
  578                  * callers instead of in N filesystems.  The VDIR check
  579                  * mostly already is.
  580                  */
  581                 switch (vp->v_type) {
  582                 case VDIR:
  583                         return (EISDIR);
  584                 case VLNK:
  585                 case VREG:
  586                         /*
  587                          * Truncation should have an effect in these cases.
  588                          * Disallow it if the filesystem is read-only or
  589                          * the file is being snapshotted.
  590                          */
  591                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  592                                 return (EROFS);
  593                         if ((ip->i_flags & SF_SNAPSHOT) != 0)
  594                                 return (EPERM);
  595                         break;
  596                 default:
  597                         /*
  598                          * According to POSIX, the result is unspecified
  599                          * for file types other than regular files,
  600                          * directories and shared memory objects.  We
  601                          * don't support shared memory objects in the file
  602                          * system, and have dubious support for truncating
  603                          * symlinks.  Just ignore the request in other cases.
  604                          */
  605                         return (0);
  606                 }
  607                 if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
  608                     ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
  609                     cred)) != 0)
  610                         return (error);
  611         }
  612         if (vap->va_atime.tv_sec != VNOVAL ||
  613             vap->va_mtime.tv_sec != VNOVAL ||
  614             vap->va_birthtime.tv_sec != VNOVAL) {
  615                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  616                         return (EROFS);
  617                 if ((ip->i_flags & SF_SNAPSHOT) != 0)
  618                         return (EPERM);
  619                 error = vn_utimes_perm(vp, vap, cred, td);
  620                 if (error != 0)
  621                         return (error);
  622                 ip->i_flag |= IN_CHANGE | IN_MODIFIED;
  623                 if (vap->va_atime.tv_sec != VNOVAL) {
  624                         ip->i_flag &= ~IN_ACCESS;
  625                         DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
  626                         DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
  627                 }
  628                 if (vap->va_mtime.tv_sec != VNOVAL) {
  629                         ip->i_flag &= ~IN_UPDATE;
  630                         DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
  631                         DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
  632                 }
  633                 if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) {
  634                         ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
  635                         ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
  636                 }
  637                 error = UFS_UPDATE(vp, 0);
  638                 if (error)
  639                         return (error);
  640         }
  641         error = 0;
  642         if (vap->va_mode != (mode_t)VNOVAL) {
  643                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  644                         return (EROFS);
  645                 if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
  646                    (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
  647                         return (EPERM);
  648                 error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
  649         }
  650         return (error);
  651 }
  652 
  653 #ifdef UFS_ACL
  654 static int
  655 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
  656     int file_owner_id, struct ucred *cred, struct thread *td)
  657 {
  658         int error;
  659         struct acl *aclp;
  660 
  661         aclp = acl_alloc(M_WAITOK);
  662         error = ufs_getacl_nfs4_internal(vp, aclp, td);
  663         /*
  664          * We don't have to handle EOPNOTSUPP here, as the filesystem claims
  665          * it supports ACLs.
  666          */
  667         if (error)
  668                 goto out;
  669 
  670         acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
  671         error = ufs_setacl_nfs4_internal(vp, aclp, td);
  672 
  673 out:
  674         acl_free(aclp);
  675         return (error);
  676 }
  677 #endif /* UFS_ACL */
  678 
  679 /*
  680  * Mark this file's access time for update for vfs_mark_atime().  This
  681  * is called from execve() and mmap().
  682  */
  683 static int
  684 ufs_markatime(ap)
  685         struct vop_markatime_args /* {
  686                 struct vnode *a_vp;
  687         } */ *ap;
  688 {
  689         struct vnode *vp = ap->a_vp;
  690         struct inode *ip = VTOI(vp);
  691 
  692         VI_LOCK(vp);
  693         ip->i_flag |= IN_ACCESS;
  694         VI_UNLOCK(vp);
  695         /*
  696          * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
  697          */
  698         return (0);
  699 }
  700 
  701 /*
  702  * Change the mode on a file.
  703  * Inode must be locked before calling.
  704  */
  705 static int
  706 ufs_chmod(vp, mode, cred, td)
  707         struct vnode *vp;
  708         int mode;
  709         struct ucred *cred;
  710         struct thread *td;
  711 {
  712         struct inode *ip = VTOI(vp);
  713         int error;
  714 
  715         /*
  716          * To modify the permissions on a file, must possess VADMIN
  717          * for that file.
  718          */
  719         if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
  720                 return (error);
  721         /*
  722          * Privileged processes may set the sticky bit on non-directories,
  723          * as well as set the setgid bit on a file with a group that the
  724          * process is not a member of.  Both of these are allowed in
  725          * jail(8).
  726          */
  727         if (vp->v_type != VDIR && (mode & S_ISTXT)) {
  728                 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
  729                         return (EFTYPE);
  730         }
  731         if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
  732                 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
  733                 if (error)
  734                         return (error);
  735         }
  736 
  737         /*
  738          * Deny setting setuid if we are not the file owner.
  739          */
  740         if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
  741                 error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
  742                 if (error)
  743                         return (error);
  744         }
  745 
  746         ip->i_mode &= ~ALLPERMS;
  747         ip->i_mode |= (mode & ALLPERMS);
  748         DIP_SET(ip, i_mode, ip->i_mode);
  749         ip->i_flag |= IN_CHANGE;
  750 #ifdef UFS_ACL
  751         if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
  752                 error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
  753 #endif
  754         if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
  755                 error = UFS_UPDATE(vp, 0);
  756 
  757         return (error);
  758 }
  759 
  760 /*
  761  * Perform chown operation on inode ip;
  762  * inode must be locked prior to call.
  763  */
  764 static int
  765 ufs_chown(vp, uid, gid, cred, td)
  766         struct vnode *vp;
  767         uid_t uid;
  768         gid_t gid;
  769         struct ucred *cred;
  770         struct thread *td;
  771 {
  772         struct inode *ip = VTOI(vp);
  773         uid_t ouid;
  774         gid_t ogid;
  775         int error = 0;
  776 #ifdef QUOTA
  777         int i;
  778         ufs2_daddr_t change;
  779 #endif
  780 
  781         if (uid == (uid_t)VNOVAL)
  782                 uid = ip->i_uid;
  783         if (gid == (gid_t)VNOVAL)
  784                 gid = ip->i_gid;
  785         /*
  786          * To modify the ownership of a file, must possess VADMIN for that
  787          * file.
  788          */
  789         if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
  790                 return (error);
  791         /*
  792          * To change the owner of a file, or change the group of a file to a
  793          * group of which we are not a member, the caller must have
  794          * privilege.
  795          */
  796         if (((uid != ip->i_uid && uid != cred->cr_uid) || 
  797             (gid != ip->i_gid && !groupmember(gid, cred))) &&
  798             (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
  799                 return (error);
  800         ogid = ip->i_gid;
  801         ouid = ip->i_uid;
  802 #ifdef QUOTA
  803         if ((error = getinoquota(ip)) != 0)
  804                 return (error);
  805         if (ouid == uid) {
  806                 dqrele(vp, ip->i_dquot[USRQUOTA]);
  807                 ip->i_dquot[USRQUOTA] = NODQUOT;
  808         }
  809         if (ogid == gid) {
  810                 dqrele(vp, ip->i_dquot[GRPQUOTA]);
  811                 ip->i_dquot[GRPQUOTA] = NODQUOT;
  812         }
  813         change = DIP(ip, i_blocks);
  814         (void) chkdq(ip, -change, cred, CHOWN);
  815         (void) chkiq(ip, -1, cred, CHOWN);
  816         for (i = 0; i < MAXQUOTAS; i++) {
  817                 dqrele(vp, ip->i_dquot[i]);
  818                 ip->i_dquot[i] = NODQUOT;
  819         }
  820 #endif
  821         ip->i_gid = gid;
  822         DIP_SET(ip, i_gid, gid);
  823         ip->i_uid = uid;
  824         DIP_SET(ip, i_uid, uid);
  825 #ifdef QUOTA
  826         if ((error = getinoquota(ip)) == 0) {
  827                 if (ouid == uid) {
  828                         dqrele(vp, ip->i_dquot[USRQUOTA]);
  829                         ip->i_dquot[USRQUOTA] = NODQUOT;
  830                 }
  831                 if (ogid == gid) {
  832                         dqrele(vp, ip->i_dquot[GRPQUOTA]);
  833                         ip->i_dquot[GRPQUOTA] = NODQUOT;
  834                 }
  835                 if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
  836                         if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
  837                                 goto good;
  838                         else
  839                                 (void) chkdq(ip, -change, cred, CHOWN|FORCE);
  840                 }
  841                 for (i = 0; i < MAXQUOTAS; i++) {
  842                         dqrele(vp, ip->i_dquot[i]);
  843                         ip->i_dquot[i] = NODQUOT;
  844                 }
  845         }
  846         ip->i_gid = ogid;
  847         DIP_SET(ip, i_gid, ogid);
  848         ip->i_uid = ouid;
  849         DIP_SET(ip, i_uid, ouid);
  850         if (getinoquota(ip) == 0) {
  851                 if (ouid == uid) {
  852                         dqrele(vp, ip->i_dquot[USRQUOTA]);
  853                         ip->i_dquot[USRQUOTA] = NODQUOT;
  854                 }
  855                 if (ogid == gid) {
  856                         dqrele(vp, ip->i_dquot[GRPQUOTA]);
  857                         ip->i_dquot[GRPQUOTA] = NODQUOT;
  858                 }
  859                 (void) chkdq(ip, change, cred, FORCE|CHOWN);
  860                 (void) chkiq(ip, 1, cred, FORCE|CHOWN);
  861                 (void) getinoquota(ip);
  862         }
  863         return (error);
  864 good:
  865         if (getinoquota(ip))
  866                 panic("ufs_chown: lost quota");
  867 #endif /* QUOTA */
  868         ip->i_flag |= IN_CHANGE;
  869         if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
  870                 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
  871                         ip->i_mode &= ~(ISUID | ISGID);
  872                         DIP_SET(ip, i_mode, ip->i_mode);
  873                 }
  874         }
  875         error = UFS_UPDATE(vp, 0);
  876         return (error);
  877 }
  878 
  879 static int
  880 ufs_remove(ap)
  881         struct vop_remove_args /* {
  882                 struct vnode *a_dvp;
  883                 struct vnode *a_vp;
  884                 struct componentname *a_cnp;
  885         } */ *ap;
  886 {
  887         struct inode *ip;
  888         struct vnode *vp = ap->a_vp;
  889         struct vnode *dvp = ap->a_dvp;
  890         int error;
  891         struct thread *td;
  892 
  893         td = curthread;
  894         ip = VTOI(vp);
  895         if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
  896             (VTOI(dvp)->i_flags & APPEND)) {
  897                 error = EPERM;
  898                 goto out;
  899         }
  900 #ifdef UFS_GJOURNAL
  901         ufs_gjournal_orphan(vp);
  902 #endif
  903         error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
  904         if (ip->i_nlink <= 0)
  905                 vp->v_vflag |= VV_NOSYNC;
  906         if ((ip->i_flags & SF_SNAPSHOT) != 0) {
  907                 /*
  908                  * Avoid deadlock where another thread is trying to
  909                  * update the inodeblock for dvp and is waiting on
  910                  * snaplk.  Temporary unlock the vnode lock for the
  911                  * unlinked file and sync the directory.  This should
  912                  * allow vput() of the directory to not block later on
  913                  * while holding the snapshot vnode locked, assuming
  914                  * that the directory hasn't been unlinked too.
  915                  */
  916                 VOP_UNLOCK(vp, 0);
  917                 (void) VOP_FSYNC(dvp, MNT_WAIT, td);
  918                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  919         }
  920 out:
  921         return (error);
  922 }
  923 
  924 static void
  925 print_bad_link_count(const char *funcname, struct vnode *dvp)
  926 {
  927         struct inode *dip;
  928 
  929         dip = VTOI(dvp);
  930         uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n",
  931             funcname, dip->i_effnlink, (intmax_t)dip->i_number,
  932             dvp->v_mount->mnt_stat.f_mntonname);
  933 }
  934 
  935 /*
  936  * link vnode call
  937  */
  938 static int
  939 ufs_link(ap)
  940         struct vop_link_args /* {
  941                 struct vnode *a_tdvp;
  942                 struct vnode *a_vp;
  943                 struct componentname *a_cnp;
  944         } */ *ap;
  945 {
  946         struct vnode *vp = ap->a_vp;
  947         struct vnode *tdvp = ap->a_tdvp;
  948         struct componentname *cnp = ap->a_cnp;
  949         struct inode *ip;
  950         struct direct newdir;
  951         int error;
  952 
  953 #ifdef INVARIANTS
  954         if ((cnp->cn_flags & HASBUF) == 0)
  955                 panic("ufs_link: no name");
  956 #endif
  957         if (VTOI(tdvp)->i_effnlink < 2) {
  958                 print_bad_link_count("ufs_link", tdvp);
  959                 error = EINVAL;
  960                 goto out;
  961         }
  962         ip = VTOI(vp);
  963         if (ip->i_nlink >= UFS_LINK_MAX) {
  964                 error = EMLINK;
  965                 goto out;
  966         }
  967         /*
  968          * The file may have been removed after namei droped the original
  969          * lock.
  970          */
  971         if (ip->i_effnlink == 0) {
  972                 error = ENOENT;
  973                 goto out;
  974         }
  975         if (ip->i_flags & (IMMUTABLE | APPEND)) {
  976                 error = EPERM;
  977                 goto out;
  978         }
  979         ip->i_effnlink++;
  980         ip->i_nlink++;
  981         DIP_SET(ip, i_nlink, ip->i_nlink);
  982         ip->i_flag |= IN_CHANGE;
  983         if (DOINGSOFTDEP(vp))
  984                 softdep_setup_link(VTOI(tdvp), ip);
  985         error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp));
  986         if (!error) {
  987                 ufs_makedirentry(ip, cnp, &newdir);
  988                 error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
  989         }
  990 
  991         if (error) {
  992                 ip->i_effnlink--;
  993                 ip->i_nlink--;
  994                 DIP_SET(ip, i_nlink, ip->i_nlink);
  995                 ip->i_flag |= IN_CHANGE;
  996                 if (DOINGSOFTDEP(vp))
  997                         softdep_revert_link(VTOI(tdvp), ip);
  998         }
  999 out:
 1000         return (error);
 1001 }
 1002 
 1003 /*
 1004  * whiteout vnode call
 1005  */
 1006 static int
 1007 ufs_whiteout(ap)
 1008         struct vop_whiteout_args /* {
 1009                 struct vnode *a_dvp;
 1010                 struct componentname *a_cnp;
 1011                 int a_flags;
 1012         } */ *ap;
 1013 {
 1014         struct vnode *dvp = ap->a_dvp;
 1015         struct componentname *cnp = ap->a_cnp;
 1016         struct direct newdir;
 1017         int error = 0;
 1018 
 1019         switch (ap->a_flags) {
 1020         case LOOKUP:
 1021                 /* 4.4 format directories support whiteout operations */
 1022                 if (dvp->v_mount->mnt_maxsymlinklen > 0)
 1023                         return (0);
 1024                 return (EOPNOTSUPP);
 1025 
 1026         case CREATE:
 1027                 /* create a new directory whiteout */
 1028 #ifdef INVARIANTS
 1029                 if ((cnp->cn_flags & SAVENAME) == 0)
 1030                         panic("ufs_whiteout: missing name");
 1031                 if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 1032                         panic("ufs_whiteout: old format filesystem");
 1033 #endif
 1034 
 1035                 newdir.d_ino = UFS_WINO;
 1036                 newdir.d_namlen = cnp->cn_namelen;
 1037                 bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 1038                 newdir.d_type = DT_WHT;
 1039                 error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
 1040                 break;
 1041 
 1042         case DELETE:
 1043                 /* remove an existing directory whiteout */
 1044 #ifdef INVARIANTS
 1045                 if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 1046                         panic("ufs_whiteout: old format filesystem");
 1047 #endif
 1048 
 1049                 cnp->cn_flags &= ~DOWHITEOUT;
 1050                 error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 1051                 break;
 1052         default:
 1053                 panic("ufs_whiteout: unknown op");
 1054         }
 1055         return (error);
 1056 }
 1057 
 1058 static volatile int rename_restarts;
 1059 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
 1060     __DEVOLATILE(int *, &rename_restarts), 0,
 1061     "Times rename had to restart due to lock contention");
 1062 
 1063 /*
 1064  * Rename system call.
 1065  *      rename("foo", "bar");
 1066  * is essentially
 1067  *      unlink("bar");
 1068  *      link("foo", "bar");
 1069  *      unlink("foo");
 1070  * but ``atomically''.  Can't do full commit without saving state in the
 1071  * inode on disk which isn't feasible at this time.  Best we can do is
 1072  * always guarantee the target exists.
 1073  *
 1074  * Basic algorithm is:
 1075  *
 1076  * 1) Bump link count on source while we're linking it to the
 1077  *    target.  This also ensure the inode won't be deleted out
 1078  *    from underneath us while we work (it may be truncated by
 1079  *    a concurrent `trunc' or `open' for creation).
 1080  * 2) Link source to destination.  If destination already exists,
 1081  *    delete it first.
 1082  * 3) Unlink source reference to inode if still around. If a
 1083  *    directory was moved and the parent of the destination
 1084  *    is different from the source, patch the ".." entry in the
 1085  *    directory.
 1086  */
 1087 static int
 1088 ufs_rename(ap)
 1089         struct vop_rename_args  /* {
 1090                 struct vnode *a_fdvp;
 1091                 struct vnode *a_fvp;
 1092                 struct componentname *a_fcnp;
 1093                 struct vnode *a_tdvp;
 1094                 struct vnode *a_tvp;
 1095                 struct componentname *a_tcnp;
 1096         } */ *ap;
 1097 {
 1098         struct vnode *tvp = ap->a_tvp;
 1099         struct vnode *tdvp = ap->a_tdvp;
 1100         struct vnode *fvp = ap->a_fvp;
 1101         struct vnode *fdvp = ap->a_fdvp;
 1102         struct vnode *nvp;
 1103         struct componentname *tcnp = ap->a_tcnp;
 1104         struct componentname *fcnp = ap->a_fcnp;
 1105         struct thread *td = fcnp->cn_thread;
 1106         struct inode *fip, *tip, *tdp, *fdp;
 1107         struct direct newdir;
 1108         off_t endoff;
 1109         int doingdirectory, newparent;
 1110         int error = 0;
 1111         struct mount *mp;
 1112         ino_t ino;
 1113 
 1114 #ifdef INVARIANTS
 1115         if ((tcnp->cn_flags & HASBUF) == 0 ||
 1116             (fcnp->cn_flags & HASBUF) == 0)
 1117                 panic("ufs_rename: no name");
 1118 #endif
 1119         endoff = 0;
 1120         mp = tdvp->v_mount;
 1121         VOP_UNLOCK(tdvp, 0);
 1122         if (tvp && tvp != tdvp)
 1123                 VOP_UNLOCK(tvp, 0);
 1124         /*
 1125          * Check for cross-device rename.
 1126          */
 1127         if ((fvp->v_mount != tdvp->v_mount) ||
 1128             (tvp && (fvp->v_mount != tvp->v_mount))) {
 1129                 error = EXDEV;
 1130                 mp = NULL;
 1131                 goto releout;
 1132         }
 1133 relock:
 1134         /* 
 1135          * We need to acquire 2 to 4 locks depending on whether tvp is NULL
 1136          * and fdvp and tdvp are the same directory.  Subsequently we need
 1137          * to double-check all paths and in the directory rename case we
 1138          * need to verify that we are not creating a directory loop.  To
 1139          * handle this we acquire all but fdvp using non-blocking
 1140          * acquisitions.  If we fail to acquire any lock in the path we will
 1141          * drop all held locks, acquire the new lock in a blocking fashion,
 1142          * and then release it and restart the rename.  This acquire/release
 1143          * step ensures that we do not spin on a lock waiting for release.
 1144          */
 1145         error = vn_lock(fdvp, LK_EXCLUSIVE);
 1146         if (error)
 1147                 goto releout;
 1148         if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 1149                 VOP_UNLOCK(fdvp, 0);
 1150                 error = vn_lock(tdvp, LK_EXCLUSIVE);
 1151                 if (error)
 1152                         goto releout;
 1153                 VOP_UNLOCK(tdvp, 0);
 1154                 atomic_add_int(&rename_restarts, 1);
 1155                 goto relock;
 1156         }
 1157         /*
 1158          * Re-resolve fvp to be certain it still exists and fetch the
 1159          * correct vnode.
 1160          */
 1161         error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 1162         if (error) {
 1163                 VOP_UNLOCK(fdvp, 0);
 1164                 VOP_UNLOCK(tdvp, 0);
 1165                 goto releout;
 1166         }
 1167         error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 1168         if (error) {
 1169                 VOP_UNLOCK(fdvp, 0);
 1170                 VOP_UNLOCK(tdvp, 0);
 1171                 if (error != EBUSY)
 1172                         goto releout;
 1173                 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 1174                 if (error != 0)
 1175                         goto releout;
 1176                 VOP_UNLOCK(nvp, 0);
 1177                 vrele(fvp);
 1178                 fvp = nvp;
 1179                 atomic_add_int(&rename_restarts, 1);
 1180                 goto relock;
 1181         }
 1182         vrele(fvp);
 1183         fvp = nvp;
 1184         /*
 1185          * Re-resolve tvp and acquire the vnode lock if present.
 1186          */
 1187         error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
 1188         if (error != 0 && error != EJUSTRETURN) {
 1189                 VOP_UNLOCK(fdvp, 0);
 1190                 VOP_UNLOCK(tdvp, 0);
 1191                 VOP_UNLOCK(fvp, 0);
 1192                 goto releout;
 1193         }
 1194         /*
 1195          * If tvp disappeared we just carry on.
 1196          */
 1197         if (error == EJUSTRETURN && tvp != NULL) {
 1198                 vrele(tvp);
 1199                 tvp = NULL;
 1200         }
 1201         /*
 1202          * Get the tvp ino if the lookup succeeded.  We may have to restart
 1203          * if the non-blocking acquire fails.
 1204          */
 1205         if (error == 0) {
 1206                 nvp = NULL;
 1207                 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 1208                 if (tvp)
 1209                         vrele(tvp);
 1210                 tvp = nvp;
 1211                 if (error) {
 1212                         VOP_UNLOCK(fdvp, 0);
 1213                         VOP_UNLOCK(tdvp, 0);
 1214                         VOP_UNLOCK(fvp, 0);
 1215                         if (error != EBUSY)
 1216                                 goto releout;
 1217                         error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 1218                         if (error != 0)
 1219                                 goto releout;
 1220                         vput(nvp);
 1221                         atomic_add_int(&rename_restarts, 1);
 1222                         goto relock;
 1223                 }
 1224         }
 1225         fdp = VTOI(fdvp);
 1226         fip = VTOI(fvp);
 1227         tdp = VTOI(tdvp);
 1228         tip = NULL;
 1229         if (tvp)
 1230                 tip = VTOI(tvp);
 1231         if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 1232             (VTOI(tdvp)->i_flags & APPEND))) {
 1233                 error = EPERM;
 1234                 goto unlockout;
 1235         }
 1236         /*
 1237          * Renaming a file to itself has no effect.  The upper layers should
 1238          * not call us in that case.  However, things could change after
 1239          * we drop the locks above.
 1240          */
 1241         if (fvp == tvp) {
 1242                 error = 0;
 1243                 goto unlockout;
 1244         }
 1245         doingdirectory = 0;
 1246         newparent = 0;
 1247         ino = fip->i_number;
 1248         if (fip->i_nlink >= UFS_LINK_MAX) {
 1249                 error = EMLINK;
 1250                 goto unlockout;
 1251         }
 1252         if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 1253             || (fdp->i_flags & APPEND)) {
 1254                 error = EPERM;
 1255                 goto unlockout;
 1256         }
 1257         if ((fip->i_mode & IFMT) == IFDIR) {
 1258                 /*
 1259                  * Avoid ".", "..", and aliases of "." for obvious reasons.
 1260                  */
 1261                 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 1262                     fdp == fip ||
 1263                     (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 1264                         error = EINVAL;
 1265                         goto unlockout;
 1266                 }
 1267                 if (fdp->i_number != tdp->i_number)
 1268                         newparent = tdp->i_number;
 1269                 doingdirectory = 1;
 1270         }
 1271         if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
 1272             (tvp != NULL && tvp->v_type == VDIR &&
 1273             tvp->v_mountedhere != NULL)) {
 1274                 error = EXDEV;
 1275                 goto unlockout;
 1276         }
 1277 
 1278         /*
 1279          * If ".." must be changed (ie the directory gets a new
 1280          * parent) then the source directory must not be in the
 1281          * directory hierarchy above the target, as this would
 1282          * orphan everything below the source directory. Also
 1283          * the user must have write permission in the source so
 1284          * as to be able to change "..".
 1285          */
 1286         if (doingdirectory && newparent) {
 1287                 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 1288                 if (error)
 1289                         goto unlockout;
 1290                 error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
 1291                     &ino);
 1292                 /*
 1293                  * We encountered a lock that we have to wait for.  Unlock
 1294                  * everything else and VGET before restarting.
 1295                  */
 1296                 if (ino) {
 1297                         VOP_UNLOCK(fdvp, 0);
 1298                         VOP_UNLOCK(fvp, 0);
 1299                         VOP_UNLOCK(tdvp, 0);
 1300                         if (tvp)
 1301                                 VOP_UNLOCK(tvp, 0);
 1302                         error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
 1303                         if (error == 0)
 1304                                 vput(nvp);
 1305                         atomic_add_int(&rename_restarts, 1);
 1306                         goto relock;
 1307                 }
 1308                 if (error)
 1309                         goto unlockout;
 1310                 if ((tcnp->cn_flags & SAVESTART) == 0)
 1311                         panic("ufs_rename: lost to startdir");
 1312         }
 1313         if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
 1314             tdp->i_effnlink == 0)
 1315                 panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 1316 
 1317         /*
 1318          * 1) Bump link count while we're moving stuff
 1319          *    around.  If we crash somewhere before
 1320          *    completing our work, the link count
 1321          *    may be wrong, but correctable.
 1322          */
 1323         fip->i_effnlink++;
 1324         fip->i_nlink++;
 1325         DIP_SET(fip, i_nlink, fip->i_nlink);
 1326         fip->i_flag |= IN_CHANGE;
 1327         if (DOINGSOFTDEP(fvp))
 1328                 softdep_setup_link(tdp, fip);
 1329         error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp));
 1330         if (error)
 1331                 goto bad;
 1332 
 1333         /*
 1334          * 2) If target doesn't exist, link the target
 1335          *    to the source and unlink the source.
 1336          *    Otherwise, rewrite the target directory
 1337          *    entry to reference the source inode and
 1338          *    expunge the original entry's existence.
 1339          */
 1340         if (tip == NULL) {
 1341                 if (ITODEV(tdp) != ITODEV(fip))
 1342                         panic("ufs_rename: EXDEV");
 1343                 if (doingdirectory && newparent) {
 1344                         /*
 1345                          * Account for ".." in new directory.
 1346                          * When source and destination have the same
 1347                          * parent we don't adjust the link count.  The
 1348                          * actual link modification is completed when
 1349                          * .. is rewritten below.
 1350                          */
 1351                         if (tdp->i_nlink >= UFS_LINK_MAX) {
 1352                                 error = EMLINK;
 1353                                 goto bad;
 1354                         }
 1355                 }
 1356                 ufs_makedirentry(fip, tcnp, &newdir);
 1357                 error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
 1358                 if (error)
 1359                         goto bad;
 1360                 /* Setup tdvp for directory compaction if needed. */
 1361                 if (tdp->i_count && tdp->i_endoff &&
 1362                     tdp->i_endoff < tdp->i_size)
 1363                         endoff = tdp->i_endoff;
 1364         } else {
 1365                 if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip))
 1366                         panic("ufs_rename: EXDEV");
 1367                 /*
 1368                  * Short circuit rename(foo, foo).
 1369                  */
 1370                 if (tip->i_number == fip->i_number)
 1371                         panic("ufs_rename: same file");
 1372                 /*
 1373                  * If the parent directory is "sticky", then the caller
 1374                  * must possess VADMIN for the parent directory, or the
 1375                  * destination of the rename.  This implements append-only
 1376                  * directories.
 1377                  */
 1378                 if ((tdp->i_mode & S_ISTXT) &&
 1379                     VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
 1380                     VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
 1381                         error = EPERM;
 1382                         goto bad;
 1383                 }
 1384                 /*
 1385                  * Target must be empty if a directory and have no links
 1386                  * to it. Also, ensure source and target are compatible
 1387                  * (both directories, or both not directories).
 1388                  */
 1389                 if ((tip->i_mode & IFMT) == IFDIR) {
 1390                         if ((tip->i_effnlink > 2) ||
 1391                             !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
 1392                                 error = ENOTEMPTY;
 1393                                 goto bad;
 1394                         }
 1395                         if (!doingdirectory) {
 1396                                 error = ENOTDIR;
 1397                                 goto bad;
 1398                         }
 1399                         cache_purge(tdvp);
 1400                 } else if (doingdirectory) {
 1401                         error = EISDIR;
 1402                         goto bad;
 1403                 }
 1404                 if (doingdirectory) {
 1405                         if (!newparent) {
 1406                                 tdp->i_effnlink--;
 1407                                 if (DOINGSOFTDEP(tdvp))
 1408                                         softdep_change_linkcnt(tdp);
 1409                         }
 1410                         tip->i_effnlink--;
 1411                         if (DOINGSOFTDEP(tvp))
 1412                                 softdep_change_linkcnt(tip);
 1413                 }
 1414                 error = ufs_dirrewrite(tdp, tip, fip->i_number,
 1415                     IFTODT(fip->i_mode),
 1416                     (doingdirectory && newparent) ? newparent : doingdirectory);
 1417                 if (error) {
 1418                         if (doingdirectory) {
 1419                                 if (!newparent) {
 1420                                         tdp->i_effnlink++;
 1421                                         if (DOINGSOFTDEP(tdvp))
 1422                                                 softdep_change_linkcnt(tdp);
 1423                                 }
 1424                                 tip->i_effnlink++;
 1425                                 if (DOINGSOFTDEP(tvp))
 1426                                         softdep_change_linkcnt(tip);
 1427                         }
 1428                 }
 1429                 if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 1430                         /*
 1431                          * The only stuff left in the directory is "."
 1432                          * and "..". The "." reference is inconsequential
 1433                          * since we are quashing it. We have removed the "."
 1434                          * reference and the reference in the parent directory,
 1435                          * but there may be other hard links. The soft
 1436                          * dependency code will arrange to do these operations
 1437                          * after the parent directory entry has been deleted on
 1438                          * disk, so when running with that code we avoid doing
 1439                          * them now.
 1440                          */
 1441                         if (!newparent) {
 1442                                 tdp->i_nlink--;
 1443                                 DIP_SET(tdp, i_nlink, tdp->i_nlink);
 1444                                 tdp->i_flag |= IN_CHANGE;
 1445                         }
 1446                         tip->i_nlink--;
 1447                         DIP_SET(tip, i_nlink, tip->i_nlink);
 1448                         tip->i_flag |= IN_CHANGE;
 1449                 }
 1450         }
 1451 
 1452         /*
 1453          * 3) Unlink the source.  We have to resolve the path again to
 1454          * fixup the directory offset and count for ufs_dirremove.
 1455          */
 1456         if (fdvp == tdvp) {
 1457                 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 1458                 if (error)
 1459                         panic("ufs_rename: from entry went away!");
 1460                 if (ino != fip->i_number)
 1461                         panic("ufs_rename: ino mismatch %ju != %ju\n",
 1462                             (uintmax_t)ino, (uintmax_t)fip->i_number);
 1463         }
 1464         /*
 1465          * If the source is a directory with a
 1466          * new parent, the link count of the old
 1467          * parent directory must be decremented
 1468          * and ".." set to point to the new parent.
 1469          */
 1470         if (doingdirectory && newparent) {
 1471                 /*
 1472                  * If tip exists we simply use its link, otherwise we must
 1473                  * add a new one.
 1474                  */
 1475                 if (tip == NULL) {
 1476                         tdp->i_effnlink++;
 1477                         tdp->i_nlink++;
 1478                         DIP_SET(tdp, i_nlink, tdp->i_nlink);
 1479                         tdp->i_flag |= IN_CHANGE;
 1480                         if (DOINGSOFTDEP(tdvp))
 1481                                 softdep_setup_dotdot_link(tdp, fip);
 1482                         error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) &&
 1483                             !DOINGASYNC(tdvp));
 1484                         /* Don't go to bad here as the new link exists. */
 1485                         if (error)
 1486                                 goto unlockout;
 1487                 } else if (DOINGSUJ(tdvp))
 1488                         /* Journal must account for each new link. */
 1489                         softdep_setup_dotdot_link(tdp, fip);
 1490                 fip->i_offset = mastertemplate.dot_reclen;
 1491                 ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
 1492                 cache_purge(fdvp);
 1493         }
 1494         error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
 1495         /*
 1496          * The kern_renameat() looks up the fvp using the DELETE flag, which
 1497          * causes the removal of the name cache entry for fvp.
 1498          * As the relookup of the fvp is done in two steps:
 1499          * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
 1500          * normal lookup of the from name just before the VFS_VGET() call,
 1501          * causing the cache entry to be re-instantiated.
 1502          *
 1503          * The same issue also applies to tvp if it exists as
 1504          * otherwise we may have a stale name cache entry for the new
 1505          * name that references the old i-node if it has other links
 1506          * or open file descriptors.
 1507          */
 1508         cache_purge(fvp);
 1509         if (tvp)
 1510                 cache_purge(tvp);
 1511         cache_purge_negative(tdvp);
 1512 
 1513 unlockout:
 1514         vput(fdvp);
 1515         vput(fvp);
 1516         if (tvp)
 1517                 vput(tvp);
 1518         /*
 1519          * If compaction or fsync was requested do it now that other locks
 1520          * are no longer needed.
 1521          */
 1522         if (error == 0 && endoff != 0) {
 1523                 error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
 1524                     (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
 1525                 if (error != 0)
 1526                         vn_printf(tdvp,
 1527                             "ufs_rename: failed to truncate, error %d\n",
 1528                             error);
 1529 #ifdef UFS_DIRHASH
 1530                 else if (tdp->i_dirhash != NULL)
 1531                         ufsdirhash_dirtrunc(tdp, endoff);
 1532 #endif
 1533                 /*
 1534                  * Even if the directory compaction failed, rename was
 1535                  * succesful.  Do not propagate a UFS_TRUNCATE() error
 1536                  * to the caller.
 1537                  */
 1538                 error = 0;
 1539         }
 1540         if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
 1541                 error = VOP_FSYNC(tdvp, MNT_WAIT, td);
 1542         vput(tdvp);
 1543         return (error);
 1544 
 1545 bad:
 1546         fip->i_effnlink--;
 1547         fip->i_nlink--;
 1548         DIP_SET(fip, i_nlink, fip->i_nlink);
 1549         fip->i_flag |= IN_CHANGE;
 1550         if (DOINGSOFTDEP(fvp))
 1551                 softdep_revert_link(tdp, fip);
 1552         goto unlockout;
 1553 
 1554 releout:
 1555         vrele(fdvp);
 1556         vrele(fvp);
 1557         vrele(tdvp);
 1558         if (tvp)
 1559                 vrele(tvp);
 1560 
 1561         return (error);
 1562 }
 1563 
 1564 #ifdef UFS_ACL
 1565 static int
 1566 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
 1567     mode_t dmode, struct ucred *cred, struct thread *td)
 1568 {
 1569         int error;
 1570         struct inode *ip = VTOI(tvp);
 1571         struct acl *dacl, *acl;
 1572 
 1573         acl = acl_alloc(M_WAITOK);
 1574         dacl = acl_alloc(M_WAITOK);
 1575 
 1576         /*
 1577          * Retrieve default ACL from parent, if any.
 1578          */
 1579         error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 1580         switch (error) {
 1581         case 0:
 1582                 /*
 1583                  * Retrieved a default ACL, so merge mode and ACL if
 1584                  * necessary.  If the ACL is empty, fall through to
 1585                  * the "not defined or available" case.
 1586                  */
 1587                 if (acl->acl_cnt != 0) {
 1588                         dmode = acl_posix1e_newfilemode(dmode, acl);
 1589                         ip->i_mode = dmode;
 1590                         DIP_SET(ip, i_mode, dmode);
 1591                         *dacl = *acl;
 1592                         ufs_sync_acl_from_inode(ip, acl);
 1593                         break;
 1594                 }
 1595                 /* FALLTHROUGH */
 1596 
 1597         case EOPNOTSUPP:
 1598                 /*
 1599                  * Just use the mode as-is.
 1600                  */
 1601                 ip->i_mode = dmode;
 1602                 DIP_SET(ip, i_mode, dmode);
 1603                 error = 0;
 1604                 goto out;
 1605         
 1606         default:
 1607                 goto out;
 1608         }
 1609 
 1610         /*
 1611          * XXX: If we abort now, will Soft Updates notify the extattr
 1612          * code that the EAs for the file need to be released?
 1613          */
 1614         error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 1615         if (error == 0)
 1616                 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
 1617         switch (error) {
 1618         case 0:
 1619                 break;
 1620 
 1621         case EOPNOTSUPP:
 1622                 /*
 1623                  * XXX: This should not happen, as EOPNOTSUPP above
 1624                  * was supposed to free acl.
 1625                  */
 1626                 printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
 1627                 /*
 1628                 panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
 1629                  */
 1630                 break;
 1631 
 1632         default:
 1633                 goto out;
 1634         }
 1635 
 1636 out:
 1637         acl_free(acl);
 1638         acl_free(dacl);
 1639 
 1640         return (error);
 1641 }
 1642 
 1643 static int
 1644 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
 1645     mode_t mode, struct ucred *cred, struct thread *td)
 1646 {
 1647         int error;
 1648         struct inode *ip = VTOI(tvp);
 1649         struct acl *acl;
 1650 
 1651         acl = acl_alloc(M_WAITOK);
 1652 
 1653         /*
 1654          * Retrieve default ACL for parent, if any.
 1655          */
 1656         error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 1657         switch (error) {
 1658         case 0:
 1659                 /*
 1660                  * Retrieved a default ACL, so merge mode and ACL if
 1661                  * necessary.
 1662                  */
 1663                 if (acl->acl_cnt != 0) {
 1664                         /*
 1665                          * Two possible ways for default ACL to not
 1666                          * be present.  First, the EA can be
 1667                          * undefined, or second, the default ACL can
 1668                          * be blank.  If it's blank, fall through to
 1669                          * the it's not defined case.
 1670                          */
 1671                         mode = acl_posix1e_newfilemode(mode, acl);
 1672                         ip->i_mode = mode;
 1673                         DIP_SET(ip, i_mode, mode);
 1674                         ufs_sync_acl_from_inode(ip, acl);
 1675                         break;
 1676                 }
 1677                 /* FALLTHROUGH */
 1678 
 1679         case EOPNOTSUPP:
 1680                 /*
 1681                  * Just use the mode as-is.
 1682                  */
 1683                 ip->i_mode = mode;
 1684                 DIP_SET(ip, i_mode, mode);
 1685                 error = 0;
 1686                 goto out;
 1687 
 1688         default:
 1689                 goto out;
 1690         }
 1691 
 1692         /*
 1693          * XXX: If we abort now, will Soft Updates notify the extattr
 1694          * code that the EAs for the file need to be released?
 1695          */
 1696         error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 1697         switch (error) {
 1698         case 0:
 1699                 break;
 1700 
 1701         case EOPNOTSUPP:
 1702                 /*
 1703                  * XXX: This should not happen, as EOPNOTSUPP above was
 1704                  * supposed to free acl.
 1705                  */
 1706                 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 1707                     "but no VOP_SETACL()\n");
 1708                 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 1709                     "but no VOP_SETACL()"); */
 1710                 break;
 1711 
 1712         default:
 1713                 goto out;
 1714         }
 1715 
 1716 out:
 1717         acl_free(acl);
 1718 
 1719         return (error);
 1720 }
 1721 
 1722 static int
 1723 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
 1724     mode_t child_mode, struct ucred *cred, struct thread *td)
 1725 {
 1726         int error;
 1727         struct acl *parent_aclp, *child_aclp;
 1728 
 1729         parent_aclp = acl_alloc(M_WAITOK);
 1730         child_aclp = acl_alloc(M_WAITOK | M_ZERO);
 1731 
 1732         error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
 1733         if (error)
 1734                 goto out;
 1735         acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
 1736             child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
 1737         error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
 1738         if (error)
 1739                 goto out;
 1740 out:
 1741         acl_free(parent_aclp);
 1742         acl_free(child_aclp);
 1743 
 1744         return (error);
 1745 }
 1746 #endif
 1747 
 1748 /*
 1749  * Mkdir system call
 1750  */
 1751 static int
 1752 ufs_mkdir(ap)
 1753         struct vop_mkdir_args /* {
 1754                 struct vnode *a_dvp;
 1755                 struct vnode **a_vpp;
 1756                 struct componentname *a_cnp;
 1757                 struct vattr *a_vap;
 1758         } */ *ap;
 1759 {
 1760         struct vnode *dvp = ap->a_dvp;
 1761         struct vattr *vap = ap->a_vap;
 1762         struct componentname *cnp = ap->a_cnp;
 1763         struct inode *ip, *dp;
 1764         struct vnode *tvp;
 1765         struct buf *bp;
 1766         struct dirtemplate dirtemplate, *dtp;
 1767         struct direct newdir;
 1768         int error, dmode;
 1769         long blkoff;
 1770 
 1771 #ifdef INVARIANTS
 1772         if ((cnp->cn_flags & HASBUF) == 0)
 1773                 panic("ufs_mkdir: no name");
 1774 #endif
 1775         dp = VTOI(dvp);
 1776         if (dp->i_nlink >= UFS_LINK_MAX) {
 1777                 error = EMLINK;
 1778                 goto out;
 1779         }
 1780         dmode = vap->va_mode & 0777;
 1781         dmode |= IFDIR;
 1782         /*
 1783          * Must simulate part of ufs_makeinode here to acquire the inode,
 1784          * but not have it entered in the parent directory. The entry is
 1785          * made later after writing "." and ".." entries.
 1786          */
 1787         if (dp->i_effnlink < 2) {
 1788                 print_bad_link_count("ufs_mkdir", dvp);
 1789                 error = EINVAL;
 1790                 goto out;
 1791         }
 1792         error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 1793         if (error)
 1794                 goto out;
 1795         ip = VTOI(tvp);
 1796         ip->i_gid = dp->i_gid;
 1797         DIP_SET(ip, i_gid, dp->i_gid);
 1798 #ifdef SUIDDIR
 1799         {
 1800 #ifdef QUOTA
 1801                 struct ucred ucred, *ucp;
 1802                 gid_t ucred_group;
 1803                 ucp = cnp->cn_cred;
 1804 #endif
 1805                 /*
 1806                  * If we are hacking owners here, (only do this where told to)
 1807                  * and we are not giving it TO root, (would subvert quotas)
 1808                  * then go ahead and give it to the other user.
 1809                  * The new directory also inherits the SUID bit.
 1810                  * If user's UID and dir UID are the same,
 1811                  * 'give it away' so that the SUID is still forced on.
 1812                  */
 1813                 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 1814                     (dp->i_mode & ISUID) && dp->i_uid) {
 1815                         dmode |= ISUID;
 1816                         ip->i_uid = dp->i_uid;
 1817                         DIP_SET(ip, i_uid, dp->i_uid);
 1818 #ifdef QUOTA
 1819                         if (dp->i_uid != cnp->cn_cred->cr_uid) {
 1820                                 /*
 1821                                  * Make sure the correct user gets charged
 1822                                  * for the space.
 1823                                  * Make a dummy credential for the victim.
 1824                                  * XXX This seems to never be accessed out of
 1825                                  * our context so a stack variable is ok.
 1826                                  */
 1827                                 refcount_init(&ucred.cr_ref, 1);
 1828                                 ucred.cr_uid = ip->i_uid;
 1829                                 ucred.cr_ngroups = 1;
 1830                                 ucred.cr_groups = &ucred_group;
 1831                                 ucred.cr_groups[0] = dp->i_gid;
 1832                                 ucp = &ucred;
 1833                         }
 1834 #endif
 1835                 } else {
 1836                         ip->i_uid = cnp->cn_cred->cr_uid;
 1837                         DIP_SET(ip, i_uid, ip->i_uid);
 1838                 }
 1839 #ifdef QUOTA
 1840                 if ((error = getinoquota(ip)) ||
 1841                     (error = chkiq(ip, 1, ucp, 0))) {
 1842                         if (DOINGSOFTDEP(tvp))
 1843                                 softdep_revert_link(dp, ip);
 1844                         UFS_VFREE(tvp, ip->i_number, dmode);
 1845                         vput(tvp);
 1846                         return (error);
 1847                 }
 1848 #endif
 1849         }
 1850 #else   /* !SUIDDIR */
 1851         ip->i_uid = cnp->cn_cred->cr_uid;
 1852         DIP_SET(ip, i_uid, ip->i_uid);
 1853 #ifdef QUOTA
 1854         if ((error = getinoquota(ip)) ||
 1855             (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 1856                 if (DOINGSOFTDEP(tvp))
 1857                         softdep_revert_link(dp, ip);
 1858                 UFS_VFREE(tvp, ip->i_number, dmode);
 1859                 vput(tvp);
 1860                 return (error);
 1861         }
 1862 #endif
 1863 #endif  /* !SUIDDIR */
 1864         ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 1865         ip->i_mode = dmode;
 1866         DIP_SET(ip, i_mode, dmode);
 1867         tvp->v_type = VDIR;     /* Rest init'd in getnewvnode(). */
 1868         ip->i_effnlink = 2;
 1869         ip->i_nlink = 2;
 1870         DIP_SET(ip, i_nlink, 2);
 1871 
 1872         if (cnp->cn_flags & ISWHITEOUT) {
 1873                 ip->i_flags |= UF_OPAQUE;
 1874                 DIP_SET(ip, i_flags, ip->i_flags);
 1875         }
 1876 
 1877         /*
 1878          * Bump link count in parent directory to reflect work done below.
 1879          * Should be done before reference is created so cleanup is
 1880          * possible if we crash.
 1881          */
 1882         dp->i_effnlink++;
 1883         dp->i_nlink++;
 1884         DIP_SET(dp, i_nlink, dp->i_nlink);
 1885         dp->i_flag |= IN_CHANGE;
 1886         if (DOINGSOFTDEP(dvp))
 1887                 softdep_setup_mkdir(dp, ip);
 1888         error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp));
 1889         if (error)
 1890                 goto bad;
 1891 #ifdef MAC
 1892         if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 1893                 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 1894                     dvp, tvp, cnp);
 1895                 if (error)
 1896                         goto bad;
 1897         }
 1898 #endif
 1899 #ifdef UFS_ACL
 1900         if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 1901                 error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
 1902                     cnp->cn_cred, cnp->cn_thread);
 1903                 if (error)
 1904                         goto bad;
 1905         } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 1906                 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
 1907                     cnp->cn_cred, cnp->cn_thread);
 1908                 if (error)
 1909                         goto bad;
 1910         }
 1911 #endif /* !UFS_ACL */
 1912 
 1913         /*
 1914          * Initialize directory with "." and ".." from static template.
 1915          */
 1916         if (dvp->v_mount->mnt_maxsymlinklen > 0)
 1917                 dtp = &mastertemplate;
 1918         else
 1919                 dtp = (struct dirtemplate *)&omastertemplate;
 1920         dirtemplate = *dtp;
 1921         dirtemplate.dot_ino = ip->i_number;
 1922         dirtemplate.dotdot_ino = dp->i_number;
 1923         vnode_pager_setsize(tvp, DIRBLKSIZ);
 1924         if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 1925             BA_CLRBUF, &bp)) != 0)
 1926                 goto bad;
 1927         ip->i_size = DIRBLKSIZ;
 1928         DIP_SET(ip, i_size, DIRBLKSIZ);
 1929         ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1930         bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 1931         if (DOINGSOFTDEP(tvp)) {
 1932                 /*
 1933                  * Ensure that the entire newly allocated block is a
 1934                  * valid directory so that future growth within the
 1935                  * block does not have to ensure that the block is
 1936                  * written before the inode.
 1937                  */
 1938                 blkoff = DIRBLKSIZ;
 1939                 while (blkoff < bp->b_bcount) {
 1940                         ((struct direct *)
 1941                            (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 1942                         blkoff += DIRBLKSIZ;
 1943                 }
 1944         }
 1945         if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) &&
 1946             !DOINGASYNC(tvp))) != 0) {
 1947                 (void)bwrite(bp);
 1948                 goto bad;
 1949         }
 1950         /*
 1951          * Directory set up, now install its entry in the parent directory.
 1952          *
 1953          * If we are not doing soft dependencies, then we must write out the
 1954          * buffer containing the new directory body before entering the new 
 1955          * name in the parent. If we are doing soft dependencies, then the
 1956          * buffer containing the new directory body will be passed to and
 1957          * released in the soft dependency code after the code has attached
 1958          * an appropriate ordering dependency to the buffer which ensures that
 1959          * the buffer is written before the new name is written in the parent.
 1960          */
 1961         if (DOINGASYNC(dvp))
 1962                 bdwrite(bp);
 1963         else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
 1964                 goto bad;
 1965         ufs_makedirentry(ip, cnp, &newdir);
 1966         error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
 1967         
 1968 bad:
 1969         if (error == 0) {
 1970                 *ap->a_vpp = tvp;
 1971         } else {
 1972                 dp->i_effnlink--;
 1973                 dp->i_nlink--;
 1974                 DIP_SET(dp, i_nlink, dp->i_nlink);
 1975                 dp->i_flag |= IN_CHANGE;
 1976                 /*
 1977                  * No need to do an explicit VOP_TRUNCATE here, vrele will
 1978                  * do this for us because we set the link count to 0.
 1979                  */
 1980                 ip->i_effnlink = 0;
 1981                 ip->i_nlink = 0;
 1982                 DIP_SET(ip, i_nlink, 0);
 1983                 ip->i_flag |= IN_CHANGE;
 1984                 if (DOINGSOFTDEP(tvp))
 1985                         softdep_revert_mkdir(dp, ip);
 1986 
 1987                 vput(tvp);
 1988         }
 1989 out:
 1990         return (error);
 1991 }
 1992 
 1993 /*
 1994  * Rmdir system call.
 1995  */
 1996 static int
 1997 ufs_rmdir(ap)
 1998         struct vop_rmdir_args /* {
 1999                 struct vnode *a_dvp;
 2000                 struct vnode *a_vp;
 2001                 struct componentname *a_cnp;
 2002         } */ *ap;
 2003 {
 2004         struct vnode *vp = ap->a_vp;
 2005         struct vnode *dvp = ap->a_dvp;
 2006         struct componentname *cnp = ap->a_cnp;
 2007         struct inode *ip, *dp;
 2008         int error;
 2009 
 2010         ip = VTOI(vp);
 2011         dp = VTOI(dvp);
 2012 
 2013         /*
 2014          * Do not remove a directory that is in the process of being renamed.
 2015          * Verify the directory is empty (and valid). Rmdir ".." will not be
 2016          * valid since ".." will contain a reference to the current directory
 2017          * and thus be non-empty. Do not allow the removal of mounted on
 2018          * directories (this can happen when an NFS exported filesystem
 2019          * tries to remove a locally mounted on directory).
 2020          */
 2021         error = 0;
 2022         if (dp->i_effnlink <= 2) {
 2023                 if (dp->i_effnlink == 2)
 2024                         print_bad_link_count("ufs_rmdir", dvp);
 2025                 error = EINVAL;
 2026                 goto out;
 2027         }
 2028         if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 2029                 error = ENOTEMPTY;
 2030                 goto out;
 2031         }
 2032         if ((dp->i_flags & APPEND)
 2033             || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 2034                 error = EPERM;
 2035                 goto out;
 2036         }
 2037         if (vp->v_mountedhere != 0) {
 2038                 error = EINVAL;
 2039                 goto out;
 2040         }
 2041 #ifdef UFS_GJOURNAL
 2042         ufs_gjournal_orphan(vp);
 2043 #endif
 2044         /*
 2045          * Delete reference to directory before purging
 2046          * inode.  If we crash in between, the directory
 2047          * will be reattached to lost+found,
 2048          */
 2049         dp->i_effnlink--;
 2050         ip->i_effnlink--;
 2051         if (DOINGSOFTDEP(vp))
 2052                 softdep_setup_rmdir(dp, ip);
 2053         error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 2054         if (error) {
 2055                 dp->i_effnlink++;
 2056                 ip->i_effnlink++;
 2057                 if (DOINGSOFTDEP(vp))
 2058                         softdep_revert_rmdir(dp, ip);
 2059                 goto out;
 2060         }
 2061         cache_purge(dvp);
 2062         /*
 2063          * The only stuff left in the directory is "." and "..". The "."
 2064          * reference is inconsequential since we are quashing it. The soft
 2065          * dependency code will arrange to do these operations after
 2066          * the parent directory entry has been deleted on disk, so
 2067          * when running with that code we avoid doing them now.
 2068          */
 2069         if (!DOINGSOFTDEP(vp)) {
 2070                 dp->i_nlink--;
 2071                 DIP_SET(dp, i_nlink, dp->i_nlink);
 2072                 dp->i_flag |= IN_CHANGE;
 2073                 error = UFS_UPDATE(dvp, 0);
 2074                 ip->i_nlink--;
 2075                 DIP_SET(ip, i_nlink, ip->i_nlink);
 2076                 ip->i_flag |= IN_CHANGE;
 2077         }
 2078         cache_purge(vp);
 2079 #ifdef UFS_DIRHASH
 2080         /* Kill any active hash; i_effnlink == 0, so it will not come back. */
 2081         if (ip->i_dirhash != NULL)
 2082                 ufsdirhash_free(ip);
 2083 #endif
 2084 out:
 2085         return (error);
 2086 }
 2087 
 2088 /*
 2089  * symlink -- make a symbolic link
 2090  */
 2091 static int
 2092 ufs_symlink(ap)
 2093         struct vop_symlink_args /* {
 2094                 struct vnode *a_dvp;
 2095                 struct vnode **a_vpp;
 2096                 struct componentname *a_cnp;
 2097                 struct vattr *a_vap;
 2098                 char *a_target;
 2099         } */ *ap;
 2100 {
 2101         struct vnode *vp, **vpp = ap->a_vpp;
 2102         struct inode *ip;
 2103         int len, error;
 2104 
 2105         error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 2106             vpp, ap->a_cnp, "ufs_symlink");
 2107         if (error)
 2108                 return (error);
 2109         vp = *vpp;
 2110         len = strlen(ap->a_target);
 2111         if (len < vp->v_mount->mnt_maxsymlinklen) {
 2112                 ip = VTOI(vp);
 2113                 bcopy(ap->a_target, SHORTLINK(ip), len);
 2114                 ip->i_size = len;
 2115                 DIP_SET(ip, i_size, len);
 2116                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
 2117                 error = UFS_UPDATE(vp, 0);
 2118         } else
 2119                 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 2120                     UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 2121                     ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 2122         if (error)
 2123                 vput(vp);
 2124         return (error);
 2125 }
 2126 
 2127 /*
 2128  * Vnode op for reading directories.
 2129  */
 2130 int
 2131 ufs_readdir(ap)
 2132         struct vop_readdir_args /* {
 2133                 struct vnode *a_vp;
 2134                 struct uio *a_uio;
 2135                 struct ucred *a_cred;
 2136                 int *a_eofflag;
 2137                 int *a_ncookies;
 2138                 u_long **a_cookies;
 2139         } */ *ap;
 2140 {
 2141         struct vnode *vp = ap->a_vp;
 2142         struct uio *uio = ap->a_uio;
 2143         struct buf *bp;
 2144         struct inode *ip;
 2145         struct direct *dp, *edp;
 2146         u_long *cookies;
 2147         struct dirent dstdp;
 2148         off_t offset, startoffset;
 2149         size_t readcnt, skipcnt;
 2150         ssize_t startresid;
 2151         u_int ncookies;
 2152         int error;
 2153 
 2154         if (uio->uio_offset < 0)
 2155                 return (EINVAL);
 2156         ip = VTOI(vp);
 2157         if (ip->i_effnlink == 0)
 2158                 return (0);
 2159         if (ap->a_ncookies != NULL) {
 2160                 if (uio->uio_resid < 0)
 2161                         ncookies = 0;
 2162                 else
 2163                         ncookies = uio->uio_resid;
 2164                 if (uio->uio_offset >= ip->i_size)
 2165                         ncookies = 0;
 2166                 else if (ip->i_size - uio->uio_offset < ncookies)
 2167                         ncookies = ip->i_size - uio->uio_offset;
 2168                 ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
 2169                 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 2170                 *ap->a_ncookies = ncookies;
 2171                 *ap->a_cookies = cookies;
 2172         } else {
 2173                 ncookies = 0;
 2174                 cookies = NULL;
 2175         }
 2176         offset = startoffset = uio->uio_offset;
 2177         startresid = uio->uio_resid;
 2178         error = 0;
 2179         while (error == 0 && uio->uio_resid > 0 &&
 2180             uio->uio_offset < ip->i_size) {
 2181                 error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
 2182                 if (error)
 2183                         break;
 2184                 if (bp->b_offset + bp->b_bcount > ip->i_size)
 2185                         readcnt = ip->i_size - bp->b_offset;
 2186                 else
 2187                         readcnt = bp->b_bcount;
 2188                 skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
 2189                     ~(size_t)(DIRBLKSIZ - 1);
 2190                 offset = bp->b_offset + skipcnt;
 2191                 dp = (struct direct *)&bp->b_data[skipcnt];
 2192                 edp = (struct direct *)&bp->b_data[readcnt];
 2193                 while (error == 0 && uio->uio_resid > 0 && dp < edp) {
 2194                         if (dp->d_reclen <= offsetof(struct direct, d_name) ||
 2195                             (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
 2196                                 error = EIO;
 2197                                 break;
 2198                         }
 2199 #if BYTE_ORDER == LITTLE_ENDIAN
 2200                         /* Old filesystem format. */
 2201                         if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 2202                                 dstdp.d_namlen = dp->d_type;
 2203                                 dstdp.d_type = dp->d_namlen;
 2204                         } else
 2205 #endif
 2206                         {
 2207                                 dstdp.d_namlen = dp->d_namlen;
 2208                                 dstdp.d_type = dp->d_type;
 2209                         }
 2210                         if (offsetof(struct direct, d_name) + dstdp.d_namlen >
 2211                             dp->d_reclen) {
 2212                                 error = EIO;
 2213                                 break;
 2214                         }
 2215                         if (offset < startoffset || dp->d_ino == 0)
 2216                                 goto nextentry;
 2217                         dstdp.d_fileno = dp->d_ino;
 2218                         dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
 2219                         bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 2220                         dirent_terminate(&dstdp);
 2221                         if (dstdp.d_reclen > uio->uio_resid) {
 2222                                 if (uio->uio_resid == startresid)
 2223                                         error = EINVAL;
 2224                                 else
 2225                                         error = EJUSTRETURN;
 2226                                 break;
 2227                         }
 2228                         /* Advance dp. */
 2229                         error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
 2230                         if (error)
 2231                                 break;
 2232                         if (cookies != NULL) {
 2233                                 KASSERT(ncookies > 0,
 2234                                     ("ufs_readdir: cookies buffer too small"));
 2235                                 *cookies = offset + dp->d_reclen;
 2236                                 cookies++;
 2237                                 ncookies--;
 2238                         }
 2239 nextentry:
 2240                         offset += dp->d_reclen;
 2241                         dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
 2242                 }
 2243                 bqrelse(bp);
 2244                 uio->uio_offset = offset;
 2245         }
 2246         /* We need to correct uio_offset. */
 2247         uio->uio_offset = offset;
 2248         if (error == EJUSTRETURN)
 2249                 error = 0;
 2250         if (ap->a_ncookies != NULL) {
 2251                 if (error == 0) {
 2252                         ap->a_ncookies -= ncookies;
 2253                 } else {
 2254                         free(*ap->a_cookies, M_TEMP);
 2255                         *ap->a_ncookies = 0;
 2256                         *ap->a_cookies = NULL;
 2257                 }
 2258         }
 2259         if (error == 0 && ap->a_eofflag)
 2260                 *ap->a_eofflag = ip->i_size <= uio->uio_offset;
 2261         return (error);
 2262 }
 2263 
 2264 /*
 2265  * Return target name of a symbolic link
 2266  */
 2267 static int
 2268 ufs_readlink(ap)
 2269         struct vop_readlink_args /* {
 2270                 struct vnode *a_vp;
 2271                 struct uio *a_uio;
 2272                 struct ucred *a_cred;
 2273         } */ *ap;
 2274 {
 2275         struct vnode *vp = ap->a_vp;
 2276         struct inode *ip = VTOI(vp);
 2277         doff_t isize;
 2278 
 2279         isize = ip->i_size;
 2280         if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
 2281             DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
 2282                 return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
 2283         }
 2284         return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 2285 }
 2286 
 2287 /*
 2288  * Calculate the logical to physical mapping if not done already,
 2289  * then call the device strategy routine.
 2290  *
 2291  * In order to be able to swap to a file, the ufs_bmaparray() operation may not
 2292  * deadlock on memory.  See ufs_bmap() for details.
 2293  */
 2294 static int
 2295 ufs_strategy(ap)
 2296         struct vop_strategy_args /* {
 2297                 struct vnode *a_vp;
 2298                 struct buf *a_bp;
 2299         } */ *ap;
 2300 {
 2301         struct buf *bp = ap->a_bp;
 2302         struct vnode *vp = ap->a_vp;
 2303         ufs2_daddr_t blkno;
 2304         int error;
 2305 
 2306         if (bp->b_blkno == bp->b_lblkno) {
 2307                 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
 2308                 bp->b_blkno = blkno;
 2309                 if (error) {
 2310                         bp->b_error = error;
 2311                         bp->b_ioflags |= BIO_ERROR;
 2312                         bufdone(bp);
 2313                         return (0);
 2314                 }
 2315                 if ((long)bp->b_blkno == -1)
 2316                         vfs_bio_clrbuf(bp);
 2317         }
 2318         if ((long)bp->b_blkno == -1) {
 2319                 bufdone(bp);
 2320                 return (0);
 2321         }
 2322         bp->b_iooffset = dbtob(bp->b_blkno);
 2323         BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp);
 2324         return (0);
 2325 }
 2326 
 2327 /*
 2328  * Print out the contents of an inode.
 2329  */
 2330 static int
 2331 ufs_print(ap)
 2332         struct vop_print_args /* {
 2333                 struct vnode *a_vp;
 2334         } */ *ap;
 2335 {
 2336         struct vnode *vp = ap->a_vp;
 2337         struct inode *ip = VTOI(vp);
 2338 
 2339         printf("\tino %lu, on dev %s", (u_long)ip->i_number,
 2340             devtoname(ITODEV(ip)));
 2341         if (vp->v_type == VFIFO)
 2342                 fifo_printinfo(vp);
 2343         printf("\n");
 2344         return (0);
 2345 }
 2346 
 2347 /*
 2348  * Close wrapper for fifos.
 2349  *
 2350  * Update the times on the inode then do device close.
 2351  */
 2352 static int
 2353 ufsfifo_close(ap)
 2354         struct vop_close_args /* {
 2355                 struct vnode *a_vp;
 2356                 int  a_fflag;
 2357                 struct ucred *a_cred;
 2358                 struct thread *a_td;
 2359         } */ *ap;
 2360 {
 2361         struct vnode *vp = ap->a_vp;
 2362         int usecount;
 2363 
 2364         VI_LOCK(vp);
 2365         usecount = vp->v_usecount;
 2366         if (usecount > 1)
 2367                 ufs_itimes_locked(vp);
 2368         VI_UNLOCK(vp);
 2369         return (fifo_specops.vop_close(ap));
 2370 }
 2371 
 2372 /*
 2373  * Kqfilter wrapper for fifos.
 2374  *
 2375  * Fall through to ufs kqfilter routines if needed 
 2376  */
 2377 static int
 2378 ufsfifo_kqfilter(ap)
 2379         struct vop_kqfilter_args *ap;
 2380 {
 2381         int error;
 2382 
 2383         error = fifo_specops.vop_kqfilter(ap);
 2384         if (error)
 2385                 error = vfs_kqfilter(ap);
 2386         return (error);
 2387 }
 2388 
 2389 /*
 2390  * Return POSIX pathconf information applicable to ufs filesystems.
 2391  */
 2392 static int
 2393 ufs_pathconf(ap)
 2394         struct vop_pathconf_args /* {
 2395                 struct vnode *a_vp;
 2396                 int a_name;
 2397                 int *a_retval;
 2398         } */ *ap;
 2399 {
 2400         int error;
 2401 
 2402         error = 0;
 2403         switch (ap->a_name) {
 2404         case _PC_LINK_MAX:
 2405                 *ap->a_retval = UFS_LINK_MAX;
 2406                 break;
 2407         case _PC_NAME_MAX:
 2408                 *ap->a_retval = UFS_MAXNAMLEN;
 2409                 break;
 2410         case _PC_PIPE_BUF:
 2411                 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
 2412                         *ap->a_retval = PIPE_BUF;
 2413                 else
 2414                         error = EINVAL;
 2415                 break;
 2416         case _PC_CHOWN_RESTRICTED:
 2417                 *ap->a_retval = 1;
 2418                 break;
 2419         case _PC_NO_TRUNC:
 2420                 *ap->a_retval = 1;
 2421                 break;
 2422         case _PC_ACL_EXTENDED:
 2423 #ifdef UFS_ACL
 2424                 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 2425                         *ap->a_retval = 1;
 2426                 else
 2427                         *ap->a_retval = 0;
 2428 #else
 2429                 *ap->a_retval = 0;
 2430 #endif
 2431                 break;
 2432 
 2433         case _PC_ACL_NFS4:
 2434 #ifdef UFS_ACL
 2435                 if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 2436                         *ap->a_retval = 1;
 2437                 else
 2438                         *ap->a_retval = 0;
 2439 #else
 2440                 *ap->a_retval = 0;
 2441 #endif
 2442                 break;
 2443 
 2444         case _PC_ACL_PATH_MAX:
 2445 #ifdef UFS_ACL
 2446                 if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
 2447                         *ap->a_retval = ACL_MAX_ENTRIES;
 2448                 else
 2449                         *ap->a_retval = 3;
 2450 #else
 2451                 *ap->a_retval = 3;
 2452 #endif
 2453                 break;
 2454         case _PC_MAC_PRESENT:
 2455 #ifdef MAC
 2456                 if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
 2457                         *ap->a_retval = 1;
 2458                 else
 2459                         *ap->a_retval = 0;
 2460 #else
 2461                 *ap->a_retval = 0;
 2462 #endif
 2463                 break;
 2464         case _PC_MIN_HOLE_SIZE:
 2465                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 2466                 break;
 2467         case _PC_PRIO_IO:
 2468                 *ap->a_retval = 0;
 2469                 break;
 2470         case _PC_SYNC_IO:
 2471                 *ap->a_retval = 0;
 2472                 break;
 2473         case _PC_ALLOC_SIZE_MIN:
 2474                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 2475                 break;
 2476         case _PC_FILESIZEBITS:
 2477                 *ap->a_retval = 64;
 2478                 break;
 2479         case _PC_REC_INCR_XFER_SIZE:
 2480                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 2481                 break;
 2482         case _PC_REC_MAX_XFER_SIZE:
 2483                 *ap->a_retval = -1; /* means ``unlimited'' */
 2484                 break;
 2485         case _PC_REC_MIN_XFER_SIZE:
 2486                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 2487                 break;
 2488         case _PC_REC_XFER_ALIGN:
 2489                 *ap->a_retval = PAGE_SIZE;
 2490                 break;
 2491         case _PC_SYMLINK_MAX:
 2492                 *ap->a_retval = MAXPATHLEN;
 2493                 break;
 2494 
 2495         default:
 2496                 error = vop_stdpathconf(ap);
 2497                 break;
 2498         }
 2499         return (error);
 2500 }
 2501 
 2502 /*
 2503  * Initialize the vnode associated with a new inode, handle aliased
 2504  * vnodes.
 2505  */
 2506 int
 2507 ufs_vinit(mntp, fifoops, vpp)
 2508         struct mount *mntp;
 2509         struct vop_vector *fifoops;
 2510         struct vnode **vpp;
 2511 {
 2512         struct inode *ip;
 2513         struct vnode *vp;
 2514 
 2515         vp = *vpp;
 2516         ip = VTOI(vp);
 2517         vp->v_type = IFTOVT(ip->i_mode);
 2518         if (vp->v_type == VFIFO)
 2519                 vp->v_op = fifoops;
 2520         ASSERT_VOP_LOCKED(vp, "ufs_vinit");
 2521         if (ip->i_number == UFS_ROOTINO)
 2522                 vp->v_vflag |= VV_ROOT;
 2523         *vpp = vp;
 2524         return (0);
 2525 }
 2526 
 2527 /*
 2528  * Allocate a new inode.
 2529  * Vnode dvp must be locked.
 2530  */
 2531 static int
 2532 ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 2533         int mode;
 2534         struct vnode *dvp;
 2535         struct vnode **vpp;
 2536         struct componentname *cnp;
 2537         const char *callfunc;
 2538 {
 2539         struct inode *ip, *pdir;
 2540         struct direct newdir;
 2541         struct vnode *tvp;
 2542         int error;
 2543 
 2544         pdir = VTOI(dvp);
 2545 #ifdef INVARIANTS
 2546         if ((cnp->cn_flags & HASBUF) == 0)
 2547                 panic("%s: no name", callfunc);
 2548 #endif
 2549         *vpp = NULL;
 2550         if ((mode & IFMT) == 0)
 2551                 mode |= IFREG;
 2552 
 2553         if (pdir->i_effnlink < 2) {
 2554                 print_bad_link_count(callfunc, dvp);
 2555                 return (EINVAL);
 2556         }
 2557         error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 2558         if (error)
 2559                 return (error);
 2560         ip = VTOI(tvp);
 2561         ip->i_gid = pdir->i_gid;
 2562         DIP_SET(ip, i_gid, pdir->i_gid);
 2563 #ifdef SUIDDIR
 2564         {
 2565 #ifdef QUOTA
 2566                 struct ucred ucred, *ucp;
 2567                 gid_t ucred_group;
 2568                 ucp = cnp->cn_cred;
 2569 #endif
 2570                 /*
 2571                  * If we are not the owner of the directory,
 2572                  * and we are hacking owners here, (only do this where told to)
 2573                  * and we are not giving it TO root, (would subvert quotas)
 2574                  * then go ahead and give it to the other user.
 2575                  * Note that this drops off the execute bits for security.
 2576                  */
 2577                 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 2578                     (pdir->i_mode & ISUID) &&
 2579                     (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 2580                         ip->i_uid = pdir->i_uid;
 2581                         DIP_SET(ip, i_uid, ip->i_uid);
 2582                         mode &= ~07111;
 2583 #ifdef QUOTA
 2584                         /*
 2585                          * Make sure the correct user gets charged
 2586                          * for the space.
 2587                          * Quickly knock up a dummy credential for the victim.
 2588                          * XXX This seems to never be accessed out of our
 2589                          * context so a stack variable is ok.
 2590                          */
 2591                         refcount_init(&ucred.cr_ref, 1);
 2592                         ucred.cr_uid = ip->i_uid;
 2593                         ucred.cr_ngroups = 1;
 2594                         ucred.cr_groups = &ucred_group;
 2595                         ucred.cr_groups[0] = pdir->i_gid;
 2596                         ucp = &ucred;
 2597 #endif
 2598                 } else {
 2599                         ip->i_uid = cnp->cn_cred->cr_uid;
 2600                         DIP_SET(ip, i_uid, ip->i_uid);
 2601                 }
 2602 
 2603 #ifdef QUOTA
 2604                 if ((error = getinoquota(ip)) ||
 2605                     (error = chkiq(ip, 1, ucp, 0))) {
 2606                         if (DOINGSOFTDEP(tvp))
 2607                                 softdep_revert_link(pdir, ip);
 2608                         UFS_VFREE(tvp, ip->i_number, mode);
 2609                         vput(tvp);
 2610                         return (error);
 2611                 }
 2612 #endif
 2613         }
 2614 #else   /* !SUIDDIR */
 2615         ip->i_uid = cnp->cn_cred->cr_uid;
 2616         DIP_SET(ip, i_uid, ip->i_uid);
 2617 #ifdef QUOTA
 2618         if ((error = getinoquota(ip)) ||
 2619             (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 2620                 if (DOINGSOFTDEP(tvp))
 2621                         softdep_revert_link(pdir, ip);
 2622                 UFS_VFREE(tvp, ip->i_number, mode);
 2623                 vput(tvp);
 2624                 return (error);
 2625         }
 2626 #endif
 2627 #endif  /* !SUIDDIR */
 2628         ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 2629         ip->i_mode = mode;
 2630         DIP_SET(ip, i_mode, mode);
 2631         tvp->v_type = IFTOVT(mode);     /* Rest init'd in getnewvnode(). */
 2632         ip->i_effnlink = 1;
 2633         ip->i_nlink = 1;
 2634         DIP_SET(ip, i_nlink, 1);
 2635         if (DOINGSOFTDEP(tvp))
 2636                 softdep_setup_create(VTOI(dvp), ip);
 2637         if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 2638             priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
 2639                 ip->i_mode &= ~ISGID;
 2640                 DIP_SET(ip, i_mode, ip->i_mode);
 2641         }
 2642 
 2643         if (cnp->cn_flags & ISWHITEOUT) {
 2644                 ip->i_flags |= UF_OPAQUE;
 2645                 DIP_SET(ip, i_flags, ip->i_flags);
 2646         }
 2647 
 2648         /*
 2649          * Make sure inode goes to disk before directory entry.
 2650          */
 2651         error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp));
 2652         if (error)
 2653                 goto bad;
 2654 #ifdef MAC
 2655         if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 2656                 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 2657                     dvp, tvp, cnp);
 2658                 if (error)
 2659                         goto bad;
 2660         }
 2661 #endif
 2662 #ifdef UFS_ACL
 2663         if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 2664                 error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
 2665                     cnp->cn_cred, cnp->cn_thread);
 2666                 if (error)
 2667                         goto bad;
 2668         } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 2669                 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
 2670                     cnp->cn_cred, cnp->cn_thread);
 2671                 if (error)
 2672                         goto bad;
 2673         }
 2674 #endif /* !UFS_ACL */
 2675         ufs_makedirentry(ip, cnp, &newdir);
 2676         error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
 2677         if (error)
 2678                 goto bad;
 2679         *vpp = tvp;
 2680         return (0);
 2681 
 2682 bad:
 2683         /*
 2684          * Write error occurred trying to update the inode
 2685          * or the directory so must deallocate the inode.
 2686          */
 2687         ip->i_effnlink = 0;
 2688         ip->i_nlink = 0;
 2689         DIP_SET(ip, i_nlink, 0);
 2690         ip->i_flag |= IN_CHANGE;
 2691         if (DOINGSOFTDEP(tvp))
 2692                 softdep_revert_create(VTOI(dvp), ip);
 2693         vput(tvp);
 2694         return (error);
 2695 }
 2696 
 2697 static int
 2698 ufs_ioctl(struct vop_ioctl_args *ap)
 2699 {
 2700 
 2701         switch (ap->a_command) {
 2702         case FIOSEEKDATA:
 2703         case FIOSEEKHOLE:
 2704                 return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
 2705                     (off_t *)ap->a_data, ap->a_cred));
 2706         default:
 2707                 return (ENOTTY);
 2708         }
 2709 }
 2710 
 2711 /* Global vfs data structures for ufs. */
 2712 struct vop_vector ufs_vnodeops = {
 2713         .vop_default =          &default_vnodeops,
 2714         .vop_fsync =            VOP_PANIC,
 2715         .vop_read =             VOP_PANIC,
 2716         .vop_reallocblks =      VOP_PANIC,
 2717         .vop_write =            VOP_PANIC,
 2718         .vop_accessx =          ufs_accessx,
 2719         .vop_bmap =             ufs_bmap,
 2720         .vop_cachedlookup =     ufs_lookup,
 2721         .vop_close =            ufs_close,
 2722         .vop_create =           ufs_create,
 2723         .vop_getattr =          ufs_getattr,
 2724         .vop_inactive =         ufs_inactive,
 2725         .vop_ioctl =            ufs_ioctl,
 2726         .vop_link =             ufs_link,
 2727         .vop_lookup =           vfs_cache_lookup,
 2728         .vop_markatime =        ufs_markatime,
 2729         .vop_mkdir =            ufs_mkdir,
 2730         .vop_mknod =            ufs_mknod,
 2731         .vop_open =             ufs_open,
 2732         .vop_pathconf =         ufs_pathconf,
 2733         .vop_poll =             vop_stdpoll,
 2734         .vop_print =            ufs_print,
 2735         .vop_readdir =          ufs_readdir,
 2736         .vop_readlink =         ufs_readlink,
 2737         .vop_reclaim =          ufs_reclaim,
 2738         .vop_remove =           ufs_remove,
 2739         .vop_rename =           ufs_rename,
 2740         .vop_rmdir =            ufs_rmdir,
 2741         .vop_setattr =          ufs_setattr,
 2742 #ifdef MAC
 2743         .vop_setlabel =         vop_stdsetlabel_ea,
 2744 #endif
 2745         .vop_strategy =         ufs_strategy,
 2746         .vop_symlink =          ufs_symlink,
 2747         .vop_whiteout =         ufs_whiteout,
 2748 #ifdef UFS_EXTATTR
 2749         .vop_getextattr =       ufs_getextattr,
 2750         .vop_deleteextattr =    ufs_deleteextattr,
 2751         .vop_setextattr =       ufs_setextattr,
 2752 #endif
 2753 #ifdef UFS_ACL
 2754         .vop_getacl =           ufs_getacl,
 2755         .vop_setacl =           ufs_setacl,
 2756         .vop_aclcheck =         ufs_aclcheck,
 2757 #endif
 2758 };
 2759 
 2760 struct vop_vector ufs_fifoops = {
 2761         .vop_default =          &fifo_specops,
 2762         .vop_fsync =            VOP_PANIC,
 2763         .vop_accessx =          ufs_accessx,
 2764         .vop_close =            ufsfifo_close,
 2765         .vop_getattr =          ufs_getattr,
 2766         .vop_inactive =         ufs_inactive,
 2767         .vop_kqfilter =         ufsfifo_kqfilter,
 2768         .vop_markatime =        ufs_markatime,
 2769         .vop_pathconf =         ufs_pathconf,
 2770         .vop_print =            ufs_print,
 2771         .vop_read =             VOP_PANIC,
 2772         .vop_reclaim =          ufs_reclaim,
 2773         .vop_setattr =          ufs_setattr,
 2774 #ifdef MAC
 2775         .vop_setlabel =         vop_stdsetlabel_ea,
 2776 #endif
 2777         .vop_write =            VOP_PANIC,
 2778 #ifdef UFS_EXTATTR
 2779         .vop_getextattr =       ufs_getextattr,
 2780         .vop_deleteextattr =    ufs_deleteextattr,
 2781         .vop_setextattr =       ufs_setextattr,
 2782 #endif
 2783 #ifdef UFS_ACL
 2784         .vop_getacl =           ufs_getacl,
 2785         .vop_setacl =           ufs_setacl,
 2786         .vop_aclcheck =         ufs_aclcheck,
 2787 #endif
 2788 };

Cache object: 90ab551c544dff09c2202b3750c66356


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.