The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_vnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/9.0/sys/ufs/ufs/ufs_vnops.c 227402 2011-11-09 18:51:41Z pho $");
   39 
   40 #include "opt_quota.h"
   41 #include "opt_suiddir.h"
   42 #include "opt_ufs.h"
   43 #include "opt_ffs.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/malloc.h>
   48 #include <sys/namei.h>
   49 #include <sys/kernel.h>
   50 #include <sys/fcntl.h>
   51 #include <sys/stat.h>
   52 #include <sys/bio.h>
   53 #include <sys/buf.h>
   54 #include <sys/mount.h>
   55 #include <sys/priv.h>
   56 #include <sys/refcount.h>
   57 #include <sys/unistd.h>
   58 #include <sys/vnode.h>
   59 #include <sys/dirent.h>
   60 #include <sys/lockf.h>
   61 #include <sys/conf.h>
   62 #include <sys/acl.h>
   63 
   64 #include <security/mac/mac_framework.h>
   65 
   66 #include <sys/file.h>           /* XXX */
   67 
   68 #include <vm/vm.h>
   69 #include <vm/vm_extern.h>
   70 
   71 #include <fs/fifofs/fifo.h>
   72 
   73 #include <ufs/ufs/acl.h>
   74 #include <ufs/ufs/extattr.h>
   75 #include <ufs/ufs/quota.h>
   76 #include <ufs/ufs/inode.h>
   77 #include <ufs/ufs/dir.h>
   78 #include <ufs/ufs/ufsmount.h>
   79 #include <ufs/ufs/ufs_extern.h>
   80 #ifdef UFS_DIRHASH
   81 #include <ufs/ufs/dirhash.h>
   82 #endif
   83 #ifdef UFS_GJOURNAL
   84 #include <ufs/ufs/gjournal.h>
   85 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
   86 #endif
   87 
   88 #ifdef QUOTA
   89 FEATURE(ufs_quota, "UFS disk quotas support");
   90 FEATURE(ufs_quota64, "64bit UFS disk quotas support");
   91 #endif
   92 
   93 #ifdef SUIDDIR
   94 FEATURE(suiddir,
   95     "Give all new files in directory the same ownership as the directory");
   96 #endif
   97 
   98 
   99 #include <ufs/ffs/ffs_extern.h>
  100 
  101 static vop_accessx_t    ufs_accessx;
  102 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
  103 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
  104 static vop_close_t      ufs_close;
  105 static vop_create_t     ufs_create;
  106 static vop_getattr_t    ufs_getattr;
  107 static vop_link_t       ufs_link;
  108 static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
  109 static vop_markatime_t  ufs_markatime;
  110 static vop_mkdir_t      ufs_mkdir;
  111 static vop_mknod_t      ufs_mknod;
  112 static vop_open_t       ufs_open;
  113 static vop_pathconf_t   ufs_pathconf;
  114 static vop_print_t      ufs_print;
  115 static vop_readlink_t   ufs_readlink;
  116 static vop_remove_t     ufs_remove;
  117 static vop_rename_t     ufs_rename;
  118 static vop_rmdir_t      ufs_rmdir;
  119 static vop_setattr_t    ufs_setattr;
  120 static vop_strategy_t   ufs_strategy;
  121 static vop_symlink_t    ufs_symlink;
  122 static vop_whiteout_t   ufs_whiteout;
  123 static vop_close_t      ufsfifo_close;
  124 static vop_kqfilter_t   ufsfifo_kqfilter;
  125 static vop_pathconf_t   ufsfifo_pathconf;
  126 
  127 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
  128 
  129 /*
  130  * A virgin directory (no blushing please).
  131  */
  132 static struct dirtemplate mastertemplate = {
  133         0, 12, DT_DIR, 1, ".",
  134         0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
  135 };
  136 static struct odirtemplate omastertemplate = {
  137         0, 12, 1, ".",
  138         0, DIRBLKSIZ - 12, 2, ".."
  139 };
  140 
  141 static void
  142 ufs_itimes_locked(struct vnode *vp)
  143 {
  144         struct inode *ip;
  145         struct timespec ts;
  146 
  147         ASSERT_VI_LOCKED(vp, __func__);
  148 
  149         ip = VTOI(vp);
  150         if (UFS_RDONLY(ip))
  151                 goto out;
  152         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
  153                 return;
  154 
  155         if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
  156                 ip->i_flag |= IN_LAZYMOD;
  157         else if (((vp->v_mount->mnt_kern_flag &
  158                     (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
  159                     (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
  160                 ip->i_flag |= IN_MODIFIED;
  161         else if (ip->i_flag & IN_ACCESS)
  162                 ip->i_flag |= IN_LAZYACCESS;
  163         vfs_timestamp(&ts);
  164         if (ip->i_flag & IN_ACCESS) {
  165                 DIP_SET(ip, i_atime, ts.tv_sec);
  166                 DIP_SET(ip, i_atimensec, ts.tv_nsec);
  167         }
  168         if (ip->i_flag & IN_UPDATE) {
  169                 DIP_SET(ip, i_mtime, ts.tv_sec);
  170                 DIP_SET(ip, i_mtimensec, ts.tv_nsec);
  171         }
  172         if (ip->i_flag & IN_CHANGE) {
  173                 DIP_SET(ip, i_ctime, ts.tv_sec);
  174                 DIP_SET(ip, i_ctimensec, ts.tv_nsec);
  175                 DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
  176         }
  177 
  178  out:
  179         ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
  180 }
  181 
  182 void
  183 ufs_itimes(struct vnode *vp)
  184 {
  185 
  186         VI_LOCK(vp);
  187         ufs_itimes_locked(vp);
  188         VI_UNLOCK(vp);
  189 }
  190 
  191 /*
  192  * Create a regular file
  193  */
  194 static int
  195 ufs_create(ap)
  196         struct vop_create_args /* {
  197                 struct vnode *a_dvp;
  198                 struct vnode **a_vpp;
  199                 struct componentname *a_cnp;
  200                 struct vattr *a_vap;
  201         } */ *ap;
  202 {
  203         int error;
  204 
  205         error =
  206             ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
  207             ap->a_dvp, ap->a_vpp, ap->a_cnp);
  208         if (error)
  209                 return (error);
  210         return (0);
  211 }
  212 
  213 /*
  214  * Mknod vnode call
  215  */
  216 /* ARGSUSED */
  217 static int
  218 ufs_mknod(ap)
  219         struct vop_mknod_args /* {
  220                 struct vnode *a_dvp;
  221                 struct vnode **a_vpp;
  222                 struct componentname *a_cnp;
  223                 struct vattr *a_vap;
  224         } */ *ap;
  225 {
  226         struct vattr *vap = ap->a_vap;
  227         struct vnode **vpp = ap->a_vpp;
  228         struct inode *ip;
  229         ino_t ino;
  230         int error;
  231 
  232         error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
  233             ap->a_dvp, vpp, ap->a_cnp);
  234         if (error)
  235                 return (error);
  236         ip = VTOI(*vpp);
  237         ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
  238         if (vap->va_rdev != VNOVAL) {
  239                 /*
  240                  * Want to be able to use this to make badblock
  241                  * inodes, so don't truncate the dev number.
  242                  */
  243                 DIP_SET(ip, i_rdev, vap->va_rdev);
  244         }
  245         /*
  246          * Remove inode, then reload it through VFS_VGET so it is
  247          * checked to see if it is an alias of an existing entry in
  248          * the inode cache.  XXX I don't believe this is necessary now.
  249          */
  250         (*vpp)->v_type = VNON;
  251         ino = ip->i_number;     /* Save this before vgone() invalidates ip. */
  252         vgone(*vpp);
  253         vput(*vpp);
  254         error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
  255         if (error) {
  256                 *vpp = NULL;
  257                 return (error);
  258         }
  259         return (0);
  260 }
  261 
  262 /*
  263  * Open called.
  264  */
  265 /* ARGSUSED */
  266 static int
  267 ufs_open(struct vop_open_args *ap)
  268 {
  269         struct vnode *vp = ap->a_vp;
  270         struct inode *ip;
  271 
  272         if (vp->v_type == VCHR || vp->v_type == VBLK)
  273                 return (EOPNOTSUPP);
  274 
  275         ip = VTOI(vp);
  276         /*
  277          * Files marked append-only must be opened for appending.
  278          */
  279         if ((ip->i_flags & APPEND) &&
  280             (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
  281                 return (EPERM);
  282         vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
  283         return (0);
  284 }
  285 
  286 /*
  287  * Close called.
  288  *
  289  * Update the times on the inode.
  290  */
  291 /* ARGSUSED */
  292 static int
  293 ufs_close(ap)
  294         struct vop_close_args /* {
  295                 struct vnode *a_vp;
  296                 int  a_fflag;
  297                 struct ucred *a_cred;
  298                 struct thread *a_td;
  299         } */ *ap;
  300 {
  301         struct vnode *vp = ap->a_vp;
  302         int usecount;
  303 
  304         VI_LOCK(vp);
  305         usecount = vp->v_usecount;
  306         if (usecount > 1)
  307                 ufs_itimes_locked(vp);
  308         VI_UNLOCK(vp);
  309         return (0);
  310 }
  311 
  312 static int
  313 ufs_accessx(ap)
  314         struct vop_accessx_args /* {
  315                 struct vnode *a_vp;
  316                 accmode_t a_accmode;
  317                 struct ucred *a_cred;
  318                 struct thread *a_td;
  319         } */ *ap;
  320 {
  321         struct vnode *vp = ap->a_vp;
  322         struct inode *ip = VTOI(vp);
  323         accmode_t accmode = ap->a_accmode;
  324         int error;
  325 #ifdef QUOTA
  326         int relocked;
  327 #endif
  328 #ifdef UFS_ACL
  329         struct acl *acl;
  330         acl_type_t type;
  331 #endif
  332 
  333         /*
  334          * Disallow write attempts on read-only filesystems;
  335          * unless the file is a socket, fifo, or a block or
  336          * character device resident on the filesystem.
  337          */
  338         if (accmode & VMODIFY_PERMS) {
  339                 switch (vp->v_type) {
  340                 case VDIR:
  341                 case VLNK:
  342                 case VREG:
  343                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  344                                 return (EROFS);
  345 #ifdef QUOTA
  346                         /*
  347                          * Inode is accounted in the quotas only if struct
  348                          * dquot is attached to it. VOP_ACCESS() is called
  349                          * from vn_open_cred() and provides a convenient
  350                          * point to call getinoquota().
  351                          */
  352                         if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
  353 
  354                                 /*
  355                                  * Upgrade vnode lock, since getinoquota()
  356                                  * requires exclusive lock to modify inode.
  357                                  */
  358                                 relocked = 1;
  359                                 vhold(vp);
  360                                 vn_lock(vp, LK_UPGRADE | LK_RETRY);
  361                                 VI_LOCK(vp);
  362                                 if (vp->v_iflag & VI_DOOMED) {
  363                                         vdropl(vp);
  364                                         error = ENOENT;
  365                                         goto relock;
  366                                 }
  367                                 vdropl(vp);
  368                         } else
  369                                 relocked = 0;
  370                         error = getinoquota(ip);
  371 relock:
  372                         if (relocked)
  373                                 vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
  374                         if (error != 0)
  375                                 return (error);
  376 #endif
  377                         break;
  378                 default:
  379                         break;
  380                 }
  381         }
  382 
  383         /*
  384          * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
  385          * is here, because without it, * it would be impossible for the owner
  386          * to remove the IMMUTABLE flag.
  387          */
  388         if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
  389             (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
  390                 return (EPERM);
  391 
  392 #ifdef UFS_ACL
  393         if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
  394                 if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
  395                         type = ACL_TYPE_NFS4;
  396                 else
  397                         type = ACL_TYPE_ACCESS;
  398 
  399                 acl = acl_alloc(M_WAITOK);
  400                 if (type == ACL_TYPE_NFS4)
  401                         error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
  402                 else
  403                         error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
  404                 switch (error) {
  405                 case 0:
  406                         if (type == ACL_TYPE_NFS4) {
  407                                 error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
  408                                     ip->i_gid, acl, accmode, ap->a_cred, NULL);
  409                         } else {
  410                                 error = vfs_unixify_accmode(&accmode);
  411                                 if (error == 0)
  412                                         error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
  413                                             ip->i_gid, acl, accmode, ap->a_cred, NULL);
  414                         }
  415                         break;
  416                 default:
  417                         if (error != EOPNOTSUPP)
  418                                 printf(
  419 "ufs_accessx(): Error retrieving ACL on object (%d).\n",
  420                                     error);
  421                         /*
  422                          * XXX: Fall back until debugged.  Should
  423                          * eventually possibly log an error, and return
  424                          * EPERM for safety.
  425                          */
  426                         error = vfs_unixify_accmode(&accmode);
  427                         if (error == 0)
  428                                 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
  429                                     ip->i_gid, accmode, ap->a_cred, NULL);
  430                 }
  431                 acl_free(acl);
  432 
  433                 return (error);
  434         }
  435 #endif /* !UFS_ACL */
  436         error = vfs_unixify_accmode(&accmode);
  437         if (error == 0)
  438                 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
  439                     accmode, ap->a_cred, NULL);
  440         return (error);
  441 }
  442 
  443 /* ARGSUSED */
  444 static int
  445 ufs_getattr(ap)
  446         struct vop_getattr_args /* {
  447                 struct vnode *a_vp;
  448                 struct vattr *a_vap;
  449                 struct ucred *a_cred;
  450         } */ *ap;
  451 {
  452         struct vnode *vp = ap->a_vp;
  453         struct inode *ip = VTOI(vp);
  454         struct vattr *vap = ap->a_vap;
  455 
  456         VI_LOCK(vp);
  457         ufs_itimes_locked(vp);
  458         if (ip->i_ump->um_fstype == UFS1) {
  459                 vap->va_atime.tv_sec = ip->i_din1->di_atime;
  460                 vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
  461         } else {
  462                 vap->va_atime.tv_sec = ip->i_din2->di_atime;
  463                 vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
  464         }
  465         VI_UNLOCK(vp);
  466         /*
  467          * Copy from inode table
  468          */
  469         vap->va_fsid = dev2udev(ip->i_dev);
  470         vap->va_fileid = ip->i_number;
  471         vap->va_mode = ip->i_mode & ~IFMT;
  472         vap->va_nlink = ip->i_effnlink;
  473         vap->va_uid = ip->i_uid;
  474         vap->va_gid = ip->i_gid;
  475         if (ip->i_ump->um_fstype == UFS1) {
  476                 vap->va_rdev = ip->i_din1->di_rdev;
  477                 vap->va_size = ip->i_din1->di_size;
  478                 vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
  479                 vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
  480                 vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
  481                 vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
  482                 vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
  483                 vap->va_filerev = ip->i_din1->di_modrev;
  484         } else {
  485                 vap->va_rdev = ip->i_din2->di_rdev;
  486                 vap->va_size = ip->i_din2->di_size;
  487                 vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
  488                 vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
  489                 vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
  490                 vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
  491                 vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
  492                 vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
  493                 vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
  494                 vap->va_filerev = ip->i_din2->di_modrev;
  495         }
  496         vap->va_flags = ip->i_flags;
  497         vap->va_gen = ip->i_gen;
  498         vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
  499         vap->va_type = IFTOVT(ip->i_mode);
  500         return (0);
  501 }
  502 
  503 /*
  504  * Set attribute vnode op. called from several syscalls
  505  */
  506 static int
  507 ufs_setattr(ap)
  508         struct vop_setattr_args /* {
  509                 struct vnode *a_vp;
  510                 struct vattr *a_vap;
  511                 struct ucred *a_cred;
  512         } */ *ap;
  513 {
  514         struct vattr *vap = ap->a_vap;
  515         struct vnode *vp = ap->a_vp;
  516         struct inode *ip = VTOI(vp);
  517         struct ucred *cred = ap->a_cred;
  518         struct thread *td = curthread;
  519         int error;
  520 
  521         /*
  522          * Check for unsettable attributes.
  523          */
  524         if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
  525             (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
  526             (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
  527             ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
  528                 return (EINVAL);
  529         }
  530         if (vap->va_flags != VNOVAL) {
  531                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  532                         return (EROFS);
  533                 /*
  534                  * Callers may only modify the file flags on objects they
  535                  * have VADMIN rights for.
  536                  */
  537                 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
  538                         return (error);
  539                 /*
  540                  * Unprivileged processes are not permitted to unset system
  541                  * flags, or modify flags if any system flags are set.
  542                  * Privileged non-jail processes may not modify system flags
  543                  * if securelevel > 0 and any existing system flags are set.
  544                  * Privileged jail processes behave like privileged non-jail
  545                  * processes if the security.jail.chflags_allowed sysctl is
  546                  * is non-zero; otherwise, they behave like unprivileged
  547                  * processes.
  548                  */
  549                 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
  550                         if (ip->i_flags
  551                             & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
  552                                 error = securelevel_gt(cred, 0);
  553                                 if (error)
  554                                         return (error);
  555                         }
  556                         /* Snapshot flag cannot be set or cleared */
  557                         if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
  558                              (ip->i_flags & SF_SNAPSHOT) == 0) ||
  559                             ((vap->va_flags & SF_SNAPSHOT) == 0 &&
  560                              (ip->i_flags & SF_SNAPSHOT) != 0))
  561                                 return (EPERM);
  562                         ip->i_flags = vap->va_flags;
  563                         DIP_SET(ip, i_flags, vap->va_flags);
  564                 } else {
  565                         if (ip->i_flags
  566                             & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
  567                             (vap->va_flags & UF_SETTABLE) != vap->va_flags)
  568                                 return (EPERM);
  569                         ip->i_flags &= SF_SETTABLE;
  570                         ip->i_flags |= (vap->va_flags & UF_SETTABLE);
  571                         DIP_SET(ip, i_flags, ip->i_flags);
  572                 }
  573                 ip->i_flag |= IN_CHANGE;
  574                 if (vap->va_flags & (IMMUTABLE | APPEND))
  575                         return (0);
  576         }
  577         if (ip->i_flags & (IMMUTABLE | APPEND))
  578                 return (EPERM);
  579         /*
  580          * Go through the fields and update iff not VNOVAL.
  581          */
  582         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
  583                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  584                         return (EROFS);
  585                 if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
  586                     td)) != 0)
  587                         return (error);
  588         }
  589         if (vap->va_size != VNOVAL) {
  590                 /*
  591                  * XXX most of the following special cases should be in
  592                  * callers instead of in N filesystems.  The VDIR check
  593                  * mostly already is.
  594                  */
  595                 switch (vp->v_type) {
  596                 case VDIR:
  597                         return (EISDIR);
  598                 case VLNK:
  599                 case VREG:
  600                         /*
  601                          * Truncation should have an effect in these cases.
  602                          * Disallow it if the filesystem is read-only or
  603                          * the file is being snapshotted.
  604                          */
  605                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  606                                 return (EROFS);
  607                         if ((ip->i_flags & SF_SNAPSHOT) != 0)
  608                                 return (EPERM);
  609                         break;
  610                 default:
  611                         /*
  612                          * According to POSIX, the result is unspecified
  613                          * for file types other than regular files,
  614                          * directories and shared memory objects.  We
  615                          * don't support shared memory objects in the file
  616                          * system, and have dubious support for truncating
  617                          * symlinks.  Just ignore the request in other cases.
  618                          */
  619                         return (0);
  620                 }
  621                 if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
  622                     cred, td)) != 0)
  623                         return (error);
  624         }
  625         if (vap->va_atime.tv_sec != VNOVAL ||
  626             vap->va_mtime.tv_sec != VNOVAL ||
  627             vap->va_birthtime.tv_sec != VNOVAL) {
  628                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  629                         return (EROFS);
  630                 if ((ip->i_flags & SF_SNAPSHOT) != 0)
  631                         return (EPERM);
  632                 /*
  633                  * From utimes(2):
  634                  * If times is NULL, ... The caller must be the owner of
  635                  * the file, have permission to write the file, or be the
  636                  * super-user.
  637                  * If times is non-NULL, ... The caller must be the owner of
  638                  * the file or be the super-user.
  639                  *
  640                  * Possibly for historical reasons, try to use VADMIN in
  641                  * preference to VWRITE for a NULL timestamp.  This means we
  642                  * will return EACCES in preference to EPERM if neither
  643                  * check succeeds.
  644                  */
  645                 if (vap->va_vaflags & VA_UTIMES_NULL) {
  646                         /*
  647                          * NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes
  648                          *
  649                          * "A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
  650                          * will be allowed to set the times [..] to the current
  651                          * server time."
  652                          *
  653                          * XXX: Calling it four times seems a little excessive.
  654                          */
  655                         error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
  656                         if (error)
  657                                 error = VOP_ACCESS(vp, VWRITE, cred, td);
  658                 } else
  659                         error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
  660                 if (error)
  661                         return (error);
  662                 if (vap->va_atime.tv_sec != VNOVAL)
  663                         ip->i_flag |= IN_ACCESS;
  664                 if (vap->va_mtime.tv_sec != VNOVAL)
  665                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
  666                 if (vap->va_birthtime.tv_sec != VNOVAL &&
  667                     ip->i_ump->um_fstype == UFS2)
  668                         ip->i_flag |= IN_MODIFIED;
  669                 ufs_itimes(vp);
  670                 if (vap->va_atime.tv_sec != VNOVAL) {
  671                         DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
  672                         DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
  673                 }
  674                 if (vap->va_mtime.tv_sec != VNOVAL) {
  675                         DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
  676                         DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
  677                 }
  678                 if (vap->va_birthtime.tv_sec != VNOVAL &&
  679                     ip->i_ump->um_fstype == UFS2) {
  680                         ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
  681                         ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
  682                 }
  683                 error = UFS_UPDATE(vp, 0);
  684                 if (error)
  685                         return (error);
  686         }
  687         error = 0;
  688         if (vap->va_mode != (mode_t)VNOVAL) {
  689                 if (vp->v_mount->mnt_flag & MNT_RDONLY)
  690                         return (EROFS);
  691                 if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
  692                    (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
  693                         return (EPERM);
  694                 error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
  695         }
  696         return (error);
  697 }
  698 
  699 #ifdef UFS_ACL
  700 static int
  701 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
  702     int file_owner_id, struct ucred *cred, struct thread *td)
  703 {
  704         int error;
  705         struct acl *aclp;
  706 
  707         aclp = acl_alloc(M_WAITOK);
  708         error = ufs_getacl_nfs4_internal(vp, aclp, td);
  709         /*
  710          * We don't have to handle EOPNOTSUPP here, as the filesystem claims
  711          * it supports ACLs.
  712          */
  713         if (error)
  714                 goto out;
  715 
  716         acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
  717         error = ufs_setacl_nfs4_internal(vp, aclp, td);
  718 
  719 out:
  720         acl_free(aclp);
  721         return (error);
  722 }
  723 #endif /* UFS_ACL */
  724 
  725 /*
  726  * Mark this file's access time for update for vfs_mark_atime().  This
  727  * is called from execve() and mmap().
  728  */
  729 static int
  730 ufs_markatime(ap)
  731         struct vop_markatime_args /* {
  732                 struct vnode *a_vp;
  733         } */ *ap;
  734 {
  735         struct vnode *vp = ap->a_vp;
  736         struct inode *ip = VTOI(vp);
  737 
  738         VI_LOCK(vp);
  739         ip->i_flag |= IN_ACCESS;
  740         VI_UNLOCK(vp);
  741         return (0);
  742 }
  743 
  744 /*
  745  * Change the mode on a file.
  746  * Inode must be locked before calling.
  747  */
  748 static int
  749 ufs_chmod(vp, mode, cred, td)
  750         struct vnode *vp;
  751         int mode;
  752         struct ucred *cred;
  753         struct thread *td;
  754 {
  755         struct inode *ip = VTOI(vp);
  756         int error;
  757 
  758         /*
  759          * To modify the permissions on a file, must possess VADMIN
  760          * for that file.
  761          */
  762         if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
  763                 return (error);
  764         /*
  765          * Privileged processes may set the sticky bit on non-directories,
  766          * as well as set the setgid bit on a file with a group that the
  767          * process is not a member of.  Both of these are allowed in
  768          * jail(8).
  769          */
  770         if (vp->v_type != VDIR && (mode & S_ISTXT)) {
  771                 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
  772                         return (EFTYPE);
  773         }
  774         if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
  775                 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
  776                 if (error)
  777                         return (error);
  778         }
  779 
  780         /*
  781          * Deny setting setuid if we are not the file owner.
  782          */
  783         if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
  784                 error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
  785                 if (error)
  786                         return (error);
  787         }
  788 
  789         ip->i_mode &= ~ALLPERMS;
  790         ip->i_mode |= (mode & ALLPERMS);
  791         DIP_SET(ip, i_mode, ip->i_mode);
  792         ip->i_flag |= IN_CHANGE;
  793 #ifdef UFS_ACL
  794         if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
  795                 error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
  796 #endif
  797         return (error);
  798 }
  799 
  800 /*
  801  * Perform chown operation on inode ip;
  802  * inode must be locked prior to call.
  803  */
  804 static int
  805 ufs_chown(vp, uid, gid, cred, td)
  806         struct vnode *vp;
  807         uid_t uid;
  808         gid_t gid;
  809         struct ucred *cred;
  810         struct thread *td;
  811 {
  812         struct inode *ip = VTOI(vp);
  813         uid_t ouid;
  814         gid_t ogid;
  815         int error = 0;
  816 #ifdef QUOTA
  817         int i;
  818         ufs2_daddr_t change;
  819 #endif
  820 
  821         if (uid == (uid_t)VNOVAL)
  822                 uid = ip->i_uid;
  823         if (gid == (gid_t)VNOVAL)
  824                 gid = ip->i_gid;
  825         /*
  826          * To modify the ownership of a file, must possess VADMIN for that
  827          * file.
  828          */
  829         if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
  830                 return (error);
  831         /*
  832          * To change the owner of a file, or change the group of a file to a
  833          * group of which we are not a member, the caller must have
  834          * privilege.
  835          */
  836         if (((uid != ip->i_uid && uid != cred->cr_uid) || 
  837             (gid != ip->i_gid && !groupmember(gid, cred))) &&
  838             (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
  839                 return (error);
  840         ogid = ip->i_gid;
  841         ouid = ip->i_uid;
  842 #ifdef QUOTA
  843         if ((error = getinoquota(ip)) != 0)
  844                 return (error);
  845         if (ouid == uid) {
  846                 dqrele(vp, ip->i_dquot[USRQUOTA]);
  847                 ip->i_dquot[USRQUOTA] = NODQUOT;
  848         }
  849         if (ogid == gid) {
  850                 dqrele(vp, ip->i_dquot[GRPQUOTA]);
  851                 ip->i_dquot[GRPQUOTA] = NODQUOT;
  852         }
  853         change = DIP(ip, i_blocks);
  854         (void) chkdq(ip, -change, cred, CHOWN);
  855         (void) chkiq(ip, -1, cred, CHOWN);
  856         for (i = 0; i < MAXQUOTAS; i++) {
  857                 dqrele(vp, ip->i_dquot[i]);
  858                 ip->i_dquot[i] = NODQUOT;
  859         }
  860 #endif
  861         ip->i_gid = gid;
  862         DIP_SET(ip, i_gid, gid);
  863         ip->i_uid = uid;
  864         DIP_SET(ip, i_uid, uid);
  865 #ifdef QUOTA
  866         if ((error = getinoquota(ip)) == 0) {
  867                 if (ouid == uid) {
  868                         dqrele(vp, ip->i_dquot[USRQUOTA]);
  869                         ip->i_dquot[USRQUOTA] = NODQUOT;
  870                 }
  871                 if (ogid == gid) {
  872                         dqrele(vp, ip->i_dquot[GRPQUOTA]);
  873                         ip->i_dquot[GRPQUOTA] = NODQUOT;
  874                 }
  875                 if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
  876                         if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
  877                                 goto good;
  878                         else
  879                                 (void) chkdq(ip, -change, cred, CHOWN|FORCE);
  880                 }
  881                 for (i = 0; i < MAXQUOTAS; i++) {
  882                         dqrele(vp, ip->i_dquot[i]);
  883                         ip->i_dquot[i] = NODQUOT;
  884                 }
  885         }
  886         ip->i_gid = ogid;
  887         DIP_SET(ip, i_gid, ogid);
  888         ip->i_uid = ouid;
  889         DIP_SET(ip, i_uid, ouid);
  890         if (getinoquota(ip) == 0) {
  891                 if (ouid == uid) {
  892                         dqrele(vp, ip->i_dquot[USRQUOTA]);
  893                         ip->i_dquot[USRQUOTA] = NODQUOT;
  894                 }
  895                 if (ogid == gid) {
  896                         dqrele(vp, ip->i_dquot[GRPQUOTA]);
  897                         ip->i_dquot[GRPQUOTA] = NODQUOT;
  898                 }
  899                 (void) chkdq(ip, change, cred, FORCE|CHOWN);
  900                 (void) chkiq(ip, 1, cred, FORCE|CHOWN);
  901                 (void) getinoquota(ip);
  902         }
  903         return (error);
  904 good:
  905         if (getinoquota(ip))
  906                 panic("ufs_chown: lost quota");
  907 #endif /* QUOTA */
  908         ip->i_flag |= IN_CHANGE;
  909         if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
  910                 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
  911                         ip->i_mode &= ~(ISUID | ISGID);
  912                         DIP_SET(ip, i_mode, ip->i_mode);
  913                 }
  914         }
  915         return (0);
  916 }
  917 
  918 static int
  919 ufs_remove(ap)
  920         struct vop_remove_args /* {
  921                 struct vnode *a_dvp;
  922                 struct vnode *a_vp;
  923                 struct componentname *a_cnp;
  924         } */ *ap;
  925 {
  926         struct inode *ip;
  927         struct vnode *vp = ap->a_vp;
  928         struct vnode *dvp = ap->a_dvp;
  929         int error;
  930         struct thread *td;
  931 
  932         td = curthread;
  933         ip = VTOI(vp);
  934         if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
  935             (VTOI(dvp)->i_flags & APPEND)) {
  936                 error = EPERM;
  937                 goto out;
  938         }
  939 #ifdef UFS_GJOURNAL
  940         ufs_gjournal_orphan(vp);
  941 #endif
  942         error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
  943         if (ip->i_nlink <= 0)
  944                 vp->v_vflag |= VV_NOSYNC;
  945         if ((ip->i_flags & SF_SNAPSHOT) != 0) {
  946                 /*
  947                  * Avoid deadlock where another thread is trying to
  948                  * update the inodeblock for dvp and is waiting on
  949                  * snaplk.  Temporary unlock the vnode lock for the
  950                  * unlinked file and sync the directory.  This should
  951                  * allow vput() of the directory to not block later on
  952                  * while holding the snapshot vnode locked, assuming
  953                  * that the directory hasn't been unlinked too.
  954                  */
  955                 VOP_UNLOCK(vp, 0);
  956                 (void) VOP_FSYNC(dvp, MNT_WAIT, td);
  957                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  958         }
  959 out:
  960         return (error);
  961 }
  962 
  963 /*
  964  * link vnode call
  965  */
  966 static int
  967 ufs_link(ap)
  968         struct vop_link_args /* {
  969                 struct vnode *a_tdvp;
  970                 struct vnode *a_vp;
  971                 struct componentname *a_cnp;
  972         } */ *ap;
  973 {
  974         struct vnode *vp = ap->a_vp;
  975         struct vnode *tdvp = ap->a_tdvp;
  976         struct componentname *cnp = ap->a_cnp;
  977         struct inode *ip;
  978         struct direct newdir;
  979         int error;
  980 
  981 #ifdef INVARIANTS
  982         if ((cnp->cn_flags & HASBUF) == 0)
  983                 panic("ufs_link: no name");
  984 #endif
  985         if (tdvp->v_mount != vp->v_mount) {
  986                 error = EXDEV;
  987                 goto out;
  988         }
  989         if (VTOI(tdvp)->i_effnlink < 2)
  990                 panic("ufs_link: Bad link count %d on parent",
  991                     VTOI(tdvp)->i_effnlink);
  992         ip = VTOI(vp);
  993         if ((nlink_t)ip->i_nlink >= LINK_MAX) {
  994                 error = EMLINK;
  995                 goto out;
  996         }
  997         if (ip->i_flags & (IMMUTABLE | APPEND)) {
  998                 error = EPERM;
  999                 goto out;
 1000         }
 1001         ip->i_effnlink++;
 1002         ip->i_nlink++;
 1003         DIP_SET(ip, i_nlink, ip->i_nlink);
 1004         ip->i_flag |= IN_CHANGE;
 1005         if (DOINGSOFTDEP(vp))
 1006                 softdep_setup_link(VTOI(tdvp), ip);
 1007         error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
 1008         if (!error) {
 1009                 ufs_makedirentry(ip, cnp, &newdir);
 1010                 error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
 1011         }
 1012 
 1013         if (error) {
 1014                 ip->i_effnlink--;
 1015                 ip->i_nlink--;
 1016                 DIP_SET(ip, i_nlink, ip->i_nlink);
 1017                 ip->i_flag |= IN_CHANGE;
 1018                 if (DOINGSOFTDEP(vp))
 1019                         softdep_revert_link(VTOI(tdvp), ip);
 1020         }
 1021 out:
 1022         return (error);
 1023 }
 1024 
 1025 /*
 1026  * whiteout vnode call
 1027  */
 1028 static int
 1029 ufs_whiteout(ap)
 1030         struct vop_whiteout_args /* {
 1031                 struct vnode *a_dvp;
 1032                 struct componentname *a_cnp;
 1033                 int a_flags;
 1034         } */ *ap;
 1035 {
 1036         struct vnode *dvp = ap->a_dvp;
 1037         struct componentname *cnp = ap->a_cnp;
 1038         struct direct newdir;
 1039         int error = 0;
 1040 
 1041         switch (ap->a_flags) {
 1042         case LOOKUP:
 1043                 /* 4.4 format directories support whiteout operations */
 1044                 if (dvp->v_mount->mnt_maxsymlinklen > 0)
 1045                         return (0);
 1046                 return (EOPNOTSUPP);
 1047 
 1048         case CREATE:
 1049                 /* create a new directory whiteout */
 1050 #ifdef INVARIANTS
 1051                 if ((cnp->cn_flags & SAVENAME) == 0)
 1052                         panic("ufs_whiteout: missing name");
 1053                 if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 1054                         panic("ufs_whiteout: old format filesystem");
 1055 #endif
 1056 
 1057                 newdir.d_ino = WINO;
 1058                 newdir.d_namlen = cnp->cn_namelen;
 1059                 bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 1060                 newdir.d_type = DT_WHT;
 1061                 error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
 1062                 break;
 1063 
 1064         case DELETE:
 1065                 /* remove an existing directory whiteout */
 1066 #ifdef INVARIANTS
 1067                 if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 1068                         panic("ufs_whiteout: old format filesystem");
 1069 #endif
 1070 
 1071                 cnp->cn_flags &= ~DOWHITEOUT;
 1072                 error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 1073                 break;
 1074         default:
 1075                 panic("ufs_whiteout: unknown op");
 1076         }
 1077         return (error);
 1078 }
 1079 
 1080 static volatile int rename_restarts;
 1081 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
 1082     __DEVOLATILE(int *, &rename_restarts), 0,
 1083     "Times rename had to restart due to lock contention");
 1084 
 1085 /*
 1086  * Rename system call.
 1087  *      rename("foo", "bar");
 1088  * is essentially
 1089  *      unlink("bar");
 1090  *      link("foo", "bar");
 1091  *      unlink("foo");
 1092  * but ``atomically''.  Can't do full commit without saving state in the
 1093  * inode on disk which isn't feasible at this time.  Best we can do is
 1094  * always guarantee the target exists.
 1095  *
 1096  * Basic algorithm is:
 1097  *
 1098  * 1) Bump link count on source while we're linking it to the
 1099  *    target.  This also ensure the inode won't be deleted out
 1100  *    from underneath us while we work (it may be truncated by
 1101  *    a concurrent `trunc' or `open' for creation).
 1102  * 2) Link source to destination.  If destination already exists,
 1103  *    delete it first.
 1104  * 3) Unlink source reference to inode if still around. If a
 1105  *    directory was moved and the parent of the destination
 1106  *    is different from the source, patch the ".." entry in the
 1107  *    directory.
 1108  */
 1109 static int
 1110 ufs_rename(ap)
 1111         struct vop_rename_args  /* {
 1112                 struct vnode *a_fdvp;
 1113                 struct vnode *a_fvp;
 1114                 struct componentname *a_fcnp;
 1115                 struct vnode *a_tdvp;
 1116                 struct vnode *a_tvp;
 1117                 struct componentname *a_tcnp;
 1118         } */ *ap;
 1119 {
 1120         struct vnode *tvp = ap->a_tvp;
 1121         struct vnode *tdvp = ap->a_tdvp;
 1122         struct vnode *fvp = ap->a_fvp;
 1123         struct vnode *fdvp = ap->a_fdvp;
 1124         struct vnode *nvp;
 1125         struct componentname *tcnp = ap->a_tcnp;
 1126         struct componentname *fcnp = ap->a_fcnp;
 1127         struct thread *td = fcnp->cn_thread;
 1128         struct inode *fip, *tip, *tdp, *fdp;
 1129         struct direct newdir;
 1130         off_t endoff;
 1131         int doingdirectory, newparent;
 1132         int error = 0;
 1133         struct mount *mp;
 1134         ino_t ino;
 1135 
 1136 #ifdef INVARIANTS
 1137         if ((tcnp->cn_flags & HASBUF) == 0 ||
 1138             (fcnp->cn_flags & HASBUF) == 0)
 1139                 panic("ufs_rename: no name");
 1140 #endif
 1141         endoff = 0;
 1142         mp = tdvp->v_mount;
 1143         VOP_UNLOCK(tdvp, 0);
 1144         if (tvp && tvp != tdvp)
 1145                 VOP_UNLOCK(tvp, 0);
 1146         /*
 1147          * Check for cross-device rename.
 1148          */
 1149         if ((fvp->v_mount != tdvp->v_mount) ||
 1150             (tvp && (fvp->v_mount != tvp->v_mount))) {
 1151                 error = EXDEV;
 1152                 mp = NULL;
 1153                 goto releout;
 1154         }
 1155         error = vfs_busy(mp, 0);
 1156         if (error) {
 1157                 mp = NULL;
 1158                 goto releout;
 1159         }
 1160 relock:
 1161         /* 
 1162          * We need to acquire 2 to 4 locks depending on whether tvp is NULL
 1163          * and fdvp and tdvp are the same directory.  Subsequently we need
 1164          * to double-check all paths and in the directory rename case we
 1165          * need to verify that we are not creating a directory loop.  To
 1166          * handle this we acquire all but fdvp using non-blocking
 1167          * acquisitions.  If we fail to acquire any lock in the path we will
 1168          * drop all held locks, acquire the new lock in a blocking fashion,
 1169          * and then release it and restart the rename.  This acquire/release
 1170          * step ensures that we do not spin on a lock waiting for release.
 1171          */
 1172         error = vn_lock(fdvp, LK_EXCLUSIVE);
 1173         if (error)
 1174                 goto releout;
 1175         if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 1176                 VOP_UNLOCK(fdvp, 0);
 1177                 error = vn_lock(tdvp, LK_EXCLUSIVE);
 1178                 if (error)
 1179                         goto releout;
 1180                 VOP_UNLOCK(tdvp, 0);
 1181                 atomic_add_int(&rename_restarts, 1);
 1182                 goto relock;
 1183         }
 1184         /*
 1185          * Re-resolve fvp to be certain it still exists and fetch the
 1186          * correct vnode.
 1187          */
 1188         error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 1189         if (error) {
 1190                 VOP_UNLOCK(fdvp, 0);
 1191                 VOP_UNLOCK(tdvp, 0);
 1192                 goto releout;
 1193         }
 1194         error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 1195         if (error) {
 1196                 VOP_UNLOCK(fdvp, 0);
 1197                 VOP_UNLOCK(tdvp, 0);
 1198                 if (error != EBUSY)
 1199                         goto releout;
 1200                 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 1201                 if (error != 0)
 1202                         goto releout;
 1203                 VOP_UNLOCK(nvp, 0);
 1204                 vrele(fvp);
 1205                 fvp = nvp;
 1206                 atomic_add_int(&rename_restarts, 1);
 1207                 goto relock;
 1208         }
 1209         vrele(fvp);
 1210         fvp = nvp;
 1211         /*
 1212          * Re-resolve tvp and acquire the vnode lock if present.
 1213          */
 1214         error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
 1215         if (error != 0 && error != EJUSTRETURN) {
 1216                 VOP_UNLOCK(fdvp, 0);
 1217                 VOP_UNLOCK(tdvp, 0);
 1218                 VOP_UNLOCK(fvp, 0);
 1219                 goto releout;
 1220         }
 1221         /*
 1222          * If tvp disappeared we just carry on.
 1223          */
 1224         if (error == EJUSTRETURN && tvp != NULL) {
 1225                 vrele(tvp);
 1226                 tvp = NULL;
 1227         }
 1228         /*
 1229          * Get the tvp ino if the lookup succeeded.  We may have to restart
 1230          * if the non-blocking acquire fails.
 1231          */
 1232         if (error == 0) {
 1233                 nvp = NULL;
 1234                 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 1235                 if (tvp)
 1236                         vrele(tvp);
 1237                 tvp = nvp;
 1238                 if (error) {
 1239                         VOP_UNLOCK(fdvp, 0);
 1240                         VOP_UNLOCK(tdvp, 0);
 1241                         VOP_UNLOCK(fvp, 0);
 1242                         if (error != EBUSY)
 1243                                 goto releout;
 1244                         error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 1245                         if (error != 0)
 1246                                 goto releout;
 1247                         VOP_UNLOCK(nvp, 0);
 1248                         atomic_add_int(&rename_restarts, 1);
 1249                         goto relock;
 1250                 }
 1251         }
 1252         fdp = VTOI(fdvp);
 1253         fip = VTOI(fvp);
 1254         tdp = VTOI(tdvp);
 1255         tip = NULL;
 1256         if (tvp)
 1257                 tip = VTOI(tvp);
 1258         if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 1259             (VTOI(tdvp)->i_flags & APPEND))) {
 1260                 error = EPERM;
 1261                 goto unlockout;
 1262         }
 1263         /*
 1264          * Renaming a file to itself has no effect.  The upper layers should
 1265          * not call us in that case.  However, things could change after
 1266          * we drop the locks above.
 1267          */
 1268         if (fvp == tvp) {
 1269                 error = 0;
 1270                 goto unlockout;
 1271         }
 1272         doingdirectory = 0;
 1273         newparent = 0;
 1274         ino = fip->i_number;
 1275         if (fip->i_nlink >= LINK_MAX) {
 1276                 error = EMLINK;
 1277                 goto unlockout;
 1278         }
 1279         if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 1280             || (fdp->i_flags & APPEND)) {
 1281                 error = EPERM;
 1282                 goto unlockout;
 1283         }
 1284         if ((fip->i_mode & IFMT) == IFDIR) {
 1285                 /*
 1286                  * Avoid ".", "..", and aliases of "." for obvious reasons.
 1287                  */
 1288                 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 1289                     fdp == fip ||
 1290                     (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 1291                         error = EINVAL;
 1292                         goto unlockout;
 1293                 }
 1294                 if (fdp->i_number != tdp->i_number)
 1295                         newparent = tdp->i_number;
 1296                 doingdirectory = 1;
 1297         }
 1298         if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
 1299             (tvp != NULL && tvp->v_type == VDIR &&
 1300             tvp->v_mountedhere != NULL)) {
 1301                 error = EXDEV;
 1302                 goto unlockout;
 1303         }
 1304 
 1305         /*
 1306          * If ".." must be changed (ie the directory gets a new
 1307          * parent) then the source directory must not be in the
 1308          * directory hierarchy above the target, as this would
 1309          * orphan everything below the source directory. Also
 1310          * the user must have write permission in the source so
 1311          * as to be able to change "..".
 1312          */
 1313         if (doingdirectory && newparent) {
 1314                 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 1315                 if (error)
 1316                         goto unlockout;
 1317                 error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
 1318                     &ino);
 1319                 /*
 1320                  * We encountered a lock that we have to wait for.  Unlock
 1321                  * everything else and VGET before restarting.
 1322                  */
 1323                 if (ino) {
 1324                         VOP_UNLOCK(fdvp, 0);
 1325                         VOP_UNLOCK(fvp, 0);
 1326                         VOP_UNLOCK(tdvp, 0);
 1327                         if (tvp)
 1328                                 VOP_UNLOCK(tvp, 0);
 1329                         error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
 1330                         if (error == 0)
 1331                                 vput(nvp);
 1332                         atomic_add_int(&rename_restarts, 1);
 1333                         goto relock;
 1334                 }
 1335                 if (error)
 1336                         goto unlockout;
 1337                 if ((tcnp->cn_flags & SAVESTART) == 0)
 1338                         panic("ufs_rename: lost to startdir");
 1339         }
 1340         if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
 1341             tdp->i_effnlink == 0)
 1342                 panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 1343 
 1344         /*
 1345          * 1) Bump link count while we're moving stuff
 1346          *    around.  If we crash somewhere before
 1347          *    completing our work, the link count
 1348          *    may be wrong, but correctable.
 1349          */
 1350         fip->i_effnlink++;
 1351         fip->i_nlink++;
 1352         DIP_SET(fip, i_nlink, fip->i_nlink);
 1353         fip->i_flag |= IN_CHANGE;
 1354         if (DOINGSOFTDEP(fvp))
 1355                 softdep_setup_link(tdp, fip);
 1356         error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
 1357         if (error)
 1358                 goto bad;
 1359 
 1360         /*
 1361          * 2) If target doesn't exist, link the target
 1362          *    to the source and unlink the source.
 1363          *    Otherwise, rewrite the target directory
 1364          *    entry to reference the source inode and
 1365          *    expunge the original entry's existence.
 1366          */
 1367         if (tip == NULL) {
 1368                 if (tdp->i_dev != fip->i_dev)
 1369                         panic("ufs_rename: EXDEV");
 1370                 if (doingdirectory && newparent) {
 1371                         /*
 1372                          * Account for ".." in new directory.
 1373                          * When source and destination have the same
 1374                          * parent we don't adjust the link count.  The
 1375                          * actual link modification is completed when
 1376                          * .. is rewritten below.
 1377                          */
 1378                         if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
 1379                                 error = EMLINK;
 1380                                 goto bad;
 1381                         }
 1382                 }
 1383                 ufs_makedirentry(fip, tcnp, &newdir);
 1384                 error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
 1385                 if (error)
 1386                         goto bad;
 1387                 /* Setup tdvp for directory compaction if needed. */
 1388                 if (tdp->i_count && tdp->i_endoff &&
 1389                     tdp->i_endoff < tdp->i_size)
 1390                         endoff = tdp->i_endoff;
 1391         } else {
 1392                 if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
 1393                         panic("ufs_rename: EXDEV");
 1394                 /*
 1395                  * Short circuit rename(foo, foo).
 1396                  */
 1397                 if (tip->i_number == fip->i_number)
 1398                         panic("ufs_rename: same file");
 1399                 /*
 1400                  * If the parent directory is "sticky", then the caller
 1401                  * must possess VADMIN for the parent directory, or the
 1402                  * destination of the rename.  This implements append-only
 1403                  * directories.
 1404                  */
 1405                 if ((tdp->i_mode & S_ISTXT) &&
 1406                     VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
 1407                     VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
 1408                         error = EPERM;
 1409                         goto bad;
 1410                 }
 1411                 /*
 1412                  * Target must be empty if a directory and have no links
 1413                  * to it. Also, ensure source and target are compatible
 1414                  * (both directories, or both not directories).
 1415                  */
 1416                 if ((tip->i_mode & IFMT) == IFDIR) {
 1417                         if ((tip->i_effnlink > 2) ||
 1418                             !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
 1419                                 error = ENOTEMPTY;
 1420                                 goto bad;
 1421                         }
 1422                         if (!doingdirectory) {
 1423                                 error = ENOTDIR;
 1424                                 goto bad;
 1425                         }
 1426                         cache_purge(tdvp);
 1427                 } else if (doingdirectory) {
 1428                         error = EISDIR;
 1429                         goto bad;
 1430                 }
 1431                 if (doingdirectory) {
 1432                         if (!newparent) {
 1433                                 tdp->i_effnlink--;
 1434                                 if (DOINGSOFTDEP(tdvp))
 1435                                         softdep_change_linkcnt(tdp);
 1436                         }
 1437                         tip->i_effnlink--;
 1438                         if (DOINGSOFTDEP(tvp))
 1439                                 softdep_change_linkcnt(tip);
 1440                 }
 1441                 error = ufs_dirrewrite(tdp, tip, fip->i_number,
 1442                     IFTODT(fip->i_mode),
 1443                     (doingdirectory && newparent) ? newparent : doingdirectory);
 1444                 if (error) {
 1445                         if (doingdirectory) {
 1446                                 if (!newparent) {
 1447                                         tdp->i_effnlink++;
 1448                                         if (DOINGSOFTDEP(tdvp))
 1449                                                 softdep_change_linkcnt(tdp);
 1450                                 }
 1451                                 tip->i_effnlink++;
 1452                                 if (DOINGSOFTDEP(tvp))
 1453                                         softdep_change_linkcnt(tip);
 1454                         }
 1455                 }
 1456                 if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 1457                         /*
 1458                          * The only stuff left in the directory is "."
 1459                          * and "..". The "." reference is inconsequential
 1460                          * since we are quashing it. We have removed the "."
 1461                          * reference and the reference in the parent directory,
 1462                          * but there may be other hard links. The soft
 1463                          * dependency code will arrange to do these operations
 1464                          * after the parent directory entry has been deleted on
 1465                          * disk, so when running with that code we avoid doing
 1466                          * them now.
 1467                          */
 1468                         if (!newparent) {
 1469                                 tdp->i_nlink--;
 1470                                 DIP_SET(tdp, i_nlink, tdp->i_nlink);
 1471                                 tdp->i_flag |= IN_CHANGE;
 1472                         }
 1473                         tip->i_nlink--;
 1474                         DIP_SET(tip, i_nlink, tip->i_nlink);
 1475                         tip->i_flag |= IN_CHANGE;
 1476                 }
 1477         }
 1478 
 1479         /*
 1480          * 3) Unlink the source.  We have to resolve the path again to
 1481          * fixup the directory offset and count for ufs_dirremove.
 1482          */
 1483         if (fdvp == tdvp) {
 1484                 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 1485                 if (error)
 1486                         panic("ufs_rename: from entry went away!");
 1487                 if (ino != fip->i_number)
 1488                         panic("ufs_rename: ino mismatch %d != %d\n", ino,
 1489                             fip->i_number);
 1490         }
 1491         /*
 1492          * If the source is a directory with a
 1493          * new parent, the link count of the old
 1494          * parent directory must be decremented
 1495          * and ".." set to point to the new parent.
 1496          */
 1497         if (doingdirectory && newparent) {
 1498                 /*
 1499                  * If tip exists we simply use its link, otherwise we must
 1500                  * add a new one.
 1501                  */
 1502                 if (tip == NULL) {
 1503                         tdp->i_effnlink++;
 1504                         tdp->i_nlink++;
 1505                         DIP_SET(tdp, i_nlink, tdp->i_nlink);
 1506                         tdp->i_flag |= IN_CHANGE;
 1507                         if (DOINGSOFTDEP(tdvp))
 1508                                 softdep_setup_dotdot_link(tdp, fip);
 1509                         error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
 1510                                                    DOINGASYNC(tdvp)));
 1511                         /* Don't go to bad here as the new link exists. */
 1512                         if (error)
 1513                                 goto unlockout;
 1514                 } else if (DOINGSUJ(tdvp))
 1515                         /* Journal must account for each new link. */
 1516                         softdep_setup_dotdot_link(tdp, fip);
 1517                 fip->i_offset = mastertemplate.dot_reclen;
 1518                 ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
 1519                 cache_purge(fdvp);
 1520         }
 1521         error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
 1522         /*
 1523          * The kern_renameat() looks up the fvp using the DELETE flag, which
 1524          * causes the removal of the name cache entry for fvp.
 1525          * As the relookup of the fvp is done in two steps:
 1526          * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
 1527          * normal lookup of the from name just before the VFS_VGET() call,
 1528          * causing the cache entry to be re-instantiated.
 1529          */
 1530         cache_purge(fvp);
 1531 
 1532 unlockout:
 1533         vput(fdvp);
 1534         vput(fvp);
 1535         if (tvp)
 1536                 vput(tvp);
 1537         /*
 1538          * If compaction or fsync was requested do it now that other locks
 1539          * are no longer needed.
 1540          */
 1541         if (error == 0 && endoff != 0) {
 1542 #ifdef UFS_DIRHASH
 1543                 if (tdp->i_dirhash != NULL)
 1544                         ufsdirhash_dirtrunc(tdp, endoff);
 1545 #endif
 1546                 UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
 1547                     td);
 1548         }
 1549         if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
 1550                 error = VOP_FSYNC(tdvp, MNT_WAIT, td);
 1551         vput(tdvp);
 1552         if (mp)
 1553                 vfs_unbusy(mp);
 1554         return (error);
 1555 
 1556 bad:
 1557         fip->i_effnlink--;
 1558         fip->i_nlink--;
 1559         DIP_SET(fip, i_nlink, fip->i_nlink);
 1560         fip->i_flag |= IN_CHANGE;
 1561         if (DOINGSOFTDEP(fvp))
 1562                 softdep_revert_link(tdp, fip);
 1563         goto unlockout;
 1564 
 1565 releout:
 1566         vrele(fdvp);
 1567         vrele(fvp);
 1568         vrele(tdvp);
 1569         if (tvp)
 1570                 vrele(tvp);
 1571         if (mp)
 1572                 vfs_unbusy(mp);
 1573 
 1574         return (error);
 1575 }
 1576 
 1577 #ifdef UFS_ACL
 1578 static int
 1579 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
 1580     mode_t dmode, struct ucred *cred, struct thread *td)
 1581 {
 1582         int error;
 1583         struct inode *ip = VTOI(tvp);
 1584         struct acl *dacl, *acl;
 1585 
 1586         acl = acl_alloc(M_WAITOK);
 1587         dacl = acl_alloc(M_WAITOK);
 1588 
 1589         /*
 1590          * Retrieve default ACL from parent, if any.
 1591          */
 1592         error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 1593         switch (error) {
 1594         case 0:
 1595                 /*
 1596                  * Retrieved a default ACL, so merge mode and ACL if
 1597                  * necessary.  If the ACL is empty, fall through to
 1598                  * the "not defined or available" case.
 1599                  */
 1600                 if (acl->acl_cnt != 0) {
 1601                         dmode = acl_posix1e_newfilemode(dmode, acl);
 1602                         ip->i_mode = dmode;
 1603                         DIP_SET(ip, i_mode, dmode);
 1604                         *dacl = *acl;
 1605                         ufs_sync_acl_from_inode(ip, acl);
 1606                         break;
 1607                 }
 1608                 /* FALLTHROUGH */
 1609 
 1610         case EOPNOTSUPP:
 1611                 /*
 1612                  * Just use the mode as-is.
 1613                  */
 1614                 ip->i_mode = dmode;
 1615                 DIP_SET(ip, i_mode, dmode);
 1616                 error = 0;
 1617                 goto out;
 1618         
 1619         default:
 1620                 goto out;
 1621         }
 1622 
 1623         /*
 1624          * XXX: If we abort now, will Soft Updates notify the extattr
 1625          * code that the EAs for the file need to be released?
 1626          */
 1627         error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 1628         if (error == 0)
 1629                 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
 1630         switch (error) {
 1631         case 0:
 1632                 break;
 1633 
 1634         case EOPNOTSUPP:
 1635                 /*
 1636                  * XXX: This should not happen, as EOPNOTSUPP above
 1637                  * was supposed to free acl.
 1638                  */
 1639                 printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
 1640                 /*
 1641                 panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
 1642                  */
 1643                 break;
 1644 
 1645         default:
 1646                 goto out;
 1647         }
 1648 
 1649 out:
 1650         acl_free(acl);
 1651         acl_free(dacl);
 1652 
 1653         return (error);
 1654 }
 1655 
 1656 static int
 1657 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
 1658     mode_t mode, struct ucred *cred, struct thread *td)
 1659 {
 1660         int error;
 1661         struct inode *ip = VTOI(tvp);
 1662         struct acl *acl;
 1663 
 1664         acl = acl_alloc(M_WAITOK);
 1665 
 1666         /*
 1667          * Retrieve default ACL for parent, if any.
 1668          */
 1669         error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 1670         switch (error) {
 1671         case 0:
 1672                 /*
 1673                  * Retrieved a default ACL, so merge mode and ACL if
 1674                  * necessary.
 1675                  */
 1676                 if (acl->acl_cnt != 0) {
 1677                         /*
 1678                          * Two possible ways for default ACL to not
 1679                          * be present.  First, the EA can be
 1680                          * undefined, or second, the default ACL can
 1681                          * be blank.  If it's blank, fall through to
 1682                          * the it's not defined case.
 1683                          */
 1684                         mode = acl_posix1e_newfilemode(mode, acl);
 1685                         ip->i_mode = mode;
 1686                         DIP_SET(ip, i_mode, mode);
 1687                         ufs_sync_acl_from_inode(ip, acl);
 1688                         break;
 1689                 }
 1690                 /* FALLTHROUGH */
 1691 
 1692         case EOPNOTSUPP:
 1693                 /*
 1694                  * Just use the mode as-is.
 1695                  */
 1696                 ip->i_mode = mode;
 1697                 DIP_SET(ip, i_mode, mode);
 1698                 error = 0;
 1699                 goto out;
 1700 
 1701         default:
 1702                 goto out;
 1703         }
 1704 
 1705         /*
 1706          * XXX: If we abort now, will Soft Updates notify the extattr
 1707          * code that the EAs for the file need to be released?
 1708          */
 1709         error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 1710         switch (error) {
 1711         case 0:
 1712                 break;
 1713 
 1714         case EOPNOTSUPP:
 1715                 /*
 1716                  * XXX: This should not happen, as EOPNOTSUPP above was
 1717                  * supposed to free acl.
 1718                  */
 1719                 printf("ufs_makeinode: VOP_GETACL() but no "
 1720                     "VOP_SETACL()\n");
 1721                 /* panic("ufs_makeinode: VOP_GETACL() but no "
 1722                     "VOP_SETACL()"); */
 1723                 break;
 1724 
 1725         default:
 1726                 goto out;
 1727         }
 1728 
 1729 out:
 1730         acl_free(acl);
 1731 
 1732         return (error);
 1733 }
 1734 
 1735 static int
 1736 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
 1737     mode_t child_mode, struct ucred *cred, struct thread *td)
 1738 {
 1739         int error;
 1740         struct acl *parent_aclp, *child_aclp;
 1741 
 1742         parent_aclp = acl_alloc(M_WAITOK);
 1743         child_aclp = acl_alloc(M_WAITOK | M_ZERO);
 1744 
 1745         error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
 1746         if (error)
 1747                 goto out;
 1748         acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
 1749             child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
 1750         error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
 1751         if (error)
 1752                 goto out;
 1753 out:
 1754         acl_free(parent_aclp);
 1755         acl_free(child_aclp);
 1756 
 1757         return (error);
 1758 }
 1759 #endif
 1760 
 1761 /*
 1762  * Mkdir system call
 1763  */
 1764 static int
 1765 ufs_mkdir(ap)
 1766         struct vop_mkdir_args /* {
 1767                 struct vnode *a_dvp;
 1768                 struct vnode **a_vpp;
 1769                 struct componentname *a_cnp;
 1770                 struct vattr *a_vap;
 1771         } */ *ap;
 1772 {
 1773         struct vnode *dvp = ap->a_dvp;
 1774         struct vattr *vap = ap->a_vap;
 1775         struct componentname *cnp = ap->a_cnp;
 1776         struct inode *ip, *dp;
 1777         struct vnode *tvp;
 1778         struct buf *bp;
 1779         struct dirtemplate dirtemplate, *dtp;
 1780         struct direct newdir;
 1781         int error, dmode;
 1782         long blkoff;
 1783 
 1784 #ifdef INVARIANTS
 1785         if ((cnp->cn_flags & HASBUF) == 0)
 1786                 panic("ufs_mkdir: no name");
 1787 #endif
 1788         dp = VTOI(dvp);
 1789         if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 1790                 error = EMLINK;
 1791                 goto out;
 1792         }
 1793         dmode = vap->va_mode & 0777;
 1794         dmode |= IFDIR;
 1795         /*
 1796          * Must simulate part of ufs_makeinode here to acquire the inode,
 1797          * but not have it entered in the parent directory. The entry is
 1798          * made later after writing "." and ".." entries.
 1799          */
 1800         error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 1801         if (error)
 1802                 goto out;
 1803         ip = VTOI(tvp);
 1804         ip->i_gid = dp->i_gid;
 1805         DIP_SET(ip, i_gid, dp->i_gid);
 1806 #ifdef SUIDDIR
 1807         {
 1808 #ifdef QUOTA
 1809                 struct ucred ucred, *ucp;
 1810                 gid_t ucred_group;
 1811                 ucp = cnp->cn_cred;
 1812 #endif
 1813                 /*
 1814                  * If we are hacking owners here, (only do this where told to)
 1815                  * and we are not giving it TO root, (would subvert quotas)
 1816                  * then go ahead and give it to the other user.
 1817                  * The new directory also inherits the SUID bit.
 1818                  * If user's UID and dir UID are the same,
 1819                  * 'give it away' so that the SUID is still forced on.
 1820                  */
 1821                 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 1822                     (dp->i_mode & ISUID) && dp->i_uid) {
 1823                         dmode |= ISUID;
 1824                         ip->i_uid = dp->i_uid;
 1825                         DIP_SET(ip, i_uid, dp->i_uid);
 1826 #ifdef QUOTA
 1827                         if (dp->i_uid != cnp->cn_cred->cr_uid) {
 1828                                 /*
 1829                                  * Make sure the correct user gets charged
 1830                                  * for the space.
 1831                                  * Make a dummy credential for the victim.
 1832                                  * XXX This seems to never be accessed out of
 1833                                  * our context so a stack variable is ok.
 1834                                  */
 1835                                 refcount_init(&ucred.cr_ref, 1);
 1836                                 ucred.cr_uid = ip->i_uid;
 1837                                 ucred.cr_ngroups = 1;
 1838                                 ucred.cr_groups = &ucred_group;
 1839                                 ucred.cr_groups[0] = dp->i_gid;
 1840                                 ucp = &ucred;
 1841                         }
 1842 #endif
 1843                 } else {
 1844                         ip->i_uid = cnp->cn_cred->cr_uid;
 1845                         DIP_SET(ip, i_uid, ip->i_uid);
 1846                 }
 1847 #ifdef QUOTA
 1848                 if ((error = getinoquota(ip)) ||
 1849                     (error = chkiq(ip, 1, ucp, 0))) {
 1850                         if (DOINGSOFTDEP(tvp))
 1851                                 softdep_revert_link(dp, ip);
 1852                         UFS_VFREE(tvp, ip->i_number, dmode);
 1853                         vput(tvp);
 1854                         return (error);
 1855                 }
 1856 #endif
 1857         }
 1858 #else   /* !SUIDDIR */
 1859         ip->i_uid = cnp->cn_cred->cr_uid;
 1860         DIP_SET(ip, i_uid, ip->i_uid);
 1861 #ifdef QUOTA
 1862         if ((error = getinoquota(ip)) ||
 1863             (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 1864                 if (DOINGSOFTDEP(tvp))
 1865                         softdep_revert_link(dp, ip);
 1866                 UFS_VFREE(tvp, ip->i_number, dmode);
 1867                 vput(tvp);
 1868                 return (error);
 1869         }
 1870 #endif
 1871 #endif  /* !SUIDDIR */
 1872         ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 1873         ip->i_mode = dmode;
 1874         DIP_SET(ip, i_mode, dmode);
 1875         tvp->v_type = VDIR;     /* Rest init'd in getnewvnode(). */
 1876         ip->i_effnlink = 2;
 1877         ip->i_nlink = 2;
 1878         DIP_SET(ip, i_nlink, 2);
 1879 
 1880         if (cnp->cn_flags & ISWHITEOUT) {
 1881                 ip->i_flags |= UF_OPAQUE;
 1882                 DIP_SET(ip, i_flags, ip->i_flags);
 1883         }
 1884 
 1885         /*
 1886          * Bump link count in parent directory to reflect work done below.
 1887          * Should be done before reference is created so cleanup is
 1888          * possible if we crash.
 1889          */
 1890         dp->i_effnlink++;
 1891         dp->i_nlink++;
 1892         DIP_SET(dp, i_nlink, dp->i_nlink);
 1893         dp->i_flag |= IN_CHANGE;
 1894         if (DOINGSOFTDEP(dvp))
 1895                 softdep_setup_mkdir(dp, ip);
 1896         error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
 1897         if (error)
 1898                 goto bad;
 1899 #ifdef MAC
 1900         if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 1901                 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 1902                     dvp, tvp, cnp);
 1903                 if (error)
 1904                         goto bad;
 1905         }
 1906 #endif
 1907 #ifdef UFS_ACL
 1908         if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 1909                 error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
 1910                     cnp->cn_cred, cnp->cn_thread);
 1911                 if (error)
 1912                         goto bad;
 1913         } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 1914                 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
 1915                     cnp->cn_cred, cnp->cn_thread);
 1916                 if (error)
 1917                         goto bad;
 1918         }
 1919 #endif /* !UFS_ACL */
 1920 
 1921         /*
 1922          * Initialize directory with "." and ".." from static template.
 1923          */
 1924         if (dvp->v_mount->mnt_maxsymlinklen > 0)
 1925                 dtp = &mastertemplate;
 1926         else
 1927                 dtp = (struct dirtemplate *)&omastertemplate;
 1928         dirtemplate = *dtp;
 1929         dirtemplate.dot_ino = ip->i_number;
 1930         dirtemplate.dotdot_ino = dp->i_number;
 1931         if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 1932             BA_CLRBUF, &bp)) != 0)
 1933                 goto bad;
 1934         ip->i_size = DIRBLKSIZ;
 1935         DIP_SET(ip, i_size, DIRBLKSIZ);
 1936         ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1937         vnode_pager_setsize(tvp, (u_long)ip->i_size);
 1938         bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 1939         if (DOINGSOFTDEP(tvp)) {
 1940                 /*
 1941                  * Ensure that the entire newly allocated block is a
 1942                  * valid directory so that future growth within the
 1943                  * block does not have to ensure that the block is
 1944                  * written before the inode.
 1945                  */
 1946                 blkoff = DIRBLKSIZ;
 1947                 while (blkoff < bp->b_bcount) {
 1948                         ((struct direct *)
 1949                            (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 1950                         blkoff += DIRBLKSIZ;
 1951                 }
 1952         }
 1953         if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
 1954                                        DOINGASYNC(tvp)))) != 0) {
 1955                 (void)bwrite(bp);
 1956                 goto bad;
 1957         }
 1958         /*
 1959          * Directory set up, now install its entry in the parent directory.
 1960          *
 1961          * If we are not doing soft dependencies, then we must write out the
 1962          * buffer containing the new directory body before entering the new 
 1963          * name in the parent. If we are doing soft dependencies, then the
 1964          * buffer containing the new directory body will be passed to and
 1965          * released in the soft dependency code after the code has attached
 1966          * an appropriate ordering dependency to the buffer which ensures that
 1967          * the buffer is written before the new name is written in the parent.
 1968          */
 1969         if (DOINGASYNC(dvp))
 1970                 bdwrite(bp);
 1971         else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
 1972                 goto bad;
 1973         ufs_makedirentry(ip, cnp, &newdir);
 1974         error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
 1975         
 1976 bad:
 1977         if (error == 0) {
 1978                 *ap->a_vpp = tvp;
 1979         } else {
 1980                 dp->i_effnlink--;
 1981                 dp->i_nlink--;
 1982                 DIP_SET(dp, i_nlink, dp->i_nlink);
 1983                 dp->i_flag |= IN_CHANGE;
 1984                 /*
 1985                  * No need to do an explicit VOP_TRUNCATE here, vrele will
 1986                  * do this for us because we set the link count to 0.
 1987                  */
 1988                 ip->i_effnlink = 0;
 1989                 ip->i_nlink = 0;
 1990                 DIP_SET(ip, i_nlink, 0);
 1991                 ip->i_flag |= IN_CHANGE;
 1992                 if (DOINGSOFTDEP(tvp))
 1993                         softdep_revert_mkdir(dp, ip);
 1994 
 1995                 vput(tvp);
 1996         }
 1997 out:
 1998         return (error);
 1999 }
 2000 
 2001 /*
 2002  * Rmdir system call.
 2003  */
 2004 static int
 2005 ufs_rmdir(ap)
 2006         struct vop_rmdir_args /* {
 2007                 struct vnode *a_dvp;
 2008                 struct vnode *a_vp;
 2009                 struct componentname *a_cnp;
 2010         } */ *ap;
 2011 {
 2012         struct vnode *vp = ap->a_vp;
 2013         struct vnode *dvp = ap->a_dvp;
 2014         struct componentname *cnp = ap->a_cnp;
 2015         struct inode *ip, *dp;
 2016         int error;
 2017 
 2018         ip = VTOI(vp);
 2019         dp = VTOI(dvp);
 2020 
 2021         /*
 2022          * Do not remove a directory that is in the process of being renamed.
 2023          * Verify the directory is empty (and valid). Rmdir ".." will not be
 2024          * valid since ".." will contain a reference to the current directory
 2025          * and thus be non-empty. Do not allow the removal of mounted on
 2026          * directories (this can happen when an NFS exported filesystem
 2027          * tries to remove a locally mounted on directory).
 2028          */
 2029         error = 0;
 2030         if (ip->i_effnlink < 2) {
 2031                 error = EINVAL;
 2032                 goto out;
 2033         }
 2034         if (dp->i_effnlink < 3)
 2035                 panic("ufs_dirrem: Bad link count %d on parent",
 2036                     dp->i_effnlink);
 2037         if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 2038                 error = ENOTEMPTY;
 2039                 goto out;
 2040         }
 2041         if ((dp->i_flags & APPEND)
 2042             || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 2043                 error = EPERM;
 2044                 goto out;
 2045         }
 2046         if (vp->v_mountedhere != 0) {
 2047                 error = EINVAL;
 2048                 goto out;
 2049         }
 2050 #ifdef UFS_GJOURNAL
 2051         ufs_gjournal_orphan(vp);
 2052 #endif
 2053         /*
 2054          * Delete reference to directory before purging
 2055          * inode.  If we crash in between, the directory
 2056          * will be reattached to lost+found,
 2057          */
 2058         dp->i_effnlink--;
 2059         ip->i_effnlink--;
 2060         if (DOINGSOFTDEP(vp))
 2061                 softdep_setup_rmdir(dp, ip);
 2062         error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 2063         if (error) {
 2064                 dp->i_effnlink++;
 2065                 ip->i_effnlink++;
 2066                 if (DOINGSOFTDEP(vp))
 2067                         softdep_revert_rmdir(dp, ip);
 2068                 goto out;
 2069         }
 2070         cache_purge(dvp);
 2071         /*
 2072          * The only stuff left in the directory is "." and "..". The "."
 2073          * reference is inconsequential since we are quashing it. The soft
 2074          * dependency code will arrange to do these operations after
 2075          * the parent directory entry has been deleted on disk, so
 2076          * when running with that code we avoid doing them now.
 2077          */
 2078         if (!DOINGSOFTDEP(vp)) {
 2079                 dp->i_nlink--;
 2080                 DIP_SET(dp, i_nlink, dp->i_nlink);
 2081                 dp->i_flag |= IN_CHANGE;
 2082                 ip->i_nlink--;
 2083                 DIP_SET(ip, i_nlink, ip->i_nlink);
 2084                 ip->i_flag |= IN_CHANGE;
 2085         }
 2086         cache_purge(vp);
 2087 #ifdef UFS_DIRHASH
 2088         /* Kill any active hash; i_effnlink == 0, so it will not come back. */
 2089         if (ip->i_dirhash != NULL)
 2090                 ufsdirhash_free(ip);
 2091 #endif
 2092 out:
 2093         return (error);
 2094 }
 2095 
 2096 /*
 2097  * symlink -- make a symbolic link
 2098  */
 2099 static int
 2100 ufs_symlink(ap)
 2101         struct vop_symlink_args /* {
 2102                 struct vnode *a_dvp;
 2103                 struct vnode **a_vpp;
 2104                 struct componentname *a_cnp;
 2105                 struct vattr *a_vap;
 2106                 char *a_target;
 2107         } */ *ap;
 2108 {
 2109         struct vnode *vp, **vpp = ap->a_vpp;
 2110         struct inode *ip;
 2111         int len, error;
 2112 
 2113         error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 2114             vpp, ap->a_cnp);
 2115         if (error)
 2116                 return (error);
 2117         vp = *vpp;
 2118         len = strlen(ap->a_target);
 2119         if (len < vp->v_mount->mnt_maxsymlinklen) {
 2120                 ip = VTOI(vp);
 2121                 bcopy(ap->a_target, SHORTLINK(ip), len);
 2122                 ip->i_size = len;
 2123                 DIP_SET(ip, i_size, len);
 2124                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
 2125         } else
 2126                 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 2127                     UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 2128                     ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 2129         if (error)
 2130                 vput(vp);
 2131         return (error);
 2132 }
 2133 
 2134 /*
 2135  * Vnode op for reading directories.
 2136  *
 2137  * The routine below assumes that the on-disk format of a directory
 2138  * is the same as that defined by <sys/dirent.h>. If the on-disk
 2139  * format changes, then it will be necessary to do a conversion
 2140  * from the on-disk format that read returns to the format defined
 2141  * by <sys/dirent.h>.
 2142  */
 2143 int
 2144 ufs_readdir(ap)
 2145         struct vop_readdir_args /* {
 2146                 struct vnode *a_vp;
 2147                 struct uio *a_uio;
 2148                 struct ucred *a_cred;
 2149                 int *a_eofflag;
 2150                 int *a_ncookies;
 2151                 u_long **a_cookies;
 2152         } */ *ap;
 2153 {
 2154         struct uio *uio = ap->a_uio;
 2155         struct inode *ip;
 2156         int error;
 2157         size_t count, lost;
 2158         off_t off;
 2159 
 2160         if (ap->a_ncookies != NULL)
 2161                 /*
 2162                  * Ensure that the block is aligned.  The caller can use
 2163                  * the cookies to determine where in the block to start.
 2164                  */
 2165                 uio->uio_offset &= ~(DIRBLKSIZ - 1);
 2166         ip = VTOI(ap->a_vp);
 2167         if (ip->i_effnlink == 0)
 2168                 return (0);
 2169         off = uio->uio_offset;
 2170         count = uio->uio_resid;
 2171         /* Make sure we don't return partial entries. */
 2172         if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
 2173                 return (EINVAL);
 2174         count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
 2175         lost = uio->uio_resid - count;
 2176         uio->uio_resid = count;
 2177         uio->uio_iov->iov_len = count;
 2178 #       if (BYTE_ORDER == LITTLE_ENDIAN)
 2179                 if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
 2180                         error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 2181                 } else {
 2182                         struct dirent *dp, *edp;
 2183                         struct uio auio;
 2184                         struct iovec aiov;
 2185                         caddr_t dirbuf;
 2186                         int readcnt;
 2187                         u_char tmp;
 2188 
 2189                         auio = *uio;
 2190                         auio.uio_iov = &aiov;
 2191                         auio.uio_iovcnt = 1;
 2192                         auio.uio_segflg = UIO_SYSSPACE;
 2193                         aiov.iov_len = count;
 2194                         dirbuf = malloc(count, M_TEMP, M_WAITOK);
 2195                         aiov.iov_base = dirbuf;
 2196                         error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
 2197                         if (error == 0) {
 2198                                 readcnt = count - auio.uio_resid;
 2199                                 edp = (struct dirent *)&dirbuf[readcnt];
 2200                                 for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 2201                                         tmp = dp->d_namlen;
 2202                                         dp->d_namlen = dp->d_type;
 2203                                         dp->d_type = tmp;
 2204                                         if (dp->d_reclen > 0) {
 2205                                                 dp = (struct dirent *)
 2206                                                     ((char *)dp + dp->d_reclen);
 2207                                         } else {
 2208                                                 error = EIO;
 2209                                                 break;
 2210                                         }
 2211                                 }
 2212                                 if (dp >= edp)
 2213                                         error = uiomove(dirbuf, readcnt, uio);
 2214                         }
 2215                         free(dirbuf, M_TEMP);
 2216                 }
 2217 #       else
 2218                 error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 2219 #       endif
 2220         if (!error && ap->a_ncookies != NULL) {
 2221                 struct dirent* dpStart;
 2222                 struct dirent* dpEnd;
 2223                 struct dirent* dp;
 2224                 int ncookies;
 2225                 u_long *cookies;
 2226                 u_long *cookiep;
 2227 
 2228                 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 2229                         panic("ufs_readdir: unexpected uio from NFS server");
 2230                 dpStart = (struct dirent *)
 2231                     ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off));
 2232                 dpEnd = (struct dirent *) uio->uio_iov->iov_base;
 2233                 for (dp = dpStart, ncookies = 0;
 2234                      dp < dpEnd;
 2235                      dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
 2236                         ncookies++;
 2237                 cookies = malloc(ncookies * sizeof(u_long), M_TEMP,
 2238                     M_WAITOK);
 2239                 for (dp = dpStart, cookiep = cookies;
 2240                      dp < dpEnd;
 2241                      dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
 2242                         off += dp->d_reclen;
 2243                         *cookiep++ = (u_long) off;
 2244                 }
 2245                 *ap->a_ncookies = ncookies;
 2246                 *ap->a_cookies = cookies;
 2247         }
 2248         uio->uio_resid += lost;
 2249         if (ap->a_eofflag)
 2250             *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
 2251         return (error);
 2252 }
 2253 
 2254 /*
 2255  * Return target name of a symbolic link
 2256  */
 2257 static int
 2258 ufs_readlink(ap)
 2259         struct vop_readlink_args /* {
 2260                 struct vnode *a_vp;
 2261                 struct uio *a_uio;
 2262                 struct ucred *a_cred;
 2263         } */ *ap;
 2264 {
 2265         struct vnode *vp = ap->a_vp;
 2266         struct inode *ip = VTOI(vp);
 2267         doff_t isize;
 2268 
 2269         isize = ip->i_size;
 2270         if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
 2271             DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
 2272                 return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
 2273         }
 2274         return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 2275 }
 2276 
 2277 /*
 2278  * Calculate the logical to physical mapping if not done already,
 2279  * then call the device strategy routine.
 2280  *
 2281  * In order to be able to swap to a file, the ufs_bmaparray() operation may not
 2282  * deadlock on memory.  See ufs_bmap() for details.
 2283  */
 2284 static int
 2285 ufs_strategy(ap)
 2286         struct vop_strategy_args /* {
 2287                 struct vnode *a_vp;
 2288                 struct buf *a_bp;
 2289         } */ *ap;
 2290 {
 2291         struct buf *bp = ap->a_bp;
 2292         struct vnode *vp = ap->a_vp;
 2293         struct bufobj *bo;
 2294         struct inode *ip;
 2295         ufs2_daddr_t blkno;
 2296         int error;
 2297 
 2298         ip = VTOI(vp);
 2299         if (bp->b_blkno == bp->b_lblkno) {
 2300                 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
 2301                 bp->b_blkno = blkno;
 2302                 if (error) {
 2303                         bp->b_error = error;
 2304                         bp->b_ioflags |= BIO_ERROR;
 2305                         bufdone(bp);
 2306                         return (0);
 2307                 }
 2308                 if ((long)bp->b_blkno == -1)
 2309                         vfs_bio_clrbuf(bp);
 2310         }
 2311         if ((long)bp->b_blkno == -1) {
 2312                 bufdone(bp);
 2313                 return (0);
 2314         }
 2315         bp->b_iooffset = dbtob(bp->b_blkno);
 2316         bo = ip->i_umbufobj;
 2317         BO_STRATEGY(bo, bp);
 2318         return (0);
 2319 }
 2320 
 2321 /*
 2322  * Print out the contents of an inode.
 2323  */
 2324 static int
 2325 ufs_print(ap)
 2326         struct vop_print_args /* {
 2327                 struct vnode *a_vp;
 2328         } */ *ap;
 2329 {
 2330         struct vnode *vp = ap->a_vp;
 2331         struct inode *ip = VTOI(vp);
 2332 
 2333         printf("\tino %lu, on dev %s", (u_long)ip->i_number,
 2334             devtoname(ip->i_dev));
 2335         if (vp->v_type == VFIFO)
 2336                 fifo_printinfo(vp);
 2337         printf("\n");
 2338         return (0);
 2339 }
 2340 
 2341 /*
 2342  * Close wrapper for fifos.
 2343  *
 2344  * Update the times on the inode then do device close.
 2345  */
 2346 static int
 2347 ufsfifo_close(ap)
 2348         struct vop_close_args /* {
 2349                 struct vnode *a_vp;
 2350                 int  a_fflag;
 2351                 struct ucred *a_cred;
 2352                 struct thread *a_td;
 2353         } */ *ap;
 2354 {
 2355         struct vnode *vp = ap->a_vp;
 2356         int usecount;
 2357 
 2358         VI_LOCK(vp);
 2359         usecount = vp->v_usecount;
 2360         if (usecount > 1)
 2361                 ufs_itimes_locked(vp);
 2362         VI_UNLOCK(vp);
 2363         return (fifo_specops.vop_close(ap));
 2364 }
 2365 
 2366 /*
 2367  * Kqfilter wrapper for fifos.
 2368  *
 2369  * Fall through to ufs kqfilter routines if needed 
 2370  */
 2371 static int
 2372 ufsfifo_kqfilter(ap)
 2373         struct vop_kqfilter_args *ap;
 2374 {
 2375         int error;
 2376 
 2377         error = fifo_specops.vop_kqfilter(ap);
 2378         if (error)
 2379                 error = vfs_kqfilter(ap);
 2380         return (error);
 2381 }
 2382 
 2383 /*
 2384  * Return POSIX pathconf information applicable to fifos.
 2385  */
 2386 static int
 2387 ufsfifo_pathconf(ap)
 2388         struct vop_pathconf_args /* {
 2389                 struct vnode *a_vp;
 2390                 int a_name;
 2391                 int *a_retval;
 2392         } */ *ap;
 2393 {
 2394 
 2395         switch (ap->a_name) {
 2396         case _PC_ACL_EXTENDED:
 2397         case _PC_ACL_NFS4:
 2398         case _PC_ACL_PATH_MAX:
 2399         case _PC_MAC_PRESENT:
 2400                 return (ufs_pathconf(ap));
 2401         default:
 2402                 return (fifo_specops.vop_pathconf(ap));
 2403         }
 2404         /* NOTREACHED */
 2405 }
 2406 
 2407 /*
 2408  * Return POSIX pathconf information applicable to ufs filesystems.
 2409  */
 2410 static int
 2411 ufs_pathconf(ap)
 2412         struct vop_pathconf_args /* {
 2413                 struct vnode *a_vp;
 2414                 int a_name;
 2415                 int *a_retval;
 2416         } */ *ap;
 2417 {
 2418         int error;
 2419 
 2420         error = 0;
 2421         switch (ap->a_name) {
 2422         case _PC_LINK_MAX:
 2423                 *ap->a_retval = LINK_MAX;
 2424                 break;
 2425         case _PC_NAME_MAX:
 2426                 *ap->a_retval = NAME_MAX;
 2427                 break;
 2428         case _PC_PATH_MAX:
 2429                 *ap->a_retval = PATH_MAX;
 2430                 break;
 2431         case _PC_PIPE_BUF:
 2432                 *ap->a_retval = PIPE_BUF;
 2433                 break;
 2434         case _PC_CHOWN_RESTRICTED:
 2435                 *ap->a_retval = 1;
 2436                 break;
 2437         case _PC_NO_TRUNC:
 2438                 *ap->a_retval = 1;
 2439                 break;
 2440         case _PC_ACL_EXTENDED:
 2441 #ifdef UFS_ACL
 2442                 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 2443                         *ap->a_retval = 1;
 2444                 else
 2445                         *ap->a_retval = 0;
 2446 #else
 2447                 *ap->a_retval = 0;
 2448 #endif
 2449                 break;
 2450 
 2451         case _PC_ACL_NFS4:
 2452 #ifdef UFS_ACL
 2453                 if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 2454                         *ap->a_retval = 1;
 2455                 else
 2456                         *ap->a_retval = 0;
 2457 #else
 2458                 *ap->a_retval = 0;
 2459 #endif
 2460                 break;
 2461 
 2462         case _PC_ACL_PATH_MAX:
 2463 #ifdef UFS_ACL
 2464                 if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
 2465                         *ap->a_retval = ACL_MAX_ENTRIES;
 2466                 else
 2467                         *ap->a_retval = 3;
 2468 #else
 2469                 *ap->a_retval = 3;
 2470 #endif
 2471                 break;
 2472         case _PC_MAC_PRESENT:
 2473 #ifdef MAC
 2474                 if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
 2475                         *ap->a_retval = 1;
 2476                 else
 2477                         *ap->a_retval = 0;
 2478 #else
 2479                 *ap->a_retval = 0;
 2480 #endif
 2481                 break;
 2482         case _PC_ASYNC_IO:
 2483                 /* _PC_ASYNC_IO should have been handled by upper layers. */
 2484                 KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 2485                 error = EINVAL;
 2486                 break;
 2487         case _PC_PRIO_IO:
 2488                 *ap->a_retval = 0;
 2489                 break;
 2490         case _PC_SYNC_IO:
 2491                 *ap->a_retval = 0;
 2492                 break;
 2493         case _PC_ALLOC_SIZE_MIN:
 2494                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 2495                 break;
 2496         case _PC_FILESIZEBITS:
 2497                 *ap->a_retval = 64;
 2498                 break;
 2499         case _PC_REC_INCR_XFER_SIZE:
 2500                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 2501                 break;
 2502         case _PC_REC_MAX_XFER_SIZE:
 2503                 *ap->a_retval = -1; /* means ``unlimited'' */
 2504                 break;
 2505         case _PC_REC_MIN_XFER_SIZE:
 2506                 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 2507                 break;
 2508         case _PC_REC_XFER_ALIGN:
 2509                 *ap->a_retval = PAGE_SIZE;
 2510                 break;
 2511         case _PC_SYMLINK_MAX:
 2512                 *ap->a_retval = MAXPATHLEN;
 2513                 break;
 2514 
 2515         default:
 2516                 error = EINVAL;
 2517                 break;
 2518         }
 2519         return (error);
 2520 }
 2521 
 2522 /*
 2523  * Initialize the vnode associated with a new inode, handle aliased
 2524  * vnodes.
 2525  */
 2526 int
 2527 ufs_vinit(mntp, fifoops, vpp)
 2528         struct mount *mntp;
 2529         struct vop_vector *fifoops;
 2530         struct vnode **vpp;
 2531 {
 2532         struct inode *ip;
 2533         struct vnode *vp;
 2534 
 2535         vp = *vpp;
 2536         ip = VTOI(vp);
 2537         vp->v_type = IFTOVT(ip->i_mode);
 2538         if (vp->v_type == VFIFO)
 2539                 vp->v_op = fifoops;
 2540         ASSERT_VOP_LOCKED(vp, "ufs_vinit");
 2541         if (ip->i_number == ROOTINO)
 2542                 vp->v_vflag |= VV_ROOT;
 2543         *vpp = vp;
 2544         return (0);
 2545 }
 2546 
 2547 /*
 2548  * Allocate a new inode.
 2549  * Vnode dvp must be locked.
 2550  */
 2551 static int
 2552 ufs_makeinode(mode, dvp, vpp, cnp)
 2553         int mode;
 2554         struct vnode *dvp;
 2555         struct vnode **vpp;
 2556         struct componentname *cnp;
 2557 {
 2558         struct inode *ip, *pdir;
 2559         struct direct newdir;
 2560         struct vnode *tvp;
 2561         int error;
 2562 
 2563         pdir = VTOI(dvp);
 2564 #ifdef INVARIANTS
 2565         if ((cnp->cn_flags & HASBUF) == 0)
 2566                 panic("ufs_makeinode: no name");
 2567 #endif
 2568         *vpp = NULL;
 2569         if ((mode & IFMT) == 0)
 2570                 mode |= IFREG;
 2571 
 2572         if (VTOI(dvp)->i_effnlink < 2)
 2573                 panic("ufs_makeinode: Bad link count %d on parent",
 2574                     VTOI(dvp)->i_effnlink);
 2575         error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 2576         if (error)
 2577                 return (error);
 2578         ip = VTOI(tvp);
 2579         ip->i_gid = pdir->i_gid;
 2580         DIP_SET(ip, i_gid, pdir->i_gid);
 2581 #ifdef SUIDDIR
 2582         {
 2583 #ifdef QUOTA
 2584                 struct ucred ucred, *ucp;
 2585                 gid_t ucred_group;
 2586                 ucp = cnp->cn_cred;
 2587 #endif
 2588                 /*
 2589                  * If we are not the owner of the directory,
 2590                  * and we are hacking owners here, (only do this where told to)
 2591                  * and we are not giving it TO root, (would subvert quotas)
 2592                  * then go ahead and give it to the other user.
 2593                  * Note that this drops off the execute bits for security.
 2594                  */
 2595                 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 2596                     (pdir->i_mode & ISUID) &&
 2597                     (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 2598                         ip->i_uid = pdir->i_uid;
 2599                         DIP_SET(ip, i_uid, ip->i_uid);
 2600                         mode &= ~07111;
 2601 #ifdef QUOTA
 2602                         /*
 2603                          * Make sure the correct user gets charged
 2604                          * for the space.
 2605                          * Quickly knock up a dummy credential for the victim.
 2606                          * XXX This seems to never be accessed out of our
 2607                          * context so a stack variable is ok.
 2608                          */
 2609                         refcount_init(&ucred.cr_ref, 1);
 2610                         ucred.cr_uid = ip->i_uid;
 2611                         ucred.cr_ngroups = 1;
 2612                         ucred.cr_groups = &ucred_group;
 2613                         ucred.cr_groups[0] = pdir->i_gid;
 2614                         ucp = &ucred;
 2615 #endif
 2616                 } else {
 2617                         ip->i_uid = cnp->cn_cred->cr_uid;
 2618                         DIP_SET(ip, i_uid, ip->i_uid);
 2619                 }
 2620 
 2621 #ifdef QUOTA
 2622                 if ((error = getinoquota(ip)) ||
 2623                     (error = chkiq(ip, 1, ucp, 0))) {
 2624                         if (DOINGSOFTDEP(tvp))
 2625                                 softdep_revert_link(pdir, ip);
 2626                         UFS_VFREE(tvp, ip->i_number, mode);
 2627                         vput(tvp);
 2628                         return (error);
 2629                 }
 2630 #endif
 2631         }
 2632 #else   /* !SUIDDIR */
 2633         ip->i_uid = cnp->cn_cred->cr_uid;
 2634         DIP_SET(ip, i_uid, ip->i_uid);
 2635 #ifdef QUOTA
 2636         if ((error = getinoquota(ip)) ||
 2637             (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 2638                 if (DOINGSOFTDEP(tvp))
 2639                         softdep_revert_link(pdir, ip);
 2640                 UFS_VFREE(tvp, ip->i_number, mode);
 2641                 vput(tvp);
 2642                 return (error);
 2643         }
 2644 #endif
 2645 #endif  /* !SUIDDIR */
 2646         ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 2647         ip->i_mode = mode;
 2648         DIP_SET(ip, i_mode, mode);
 2649         tvp->v_type = IFTOVT(mode);     /* Rest init'd in getnewvnode(). */
 2650         ip->i_effnlink = 1;
 2651         ip->i_nlink = 1;
 2652         DIP_SET(ip, i_nlink, 1);
 2653         if (DOINGSOFTDEP(tvp))
 2654                 softdep_setup_create(VTOI(dvp), ip);
 2655         if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 2656             priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
 2657                 ip->i_mode &= ~ISGID;
 2658                 DIP_SET(ip, i_mode, ip->i_mode);
 2659         }
 2660 
 2661         if (cnp->cn_flags & ISWHITEOUT) {
 2662                 ip->i_flags |= UF_OPAQUE;
 2663                 DIP_SET(ip, i_flags, ip->i_flags);
 2664         }
 2665 
 2666         /*
 2667          * Make sure inode goes to disk before directory entry.
 2668          */
 2669         error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
 2670         if (error)
 2671                 goto bad;
 2672 #ifdef MAC
 2673         if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 2674                 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 2675                     dvp, tvp, cnp);
 2676                 if (error)
 2677                         goto bad;
 2678         }
 2679 #endif
 2680 #ifdef UFS_ACL
 2681         if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 2682                 error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
 2683                     cnp->cn_cred, cnp->cn_thread);
 2684                 if (error)
 2685                         goto bad;
 2686         } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 2687                 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
 2688                     cnp->cn_cred, cnp->cn_thread);
 2689                 if (error)
 2690                         goto bad;
 2691         }
 2692 #endif /* !UFS_ACL */
 2693         ufs_makedirentry(ip, cnp, &newdir);
 2694         error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
 2695         if (error)
 2696                 goto bad;
 2697         *vpp = tvp;
 2698         return (0);
 2699 
 2700 bad:
 2701         /*
 2702          * Write error occurred trying to update the inode
 2703          * or the directory so must deallocate the inode.
 2704          */
 2705         ip->i_effnlink = 0;
 2706         ip->i_nlink = 0;
 2707         DIP_SET(ip, i_nlink, 0);
 2708         ip->i_flag |= IN_CHANGE;
 2709         if (DOINGSOFTDEP(tvp))
 2710                 softdep_revert_create(VTOI(dvp), ip);
 2711         vput(tvp);
 2712         return (error);
 2713 }
 2714 
 2715 /* Global vfs data structures for ufs. */
 2716 struct vop_vector ufs_vnodeops = {
 2717         .vop_default =          &default_vnodeops,
 2718         .vop_fsync =            VOP_PANIC,
 2719         .vop_read =             VOP_PANIC,
 2720         .vop_reallocblks =      VOP_PANIC,
 2721         .vop_write =            VOP_PANIC,
 2722         .vop_accessx =          ufs_accessx,
 2723         .vop_bmap =             ufs_bmap,
 2724         .vop_cachedlookup =     ufs_lookup,
 2725         .vop_close =            ufs_close,
 2726         .vop_create =           ufs_create,
 2727         .vop_getattr =          ufs_getattr,
 2728         .vop_inactive =         ufs_inactive,
 2729         .vop_link =             ufs_link,
 2730         .vop_lookup =           vfs_cache_lookup,
 2731         .vop_markatime =        ufs_markatime,
 2732         .vop_mkdir =            ufs_mkdir,
 2733         .vop_mknod =            ufs_mknod,
 2734         .vop_open =             ufs_open,
 2735         .vop_pathconf =         ufs_pathconf,
 2736         .vop_poll =             vop_stdpoll,
 2737         .vop_print =            ufs_print,
 2738         .vop_readdir =          ufs_readdir,
 2739         .vop_readlink =         ufs_readlink,
 2740         .vop_reclaim =          ufs_reclaim,
 2741         .vop_remove =           ufs_remove,
 2742         .vop_rename =           ufs_rename,
 2743         .vop_rmdir =            ufs_rmdir,
 2744         .vop_setattr =          ufs_setattr,
 2745 #ifdef MAC
 2746         .vop_setlabel =         vop_stdsetlabel_ea,
 2747 #endif
 2748         .vop_strategy =         ufs_strategy,
 2749         .vop_symlink =          ufs_symlink,
 2750         .vop_whiteout =         ufs_whiteout,
 2751 #ifdef UFS_EXTATTR
 2752         .vop_getextattr =       ufs_getextattr,
 2753         .vop_deleteextattr =    ufs_deleteextattr,
 2754         .vop_setextattr =       ufs_setextattr,
 2755 #endif
 2756 #ifdef UFS_ACL
 2757         .vop_getacl =           ufs_getacl,
 2758         .vop_setacl =           ufs_setacl,
 2759         .vop_aclcheck =         ufs_aclcheck,
 2760 #endif
 2761 };
 2762 
 2763 struct vop_vector ufs_fifoops = {
 2764         .vop_default =          &fifo_specops,
 2765         .vop_fsync =            VOP_PANIC,
 2766         .vop_accessx =          ufs_accessx,
 2767         .vop_close =            ufsfifo_close,
 2768         .vop_getattr =          ufs_getattr,
 2769         .vop_inactive =         ufs_inactive,
 2770         .vop_kqfilter =         ufsfifo_kqfilter,
 2771         .vop_markatime =        ufs_markatime,
 2772         .vop_pathconf =         ufsfifo_pathconf,
 2773         .vop_print =            ufs_print,
 2774         .vop_read =             VOP_PANIC,
 2775         .vop_reclaim =          ufs_reclaim,
 2776         .vop_setattr =          ufs_setattr,
 2777 #ifdef MAC
 2778         .vop_setlabel =         vop_stdsetlabel_ea,
 2779 #endif
 2780         .vop_write =            VOP_PANIC,
 2781 #ifdef UFS_EXTATTR
 2782         .vop_getextattr =       ufs_getextattr,
 2783         .vop_deleteextattr =    ufs_deleteextattr,
 2784         .vop_setextattr =       ufs_setextattr,
 2785 #endif
 2786 #ifdef UFS_ACL
 2787         .vop_getacl =           ufs_getacl,
 2788         .vop_setacl =           ufs_setacl,
 2789         .vop_aclcheck =         ufs_aclcheck,
 2790 #endif
 2791 };

Cache object: 3a6fa3615d757613f8e68f783006b08c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.