union_subr.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1994 Jan-Simon Pendry
    5  * Copyright (c) 1994
    6  *      The Regents of the University of California.  All rights reserved.
    7  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
    8  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * Jan-Simon Pendry.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)union_subr.c        8.20 (Berkeley) 5/20/95
   38  * $FreeBSD$
   39  */
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/kernel.h>
   44 #include <sys/ktr.h>
   45 #include <sys/lock.h>
   46 #include <sys/mutex.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mount.h>
   49 #include <sys/namei.h>
   50 #include <sys/proc.h>
   51 #include <sys/vnode.h>
   52 #include <sys/dirent.h>
   53 #include <sys/fcntl.h>
   54 #include <sys/filedesc.h>
   55 #include <sys/stat.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/taskqueue.h>
   58 #include <sys/resourcevar.h>
   59 
   60 #include <machine/atomic.h>
   61 
   62 #include <security/mac/mac_framework.h>
   63 
   64 #include <vm/uma.h>
   65 
   66 #include <fs/unionfs/union.h>
   67 
   68 #define NUNIONFSNODECACHE 16
   69 #define UNIONFSHASHMASK (NUNIONFSNODECACHE - 1)
   70 
   71 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
   72 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
   73 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
   74 
   75 static struct task unionfs_deferred_rele_task;
   76 static struct mtx unionfs_deferred_rele_lock;
   77 static STAILQ_HEAD(, unionfs_node) unionfs_deferred_rele_list =
   78     STAILQ_HEAD_INITIALIZER(unionfs_deferred_rele_list);
   79 static TASKQUEUE_DEFINE_THREAD(unionfs_rele);
   80 
   81 unsigned int unionfs_ndeferred = 0;
   82 SYSCTL_UINT(_vfs, OID_AUTO, unionfs_ndeferred, CTLFLAG_RD,
   83     &unionfs_ndeferred, 0, "unionfs deferred vnode release");
   84 
   85 static void unionfs_deferred_rele(void *, int);
   86 
   87 /*
   88  * Initialize
   89  */
   90 int 
   91 unionfs_init(struct vfsconf *vfsp)
   92 {
   93         UNIONFSDEBUG("unionfs_init\n"); /* printed during system boot */
   94         TASK_INIT(&unionfs_deferred_rele_task, 0, unionfs_deferred_rele, NULL);
   95         mtx_init(&unionfs_deferred_rele_lock, "uniondefr", NULL, MTX_DEF); 
   96         return (0);
   97 }
   98 
   99 /*
  100  * Uninitialize
  101  */
  102 int 
  103 unionfs_uninit(struct vfsconf *vfsp)
  104 {
  105         taskqueue_quiesce(taskqueue_unionfs_rele);
  106         taskqueue_free(taskqueue_unionfs_rele);
  107         mtx_destroy(&unionfs_deferred_rele_lock);
  108         return (0);
  109 }
  110 
  111 static void
  112 unionfs_deferred_rele(void *arg __unused, int pending __unused)
  113 {
  114         STAILQ_HEAD(, unionfs_node) local_rele_list;
  115         struct unionfs_node *unp, *tunp;
  116         unsigned int ndeferred;
  117 
  118         ndeferred = 0;
  119         STAILQ_INIT(&local_rele_list);
  120         mtx_lock(&unionfs_deferred_rele_lock);
  121         STAILQ_CONCAT(&local_rele_list, &unionfs_deferred_rele_list);
  122         mtx_unlock(&unionfs_deferred_rele_lock);
  123         STAILQ_FOREACH_SAFE(unp, &local_rele_list, un_rele, tunp) {
  124                 ++ndeferred;
  125                 MPASS(unp->un_dvp != NULL);
  126                 vrele(unp->un_dvp);
  127                 free(unp, M_UNIONFSNODE);
  128         }
  129 
  130         /* We expect this function to be single-threaded, thus no atomic */
  131         unionfs_ndeferred += ndeferred;
  132 }
  133 
  134 static struct unionfs_node_hashhead *
  135 unionfs_get_hashhead(struct vnode *dvp, struct vnode *lookup)
  136 {
  137         struct unionfs_node *unp;
  138 
  139         unp = VTOUNIONFS(dvp);
  140 
  141         return (&(unp->un_hashtbl[vfs_hash_index(lookup) & UNIONFSHASHMASK]));
  142 }
  143 
  144 /*
  145  * Attempt to lookup a cached unionfs vnode by upper/lower vp
  146  * from dvp, with dvp's interlock held.
  147  */
  148 static struct vnode *
  149 unionfs_get_cached_vnode_locked(struct vnode *lookup, struct vnode *dvp)
  150 {
  151         struct unionfs_node *unp;
  152         struct unionfs_node_hashhead *hd;
  153         struct vnode *vp;
  154 
  155         hd = unionfs_get_hashhead(dvp, lookup);
  156 
  157         LIST_FOREACH(unp, hd, un_hash) {
  158                 if (unp->un_uppervp == lookup ||
  159                     unp->un_lowervp == lookup) {
  160                         vp = UNIONFSTOV(unp);
  161                         VI_LOCK_FLAGS(vp, MTX_DUPOK);
  162                         vp->v_iflag &= ~VI_OWEINACT;
  163                         if (VN_IS_DOOMED(vp) ||
  164                             ((vp->v_iflag & VI_DOINGINACT) != 0)) {
  165                                 VI_UNLOCK(vp);
  166                                 vp = NULLVP;
  167                         } else {
  168                                 vrefl(vp);
  169                                 VI_UNLOCK(vp);
  170                         }
  171                         return (vp);
  172                 }
  173         }
  174 
  175         return (NULLVP);
  176 }
  177 
  178 
  179 /*
  180  * Get the cached vnode.
  181  */
  182 static struct vnode *
  183 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
  184     struct vnode *dvp)
  185 {
  186         struct vnode *vp;
  187 
  188         vp = NULLVP;
  189         VI_LOCK(dvp);
  190         if (uvp != NULLVP)
  191                 vp = unionfs_get_cached_vnode_locked(uvp, dvp);
  192         else if (lvp != NULLVP)
  193                 vp = unionfs_get_cached_vnode_locked(lvp, dvp);
  194         VI_UNLOCK(dvp);
  195 
  196         return (vp);
  197 }
  198 
  199 /*
  200  * Add the new vnode into cache.
  201  */
  202 static struct vnode *
  203 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
  204     struct vnode *dvp)
  205 {
  206         struct unionfs_node_hashhead *hd;
  207         struct vnode *vp;
  208 
  209         ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
  210         ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
  211         KASSERT(uncp->un_uppervp == NULLVP || uncp->un_uppervp->v_type == VDIR,
  212             ("%s: v_type != VDIR", __func__));
  213         KASSERT(uncp->un_lowervp == NULLVP || uncp->un_lowervp->v_type == VDIR,
  214             ("%s: v_type != VDIR", __func__));
  215 
  216         vp = NULLVP;
  217         VI_LOCK(dvp);
  218         if (uncp->un_uppervp != NULL)
  219                 vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp);
  220         else if (uncp->un_lowervp != NULL)
  221                 vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp);
  222         if (vp == NULLVP) {
  223                 hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ?
  224                     uncp->un_uppervp : uncp->un_lowervp));
  225                 LIST_INSERT_HEAD(hd, uncp, un_hash);
  226         }
  227         VI_UNLOCK(dvp);
  228 
  229         return (vp);
  230 }
  231 
  232 /*
  233  * Remove the vnode.
  234  */
  235 static void
  236 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
  237 {
  238         KASSERT(unp != NULL, ("%s: null node", __func__));
  239         KASSERT(dvp != NULLVP,
  240             ("%s: null parent vnode", __func__));
  241 
  242         VI_LOCK(dvp);
  243         if (unp->un_hash.le_prev != NULL) {
  244                 LIST_REMOVE(unp, un_hash);
  245                 unp->un_hash.le_next = NULL;
  246                 unp->un_hash.le_prev = NULL;
  247         }
  248         VI_UNLOCK(dvp);
  249 }
  250 
  251 /*
  252  * Common cleanup handling for unionfs_nodeget
  253  * Upper, lower, and parent directory vnodes are expected to be referenced by
  254  * the caller.  Upper and lower vnodes, if non-NULL, are also expected to be
  255  * exclusively locked by the caller.
  256  * This function will return with the caller's locks and references undone.
  257  */
  258 static void
  259 unionfs_nodeget_cleanup(struct vnode *vp, struct unionfs_node *unp)
  260 {
  261 
  262         /*
  263          * Lock and reset the default vnode lock; vgone() expects a locked
  264          * vnode, and we're going to reset the vnode ops.
  265          */
  266         lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
  267 
  268         /*
  269          * Clear out private data and reset the vnode ops to avoid use of
  270          * unionfs vnode ops on a partially constructed vnode.
  271          */
  272         VI_LOCK(vp);
  273         vp->v_data = NULL;
  274         vp->v_vnlock = &vp->v_lock;
  275         vp->v_op = &dead_vnodeops;
  276         VI_UNLOCK(vp);
  277         vgone(vp);
  278         vput(vp);
  279 
  280         if (unp->un_dvp != NULLVP)
  281                 vrele(unp->un_dvp);
  282         if (unp->un_uppervp != NULLVP)
  283                 vput(unp->un_uppervp);
  284         if (unp->un_lowervp != NULLVP)
  285                 vput(unp->un_lowervp);
  286         if (unp->un_hashtbl != NULL)
  287                 hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
  288         free(unp->un_path, M_UNIONFSPATH);
  289         free(unp, M_UNIONFSNODE);
  290 }
  291 
  292 /*
  293  * Make a new or get existing unionfs node.
  294  * 
  295  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
  296  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
  297  * you should not lock plurality simultaneously.
  298  */
  299 int
  300 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
  301     struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp,
  302     struct componentname *cnp)
  303 {
  304         char           *path;
  305         struct unionfs_mount *ump;
  306         struct unionfs_node *unp;
  307         struct vnode   *vp;
  308         u_long          hashmask;
  309         int             error;
  310         int             lkflags;
  311         enum vtype      vt;
  312 
  313         error = 0;
  314         ump = MOUNTTOUNIONFSMOUNT(mp);
  315         lkflags = (cnp ? cnp->cn_lkflags : 0);
  316         path = (cnp ? cnp->cn_nameptr : NULL);
  317         *vpp = NULLVP;
  318 
  319         if (uppervp == NULLVP && lowervp == NULLVP)
  320                 panic("%s: upper and lower is null", __func__);
  321 
  322         vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
  323 
  324         /* If it has no ISLASTCN flag, path check is skipped. */
  325         if (cnp && !(cnp->cn_flags & ISLASTCN))
  326                 path = NULL;
  327 
  328         /* check the cache */
  329         if (dvp != NULLVP && vt == VDIR) {
  330                 vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp);
  331                 if (vp != NULLVP) {
  332                         *vpp = vp;
  333                         goto unionfs_nodeget_out;
  334                 }
  335         }
  336 
  337         unp = malloc(sizeof(struct unionfs_node),
  338             M_UNIONFSNODE, M_WAITOK | M_ZERO);
  339 
  340         error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
  341         if (error != 0) {
  342                 free(unp, M_UNIONFSNODE);
  343                 return (error);
  344         }
  345         if (dvp != NULLVP)
  346                 vref(dvp);
  347         if (uppervp != NULLVP)
  348                 vref(uppervp);
  349         if (lowervp != NULLVP)
  350                 vref(lowervp);
  351 
  352         if (vt == VDIR) {
  353                 unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
  354                     &hashmask);
  355                 KASSERT(hashmask == UNIONFSHASHMASK,
  356                     ("unexpected unionfs hash mask 0x%lx", hashmask));
  357         }
  358 
  359         unp->un_vnode = vp;
  360         unp->un_uppervp = uppervp;
  361         unp->un_lowervp = lowervp;
  362         unp->un_dvp = dvp;
  363         if (uppervp != NULLVP)
  364                 vp->v_vnlock = uppervp->v_vnlock;
  365         else
  366                 vp->v_vnlock = lowervp->v_vnlock;
  367 
  368         if (path != NULL) {
  369                 unp->un_path = malloc(cnp->cn_namelen + 1,
  370                     M_UNIONFSPATH, M_WAITOK | M_ZERO);
  371                 bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
  372                 unp->un_path[cnp->cn_namelen] = '\0';
  373                 unp->un_pathlen = cnp->cn_namelen;
  374         }
  375         vp->v_type = vt;
  376         vp->v_data = unp;
  377 
  378         /*
  379          * TODO: This is an imperfect check, as there's no guarantee that
  380          * the underlying filesystems will always return vnode pointers
  381          * for the root inodes that match our cached values.  To reduce
  382          * the likelihood of failure, for example in the case where either
  383          * vnode has been forcibly doomed, we check both pointers and set
  384          * VV_ROOT if either matches.
  385          */
  386         if (ump->um_uppervp == uppervp || ump->um_lowervp == lowervp)
  387                 vp->v_vflag |= VV_ROOT;
  388         KASSERT(dvp != NULL || (vp->v_vflag & VV_ROOT) != 0,
  389             ("%s: NULL dvp for non-root vp %p", __func__, vp));
  390 
  391         vn_lock_pair(lowervp, false, uppervp, false); 
  392         error = insmntque1(vp, mp);
  393         if (error != 0) {
  394                 unionfs_nodeget_cleanup(vp, unp);
  395                 return (error);
  396         }
  397         if (lowervp != NULL && VN_IS_DOOMED(lowervp)) {
  398                 vput(lowervp);
  399                 unp->un_lowervp = NULL;
  400         }
  401         if (uppervp != NULL && VN_IS_DOOMED(uppervp)) {
  402                 vput(uppervp);
  403                 unp->un_uppervp = NULL;
  404         }
  405         if (unp->un_lowervp == NULL && unp->un_uppervp == NULL) {
  406                 unionfs_nodeget_cleanup(vp, unp);
  407                 return (ENOENT);
  408         }
  409 
  410         vn_set_state(vp, VSTATE_CONSTRUCTED);
  411 
  412         if (dvp != NULLVP && vt == VDIR)
  413                 *vpp = unionfs_ins_cached_vnode(unp, dvp);
  414         if (*vpp != NULLVP) {
  415                 unionfs_nodeget_cleanup(vp, unp);
  416                 vp = *vpp;
  417         } else {
  418                 if (uppervp != NULL)
  419                         VOP_UNLOCK(uppervp);
  420                 if (lowervp != NULL)
  421                         VOP_UNLOCK(lowervp);
  422                 *vpp = vp;
  423         }
  424 
  425 unionfs_nodeget_out:
  426         if (lkflags & LK_TYPE_MASK)
  427                 vn_lock(vp, lkflags | LK_RETRY);
  428 
  429         return (0);
  430 }
  431 
  432 /*
  433  * Clean up the unionfs node.
  434  */
  435 void
  436 unionfs_noderem(struct vnode *vp)
  437 {
  438         struct unionfs_node *unp, *unp_t1, *unp_t2;
  439         struct unionfs_node_hashhead *hd;
  440         struct unionfs_node_status *unsp, *unsp_tmp;
  441         struct vnode   *lvp;
  442         struct vnode   *uvp;
  443         struct vnode   *dvp;
  444         int             count;
  445         int             writerefs;
  446 
  447         /*
  448          * The root vnode lock may be recursed during unmount, because
  449          * it may share the same lock as the unionfs mount's covered vnode,
  450          * which is locked across VFS_UNMOUNT().  This lock will then be
  451          * recursively taken during the vflush() issued by unionfs_unmount().
  452          * But we still only need to lock the unionfs lock once, because only
  453          * one of those lock operations was taken against a unionfs vnode and
  454          * will be undone against a unionfs vnode.
  455          */
  456         KASSERT(vp->v_vnlock->lk_recurse == 0 || (vp->v_vflag & VV_ROOT) != 0,
  457             ("%s: vnode %p locked recursively", __func__, vp));
  458         if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
  459                 panic("%s: failed to acquire lock for vnode lock", __func__);
  460 
  461         /*
  462          * Use the interlock to protect the clearing of v_data to
  463          * prevent faults in unionfs_lock().
  464          */
  465         VI_LOCK(vp);
  466         unp = VTOUNIONFS(vp);
  467         lvp = unp->un_lowervp;
  468         uvp = unp->un_uppervp;
  469         dvp = unp->un_dvp;
  470         unp->un_lowervp = unp->un_uppervp = NULLVP;
  471         vp->v_vnlock = &(vp->v_lock);
  472         vp->v_data = NULL;
  473         vp->v_object = NULL;
  474         if (unp->un_hashtbl != NULL) {
  475                 /*
  476                  * Clear out any cached child vnodes.  This should only
  477                  * be necessary during forced unmount, when the vnode may
  478                  * be reclaimed with a non-zero use count.  Otherwise the
  479                  * reference held by each child should prevent reclamation.
  480                  */
  481                 for (count = 0; count <= UNIONFSHASHMASK; count++) {
  482                         hd = unp->un_hashtbl + count;
  483                         LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
  484                                 LIST_REMOVE(unp_t1, un_hash);
  485                                 unp_t1->un_hash.le_next = NULL;
  486                                 unp_t1->un_hash.le_prev = NULL;
  487                         }
  488                 }
  489         }
  490         VI_UNLOCK(vp);
  491 
  492         writerefs = atomic_load_int(&vp->v_writecount);
  493         VNASSERT(writerefs >= 0, vp,
  494             ("%s: write count %d, unexpected text ref", __func__, writerefs));
  495         /*
  496          * If we were opened for write, we leased the write reference
  497          * to the lower vnode.  If this is a reclamation due to the
  498          * forced unmount, undo the reference now.
  499          */
  500         if (writerefs > 0) {
  501                 VNASSERT(uvp != NULL, vp,
  502                     ("%s: write reference without upper vnode", __func__));
  503                 VOP_ADD_WRITECOUNT(uvp, -writerefs);
  504         }
  505         if (lvp != NULLVP)
  506                 VOP_UNLOCK(lvp);
  507         if (uvp != NULLVP)
  508                 VOP_UNLOCK(uvp);
  509 
  510         if (dvp != NULLVP)
  511                 unionfs_rem_cached_vnode(unp, dvp);
  512 
  513         if (lvp != NULLVP)
  514                 vrele(lvp);
  515         if (uvp != NULLVP)
  516                 vrele(uvp);
  517         if (unp->un_path != NULL) {
  518                 free(unp->un_path, M_UNIONFSPATH);
  519                 unp->un_path = NULL;
  520                 unp->un_pathlen = 0;
  521         }
  522 
  523         if (unp->un_hashtbl != NULL) {
  524                 hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
  525         }
  526 
  527         LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
  528                 LIST_REMOVE(unsp, uns_list);
  529                 free(unsp, M_TEMP);
  530         }
  531         if (dvp != NULLVP) {
  532                 mtx_lock(&unionfs_deferred_rele_lock);
  533                 STAILQ_INSERT_TAIL(&unionfs_deferred_rele_list, unp, un_rele);
  534                 mtx_unlock(&unionfs_deferred_rele_lock);
  535                 taskqueue_enqueue(taskqueue_unionfs_rele,
  536                     &unionfs_deferred_rele_task);
  537         } else
  538                 free(unp, M_UNIONFSNODE);
  539 }
  540 
  541 /*
  542  * Get the unionfs node status object for the vnode corresponding to unp,
  543  * for the process that owns td.  Allocate a new status object if one
  544  * does not already exist.
  545  */
  546 void
  547 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
  548     struct unionfs_node_status **unspp)
  549 {
  550         struct unionfs_node_status *unsp;
  551         pid_t pid;
  552 
  553         pid = td->td_proc->p_pid;
  554 
  555         KASSERT(NULL != unspp, ("%s: NULL status", __func__));
  556         ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
  557 
  558         LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
  559                 if (unsp->uns_pid == pid) {
  560                         *unspp = unsp;
  561                         return;
  562                 }
  563         }
  564 
  565         /* create a new unionfs node status */
  566         unsp = malloc(sizeof(struct unionfs_node_status),
  567             M_TEMP, M_WAITOK | M_ZERO);
  568 
  569         unsp->uns_pid = pid;
  570         LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
  571 
  572         *unspp = unsp;
  573 }
  574 
  575 /*
  576  * Remove the unionfs node status, if you can.
  577  * You need exclusive lock this vnode.
  578  */
  579 void
  580 unionfs_tryrem_node_status(struct unionfs_node *unp,
  581     struct unionfs_node_status *unsp)
  582 {
  583         KASSERT(NULL != unsp, ("%s: NULL status", __func__));
  584         ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
  585 
  586         if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
  587                 return;
  588 
  589         LIST_REMOVE(unsp, uns_list);
  590         free(unsp, M_TEMP);
  591 }
  592 
  593 /*
  594  * Create upper node attr.
  595  */
  596 void
  597 unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva,
  598     struct vattr *uva, struct thread *td)
  599 {
  600         VATTR_NULL(uva);
  601         uva->va_type = lva->va_type;
  602         uva->va_atime = lva->va_atime;
  603         uva->va_mtime = lva->va_mtime;
  604         uva->va_ctime = lva->va_ctime;
  605 
  606         switch (ump->um_copymode) {
  607         case UNIONFS_TRANSPARENT:
  608                 uva->va_mode = lva->va_mode;
  609                 uva->va_uid = lva->va_uid;
  610                 uva->va_gid = lva->va_gid;
  611                 break;
  612         case UNIONFS_MASQUERADE:
  613                 if (ump->um_uid == lva->va_uid) {
  614                         uva->va_mode = lva->va_mode & 077077;
  615                         uva->va_mode |= (lva->va_type == VDIR ?
  616                             ump->um_udir : ump->um_ufile) & 0700;
  617                         uva->va_uid = lva->va_uid;
  618                         uva->va_gid = lva->va_gid;
  619                 } else {
  620                         uva->va_mode = (lva->va_type == VDIR ?
  621                             ump->um_udir : ump->um_ufile);
  622                         uva->va_uid = ump->um_uid;
  623                         uva->va_gid = ump->um_gid;
  624                 }
  625                 break;
  626         default:                /* UNIONFS_TRADITIONAL */
  627                 uva->va_mode = 0777 & ~td->td_proc->p_pd->pd_cmask;
  628                 uva->va_uid = ump->um_uid;
  629                 uva->va_gid = ump->um_gid;
  630                 break;
  631         }
  632 }
  633 
  634 /*
  635  * Create upper node attr.
  636  */
  637 int
  638 unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp,
  639     struct vattr *uva, struct ucred *cred, struct thread *td)
  640 {
  641         struct vattr    lva;
  642         int             error;
  643 
  644         if ((error = VOP_GETATTR(lvp, &lva, cred)))
  645                 return (error);
  646 
  647         unionfs_create_uppervattr_core(ump, &lva, uva, td);
  648 
  649         return (error);
  650 }
  651 
  652 /*
  653  * relookup
  654  * 
  655  * dvp should be locked on entry and will be locked on return.
  656  * 
  657  * If an error is returned, *vpp will be invalid, otherwise it will hold a
  658  * locked, referenced vnode. If *vpp == dvp then remember that only one
  659  * LK_EXCLUSIVE lock is held.
  660  */
  661 int
  662 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
  663     struct componentname *cnp, struct componentname *cn, struct thread *td,
  664     char *path, int pathlen, u_long nameiop)
  665 {
  666         int error;
  667         bool refstart;
  668 
  669         cn->cn_namelen = pathlen;
  670         cn->cn_pnbuf = path;
  671         cn->cn_nameiop = nameiop;
  672         cn->cn_flags = (LOCKPARENT | LOCKLEAF | ISLASTCN);
  673         cn->cn_lkflags = LK_EXCLUSIVE;
  674         cn->cn_cred = cnp->cn_cred;
  675         cn->cn_nameptr = cn->cn_pnbuf;
  676 
  677         refstart = false;
  678         if (nameiop == DELETE) {
  679                 cn->cn_flags |= (cnp->cn_flags & DOWHITEOUT);
  680         } else if (nameiop == RENAME) {
  681                 refstart = true;
  682         } else if (nameiop == CREATE) {
  683                 cn->cn_flags |= NOCACHE;
  684         }
  685 
  686         vref(dvp);
  687         VOP_UNLOCK(dvp);
  688 
  689         if ((error = vfs_relookup(dvp, vpp, cn, refstart))) {
  690                 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
  691         } else
  692                 vrele(dvp);
  693 
  694         KASSERT(cn->cn_pnbuf == path, ("%s: cn_pnbuf changed", __func__));
  695 
  696         return (error);
  697 }
  698 
  699 /*
  700  * relookup for CREATE namei operation.
  701  *
  702  * dvp is unionfs vnode. dvp should be locked.
  703  *
  704  * If it called 'unionfs_copyfile' function by unionfs_link etc,
  705  * VOP_LOOKUP information is broken.
  706  * So it need relookup in order to create link etc.
  707  */
  708 int
  709 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
  710     struct thread *td)
  711 {
  712         struct vnode *udvp;
  713         struct vnode *vp;
  714         struct componentname cn;
  715         int error;
  716 
  717         udvp = UNIONFSVPTOUPPERVP(dvp);
  718         vp = NULLVP;
  719 
  720         error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
  721             cnp->cn_namelen, CREATE);
  722         if (error)
  723                 return (error);
  724 
  725         if (vp != NULLVP) {
  726                 if (udvp == vp)
  727                         vrele(vp);
  728                 else
  729                         vput(vp);
  730 
  731                 error = EEXIST;
  732         }
  733 
  734         return (error);
  735 }
  736 
  737 /*
  738  * relookup for DELETE namei operation.
  739  *
  740  * dvp is unionfs vnode. dvp should be locked.
  741  */
  742 int
  743 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
  744     struct thread *td)
  745 {
  746         struct vnode *udvp;
  747         struct vnode *vp;
  748         struct componentname cn;
  749         int error;
  750 
  751         udvp = UNIONFSVPTOUPPERVP(dvp);
  752         vp = NULLVP;
  753 
  754         error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
  755             cnp->cn_namelen, DELETE);
  756         if (error)
  757                 return (error);
  758 
  759         if (vp == NULLVP)
  760                 error = ENOENT;
  761         else {
  762                 if (udvp == vp)
  763                         vrele(vp);
  764                 else
  765                         vput(vp);
  766         }
  767 
  768         return (error);
  769 }
  770 
  771 /*
  772  * relookup for RENAME namei operation.
  773  *
  774  * dvp is unionfs vnode. dvp should be locked.
  775  */
  776 int
  777 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
  778     struct thread *td)
  779 {
  780         struct vnode *udvp;
  781         struct vnode *vp;
  782         struct componentname cn;
  783         int error;
  784 
  785         udvp = UNIONFSVPTOUPPERVP(dvp);
  786         vp = NULLVP;
  787 
  788         error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
  789             cnp->cn_namelen, RENAME);
  790         if (error)
  791                 return (error);
  792 
  793         if (vp != NULLVP) {
  794                 if (udvp == vp)
  795                         vrele(vp);
  796                 else
  797                         vput(vp);
  798         }
  799 
  800         return (error);
  801 }
  802 
  803 /*
  804  * Update the unionfs_node.
  805  * 
  806  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
  807  * uvp's lock and lower's lock will be unlocked.
  808  */
  809 static void
  810 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
  811     struct thread *td)
  812 {
  813         struct unionfs_node_hashhead *hd;
  814         struct vnode   *vp;
  815         struct vnode   *lvp;
  816         struct vnode   *dvp;
  817         unsigned        count, lockrec;
  818 
  819         vp = UNIONFSTOV(unp);
  820         lvp = unp->un_lowervp;
  821         ASSERT_VOP_ELOCKED(lvp, __func__);
  822         ASSERT_VOP_ELOCKED(uvp, __func__);
  823         dvp = unp->un_dvp;
  824 
  825         VNASSERT(vp->v_writecount == 0, vp,
  826             ("%s: non-zero writecount", __func__));
  827         /*
  828          * Update the upper vnode's lock state to match the lower vnode,
  829          * and then switch the unionfs vnode's lock to the upper vnode.
  830          */
  831         lockrec = lvp->v_vnlock->lk_recurse;
  832         for (count = 0; count < lockrec; count++)
  833                 vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
  834         VI_LOCK(vp);
  835         unp->un_uppervp = uvp;
  836         vp->v_vnlock = uvp->v_vnlock;
  837         VI_UNLOCK(vp);
  838 
  839         /*
  840          * Re-cache the unionfs vnode against the upper vnode
  841          */
  842         if (dvp != NULLVP && vp->v_type == VDIR) {
  843                 VI_LOCK(dvp);
  844                 if (unp->un_hash.le_prev != NULL) {
  845                         LIST_REMOVE(unp, un_hash);
  846                         hd = unionfs_get_hashhead(dvp, uvp);
  847                         LIST_INSERT_HEAD(hd, unp, un_hash);
  848                 }
  849                 VI_UNLOCK(unp->un_dvp);
  850         }
  851 }
  852 
  853 /*
  854  * Create a new shadow dir.
  855  * 
  856  * udvp should be locked on entry and will be locked on return.
  857  * 
  858  * If no error returned, unp will be updated.
  859  */
  860 int
  861 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
  862     struct unionfs_node *unp, struct componentname *cnp, struct thread *td)
  863 {
  864         struct vnode   *lvp;
  865         struct vnode   *uvp;
  866         struct vattr    va;
  867         struct vattr    lva;
  868         struct nameidata nd;
  869         struct mount   *mp;
  870         struct ucred   *cred;
  871         struct ucred   *credbk;
  872         struct uidinfo *rootinfo;
  873         int             error;
  874 
  875         if (unp->un_uppervp != NULLVP)
  876                 return (EEXIST);
  877 
  878         lvp = unp->un_lowervp;
  879         uvp = NULLVP;
  880         credbk = cnp->cn_cred;
  881 
  882         /* Authority change to root */
  883         rootinfo = uifind((uid_t)0);
  884         cred = crdup(cnp->cn_cred);
  885         /*
  886          * The calls to chgproccnt() are needed to compensate for change_ruid()
  887          * calling chgproccnt().
  888          */
  889         chgproccnt(cred->cr_ruidinfo, 1, 0);
  890         change_euid(cred, rootinfo);
  891         change_ruid(cred, rootinfo);
  892         change_svuid(cred, (uid_t)0);
  893         uifree(rootinfo);
  894         cnp->cn_cred = cred;
  895 
  896         memset(&nd.ni_cnd, 0, sizeof(struct componentname));
  897         NDPREINIT(&nd);
  898 
  899         if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
  900                 goto unionfs_mkshadowdir_abort;
  901 
  902         if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td,
  903             cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
  904                 goto unionfs_mkshadowdir_abort;
  905         if (uvp != NULLVP) {
  906                 if (udvp == uvp)
  907                         vrele(uvp);
  908                 else
  909                         vput(uvp);
  910 
  911                 error = EEXIST;
  912                 goto unionfs_mkshadowdir_abort;
  913         }
  914 
  915         if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)))
  916                 goto unionfs_mkshadowdir_abort;
  917         unionfs_create_uppervattr_core(ump, &lva, &va, td);
  918 
  919         error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va);
  920 
  921         if (!error) {
  922                 unionfs_node_update(unp, uvp, td);
  923 
  924                 /*
  925                  * XXX The bug which cannot set uid/gid was corrected.
  926                  * Ignore errors.
  927                  */
  928                 va.va_type = VNON;
  929                 VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
  930         }
  931         vn_finished_write(mp);
  932 
  933 unionfs_mkshadowdir_abort:
  934         cnp->cn_cred = credbk;
  935         chgproccnt(cred->cr_ruidinfo, -1, 0);
  936         crfree(cred);
  937 
  938         return (error);
  939 }
  940 
  941 /*
  942  * Create a new whiteout.
  943  * 
  944  * dvp should be locked on entry and will be locked on return.
  945  */
  946 int
  947 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
  948     struct thread *td, char *path, int pathlen)
  949 {
  950         struct vnode   *wvp;
  951         struct nameidata nd;
  952         struct mount   *mp;
  953         int             error;
  954 
  955         wvp = NULLVP;
  956         NDPREINIT(&nd);
  957         if ((error = unionfs_relookup(dvp, &wvp, cnp, &nd.ni_cnd, td, path,
  958             pathlen, CREATE))) {
  959                 return (error);
  960         }
  961         if (wvp != NULLVP) {
  962                 if (dvp == wvp)
  963                         vrele(wvp);
  964                 else
  965                         vput(wvp);
  966 
  967                 return (EEXIST);
  968         }
  969 
  970         if ((error = vn_start_write(dvp, &mp, V_WAIT | V_PCATCH)))
  971                 goto unionfs_mkwhiteout_free_out;
  972         error = VOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE);
  973 
  974         vn_finished_write(mp);
  975 
  976 unionfs_mkwhiteout_free_out:
  977         return (error);
  978 }
  979 
  980 /*
  981  * Create a new vnode for create a new shadow file.
  982  * 
  983  * If an error is returned, *vpp will be invalid, otherwise it will hold a
  984  * locked, referenced and opened vnode.
  985  * 
  986  * unp is never updated.
  987  */
  988 static int
  989 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
  990     struct unionfs_node *unp, struct vattr *uvap, struct thread *td)
  991 {
  992         struct unionfs_mount *ump;
  993         struct vnode   *vp;
  994         struct vnode   *lvp;
  995         struct ucred   *cred;
  996         struct vattr    lva;
  997         struct nameidata nd;
  998         int             fmode;
  999         int             error;
 1000 
 1001         ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
 1002         vp = NULLVP;
 1003         lvp = unp->un_lowervp;
 1004         cred = td->td_ucred;
 1005         fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
 1006         error = 0;
 1007 
 1008         if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
 1009                 return (error);
 1010         unionfs_create_uppervattr_core(ump, &lva, uvap, td);
 1011 
 1012         if (unp->un_path == NULL)
 1013                 panic("%s: NULL un_path", __func__);
 1014 
 1015         nd.ni_cnd.cn_namelen = unp->un_pathlen;
 1016         nd.ni_cnd.cn_pnbuf = unp->un_path;
 1017         nd.ni_cnd.cn_nameiop = CREATE;
 1018         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | ISLASTCN;
 1019         nd.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
 1020         nd.ni_cnd.cn_cred = cred;
 1021         nd.ni_cnd.cn_nameptr = nd.ni_cnd.cn_pnbuf;
 1022         NDPREINIT(&nd);
 1023 
 1024         vref(udvp);
 1025         if ((error = vfs_relookup(udvp, &vp, &nd.ni_cnd, false)) != 0)
 1026                 goto unionfs_vn_create_on_upper_free_out2;
 1027         vrele(udvp);
 1028 
 1029         if (vp != NULLVP) {
 1030                 if (vp == udvp)
 1031                         vrele(vp);
 1032                 else
 1033                         vput(vp);
 1034                 error = EEXIST;
 1035                 goto unionfs_vn_create_on_upper_free_out1;
 1036         }
 1037 
 1038         if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0)
 1039                 goto unionfs_vn_create_on_upper_free_out1;
 1040 
 1041         if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
 1042                 vput(vp);
 1043                 goto unionfs_vn_create_on_upper_free_out1;
 1044         }
 1045         error = VOP_ADD_WRITECOUNT(vp, 1);
 1046         CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
 1047             __func__, vp, vp->v_writecount);
 1048         if (error == 0) {
 1049                 *vpp = vp;
 1050         } else {
 1051                 VOP_CLOSE(vp, fmode, cred, td);
 1052         }
 1053 
 1054 unionfs_vn_create_on_upper_free_out1:
 1055         VOP_UNLOCK(udvp);
 1056 
 1057 unionfs_vn_create_on_upper_free_out2:
 1058         KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path,
 1059             ("%s: cn_pnbuf changed", __func__));
 1060 
 1061         return (error);
 1062 }
 1063 
 1064 /*
 1065  * Copy from lvp to uvp.
 1066  * 
 1067  * lvp and uvp should be locked and opened on entry and will be locked and
 1068  * opened on return.
 1069  */
 1070 static int
 1071 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
 1072     struct ucred *cred, struct thread *td)
 1073 {
 1074         char           *buf;
 1075         struct uio      uio;
 1076         struct iovec    iov;
 1077         off_t           offset;
 1078         int             count;
 1079         int             error;
 1080         int             bufoffset;
 1081 
 1082         error = 0;
 1083         memset(&uio, 0, sizeof(uio));
 1084 
 1085         uio.uio_td = td;
 1086         uio.uio_segflg = UIO_SYSSPACE;
 1087         uio.uio_offset = 0;
 1088 
 1089         buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
 1090 
 1091         while (error == 0) {
 1092                 offset = uio.uio_offset;
 1093 
 1094                 uio.uio_iov = &iov;
 1095                 uio.uio_iovcnt = 1;
 1096                 iov.iov_base = buf;
 1097                 iov.iov_len = MAXBSIZE;
 1098                 uio.uio_resid = iov.iov_len;
 1099                 uio.uio_rw = UIO_READ;
 1100 
 1101                 if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
 1102                         break;
 1103                 if ((count = MAXBSIZE - uio.uio_resid) == 0)
 1104                         break;
 1105 
 1106                 bufoffset = 0;
 1107                 while (bufoffset < count) {
 1108                         uio.uio_iov = &iov;
 1109                         uio.uio_iovcnt = 1;
 1110                         iov.iov_base = buf + bufoffset;
 1111                         iov.iov_len = count - bufoffset;
 1112                         uio.uio_offset = offset + bufoffset;
 1113                         uio.uio_resid = iov.iov_len;
 1114                         uio.uio_rw = UIO_WRITE;
 1115 
 1116                         if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
 1117                                 break;
 1118 
 1119                         bufoffset += (count - bufoffset) - uio.uio_resid;
 1120                 }
 1121 
 1122                 uio.uio_offset = offset + bufoffset;
 1123         }
 1124 
 1125         free(buf, M_TEMP);
 1126 
 1127         return (error);
 1128 }
 1129 
 1130 /*
 1131  * Copy file from lower to upper.
 1132  * 
 1133  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
 1134  * docopy.
 1135  * 
 1136  * If no error returned, unp will be updated.
 1137  */
 1138 int
 1139 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
 1140     struct thread *td)
 1141 {
 1142         struct mount   *mp;
 1143         struct vnode   *udvp;
 1144         struct vnode   *lvp;
 1145         struct vnode   *uvp;
 1146         struct vattr    uva;
 1147         int             error;
 1148 
 1149         lvp = unp->un_lowervp;
 1150         uvp = NULLVP;
 1151 
 1152         if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
 1153                 return (EROFS);
 1154         if (unp->un_dvp == NULLVP)
 1155                 return (EINVAL);
 1156         if (unp->un_uppervp != NULLVP)
 1157                 return (EEXIST);
 1158         udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
 1159         if (udvp == NULLVP)
 1160                 return (EROFS);
 1161         if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
 1162                 return (EROFS);
 1163 
 1164         error = VOP_ACCESS(lvp, VREAD, cred, td);
 1165         if (error != 0)
 1166                 return (error);
 1167 
 1168         if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)) != 0)
 1169                 return (error);
 1170         error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
 1171         if (error != 0) {
 1172                 vn_finished_write(mp);
 1173                 return (error);
 1174         }
 1175 
 1176         if (docopy != 0) {
 1177                 error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
 1178                 if (error == 0) {
 1179                         error = unionfs_copyfile_core(lvp, uvp, cred, td);
 1180                         VOP_CLOSE(lvp, FREAD, cred, td);
 1181                 }
 1182         }
 1183         VOP_CLOSE(uvp, FWRITE, cred, td);
 1184         VOP_ADD_WRITECOUNT_CHECKED(uvp, -1);
 1185         CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
 1186             __func__, uvp, uvp->v_writecount);
 1187 
 1188         vn_finished_write(mp);
 1189 
 1190         if (error == 0) {
 1191                 /* Reset the attributes. Ignore errors. */
 1192                 uva.va_type = VNON;
 1193                 VOP_SETATTR(uvp, &uva, cred);
 1194         }
 1195 
 1196         unionfs_node_update(unp, uvp, td);
 1197 
 1198         return (error);
 1199 }
 1200 
 1201 /*
 1202  * It checks whether vp can rmdir. (check empty)
 1203  *
 1204  * vp is unionfs vnode.
 1205  * vp should be locked.
 1206  */
 1207 int
 1208 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
 1209 {
 1210         struct vnode   *uvp;
 1211         struct vnode   *lvp;
 1212         struct vnode   *tvp;
 1213         struct dirent  *dp;
 1214         struct dirent  *edp;
 1215         struct componentname cn;
 1216         struct iovec    iov;
 1217         struct uio      uio;
 1218         struct vattr    va;
 1219         int             error;
 1220         int             eofflag;
 1221         int             lookuperr;
 1222 
 1223         /*
 1224          * The size of buf needs to be larger than DIRBLKSIZ.
 1225          */
 1226         char            buf[256 * 6];
 1227 
 1228         ASSERT_VOP_ELOCKED(vp, __func__);
 1229 
 1230         eofflag = 0;
 1231         uvp = UNIONFSVPTOUPPERVP(vp);
 1232         lvp = UNIONFSVPTOLOWERVP(vp);
 1233 
 1234         /* check opaque */
 1235         if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
 1236                 return (error);
 1237         if (va.va_flags & OPAQUE)
 1238                 return (0);
 1239 
 1240         /* open vnode */
 1241 #ifdef MAC
 1242         if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
 1243                 return (error);
 1244 #endif
 1245         if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
 1246                 return (error);
 1247         if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
 1248                 return (error);
 1249 
 1250         uio.uio_rw = UIO_READ;
 1251         uio.uio_segflg = UIO_SYSSPACE;
 1252         uio.uio_td = td;
 1253         uio.uio_offset = 0;
 1254 
 1255 #ifdef MAC
 1256         error = mac_vnode_check_readdir(td->td_ucred, lvp);
 1257 #endif
 1258         while (!error && !eofflag) {
 1259                 iov.iov_base = buf;
 1260                 iov.iov_len = sizeof(buf);
 1261                 uio.uio_iov = &iov;
 1262                 uio.uio_iovcnt = 1;
 1263                 uio.uio_resid = iov.iov_len;
 1264 
 1265                 error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
 1266                 if (error != 0)
 1267                         break;
 1268                 KASSERT(eofflag != 0 || uio.uio_resid < sizeof(buf),
 1269                     ("%s: empty read from lower FS", __func__));
 1270 
 1271                 edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
 1272                 for (dp = (struct dirent*)buf; !error && dp < edp;
 1273                      dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
 1274                         if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
 1275                             (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 1276                             (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
 1277                                 continue;
 1278 
 1279                         cn.cn_namelen = dp->d_namlen;
 1280                         cn.cn_pnbuf = NULL;
 1281                         cn.cn_nameptr = dp->d_name;
 1282                         cn.cn_nameiop = LOOKUP;
 1283                         cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
 1284                         cn.cn_lkflags = LK_EXCLUSIVE;
 1285                         cn.cn_cred = cred;
 1286 
 1287                         /*
 1288                          * check entry in lower.
 1289                          * Sometimes, readdir function returns
 1290                          * wrong entry.
 1291                          */
 1292                         lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
 1293 
 1294                         if (!lookuperr)
 1295                                 vput(tvp);
 1296                         else
 1297                                 continue; /* skip entry */
 1298 
 1299                         /*
 1300                          * check entry
 1301                          * If it has no exist/whiteout entry in upper,
 1302                          * directory is not empty.
 1303                          */
 1304                         cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
 1305                         lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
 1306 
 1307                         if (!lookuperr)
 1308                                 vput(tvp);
 1309 
 1310                         /* ignore exist or whiteout entry */
 1311                         if (!lookuperr ||
 1312                             (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
 1313                                 continue;
 1314 
 1315                         error = ENOTEMPTY;
 1316                 }
 1317         }
 1318 
 1319         /* close vnode */
 1320         VOP_CLOSE(vp, FREAD, cred, td);
 1321 
 1322         return (error);
 1323 }
 1324
Cache object: 232bc9dc2b1e33928f4d864b6641c1db
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/fs/unionfs/union_subr.c

FreeBSD/Linux Kernel Cross Reference
sys/fs/unionfs/union_subr.c