null_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1992, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * John Heidemann of the UCLA Ficus project.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)null_vnops.c        8.6 (Berkeley) 5/27/95
   35  *
   36  * Ancestors:
   37  *      @(#)lofs_vnops.c        1.2 (Berkeley) 6/18/92
   38  *      ...and...
   39  *      @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
   40  *
   41  * $FreeBSD$
   42  */
   43 
   44 /*
   45  * Null Layer
   46  *
   47  * (See mount_nullfs(8) for more information.)
   48  *
   49  * The null layer duplicates a portion of the filesystem
   50  * name space under a new name.  In this respect, it is
   51  * similar to the loopback filesystem.  It differs from
   52  * the loopback fs in two respects:  it is implemented using
   53  * a stackable layers techniques, and its "null-node"s stack above
   54  * all lower-layer vnodes, not just over directory vnodes.
   55  *
   56  * The null layer has two purposes.  First, it serves as a demonstration
   57  * of layering by proving a layer which does nothing.  (It actually
   58  * does everything the loopback filesystem does, which is slightly
   59  * more than nothing.)  Second, the null layer can serve as a prototype
   60  * layer.  Since it provides all necessary layer framework,
   61  * new filesystem layers can be created very easily be starting
   62  * with a null layer.
   63  *
   64  * The remainder of this man page examines the null layer as a basis
   65  * for constructing new layers.
   66  *
   67  *
   68  * INSTANTIATING NEW NULL LAYERS
   69  *
   70  * New null layers are created with mount_nullfs(8).
   71  * Mount_nullfs(8) takes two arguments, the pathname
   72  * of the lower vfs (target-pn) and the pathname where the null
   73  * layer will appear in the namespace (alias-pn).  After
   74  * the null layer is put into place, the contents
   75  * of target-pn subtree will be aliased under alias-pn.
   76  *
   77  *
   78  * OPERATION OF A NULL LAYER
   79  *
   80  * The null layer is the minimum filesystem layer,
   81  * simply bypassing all possible operations to the lower layer
   82  * for processing there.  The majority of its activity centers
   83  * on the bypass routine, through which nearly all vnode operations
   84  * pass.
   85  *
   86  * The bypass routine accepts arbitrary vnode operations for
   87  * handling by the lower layer.  It begins by examining vnode
   88  * operation arguments and replacing any null-nodes by their
   89  * lower-layer equivlants.  It then invokes the operation
   90  * on the lower layer.  Finally, it replaces the null-nodes
   91  * in the arguments and, if a vnode is return by the operation,
   92  * stacks a null-node on top of the returned vnode.
   93  *
   94  * Although bypass handles most operations, vop_getattr, vop_lock,
   95  * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
   96  * bypassed. Vop_getattr must change the fsid being returned.
   97  * Vop_lock and vop_unlock must handle any locking for the
   98  * current vnode as well as pass the lock request down.
   99  * Vop_inactive and vop_reclaim are not bypassed so that
  100  * they can handle freeing null-layer specific data. Vop_print
  101  * is not bypassed to avoid excessive debugging information.
  102  * Also, certain vnode operations change the locking state within
  103  * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
  104  * and symlink). Ideally these operations should not change the
  105  * lock state, but should be changed to let the caller of the
  106  * function unlock them. Otherwise all intermediate vnode layers
  107  * (such as union, umapfs, etc) must catch these functions to do
  108  * the necessary locking at their layer.
  109  *
  110  *
  111  * INSTANTIATING VNODE STACKS
  112  *
  113  * Mounting associates the null layer with a lower layer,
  114  * effect stacking two VFSes.  Vnode stacks are instead
  115  * created on demand as files are accessed.
  116  *
  117  * The initial mount creates a single vnode stack for the
  118  * root of the new null layer.  All other vnode stacks
  119  * are created as a result of vnode operations on
  120  * this or other null vnode stacks.
  121  *
  122  * New vnode stacks come into existence as a result of
  123  * an operation which returns a vnode.
  124  * The bypass routine stacks a null-node above the new
  125  * vnode before returning it to the caller.
  126  *
  127  * For example, imagine mounting a null layer with
  128  * "mount_nullfs /usr/include /dev/layer/null".
  129  * Changing directory to /dev/layer/null will assign
  130  * the root null-node (which was created when the null layer was mounted).
  131  * Now consider opening "sys".  A vop_lookup would be
  132  * done on the root null-node.  This operation would bypass through
  133  * to the lower layer which would return a vnode representing
  134  * the UFS "sys".  Null_bypass then builds a null-node
  135  * aliasing the UFS "sys" and returns this to the caller.
  136  * Later operations on the null-node "sys" will repeat this
  137  * process when constructing other vnode stacks.
  138  *
  139  *
  140  * CREATING OTHER FILE SYSTEM LAYERS
  141  *
  142  * One of the easiest ways to construct new filesystem layers is to make
  143  * a copy of the null layer, rename all files and variables, and
  144  * then begin modifing the copy.  Sed can be used to easily rename
  145  * all variables.
  146  *
  147  * The umap layer is an example of a layer descended from the
  148  * null layer.
  149  *
  150  *
  151  * INVOKING OPERATIONS ON LOWER LAYERS
  152  *
  153  * There are two techniques to invoke operations on a lower layer
  154  * when the operation cannot be completely bypassed.  Each method
  155  * is appropriate in different situations.  In both cases,
  156  * it is the responsibility of the aliasing layer to make
  157  * the operation arguments "correct" for the lower layer
  158  * by mapping a vnode arguments to the lower layer.
  159  *
  160  * The first approach is to call the aliasing layer's bypass routine.
  161  * This method is most suitable when you wish to invoke the operation
  162  * currently being handled on the lower layer.  It has the advantage
  163  * that the bypass routine already must do argument mapping.
  164  * An example of this is null_getattrs in the null layer.
  165  *
  166  * A second approach is to directly invoke vnode operations on
  167  * the lower layer with the VOP_OPERATIONNAME interface.
  168  * The advantage of this method is that it is easy to invoke
  169  * arbitrary operations on the lower layer.  The disadvantage
  170  * is that vnode arguments must be manualy mapped.
  171  *
  172  */
  173 
  174 #include <sys/param.h>
  175 #include <sys/systm.h>
  176 #include <sys/conf.h>
  177 #include <sys/kernel.h>
  178 #include <sys/lock.h>
  179 #include <sys/malloc.h>
  180 #include <sys/mount.h>
  181 #include <sys/mutex.h>
  182 #include <sys/namei.h>
  183 #include <sys/sysctl.h>
  184 #include <sys/vnode.h>
  185 #include <sys/stat.h>
  186 
  187 #include <fs/nullfs/null.h>
  188 
  189 #include <vm/vm.h>
  190 #include <vm/vm_extern.h>
  191 #include <vm/vm_object.h>
  192 #include <vm/vnode_pager.h>
  193 
  194 static int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
  195 SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 
  196         &null_bug_bypass, 0, "");
  197 
  198 /*
  199  * This is the 10-Apr-92 bypass routine.
  200  *    This version has been optimized for speed, throwing away some
  201  * safety checks.  It should still always work, but it's not as
  202  * robust to programmer errors.
  203  *
  204  * In general, we map all vnodes going down and unmap them on the way back.
  205  * As an exception to this, vnodes can be marked "unmapped" by setting
  206  * the Nth bit in operation's vdesc_flags.
  207  *
  208  * Also, some BSD vnode operations have the side effect of vrele'ing
  209  * their arguments.  With stacking, the reference counts are held
  210  * by the upper node, not the lower one, so we must handle these
  211  * side-effects here.  This is not of concern in Sun-derived systems
  212  * since there are no such side-effects.
  213  *
  214  * This makes the following assumptions:
  215  * - only one returned vpp
  216  * - no INOUT vpp's (Sun's vop_open has one of these)
  217  * - the vnode operation vector of the first vnode should be used
  218  *   to determine what implementation of the op should be invoked
  219  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
  220  *   problems on rmdir'ing mount points and renaming?)
  221  */
  222 int
  223 null_bypass(struct vop_generic_args *ap)
  224 {
  225         struct vnode **this_vp_p;
  226         struct vnode *old_vps[VDESC_MAX_VPS];
  227         struct vnode **vps_p[VDESC_MAX_VPS];
  228         struct vnode ***vppp;
  229         struct vnode *lvp;
  230         struct vnodeop_desc *descp = ap->a_desc;
  231         int error, i, reles;
  232 
  233         if (null_bug_bypass)
  234                 printf ("null_bypass: %s\n", descp->vdesc_name);
  235 
  236 #ifdef DIAGNOSTIC
  237         /*
  238          * We require at least one vp.
  239          */
  240         if (descp->vdesc_vp_offsets == NULL ||
  241             descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
  242                 panic ("null_bypass: no vp's in map");
  243 #endif
  244 
  245         /*
  246          * Map the vnodes going in.
  247          * Later, we'll invoke the operation based on
  248          * the first mapped vnode's operation vector.
  249          */
  250         reles = descp->vdesc_flags;
  251         for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
  252                 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
  253                         break;   /* bail out at end of list */
  254                 vps_p[i] = this_vp_p = VOPARG_OFFSETTO(struct vnode **,
  255                     descp->vdesc_vp_offsets[i], ap);
  256 
  257                 /*
  258                  * We're not guaranteed that any but the first vnode
  259                  * are of our type.  Check for and don't map any
  260                  * that aren't.  (We must always map first vp or vclean fails.)
  261                  */
  262                 if (i != 0 && (*this_vp_p == NULLVP ||
  263                     (*this_vp_p)->v_op != &null_vnodeops)) {
  264                         old_vps[i] = NULLVP;
  265                 } else {
  266                         old_vps[i] = *this_vp_p;
  267                         *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
  268 
  269                         /*
  270                          * The upper vnode reference to the lower
  271                          * vnode is the only reference that keeps our
  272                          * pointer to the lower vnode alive.  If lower
  273                          * vnode is relocked during the VOP call,
  274                          * upper vnode might become unlocked and
  275                          * reclaimed, which invalidates our reference.
  276                          * Add a transient hold around VOP call.
  277                          */
  278                         vhold(*this_vp_p);
  279 
  280                         /*
  281                          * XXX - Several operations have the side effect
  282                          * of vrele'ing their vp's.  We must account for
  283                          * that.  (This should go away in the future.)
  284                          */
  285                         if (reles & VDESC_VP0_WILLRELE)
  286                                 vref(*this_vp_p);
  287                 }
  288         }
  289 
  290         /*
  291          * Call the operation on the lower layer
  292          * with the modified argument structure.
  293          */
  294         if (vps_p[0] != NULL && *vps_p[0] != NULL) {
  295                 error = VCALL(ap);
  296         } else {
  297                 printf("null_bypass: no map for %s\n", descp->vdesc_name);
  298                 error = EINVAL;
  299         }
  300 
  301         /*
  302          * Maintain the illusion of call-by-value
  303          * by restoring vnodes in the argument structure
  304          * to their original value.
  305          */
  306         reles = descp->vdesc_flags;
  307         for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
  308                 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
  309                         break;   /* bail out at end of list */
  310                 if (old_vps[i] != NULL) {
  311                         lvp = *(vps_p[i]);
  312 
  313                         /*
  314                          * Get rid of the transient hold on lvp.
  315                          * If lowervp was unlocked during VOP
  316                          * operation, nullfs upper vnode could have
  317                          * been reclaimed, which changes its v_vnlock
  318                          * back to private v_lock.  In this case we
  319                          * must move lock ownership from lower to
  320                          * upper (reclaimed) vnode.
  321                          */
  322                         if (lvp != NULLVP) {
  323                                 if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE &&
  324                                     old_vps[i]->v_vnlock != lvp->v_vnlock) {
  325                                         VOP_UNLOCK(lvp);
  326                                         VOP_LOCK(old_vps[i], LK_EXCLUSIVE |
  327                                             LK_RETRY);
  328                                 }
  329                                 vdrop(lvp);
  330                         }
  331 
  332                         *(vps_p[i]) = old_vps[i];
  333 #if 0
  334                         if (reles & VDESC_VP0_WILLUNLOCK)
  335                                 VOP_UNLOCK(*(vps_p[i]), 0);
  336 #endif
  337                         if (reles & VDESC_VP0_WILLRELE)
  338                                 vrele(*(vps_p[i]));
  339                 }
  340         }
  341 
  342         /*
  343          * Map the possible out-going vpp
  344          * (Assumes that the lower layer always returns
  345          * a VREF'ed vpp unless it gets an error.)
  346          */
  347         if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && error == 0) {
  348                 /*
  349                  * XXX - even though some ops have vpp returned vp's,
  350                  * several ops actually vrele this before returning.
  351                  * We must avoid these ops.
  352                  * (This should go away when these ops are regularized.)
  353                  */
  354                 vppp = VOPARG_OFFSETTO(struct vnode ***,
  355                     descp->vdesc_vpp_offset, ap);
  356                 if (*vppp != NULL)
  357                         error = null_nodeget(old_vps[0]->v_mount, **vppp,
  358                             *vppp);
  359         }
  360 
  361         return (error);
  362 }
  363 
  364 static int
  365 null_add_writecount(struct vop_add_writecount_args *ap)
  366 {
  367         struct vnode *lvp, *vp;
  368         int error;
  369 
  370         vp = ap->a_vp;
  371         lvp = NULLVPTOLOWERVP(vp);
  372         VI_LOCK(vp);
  373         /* text refs are bypassed to lowervp */
  374         VNASSERT(vp->v_writecount >= 0, vp, ("wrong null writecount"));
  375         VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
  376             ("wrong writecount inc %d", ap->a_inc));
  377         error = VOP_ADD_WRITECOUNT(lvp, ap->a_inc);
  378         if (error == 0)
  379                 vp->v_writecount += ap->a_inc;
  380         VI_UNLOCK(vp);
  381         return (error);
  382 }
  383 
  384 /*
  385  * We have to carry on the locking protocol on the null layer vnodes
  386  * as we progress through the tree. We also have to enforce read-only
  387  * if this layer is mounted read-only.
  388  */
  389 static int
  390 null_lookup(struct vop_lookup_args *ap)
  391 {
  392         struct componentname *cnp = ap->a_cnp;
  393         struct vnode *dvp = ap->a_dvp;
  394         int flags = cnp->cn_flags;
  395         struct vnode *vp, *ldvp, *lvp;
  396         struct mount *mp;
  397         int error;
  398 
  399         mp = dvp->v_mount;
  400         if ((flags & ISLASTCN) != 0 && (mp->mnt_flag & MNT_RDONLY) != 0 &&
  401             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
  402                 return (EROFS);
  403         /*
  404          * Although it is possible to call null_bypass(), we'll do
  405          * a direct call to reduce overhead
  406          */
  407         ldvp = NULLVPTOLOWERVP(dvp);
  408         vp = lvp = NULL;
  409 
  410         /*
  411          * Renames in the lower mounts might create an inconsistent
  412          * configuration where lower vnode is moved out of the
  413          * directory tree remounted by our null mount.  Do not try to
  414          * handle it fancy, just avoid VOP_LOOKUP() with DOTDOT name
  415          * which cannot be handled by VOP, at least passing over lower
  416          * root.
  417          */
  418         if ((ldvp->v_vflag & VV_ROOT) != 0 && (flags & ISDOTDOT) != 0) {
  419                 KASSERT((dvp->v_vflag & VV_ROOT) == 0,
  420                     ("ldvp %p fl %#x dvp %p fl %#x flags %#x",
  421                     ldvp, ldvp->v_vflag, dvp, dvp->v_vflag, flags));
  422                 return (ENOENT);
  423         }
  424 
  425         /*
  426          * Hold ldvp.  The reference on it, owned by dvp, is lost in
  427          * case of dvp reclamation, and we need ldvp to move our lock
  428          * from ldvp to dvp.
  429          */
  430         vhold(ldvp);
  431 
  432         error = VOP_LOOKUP(ldvp, &lvp, cnp);
  433 
  434         /*
  435          * VOP_LOOKUP() on lower vnode may unlock ldvp, which allows
  436          * dvp to be reclaimed due to shared v_vnlock.  Check for the
  437          * doomed state and return error.
  438          */
  439         if (VN_IS_DOOMED(dvp)) {
  440                 if (error == 0 || error == EJUSTRETURN) {
  441                         if (lvp != NULL)
  442                                 vput(lvp);
  443                         error = ENOENT;
  444                 }
  445 
  446                 /*
  447                  * If vgone() did reclaimed dvp before curthread
  448                  * relocked ldvp, the locks of dvp and ldpv are no
  449                  * longer shared.  In this case, relock of ldvp in
  450                  * lower fs VOP_LOOKUP() does not restore the locking
  451                  * state of dvp.  Compensate for this by unlocking
  452                  * ldvp and locking dvp, which is also correct if the
  453                  * locks are still shared.
  454                  */
  455                 VOP_UNLOCK(ldvp);
  456                 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
  457         }
  458         vdrop(ldvp);
  459 
  460         if (error == EJUSTRETURN && (flags & ISLASTCN) != 0 &&
  461             (mp->mnt_flag & MNT_RDONLY) != 0 &&
  462             (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
  463                 error = EROFS;
  464 
  465         if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
  466                 if (ldvp == lvp) {
  467                         *ap->a_vpp = dvp;
  468                         VREF(dvp);
  469                         vrele(lvp);
  470                 } else {
  471                         error = null_nodeget(mp, lvp, &vp);
  472                         if (error == 0)
  473                                 *ap->a_vpp = vp;
  474                 }
  475         }
  476         return (error);
  477 }
  478 
  479 static int
  480 null_open(struct vop_open_args *ap)
  481 {
  482         int retval;
  483         struct vnode *vp, *ldvp;
  484 
  485         vp = ap->a_vp;
  486         ldvp = NULLVPTOLOWERVP(vp);
  487         retval = null_bypass(&ap->a_gen);
  488         if (retval == 0) {
  489                 vp->v_object = ldvp->v_object;
  490                 if ((vn_irflag_read(ldvp) & VIRF_PGREAD) != 0) {
  491                         MPASS(vp->v_object != NULL);
  492                         if ((vn_irflag_read(vp) & VIRF_PGREAD) == 0) {
  493                                 vn_irflag_set_cond(vp, VIRF_PGREAD);
  494                         }
  495                 }
  496         }
  497         return (retval);
  498 }
  499 
  500 /*
  501  * Setattr call. Disallow write attempts if the layer is mounted read-only.
  502  */
  503 static int
  504 null_setattr(struct vop_setattr_args *ap)
  505 {
  506         struct vnode *vp = ap->a_vp;
  507         struct vattr *vap = ap->a_vap;
  508 
  509         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
  510             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
  511             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
  512             (vp->v_mount->mnt_flag & MNT_RDONLY))
  513                 return (EROFS);
  514         if (vap->va_size != VNOVAL) {
  515                 switch (vp->v_type) {
  516                 case VDIR:
  517                         return (EISDIR);
  518                 case VCHR:
  519                 case VBLK:
  520                 case VSOCK:
  521                 case VFIFO:
  522                         if (vap->va_flags != VNOVAL)
  523                                 return (EOPNOTSUPP);
  524                         return (0);
  525                 case VREG:
  526                 case VLNK:
  527                 default:
  528                         /*
  529                          * Disallow write attempts if the filesystem is
  530                          * mounted read-only.
  531                          */
  532                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  533                                 return (EROFS);
  534                 }
  535         }
  536 
  537         return (null_bypass((struct vop_generic_args *)ap));
  538 }
  539 
  540 /*
  541  *  We handle stat and getattr only to change the fsid.
  542  */
  543 static int
  544 null_stat(struct vop_stat_args *ap)
  545 {
  546         int error;
  547 
  548         if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
  549                 return (error);
  550 
  551         ap->a_sb->st_dev = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
  552         return (0);
  553 }
  554 
  555 static int
  556 null_getattr(struct vop_getattr_args *ap)
  557 {
  558         int error;
  559 
  560         if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
  561                 return (error);
  562 
  563         ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
  564         return (0);
  565 }
  566 
  567 /*
  568  * Handle to disallow write access if mounted read-only.
  569  */
  570 static int
  571 null_access(struct vop_access_args *ap)
  572 {
  573         struct vnode *vp = ap->a_vp;
  574         accmode_t accmode = ap->a_accmode;
  575 
  576         /*
  577          * Disallow write attempts on read-only layers;
  578          * unless the file is a socket, fifo, or a block or
  579          * character device resident on the filesystem.
  580          */
  581         if (accmode & VWRITE) {
  582                 switch (vp->v_type) {
  583                 case VDIR:
  584                 case VLNK:
  585                 case VREG:
  586                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  587                                 return (EROFS);
  588                         break;
  589                 default:
  590                         break;
  591                 }
  592         }
  593         return (null_bypass((struct vop_generic_args *)ap));
  594 }
  595 
  596 static int
  597 null_accessx(struct vop_accessx_args *ap)
  598 {
  599         struct vnode *vp = ap->a_vp;
  600         accmode_t accmode = ap->a_accmode;
  601 
  602         /*
  603          * Disallow write attempts on read-only layers;
  604          * unless the file is a socket, fifo, or a block or
  605          * character device resident on the filesystem.
  606          */
  607         if (accmode & VWRITE) {
  608                 switch (vp->v_type) {
  609                 case VDIR:
  610                 case VLNK:
  611                 case VREG:
  612                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  613                                 return (EROFS);
  614                         break;
  615                 default:
  616                         break;
  617                 }
  618         }
  619         return (null_bypass((struct vop_generic_args *)ap));
  620 }
  621 
  622 /*
  623  * Increasing refcount of lower vnode is needed at least for the case
  624  * when lower FS is NFS to do sillyrename if the file is in use.
  625  * Unfortunately v_usecount is incremented in many places in
  626  * the kernel and, as such, there may be races that result in
  627  * the NFS client doing an extraneous silly rename, but that seems
  628  * preferable to not doing a silly rename when it is needed.
  629  */
  630 static int
  631 null_remove(struct vop_remove_args *ap)
  632 {
  633         int retval, vreleit;
  634         struct vnode *lvp, *vp;
  635 
  636         vp = ap->a_vp;
  637         if (vrefcnt(vp) > 1) {
  638                 lvp = NULLVPTOLOWERVP(vp);
  639                 VREF(lvp);
  640                 vreleit = 1;
  641         } else
  642                 vreleit = 0;
  643         VTONULL(vp)->null_flags |= NULLV_DROP;
  644         retval = null_bypass(&ap->a_gen);
  645         if (vreleit != 0)
  646                 vrele(lvp);
  647         return (retval);
  648 }
  649 
  650 /*
  651  * We handle this to eliminate null FS to lower FS
  652  * file moving. Don't know why we don't allow this,
  653  * possibly we should.
  654  */
  655 static int
  656 null_rename(struct vop_rename_args *ap)
  657 {
  658         struct vnode *fdvp, *fvp, *tdvp, *tvp;
  659         struct vnode *lfdvp, *lfvp, *ltdvp, *ltvp;
  660         struct null_node *fdnn, *fnn, *tdnn, *tnn;
  661         int error;
  662 
  663         tdvp = ap->a_tdvp;
  664         fvp = ap->a_fvp;
  665         fdvp = ap->a_fdvp;
  666         tvp = ap->a_tvp;
  667         lfdvp = NULL;
  668 
  669         /* Check for cross-device rename. */
  670         if ((fvp->v_mount != tdvp->v_mount) ||
  671             (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
  672                 error = EXDEV;
  673                 goto upper_err;
  674         }
  675 
  676         VI_LOCK(fdvp);
  677         fdnn = VTONULL(fdvp);
  678         if (fdnn == NULL) {     /* fdvp is not locked, can be doomed */
  679                 VI_UNLOCK(fdvp);
  680                 error = ENOENT;
  681                 goto upper_err;
  682         }
  683         lfdvp = fdnn->null_lowervp;
  684         vref(lfdvp);
  685         VI_UNLOCK(fdvp);
  686 
  687         VI_LOCK(fvp);
  688         fnn = VTONULL(fvp);
  689         if (fnn == NULL) {
  690                 VI_UNLOCK(fvp);
  691                 error = ENOENT;
  692                 goto upper_err;
  693         }
  694         lfvp = fnn->null_lowervp;
  695         vref(lfvp);
  696         VI_UNLOCK(fvp);
  697 
  698         tdnn = VTONULL(tdvp);
  699         ltdvp = tdnn->null_lowervp;
  700         vref(ltdvp);
  701 
  702         if (tvp != NULL) {
  703                 tnn = VTONULL(tvp);
  704                 ltvp = tnn->null_lowervp;
  705                 vref(ltvp);
  706                 tnn->null_flags |= NULLV_DROP;
  707         } else {
  708                 ltvp = NULL;
  709         }
  710 
  711         error = VOP_RENAME(lfdvp, lfvp, ap->a_fcnp, ltdvp, ltvp, ap->a_tcnp);
  712         vrele(fdvp);
  713         vrele(fvp);
  714         vrele(tdvp);
  715         if (tvp != NULL)
  716                 vrele(tvp);
  717         return (error);
  718 
  719 upper_err:
  720         if (tdvp == tvp)
  721                 vrele(tdvp);
  722         else
  723                 vput(tdvp);
  724         if (tvp)
  725                 vput(tvp);
  726         if (lfdvp != NULL)
  727                 vrele(lfdvp);
  728         vrele(fdvp);
  729         vrele(fvp);
  730         return (error);
  731 }
  732 
  733 static int
  734 null_rmdir(struct vop_rmdir_args *ap)
  735 {
  736 
  737         VTONULL(ap->a_vp)->null_flags |= NULLV_DROP;
  738         return (null_bypass(&ap->a_gen));
  739 }
  740 
  741 /*
  742  * We need to process our own vnode lock and then clear the
  743  * interlock flag as it applies only to our vnode, not the
  744  * vnodes below us on the stack.
  745  */
  746 static int
  747 null_lock(struct vop_lock1_args *ap)
  748 {
  749         struct vnode *vp = ap->a_vp;
  750         int flags;
  751         struct null_node *nn;
  752         struct vnode *lvp;
  753         int error;
  754 
  755         if ((ap->a_flags & LK_INTERLOCK) == 0)
  756                 VI_LOCK(vp);
  757         else
  758                 ap->a_flags &= ~LK_INTERLOCK;
  759         flags = ap->a_flags;
  760         nn = VTONULL(vp);
  761         /*
  762          * If we're still active we must ask the lower layer to
  763          * lock as ffs has special lock considerations in its
  764          * vop lock.
  765          */
  766         if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
  767                 /*
  768                  * We have to hold the vnode here to solve a potential
  769                  * reclaim race.  If we're forcibly vgone'd while we
  770                  * still have refs, a thread could be sleeping inside
  771                  * the lowervp's vop_lock routine.  When we vgone we will
  772                  * drop our last ref to the lowervp, which would allow it
  773                  * to be reclaimed.  The lowervp could then be recycled,
  774                  * in which case it is not legal to be sleeping in its VOP.
  775                  * We prevent it from being recycled by holding the vnode
  776                  * here.
  777                  */
  778                 vholdnz(lvp);
  779                 VI_UNLOCK(vp);
  780                 error = VOP_LOCK(lvp, flags);
  781 
  782                 /*
  783                  * We might have slept to get the lock and someone might have
  784                  * clean our vnode already, switching vnode lock from one in
  785                  * lowervp to v_lock in our own vnode structure.  Handle this
  786                  * case by reacquiring correct lock in requested mode.
  787                  */
  788                 if (VTONULL(vp) == NULL && error == 0) {
  789                         ap->a_flags &= ~LK_TYPE_MASK;
  790                         switch (flags & LK_TYPE_MASK) {
  791                         case LK_SHARED:
  792                                 ap->a_flags |= LK_SHARED;
  793                                 break;
  794                         case LK_UPGRADE:
  795                         case LK_EXCLUSIVE:
  796                                 ap->a_flags |= LK_EXCLUSIVE;
  797                                 break;
  798                         default:
  799                                 panic("Unsupported lock request %d\n",
  800                                     ap->a_flags);
  801                         }
  802                         VOP_UNLOCK(lvp);
  803                         error = vop_stdlock(ap);
  804                 }
  805                 vdrop(lvp);
  806         } else {
  807                 VI_UNLOCK(vp);
  808                 error = vop_stdlock(ap);
  809         }
  810 
  811         return (error);
  812 }
  813 
  814 /*
  815  * We need to process our own vnode unlock and then clear the
  816  * interlock flag as it applies only to our vnode, not the
  817  * vnodes below us on the stack.
  818  */
  819 static int
  820 null_unlock(struct vop_unlock_args *ap)
  821 {
  822         struct vnode *vp = ap->a_vp;
  823         struct null_node *nn;
  824         struct vnode *lvp;
  825         int error;
  826 
  827         nn = VTONULL(vp);
  828         if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
  829                 vholdnz(lvp);
  830                 error = VOP_UNLOCK(lvp);
  831                 vdrop(lvp);
  832         } else {
  833                 error = vop_stdunlock(ap);
  834         }
  835 
  836         return (error);
  837 }
  838 
  839 /*
  840  * Do not allow the VOP_INACTIVE to be passed to the lower layer,
  841  * since the reference count on the lower vnode is not related to
  842  * ours.
  843  */
  844 static int
  845 null_want_recycle(struct vnode *vp)
  846 {
  847         struct vnode *lvp;
  848         struct null_node *xp;
  849         struct mount *mp;
  850         struct null_mount *xmp;
  851 
  852         xp = VTONULL(vp);
  853         lvp = NULLVPTOLOWERVP(vp);
  854         mp = vp->v_mount;
  855         xmp = MOUNTTONULLMOUNT(mp);
  856         if ((xmp->nullm_flags & NULLM_CACHE) == 0 ||
  857             (xp->null_flags & NULLV_DROP) != 0 ||
  858             (lvp->v_vflag & VV_NOSYNC) != 0) {
  859                 /*
  860                  * If this is the last reference and caching of the
  861                  * nullfs vnodes is not enabled, or the lower vnode is
  862                  * deleted, then free up the vnode so as not to tie up
  863                  * the lower vnodes.
  864                  */
  865                 return (1);
  866         }
  867         return (0);
  868 }
  869 
  870 static int
  871 null_inactive(struct vop_inactive_args *ap)
  872 {
  873         struct vnode *vp;
  874 
  875         vp = ap->a_vp;
  876         if (null_want_recycle(vp)) {
  877                 vp->v_object = NULL;
  878                 vrecycle(vp);
  879         }
  880         return (0);
  881 }
  882 
  883 static int
  884 null_need_inactive(struct vop_need_inactive_args *ap)
  885 {
  886 
  887         return (null_want_recycle(ap->a_vp) || vn_need_pageq_flush(ap->a_vp));
  888 }
  889 
  890 /*
  891  * Now, the nullfs vnode and, due to the sharing lock, the lower
  892  * vnode, are exclusively locked, and we shall destroy the null vnode.
  893  */
  894 static int
  895 null_reclaim(struct vop_reclaim_args *ap)
  896 {
  897         struct vnode *vp;
  898         struct null_node *xp;
  899         struct vnode *lowervp;
  900 
  901         vp = ap->a_vp;
  902         xp = VTONULL(vp);
  903         lowervp = xp->null_lowervp;
  904 
  905         KASSERT(lowervp != NULL && vp->v_vnlock != &vp->v_lock,
  906             ("Reclaiming incomplete null vnode %p", vp));
  907 
  908         null_hashrem(xp);
  909         /*
  910          * Use the interlock to protect the clearing of v_data to
  911          * prevent faults in null_lock().
  912          */
  913         lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
  914         VI_LOCK(vp);
  915         vp->v_data = NULL;
  916         vp->v_object = NULL;
  917         vp->v_vnlock = &vp->v_lock;
  918 
  919         /*
  920          * If we were opened for write, we leased the write reference
  921          * to the lower vnode.  If this is a reclamation due to the
  922          * forced unmount, undo the reference now.
  923          */
  924         if (vp->v_writecount > 0)
  925                 VOP_ADD_WRITECOUNT(lowervp, -vp->v_writecount);
  926         else if (vp->v_writecount < 0)
  927                 vp->v_writecount = 0;
  928 
  929         VI_UNLOCK(vp);
  930 
  931         if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
  932                 vunref(lowervp);
  933         else
  934                 vput(lowervp);
  935         free(xp, M_NULLFSNODE);
  936 
  937         return (0);
  938 }
  939 
  940 static int
  941 null_print(struct vop_print_args *ap)
  942 {
  943         struct vnode *vp = ap->a_vp;
  944 
  945         printf("\tvp=%p, lowervp=%p\n", vp, VTONULL(vp)->null_lowervp);
  946         return (0);
  947 }
  948 
  949 /* ARGSUSED */
  950 static int
  951 null_getwritemount(struct vop_getwritemount_args *ap)
  952 {
  953         struct null_node *xp;
  954         struct vnode *lowervp;
  955         struct vnode *vp;
  956 
  957         vp = ap->a_vp;
  958         VI_LOCK(vp);
  959         xp = VTONULL(vp);
  960         if (xp && (lowervp = xp->null_lowervp)) {
  961                 vholdnz(lowervp);
  962                 VI_UNLOCK(vp);
  963                 VOP_GETWRITEMOUNT(lowervp, ap->a_mpp);
  964                 vdrop(lowervp);
  965         } else {
  966                 VI_UNLOCK(vp);
  967                 *(ap->a_mpp) = NULL;
  968         }
  969         return (0);
  970 }
  971 
  972 static int
  973 null_vptofh(struct vop_vptofh_args *ap)
  974 {
  975         struct vnode *lvp;
  976 
  977         lvp = NULLVPTOLOWERVP(ap->a_vp);
  978         return VOP_VPTOFH(lvp, ap->a_fhp);
  979 }
  980 
  981 static int
  982 null_vptocnp(struct vop_vptocnp_args *ap)
  983 {
  984         struct vnode *vp = ap->a_vp;
  985         struct vnode **dvp = ap->a_vpp;
  986         struct vnode *lvp, *ldvp;
  987         struct mount *mp;
  988         int error, locked;
  989 
  990         locked = VOP_ISLOCKED(vp);
  991         lvp = NULLVPTOLOWERVP(vp);
  992         mp = vp->v_mount;
  993         error = vfs_busy(mp, MBF_NOWAIT);
  994         if (error != 0)
  995                 return (error);
  996         vhold(lvp);
  997         VOP_UNLOCK(vp); /* vp is held by vn_vptocnp_locked that called us */
  998         ldvp = lvp;
  999         vref(lvp);
 1000         error = vn_vptocnp(&ldvp, ap->a_buf, ap->a_buflen);
 1001         vdrop(lvp);
 1002         if (error != 0) {
 1003                 vn_lock(vp, locked | LK_RETRY);
 1004                 vfs_unbusy(mp);
 1005                 return (ENOENT);
 1006         }
 1007 
 1008         error = vn_lock(ldvp, LK_SHARED);
 1009         if (error != 0) {
 1010                 vrele(ldvp);
 1011                 vn_lock(vp, locked | LK_RETRY);
 1012                 vfs_unbusy(mp);
 1013                 return (ENOENT);
 1014         }
 1015         error = null_nodeget(mp, ldvp, dvp);
 1016         if (error == 0) {
 1017 #ifdef DIAGNOSTIC
 1018                 NULLVPTOLOWERVP(*dvp);
 1019 #endif
 1020                 VOP_UNLOCK(*dvp); /* keep reference on *dvp */
 1021         }
 1022         vn_lock(vp, locked | LK_RETRY);
 1023         vfs_unbusy(mp);
 1024         return (error);
 1025 }
 1026 
 1027 static int
 1028 null_read_pgcache(struct vop_read_pgcache_args *ap)
 1029 {
 1030         struct vnode *lvp, *vp;
 1031         struct null_node *xp;
 1032         int error;
 1033 
 1034         vp = ap->a_vp;
 1035         VI_LOCK(vp);
 1036         xp = VTONULL(vp);
 1037         if (xp == NULL) {
 1038                 VI_UNLOCK(vp);
 1039                 return (EJUSTRETURN);
 1040         }
 1041         lvp = xp->null_lowervp;
 1042         vref(lvp);
 1043         VI_UNLOCK(vp);
 1044         error = VOP_READ_PGCACHE(lvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
 1045         vrele(lvp);
 1046         return (error);
 1047 }
 1048 
 1049 static int
 1050 null_advlock(struct vop_advlock_args *ap)
 1051 {
 1052         struct vnode *lvp, *vp;
 1053         struct null_node *xp;
 1054         int error;
 1055 
 1056         vp = ap->a_vp;
 1057         VI_LOCK(vp);
 1058         xp = VTONULL(vp);
 1059         if (xp == NULL) {
 1060                 VI_UNLOCK(vp);
 1061                 return (EBADF);
 1062         }
 1063         lvp = xp->null_lowervp;
 1064         vref(lvp);
 1065         VI_UNLOCK(vp);
 1066         error = VOP_ADVLOCK(lvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags);
 1067         vrele(lvp);
 1068         return (error);
 1069 }
 1070 
 1071 /*
 1072  * Avoid standard bypass, since lower dvp and vp could be no longer
 1073  * valid after vput().
 1074  */
 1075 static int
 1076 null_vput_pair(struct vop_vput_pair_args *ap)
 1077 {
 1078         struct mount *mp;
 1079         struct vnode *dvp, *ldvp, *lvp, *vp, *vp1, **vpp;
 1080         int error, res;
 1081 
 1082         dvp = ap->a_dvp;
 1083         ldvp = NULLVPTOLOWERVP(dvp);
 1084         vref(ldvp);
 1085 
 1086         vpp = ap->a_vpp;
 1087         vp = NULL;
 1088         lvp = NULL;
 1089         mp = NULL;
 1090         if (vpp != NULL)
 1091                 vp = *vpp;
 1092         if (vp != NULL) {
 1093                 lvp = NULLVPTOLOWERVP(vp);
 1094                 vref(lvp);
 1095                 if (!ap->a_unlock_vp) {
 1096                         vhold(vp);
 1097                         vhold(lvp);
 1098                         mp = vp->v_mount;
 1099                         vfs_ref(mp);
 1100                 }
 1101         }
 1102 
 1103         res = VOP_VPUT_PAIR(ldvp, lvp != NULL ? &lvp : NULL, true);
 1104         if (vp != NULL && ap->a_unlock_vp)
 1105                 vrele(vp);
 1106         vrele(dvp);
 1107 
 1108         if (vp == NULL || ap->a_unlock_vp)
 1109                 return (res);
 1110 
 1111         /* lvp has been unlocked and vp might be reclaimed */
 1112         VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 1113         if (vp->v_data == NULL && vfs_busy(mp, MBF_NOWAIT) == 0) {
 1114                 vput(vp);
 1115                 vget(lvp, LK_EXCLUSIVE | LK_RETRY);
 1116                 if (VN_IS_DOOMED(lvp)) {
 1117                         vput(lvp);
 1118                         vget(vp, LK_EXCLUSIVE | LK_RETRY);
 1119                 } else {
 1120                         error = null_nodeget(mp, lvp, &vp1);
 1121                         if (error == 0) {
 1122                                 *vpp = vp1;
 1123                         } else {
 1124                                 vget(vp, LK_EXCLUSIVE | LK_RETRY);
 1125                         }
 1126                 }
 1127                 vfs_unbusy(mp);
 1128         }
 1129         vdrop(lvp);
 1130         vdrop(vp);
 1131         vfs_rel(mp);
 1132 
 1133         return (res);
 1134 }
 1135 
 1136 /*
 1137  * Global vfs data structures
 1138  */
 1139 struct vop_vector null_vnodeops = {
 1140         .vop_bypass =           null_bypass,
 1141         .vop_access =           null_access,
 1142         .vop_accessx =          null_accessx,
 1143         .vop_advlock =          null_advlock,
 1144         .vop_advlockpurge =     vop_stdadvlockpurge,
 1145         .vop_bmap =             VOP_EOPNOTSUPP,
 1146         .vop_stat =             null_stat,
 1147         .vop_getattr =          null_getattr,
 1148         .vop_getwritemount =    null_getwritemount,
 1149         .vop_inactive =         null_inactive,
 1150         .vop_need_inactive =    null_need_inactive,
 1151         .vop_islocked =         vop_stdislocked,
 1152         .vop_lock1 =            null_lock,
 1153         .vop_lookup =           null_lookup,
 1154         .vop_open =             null_open,
 1155         .vop_print =            null_print,
 1156         .vop_read_pgcache =     null_read_pgcache,
 1157         .vop_reclaim =          null_reclaim,
 1158         .vop_remove =           null_remove,
 1159         .vop_rename =           null_rename,
 1160         .vop_rmdir =            null_rmdir,
 1161         .vop_setattr =          null_setattr,
 1162         .vop_strategy =         VOP_EOPNOTSUPP,
 1163         .vop_unlock =           null_unlock,
 1164         .vop_vptocnp =          null_vptocnp,
 1165         .vop_vptofh =           null_vptofh,
 1166         .vop_add_writecount =   null_add_writecount,
 1167         .vop_vput_pair =        null_vput_pair,
 1168 };
 1169 VFS_VOP_VECTOR_REGISTER(null_vnodeops);
Cache object: d3f4d8ce43248e863a4438f029c1f731
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/fs/nullfs/null_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/fs/nullfs/null_vnops.c