The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
   24  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
   25  */
   26 
   27 /*
   28  * ZFS control directory (a.k.a. ".zfs")
   29  *
   30  * This directory provides a common location for all ZFS meta-objects.
   31  * Currently, this is only the 'snapshot' directory, but this may expand in the
   32  * future.  The elements are built using the GFS primitives, as the hierarchy
   33  * does not actually exist on disk.
   34  *
   35  * For 'snapshot', we don't want to have all snapshots always mounted, because
   36  * this would take up a huge amount of space in /etc/mnttab.  We have three
   37  * types of objects:
   38  *
   39  *      ctldir ------> snapshotdir -------> snapshot
   40  *                                             |
   41  *                                             |
   42  *                                             V
   43  *                                         mounted fs
   44  *
   45  * The 'snapshot' node contains just enough information to lookup '..' and act
   46  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
   47  * perform an automount of the underlying filesystem and return the
   48  * corresponding vnode.
   49  *
   50  * All mounts are handled automatically by the kernel, but unmounts are
   51  * (currently) handled from user land.  The main reason is that there is no
   52  * reliable way to auto-unmount the filesystem when it's "no longer in use".
   53  * When the user unmounts a filesystem, we call zfsctl_unmount(), which
   54  * unmounts any snapshots within the snapshot directory.
   55  *
   56  * The '.zfs', '.zfs/snapshot', and all directories created under
   57  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
   58  * share the same vfs_t as the head filesystem (what '.zfs' lives under).
   59  *
   60  * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
   61  * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
   62  * However, vnodes within these mounted on file systems have their v_vfsp
   63  * fields set to the head filesystem to make NFS happy (see
   64  * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
   65  * so that it cannot be freed until all snapshots have been unmounted.
   66  */
   67 
   68 #include <sys/types.h>
   69 #include <sys/param.h>
   70 #include <sys/libkern.h>
   71 #include <sys/dirent.h>
   72 #include <sys/zfs_context.h>
   73 #include <sys/zfs_ctldir.h>
   74 #include <sys/zfs_ioctl.h>
   75 #include <sys/zfs_vfsops.h>
   76 #include <sys/namei.h>
   77 #include <sys/stat.h>
   78 #include <sys/dmu.h>
   79 #include <sys/dsl_dataset.h>
   80 #include <sys/dsl_destroy.h>
   81 #include <sys/dsl_deleg.h>
   82 #include <sys/mount.h>
   83 #include <sys/zap.h>
   84 #include <sys/sysproto.h>
   85 
   86 #include "zfs_namecheck.h"
   87 
   88 #include <sys/kernel.h>
   89 #include <sys/ccompat.h>
   90 
   91 /* Common access mode for all virtual directories under the ctldir */
   92 const uint16_t zfsctl_ctldir_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
   93     S_IROTH | S_IXOTH;
   94 
   95 /*
   96  * "Synthetic" filesystem implementation.
   97  */
   98 
   99 /*
  100  * Assert that A implies B.
  101  */
  102 #define KASSERT_IMPLY(A, B, msg)        KASSERT(!(A) || (B), (msg));
  103 
  104 static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
  105 
  106 typedef struct sfs_node {
  107         char            sn_name[ZFS_MAX_DATASET_NAME_LEN];
  108         uint64_t        sn_parent_id;
  109         uint64_t        sn_id;
  110 } sfs_node_t;
  111 
  112 /*
  113  * Check the parent's ID as well as the node's to account for a chance
  114  * that IDs originating from different domains (snapshot IDs, artificial
  115  * IDs, znode IDs) may clash.
  116  */
  117 static int
  118 sfs_compare_ids(struct vnode *vp, void *arg)
  119 {
  120         sfs_node_t *n1 = vp->v_data;
  121         sfs_node_t *n2 = arg;
  122         bool equal;
  123 
  124         equal = n1->sn_id == n2->sn_id &&
  125             n1->sn_parent_id == n2->sn_parent_id;
  126 
  127         /* Zero means equality. */
  128         return (!equal);
  129 }
  130 
  131 static int
  132 sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
  133     uint64_t id, struct vnode **vpp)
  134 {
  135         sfs_node_t search;
  136         int err;
  137 
  138         search.sn_id = id;
  139         search.sn_parent_id = parent_id;
  140         err = vfs_hash_get(mp, (uint32_t)id, flags, curthread, vpp,
  141             sfs_compare_ids, &search);
  142         return (err);
  143 }
  144 
  145 static int
  146 sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
  147     uint64_t id, struct vnode **vpp)
  148 {
  149         int err;
  150 
  151         KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
  152         err = vfs_hash_insert(vp, (uint32_t)id, flags, curthread, vpp,
  153             sfs_compare_ids, vp->v_data);
  154         return (err);
  155 }
  156 
  157 static void
  158 sfs_vnode_remove(struct vnode *vp)
  159 {
  160         vfs_hash_remove(vp);
  161 }
  162 
  163 typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg);
  164 
  165 static int
  166 sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
  167     const char *tag, struct vop_vector *vops,
  168     sfs_vnode_setup_fn setup, void *arg,
  169     struct vnode **vpp)
  170 {
  171         struct vnode *vp;
  172         int error;
  173 
  174         error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
  175         if (error != 0 || *vpp != NULL) {
  176                 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
  177                     "sfs vnode with no data");
  178                 return (error);
  179         }
  180 
  181         /* Allocate a new vnode/inode. */
  182         error = getnewvnode(tag, mp, vops, &vp);
  183         if (error != 0) {
  184                 *vpp = NULL;
  185                 return (error);
  186         }
  187 
  188         /*
  189          * Exclusively lock the vnode vnode while it's being constructed.
  190          */
  191         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
  192         error = insmntque(vp, mp);
  193         if (error != 0) {
  194                 *vpp = NULL;
  195                 return (error);
  196         }
  197 
  198         setup(vp, arg);
  199 
  200         error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
  201         if (error != 0 || *vpp != NULL) {
  202                 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
  203                     "sfs vnode with no data");
  204                 return (error);
  205         }
  206 
  207         *vpp = vp;
  208         return (0);
  209 }
  210 
  211 static void
  212 sfs_print_node(sfs_node_t *node)
  213 {
  214         printf("\tname = %s\n", node->sn_name);
  215         printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
  216         printf("\tid = %ju\n", (uintmax_t)node->sn_id);
  217 }
  218 
  219 static sfs_node_t *
  220 sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
  221 {
  222         struct sfs_node *node;
  223 
  224         KASSERT(strlen(name) < sizeof (node->sn_name),
  225             ("sfs node name is too long"));
  226         KASSERT(size >= sizeof (*node), ("sfs node size is too small"));
  227         node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO);
  228         strlcpy(node->sn_name, name, sizeof (node->sn_name));
  229         node->sn_parent_id = parent_id;
  230         node->sn_id = id;
  231 
  232         return (node);
  233 }
  234 
  235 static void
  236 sfs_destroy_node(sfs_node_t *node)
  237 {
  238         free(node, M_SFSNODES);
  239 }
  240 
  241 static void *
  242 sfs_reclaim_vnode(vnode_t *vp)
  243 {
  244         void *data;
  245 
  246         sfs_vnode_remove(vp);
  247         data = vp->v_data;
  248         vp->v_data = NULL;
  249         return (data);
  250 }
  251 
  252 static int
  253 sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
  254     zfs_uio_t *uio, off_t *offp)
  255 {
  256         struct dirent entry;
  257         int error;
  258 
  259         /* Reset ncookies for subsequent use of vfs_read_dirent. */
  260         if (ap->a_ncookies != NULL)
  261                 *ap->a_ncookies = 0;
  262 
  263         if (zfs_uio_resid(uio) < sizeof (entry))
  264                 return (SET_ERROR(EINVAL));
  265 
  266         if (zfs_uio_offset(uio) < 0)
  267                 return (SET_ERROR(EINVAL));
  268         if (zfs_uio_offset(uio) == 0) {
  269                 entry.d_fileno = id;
  270                 entry.d_type = DT_DIR;
  271                 entry.d_name[0] = '.';
  272                 entry.d_name[1] = '\0';
  273                 entry.d_namlen = 1;
  274                 entry.d_reclen = sizeof (entry);
  275                 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio));
  276                 if (error != 0)
  277                         return (SET_ERROR(error));
  278         }
  279 
  280         if (zfs_uio_offset(uio) < sizeof (entry))
  281                 return (SET_ERROR(EINVAL));
  282         if (zfs_uio_offset(uio) == sizeof (entry)) {
  283                 entry.d_fileno = parent_id;
  284                 entry.d_type = DT_DIR;
  285                 entry.d_name[0] = '.';
  286                 entry.d_name[1] = '.';
  287                 entry.d_name[2] = '\0';
  288                 entry.d_namlen = 2;
  289                 entry.d_reclen = sizeof (entry);
  290                 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio));
  291                 if (error != 0)
  292                         return (SET_ERROR(error));
  293         }
  294 
  295         if (offp != NULL)
  296                 *offp = 2 * sizeof (entry);
  297         return (0);
  298 }
  299 
  300 
  301 /*
  302  * .zfs inode namespace
  303  *
  304  * We need to generate unique inode numbers for all files and directories
  305  * within the .zfs pseudo-filesystem.  We use the following scheme:
  306  *
  307  *      ENTRY                   ZFSCTL_INODE
  308  *      .zfs                    1
  309  *      .zfs/snapshot           2
  310  *      .zfs/snapshot/<snap>    objectid(snap)
  311  */
  312 #define ZFSCTL_INO_SNAP(id)     (id)
  313 
  314 static struct vop_vector zfsctl_ops_root;
  315 static struct vop_vector zfsctl_ops_snapdir;
  316 static struct vop_vector zfsctl_ops_snapshot;
  317 
  318 void
  319 zfsctl_init(void)
  320 {
  321 }
  322 
  323 void
  324 zfsctl_fini(void)
  325 {
  326 }
  327 
  328 boolean_t
  329 zfsctl_is_node(vnode_t *vp)
  330 {
  331         return (vn_matchops(vp, zfsctl_ops_root) ||
  332             vn_matchops(vp, zfsctl_ops_snapdir) ||
  333             vn_matchops(vp, zfsctl_ops_snapshot));
  334 
  335 }
  336 
  337 typedef struct zfsctl_root {
  338         sfs_node_t      node;
  339         sfs_node_t      *snapdir;
  340         timestruc_t     cmtime;
  341 } zfsctl_root_t;
  342 
  343 
  344 /*
  345  * Create the '.zfs' directory.
  346  */
  347 void
  348 zfsctl_create(zfsvfs_t *zfsvfs)
  349 {
  350         zfsctl_root_t *dot_zfs;
  351         sfs_node_t *snapdir;
  352         vnode_t *rvp;
  353         uint64_t crtime[2];
  354 
  355         ASSERT3P(zfsvfs->z_ctldir, ==, NULL);
  356 
  357         snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT,
  358             ZFSCTL_INO_SNAPDIR);
  359         dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof (*dot_zfs), ".zfs", 0,
  360             ZFSCTL_INO_ROOT);
  361         dot_zfs->snapdir = snapdir;
  362 
  363         VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp));
  364         VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
  365             &crtime, sizeof (crtime)));
  366         ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
  367         vput(rvp);
  368 
  369         zfsvfs->z_ctldir = dot_zfs;
  370 }
  371 
  372 /*
  373  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
  374  * The nodes must not have any associated vnodes by now as they should be
  375  * vflush-ed.
  376  */
  377 void
  378 zfsctl_destroy(zfsvfs_t *zfsvfs)
  379 {
  380         sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
  381         sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
  382         zfsvfs->z_ctldir = NULL;
  383 }
  384 
  385 static int
  386 zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags,
  387     struct vnode **vpp)
  388 {
  389         return (VFS_ROOT(mp, flags, vpp));
  390 }
  391 
  392 static void
  393 zfsctl_common_vnode_setup(vnode_t *vp, void *arg)
  394 {
  395         ASSERT_VOP_ELOCKED(vp, __func__);
  396 
  397         /* We support shared locking. */
  398         VN_LOCK_ASHARE(vp);
  399         vp->v_type = VDIR;
  400         vp->v_data = arg;
  401 }
  402 
  403 static int
  404 zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags,
  405     struct vnode **vpp)
  406 {
  407         void *node;
  408         int err;
  409 
  410         node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir;
  411         err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
  412             zfsctl_common_vnode_setup, node, vpp);
  413         return (err);
  414 }
  415 
  416 static int
  417 zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags,
  418     struct vnode **vpp)
  419 {
  420         void *node;
  421         int err;
  422 
  423         node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir->snapdir;
  424         err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
  425             &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
  426         return (err);
  427 }
  428 
  429 /*
  430  * Given a root znode, retrieve the associated .zfs directory.
  431  * Add a hold to the vnode and return it.
  432  */
  433 int
  434 zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp)
  435 {
  436         int error;
  437 
  438         error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
  439         return (error);
  440 }
  441 
  442 /*
  443  * Common open routine.  Disallow any write access.
  444  */
  445 static int
  446 zfsctl_common_open(struct vop_open_args *ap)
  447 {
  448         int flags = ap->a_mode;
  449 
  450         if (flags & FWRITE)
  451                 return (SET_ERROR(EACCES));
  452 
  453         return (0);
  454 }
  455 
  456 /*
  457  * Common close routine.  Nothing to do here.
  458  */
  459 static int
  460 zfsctl_common_close(struct vop_close_args *ap)
  461 {
  462         (void) ap;
  463         return (0);
  464 }
  465 
  466 /*
  467  * Common access routine.  Disallow writes.
  468  */
  469 static int
  470 zfsctl_common_access(struct vop_access_args *ap)
  471 {
  472         accmode_t accmode = ap->a_accmode;
  473 
  474         if (accmode & VWRITE)
  475                 return (SET_ERROR(EACCES));
  476         return (0);
  477 }
  478 
  479 /*
  480  * Common getattr function.  Fill in basic information.
  481  */
  482 static void
  483 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
  484 {
  485         timestruc_t     now;
  486         sfs_node_t *node;
  487 
  488         node = vp->v_data;
  489 
  490         vap->va_uid = 0;
  491         vap->va_gid = 0;
  492         vap->va_rdev = 0;
  493         /*
  494          * We are a purely virtual object, so we have no
  495          * blocksize or allocated blocks.
  496          */
  497         vap->va_blksize = 0;
  498         vap->va_nblocks = 0;
  499         vap->va_gen = 0;
  500         vn_fsid(vp, vap);
  501         vap->va_mode = zfsctl_ctldir_mode;
  502         vap->va_type = VDIR;
  503         /*
  504          * We live in the now (for atime).
  505          */
  506         gethrestime(&now);
  507         vap->va_atime = now;
  508         /* FreeBSD: Reset chflags(2) flags. */
  509         vap->va_flags = 0;
  510 
  511         vap->va_nodeid = node->sn_id;
  512 
  513         /* At least '.' and '..'. */
  514         vap->va_nlink = 2;
  515 }
  516 
  517 #ifndef _OPENSOLARIS_SYS_VNODE_H_
  518 struct vop_fid_args {
  519         struct vnode *a_vp;
  520         struct fid *a_fid;
  521 };
  522 #endif
  523 
  524 static int
  525 zfsctl_common_fid(struct vop_fid_args *ap)
  526 {
  527         vnode_t         *vp = ap->a_vp;
  528         fid_t           *fidp = (void *)ap->a_fid;
  529         sfs_node_t      *node = vp->v_data;
  530         uint64_t        object = node->sn_id;
  531         zfid_short_t    *zfid;
  532         int             i;
  533 
  534         zfid = (zfid_short_t *)fidp;
  535         zfid->zf_len = SHORT_FID_LEN;
  536 
  537         for (i = 0; i < sizeof (zfid->zf_object); i++)
  538                 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
  539 
  540         /* .zfs nodes always have a generation number of 0 */
  541         for (i = 0; i < sizeof (zfid->zf_gen); i++)
  542                 zfid->zf_gen[i] = 0;
  543 
  544         return (0);
  545 }
  546 
  547 #ifndef _SYS_SYSPROTO_H_
  548 struct vop_reclaim_args {
  549         struct vnode *a_vp;
  550         struct thread *a_td;
  551 };
  552 #endif
  553 
  554 static int
  555 zfsctl_common_reclaim(struct vop_reclaim_args *ap)
  556 {
  557         vnode_t *vp = ap->a_vp;
  558 
  559         (void) sfs_reclaim_vnode(vp);
  560         return (0);
  561 }
  562 
  563 #ifndef _SYS_SYSPROTO_H_
  564 struct vop_print_args {
  565         struct vnode *a_vp;
  566 };
  567 #endif
  568 
  569 static int
  570 zfsctl_common_print(struct vop_print_args *ap)
  571 {
  572         sfs_print_node(ap->a_vp->v_data);
  573         return (0);
  574 }
  575 
  576 #ifndef _SYS_SYSPROTO_H_
  577 struct vop_getattr_args {
  578         struct vnode *a_vp;
  579         struct vattr *a_vap;
  580         struct ucred *a_cred;
  581 };
  582 #endif
  583 
  584 /*
  585  * Get root directory attributes.
  586  */
  587 static int
  588 zfsctl_root_getattr(struct vop_getattr_args *ap)
  589 {
  590         struct vnode *vp = ap->a_vp;
  591         struct vattr *vap = ap->a_vap;
  592         zfsctl_root_t *node = vp->v_data;
  593 
  594         zfsctl_common_getattr(vp, vap);
  595         vap->va_ctime = node->cmtime;
  596         vap->va_mtime = vap->va_ctime;
  597         vap->va_birthtime = vap->va_ctime;
  598         vap->va_nlink += 1; /* snapdir */
  599         vap->va_size = vap->va_nlink;
  600         return (0);
  601 }
  602 
  603 /*
  604  * When we lookup "." we still can be asked to lock it
  605  * differently, can't we?
  606  */
  607 static int
  608 zfsctl_relock_dot(vnode_t *dvp, int ltype)
  609 {
  610         vref(dvp);
  611         if (ltype != VOP_ISLOCKED(dvp)) {
  612                 if (ltype == LK_EXCLUSIVE)
  613                         vn_lock(dvp, LK_UPGRADE | LK_RETRY);
  614                 else /* if (ltype == LK_SHARED) */
  615                         vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
  616 
  617                 /* Relock for the "." case may left us with reclaimed vnode. */
  618                 if (VN_IS_DOOMED(dvp)) {
  619                         vrele(dvp);
  620                         return (SET_ERROR(ENOENT));
  621                 }
  622         }
  623         return (0);
  624 }
  625 
  626 /*
  627  * Special case the handling of "..".
  628  */
  629 static int
  630 zfsctl_root_lookup(struct vop_lookup_args *ap)
  631 {
  632         struct componentname *cnp = ap->a_cnp;
  633         vnode_t *dvp = ap->a_dvp;
  634         vnode_t **vpp = ap->a_vpp;
  635         int flags = ap->a_cnp->cn_flags;
  636         int lkflags = ap->a_cnp->cn_lkflags;
  637         int nameiop = ap->a_cnp->cn_nameiop;
  638         int err;
  639 
  640         ASSERT3S(dvp->v_type, ==, VDIR);
  641 
  642         if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
  643                 return (SET_ERROR(ENOTSUP));
  644 
  645         if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
  646                 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
  647                 if (err == 0)
  648                         *vpp = dvp;
  649         } else if ((flags & ISDOTDOT) != 0) {
  650                 err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
  651                     lkflags, vpp);
  652         } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
  653                 err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
  654         } else {
  655                 err = SET_ERROR(ENOENT);
  656         }
  657         if (err != 0)
  658                 *vpp = NULL;
  659         return (err);
  660 }
  661 
  662 static int
  663 zfsctl_root_readdir(struct vop_readdir_args *ap)
  664 {
  665         struct dirent entry;
  666         vnode_t *vp = ap->a_vp;
  667         zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
  668         zfsctl_root_t *node = vp->v_data;
  669         zfs_uio_t uio;
  670         int *eofp = ap->a_eofflag;
  671         off_t dots_offset;
  672         int error;
  673 
  674         zfs_uio_init(&uio, ap->a_uio);
  675 
  676         ASSERT3S(vp->v_type, ==, VDIR);
  677 
  678         error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio,
  679             &dots_offset);
  680         if (error != 0) {
  681                 if (error == ENAMETOOLONG) /* ran out of destination space */
  682                         error = 0;
  683                 return (error);
  684         }
  685         if (zfs_uio_offset(&uio) != dots_offset)
  686                 return (SET_ERROR(EINVAL));
  687 
  688         _Static_assert(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name),
  689             "node->snapdir->sn_name too big for entry.d_name");
  690         entry.d_fileno = node->snapdir->sn_id;
  691         entry.d_type = DT_DIR;
  692         strcpy(entry.d_name, node->snapdir->sn_name);
  693         entry.d_namlen = strlen(entry.d_name);
  694         entry.d_reclen = sizeof (entry);
  695         error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio));
  696         if (error != 0) {
  697                 if (error == ENAMETOOLONG)
  698                         error = 0;
  699                 return (SET_ERROR(error));
  700         }
  701         if (eofp != NULL)
  702                 *eofp = 1;
  703         return (0);
  704 }
  705 
  706 static int
  707 zfsctl_root_vptocnp(struct vop_vptocnp_args *ap)
  708 {
  709         static const char dotzfs_name[4] = ".zfs";
  710         vnode_t *dvp;
  711         int error;
  712 
  713         if (*ap->a_buflen < sizeof (dotzfs_name))
  714                 return (SET_ERROR(ENOMEM));
  715 
  716         error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL,
  717             LK_SHARED, &dvp);
  718         if (error != 0)
  719                 return (SET_ERROR(error));
  720 
  721         VOP_UNLOCK1(dvp);
  722         *ap->a_vpp = dvp;
  723         *ap->a_buflen -= sizeof (dotzfs_name);
  724         memcpy(ap->a_buf + *ap->a_buflen, dotzfs_name, sizeof (dotzfs_name));
  725         return (0);
  726 }
  727 
  728 static int
  729 zfsctl_common_pathconf(struct vop_pathconf_args *ap)
  730 {
  731         /*
  732          * We care about ACL variables so that user land utilities like ls
  733          * can display them correctly.  Since the ctldir's st_dev is set to be
  734          * the same as the parent dataset, we must support all variables that
  735          * it supports.
  736          */
  737         switch (ap->a_name) {
  738         case _PC_LINK_MAX:
  739                 *ap->a_retval = MIN(LONG_MAX, ZFS_LINK_MAX);
  740                 return (0);
  741 
  742         case _PC_FILESIZEBITS:
  743                 *ap->a_retval = 64;
  744                 return (0);
  745 
  746         case _PC_MIN_HOLE_SIZE:
  747                 *ap->a_retval = (int)SPA_MINBLOCKSIZE;
  748                 return (0);
  749 
  750         case _PC_ACL_EXTENDED:
  751                 *ap->a_retval = 0;
  752                 return (0);
  753 
  754         case _PC_ACL_NFS4:
  755                 *ap->a_retval = 1;
  756                 return (0);
  757 
  758         case _PC_ACL_PATH_MAX:
  759                 *ap->a_retval = ACL_MAX_ENTRIES;
  760                 return (0);
  761 
  762         case _PC_NAME_MAX:
  763                 *ap->a_retval = NAME_MAX;
  764                 return (0);
  765 
  766         default:
  767                 return (vop_stdpathconf(ap));
  768         }
  769 }
  770 
  771 /*
  772  * Returns a trivial ACL
  773  */
  774 static int
  775 zfsctl_common_getacl(struct vop_getacl_args *ap)
  776 {
  777         int i;
  778 
  779         if (ap->a_type != ACL_TYPE_NFS4)
  780                 return (EINVAL);
  781 
  782         acl_nfs4_sync_acl_from_mode(ap->a_aclp, zfsctl_ctldir_mode, 0);
  783         /*
  784          * acl_nfs4_sync_acl_from_mode assumes that the owner can always modify
  785          * attributes.  That is not the case for the ctldir, so we must clear
  786          * those bits.  We also must clear ACL_READ_NAMED_ATTRS, because xattrs
  787          * aren't supported by the ctldir.
  788          */
  789         for (i = 0; i < ap->a_aclp->acl_cnt; i++) {
  790                 struct acl_entry *entry;
  791                 entry = &(ap->a_aclp->acl_entry[i]);
  792                 entry->ae_perm &= ~(ACL_WRITE_ACL | ACL_WRITE_OWNER |
  793                     ACL_WRITE_ATTRIBUTES | ACL_WRITE_NAMED_ATTRS |
  794                     ACL_READ_NAMED_ATTRS);
  795         }
  796 
  797         return (0);
  798 }
  799 
  800 static struct vop_vector zfsctl_ops_root = {
  801         .vop_default =  &default_vnodeops,
  802 #if __FreeBSD_version >= 1300121
  803         .vop_fplookup_vexec = VOP_EAGAIN,
  804 #endif
  805         .vop_open =     zfsctl_common_open,
  806         .vop_close =    zfsctl_common_close,
  807         .vop_ioctl =    VOP_EINVAL,
  808         .vop_getattr =  zfsctl_root_getattr,
  809         .vop_access =   zfsctl_common_access,
  810         .vop_readdir =  zfsctl_root_readdir,
  811         .vop_lookup =   zfsctl_root_lookup,
  812         .vop_inactive = VOP_NULL,
  813         .vop_reclaim =  zfsctl_common_reclaim,
  814         .vop_fid =      zfsctl_common_fid,
  815         .vop_print =    zfsctl_common_print,
  816         .vop_vptocnp =  zfsctl_root_vptocnp,
  817         .vop_pathconf = zfsctl_common_pathconf,
  818         .vop_getacl =   zfsctl_common_getacl,
  819 #if __FreeBSD_version >= 1400043
  820         .vop_add_writecount =   vop_stdadd_writecount_nomsync,
  821 #endif
  822 };
  823 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_root);
  824 
  825 static int
  826 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
  827 {
  828         objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
  829 
  830         dmu_objset_name(os, zname);
  831         if (strlen(zname) + 1 + strlen(name) >= len)
  832                 return (SET_ERROR(ENAMETOOLONG));
  833         (void) strcat(zname, "@");
  834         (void) strcat(zname, name);
  835         return (0);
  836 }
  837 
  838 static int
  839 zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id)
  840 {
  841         objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
  842         int err;
  843 
  844         err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id);
  845         return (err);
  846 }
  847 
  848 /*
  849  * Given a vnode get a root vnode of a filesystem mounted on top of
  850  * the vnode, if any.  The root vnode is referenced and locked.
  851  * If no filesystem is mounted then the orinal vnode remains referenced
  852  * and locked.  If any error happens the orinal vnode is unlocked and
  853  * released.
  854  */
  855 static int
  856 zfsctl_mounted_here(vnode_t **vpp, int flags)
  857 {
  858         struct mount *mp;
  859         int err;
  860 
  861         ASSERT_VOP_LOCKED(*vpp, __func__);
  862         ASSERT3S((*vpp)->v_type, ==, VDIR);
  863 
  864         if ((mp = (*vpp)->v_mountedhere) != NULL) {
  865                 err = vfs_busy(mp, 0);
  866                 KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err));
  867                 KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint"));
  868                 vput(*vpp);
  869                 err = VFS_ROOT(mp, flags, vpp);
  870                 vfs_unbusy(mp);
  871                 return (err);
  872         }
  873         return (EJUSTRETURN);
  874 }
  875 
  876 typedef struct {
  877         const char *snap_name;
  878         uint64_t    snap_id;
  879 } snapshot_setup_arg_t;
  880 
  881 static void
  882 zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg)
  883 {
  884         snapshot_setup_arg_t *ssa = arg;
  885         sfs_node_t *node;
  886 
  887         ASSERT_VOP_ELOCKED(vp, __func__);
  888 
  889         node = sfs_alloc_node(sizeof (sfs_node_t),
  890             ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id);
  891         zfsctl_common_vnode_setup(vp, node);
  892 
  893         /* We have to support recursive locking. */
  894         VN_LOCK_AREC(vp);
  895 }
  896 
  897 /*
  898  * Lookup entry point for the 'snapshot' directory.  Try to open the
  899  * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
  900  * Perform a mount of the associated dataset on top of the vnode.
  901  * There are four possibilities:
  902  * - the snapshot node and vnode do not exist
  903  * - the snapshot vnode is covered by the mounted snapshot
  904  * - the snapshot vnode is not covered yet, the mount operation is in progress
  905  * - the snapshot vnode is not covered, because the snapshot has been unmounted
  906  * The last two states are transient and should be relatively short-lived.
  907  */
  908 static int
  909 zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
  910 {
  911         vnode_t *dvp = ap->a_dvp;
  912         vnode_t **vpp = ap->a_vpp;
  913         struct componentname *cnp = ap->a_cnp;
  914         char name[NAME_MAX + 1];
  915         char fullname[ZFS_MAX_DATASET_NAME_LEN];
  916         char *mountpoint;
  917         size_t mountpoint_len;
  918         zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
  919         uint64_t snap_id;
  920         int nameiop = cnp->cn_nameiop;
  921         int lkflags = cnp->cn_lkflags;
  922         int flags = cnp->cn_flags;
  923         int err;
  924 
  925         ASSERT3S(dvp->v_type, ==, VDIR);
  926 
  927         if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
  928                 return (SET_ERROR(ENOTSUP));
  929 
  930         if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
  931                 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
  932                 if (err == 0)
  933                         *vpp = dvp;
  934                 return (err);
  935         }
  936         if (flags & ISDOTDOT) {
  937                 err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags,
  938                     vpp);
  939                 return (err);
  940         }
  941 
  942         if (cnp->cn_namelen >= sizeof (name))
  943                 return (SET_ERROR(ENAMETOOLONG));
  944 
  945         strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
  946         err = zfsctl_snapshot_lookup(dvp, name, &snap_id);
  947         if (err != 0)
  948                 return (SET_ERROR(ENOENT));
  949 
  950         for (;;) {
  951                 snapshot_setup_arg_t ssa;
  952 
  953                 ssa.snap_name = name;
  954                 ssa.snap_id = snap_id;
  955                 err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR,
  956                     snap_id, "zfs", &zfsctl_ops_snapshot,
  957                     zfsctl_snapshot_vnode_setup, &ssa, vpp);
  958                 if (err != 0)
  959                         return (err);
  960 
  961                 /* Check if a new vnode has just been created. */
  962                 if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)
  963                         break;
  964 
  965                 /*
  966                  * Check if a snapshot is already mounted on top of the vnode.
  967                  */
  968                 err = zfsctl_mounted_here(vpp, lkflags);
  969                 if (err != EJUSTRETURN)
  970                         return (err);
  971 
  972                 /*
  973                  * If the vnode is not covered, then either the mount operation
  974                  * is in progress or the snapshot has already been unmounted
  975                  * but the vnode hasn't been inactivated and reclaimed yet.
  976                  * We can try to re-use the vnode in the latter case.
  977                  */
  978                 VI_LOCK(*vpp);
  979                 if (((*vpp)->v_iflag & VI_MOUNT) == 0) {
  980                         VI_UNLOCK(*vpp);
  981                         /*
  982                          * Upgrade to exclusive lock in order to:
  983                          * - avoid race conditions
  984                          * - satisfy the contract of mount_snapshot()
  985                          */
  986                         err = VOP_LOCK(*vpp, LK_TRYUPGRADE);
  987                         if (err == 0)
  988                                 break;
  989                 } else {
  990                         VI_UNLOCK(*vpp);
  991                 }
  992 
  993                 /*
  994                  * In this state we can loop on uncontested locks and starve
  995                  * the thread doing the lengthy, non-trivial mount operation.
  996                  * So, yield to prevent that from happening.
  997                  */
  998                 vput(*vpp);
  999                 kern_yield(PRI_USER);
 1000         }
 1001 
 1002         VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof (fullname), fullname));
 1003 
 1004         mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
 1005             strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1;
 1006         mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
 1007         (void) snprintf(mountpoint, mountpoint_len,
 1008             "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
 1009             dvp->v_vfsp->mnt_stat.f_mntonname, name);
 1010 
 1011         err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
 1012         kmem_free(mountpoint, mountpoint_len);
 1013         if (err == 0) {
 1014                 /*
 1015                  * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
 1016                  *
 1017                  * This is where we lie about our v_vfsp in order to
 1018                  * make .zfs/snapshot/<snapname> accessible over NFS
 1019                  * without requiring manual mounts of <snapname>.
 1020                  */
 1021                 ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs);
 1022                 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
 1023 
 1024                 /* Clear the root flag (set via VFS_ROOT) as well. */
 1025                 (*vpp)->v_vflag &= ~VV_ROOT;
 1026         }
 1027 
 1028         if (err != 0)
 1029                 *vpp = NULL;
 1030         return (err);
 1031 }
 1032 
 1033 static int
 1034 zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
 1035 {
 1036         char snapname[ZFS_MAX_DATASET_NAME_LEN];
 1037         struct dirent entry;
 1038         vnode_t *vp = ap->a_vp;
 1039         zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 1040         zfs_uio_t uio;
 1041         int *eofp = ap->a_eofflag;
 1042         off_t dots_offset;
 1043         int error;
 1044 
 1045         zfs_uio_init(&uio, ap->a_uio);
 1046 
 1047         ASSERT3S(vp->v_type, ==, VDIR);
 1048 
 1049         error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap,
 1050             &uio, &dots_offset);
 1051         if (error != 0) {
 1052                 if (error == ENAMETOOLONG) /* ran out of destination space */
 1053                         error = 0;
 1054                 return (error);
 1055         }
 1056 
 1057         if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 1058                 return (error);
 1059         for (;;) {
 1060                 uint64_t cookie;
 1061                 uint64_t id;
 1062 
 1063                 cookie = zfs_uio_offset(&uio) - dots_offset;
 1064 
 1065                 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
 1066                 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname),
 1067                     snapname, &id, &cookie, NULL);
 1068                 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
 1069                 if (error != 0) {
 1070                         if (error == ENOENT) {
 1071                                 if (eofp != NULL)
 1072                                         *eofp = 1;
 1073                                 error = 0;
 1074                         }
 1075                         zfs_exit(zfsvfs, FTAG);
 1076                         return (error);
 1077                 }
 1078 
 1079                 entry.d_fileno = id;
 1080                 entry.d_type = DT_DIR;
 1081                 strcpy(entry.d_name, snapname);
 1082                 entry.d_namlen = strlen(entry.d_name);
 1083                 entry.d_reclen = sizeof (entry);
 1084                 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio));
 1085                 if (error != 0) {
 1086                         if (error == ENAMETOOLONG)
 1087                                 error = 0;
 1088                         zfs_exit(zfsvfs, FTAG);
 1089                         return (SET_ERROR(error));
 1090                 }
 1091                 zfs_uio_setoffset(&uio, cookie + dots_offset);
 1092         }
 1093         __builtin_unreachable();
 1094 }
 1095 
 1096 static int
 1097 zfsctl_snapdir_getattr(struct vop_getattr_args *ap)
 1098 {
 1099         vnode_t *vp = ap->a_vp;
 1100         vattr_t *vap = ap->a_vap;
 1101         zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 1102         dsl_dataset_t *ds;
 1103         uint64_t snap_count;
 1104         int err;
 1105 
 1106         if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 1107                 return (err);
 1108         ds = dmu_objset_ds(zfsvfs->z_os);
 1109         zfsctl_common_getattr(vp, vap);
 1110         vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
 1111         vap->va_mtime = vap->va_ctime;
 1112         vap->va_birthtime = vap->va_ctime;
 1113         if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
 1114                 err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
 1115                     dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
 1116                 if (err != 0) {
 1117                         zfs_exit(zfsvfs, FTAG);
 1118                         return (err);
 1119                 }
 1120                 vap->va_nlink += snap_count;
 1121         }
 1122         vap->va_size = vap->va_nlink;
 1123 
 1124         zfs_exit(zfsvfs, FTAG);
 1125         return (0);
 1126 }
 1127 
 1128 static struct vop_vector zfsctl_ops_snapdir = {
 1129         .vop_default =  &default_vnodeops,
 1130 #if __FreeBSD_version >= 1300121
 1131         .vop_fplookup_vexec = VOP_EAGAIN,
 1132 #endif
 1133         .vop_open =     zfsctl_common_open,
 1134         .vop_close =    zfsctl_common_close,
 1135         .vop_getattr =  zfsctl_snapdir_getattr,
 1136         .vop_access =   zfsctl_common_access,
 1137         .vop_readdir =  zfsctl_snapdir_readdir,
 1138         .vop_lookup =   zfsctl_snapdir_lookup,
 1139         .vop_reclaim =  zfsctl_common_reclaim,
 1140         .vop_fid =      zfsctl_common_fid,
 1141         .vop_print =    zfsctl_common_print,
 1142         .vop_pathconf = zfsctl_common_pathconf,
 1143         .vop_getacl =   zfsctl_common_getacl,
 1144 #if __FreeBSD_version >= 1400043
 1145         .vop_add_writecount =   vop_stdadd_writecount_nomsync,
 1146 #endif
 1147 };
 1148 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapdir);
 1149 
 1150 
 1151 static int
 1152 zfsctl_snapshot_inactive(struct vop_inactive_args *ap)
 1153 {
 1154         vnode_t *vp = ap->a_vp;
 1155 
 1156         VERIFY3S(vrecycle(vp), ==, 1);
 1157         return (0);
 1158 }
 1159 
 1160 static int
 1161 zfsctl_snapshot_reclaim(struct vop_reclaim_args *ap)
 1162 {
 1163         vnode_t *vp = ap->a_vp;
 1164         void *data = vp->v_data;
 1165 
 1166         sfs_reclaim_vnode(vp);
 1167         sfs_destroy_node(data);
 1168         return (0);
 1169 }
 1170 
 1171 static int
 1172 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
 1173 {
 1174         struct mount *mp;
 1175         vnode_t *dvp;
 1176         vnode_t *vp;
 1177         sfs_node_t *node;
 1178         size_t len;
 1179         int locked;
 1180         int error;
 1181 
 1182         vp = ap->a_vp;
 1183         node = vp->v_data;
 1184         len = strlen(node->sn_name);
 1185         if (*ap->a_buflen < len)
 1186                 return (SET_ERROR(ENOMEM));
 1187 
 1188         /*
 1189          * Prevent unmounting of the snapshot while the vnode lock
 1190          * is not held.  That is not strictly required, but allows
 1191          * us to assert that an uncovered snapshot vnode is never
 1192          * "leaked".
 1193          */
 1194         mp = vp->v_mountedhere;
 1195         if (mp == NULL)
 1196                 return (SET_ERROR(ENOENT));
 1197         error = vfs_busy(mp, 0);
 1198         KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error));
 1199 
 1200         /*
 1201          * We can vput the vnode as we can now depend on the reference owned
 1202          * by the busied mp.  But we also need to hold the vnode, because
 1203          * the reference may go after vfs_unbusy() which has to be called
 1204          * before we can lock the vnode again.
 1205          */
 1206         locked = VOP_ISLOCKED(vp);
 1207 #if __FreeBSD_version >= 1300045
 1208         enum vgetstate vs = vget_prep(vp);
 1209 #else
 1210         vhold(vp);
 1211 #endif
 1212         vput(vp);
 1213 
 1214         /* Look up .zfs/snapshot, our parent. */
 1215         error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp);
 1216         if (error == 0) {
 1217                 VOP_UNLOCK1(dvp);
 1218                 *ap->a_vpp = dvp;
 1219                 *ap->a_buflen -= len;
 1220                 memcpy(ap->a_buf + *ap->a_buflen, node->sn_name, len);
 1221         }
 1222         vfs_unbusy(mp);
 1223 #if __FreeBSD_version >= 1300045
 1224         vget_finish(vp, locked | LK_RETRY, vs);
 1225 #else
 1226         vget(vp, locked | LK_VNHELD | LK_RETRY, curthread);
 1227 #endif
 1228         return (error);
 1229 }
 1230 
 1231 /*
 1232  * These VP's should never see the light of day.  They should always
 1233  * be covered.
 1234  */
 1235 static struct vop_vector zfsctl_ops_snapshot = {
 1236         .vop_default =          NULL, /* ensure very restricted access */
 1237 #if __FreeBSD_version >= 1300121
 1238         .vop_fplookup_vexec =   VOP_EAGAIN,
 1239 #endif
 1240         .vop_inactive =         zfsctl_snapshot_inactive,
 1241 #if __FreeBSD_version >= 1300045
 1242         .vop_need_inactive = vop_stdneed_inactive,
 1243 #endif
 1244         .vop_reclaim =          zfsctl_snapshot_reclaim,
 1245         .vop_vptocnp =          zfsctl_snapshot_vptocnp,
 1246         .vop_lock1 =            vop_stdlock,
 1247         .vop_unlock =           vop_stdunlock,
 1248         .vop_islocked =         vop_stdislocked,
 1249         .vop_advlockpurge =     vop_stdadvlockpurge, /* called by vgone */
 1250         .vop_print =            zfsctl_common_print,
 1251 #if __FreeBSD_version >= 1400043
 1252         .vop_add_writecount =   vop_stdadd_writecount_nomsync,
 1253 #endif
 1254 };
 1255 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapshot);
 1256 
 1257 int
 1258 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
 1259 {
 1260         zfsvfs_t *zfsvfs __unused = vfsp->vfs_data;
 1261         vnode_t *vp;
 1262         int error;
 1263 
 1264         ASSERT3P(zfsvfs->z_ctldir, !=, NULL);
 1265         *zfsvfsp = NULL;
 1266         error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
 1267             ZFSCTL_INO_SNAPDIR, objsetid, &vp);
 1268         if (error == 0 && vp != NULL) {
 1269                 /*
 1270                  * XXX Probably need to at least reference, if not busy, the mp.
 1271                  */
 1272                 if (vp->v_mountedhere != NULL)
 1273                         *zfsvfsp = vp->v_mountedhere->mnt_data;
 1274                 vput(vp);
 1275         }
 1276         if (*zfsvfsp == NULL)
 1277                 return (SET_ERROR(EINVAL));
 1278         return (0);
 1279 }
 1280 
 1281 /*
 1282  * Unmount any snapshots for the given filesystem.  This is called from
 1283  * zfs_umount() - if we have a ctldir, then go through and unmount all the
 1284  * snapshots.
 1285  */
 1286 int
 1287 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
 1288 {
 1289         char snapname[ZFS_MAX_DATASET_NAME_LEN];
 1290         zfsvfs_t *zfsvfs = vfsp->vfs_data;
 1291         struct mount *mp;
 1292         vnode_t *vp;
 1293         uint64_t cookie;
 1294         int error;
 1295 
 1296         ASSERT3P(zfsvfs->z_ctldir, !=, NULL);
 1297 
 1298         cookie = 0;
 1299         for (;;) {
 1300                 uint64_t id;
 1301 
 1302                 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
 1303                 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname),
 1304                     snapname, &id, &cookie, NULL);
 1305                 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
 1306                 if (error != 0) {
 1307                         if (error == ENOENT)
 1308                                 error = 0;
 1309                         break;
 1310                 }
 1311 
 1312                 for (;;) {
 1313                         error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
 1314                             ZFSCTL_INO_SNAPDIR, id, &vp);
 1315                         if (error != 0 || vp == NULL)
 1316                                 break;
 1317 
 1318                         mp = vp->v_mountedhere;
 1319 
 1320                         /*
 1321                          * v_mountedhere being NULL means that the
 1322                          * (uncovered) vnode is in a transient state
 1323                          * (mounting or unmounting), so loop until it
 1324                          * settles down.
 1325                          */
 1326                         if (mp != NULL)
 1327                                 break;
 1328                         vput(vp);
 1329                 }
 1330                 if (error != 0)
 1331                         break;
 1332                 if (vp == NULL)
 1333                         continue;       /* no mountpoint, nothing to do */
 1334 
 1335                 /*
 1336                  * The mount-point vnode is kept locked to avoid spurious EBUSY
 1337                  * from a concurrent umount.
 1338                  * The vnode lock must have recursive locking enabled.
 1339                  */
 1340                 vfs_ref(mp);
 1341                 error = dounmount(mp, fflags, curthread);
 1342                 KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1,
 1343                     ("extra references after unmount"));
 1344                 vput(vp);
 1345                 if (error != 0)
 1346                         break;
 1347         }
 1348         KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0,
 1349             ("force unmounting failed"));
 1350         return (error);
 1351 }
 1352 
 1353 int
 1354 zfsctl_snapshot_unmount(const char *snapname, int flags __unused)
 1355 {
 1356         vfs_t *vfsp = NULL;
 1357         zfsvfs_t *zfsvfs = NULL;
 1358 
 1359         if (strchr(snapname, '@') == NULL)
 1360                 return (0);
 1361 
 1362         int err = getzfsvfs(snapname, &zfsvfs);
 1363         if (err != 0) {
 1364                 ASSERT3P(zfsvfs, ==, NULL);
 1365                 return (0);
 1366         }
 1367         vfsp = zfsvfs->z_vfs;
 1368 
 1369         ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
 1370 
 1371         vfs_ref(vfsp);
 1372         vfs_unbusy(vfsp);
 1373         return (dounmount(vfsp, MS_FORCE, curthread));
 1374 }

Cache object: baf6824aa79e9895b8414bbc4cfad4d5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.