The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
   24  * All rights reserved.
   25  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
   26  * Copyright (c) 2014 Integros [integros.com]
   27  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
   28  */
   29 
   30 /* Portions Copyright 2010 Robert Milkowski */
   31 
   32 #include <sys/types.h>
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kernel.h>
   36 #include <sys/sysmacros.h>
   37 #include <sys/kmem.h>
   38 #include <sys/acl.h>
   39 #include <sys/vnode.h>
   40 #include <sys/vfs.h>
   41 #include <sys/mntent.h>
   42 #include <sys/mount.h>
   43 #include <sys/cmn_err.h>
   44 #include <sys/zfs_znode.h>
   45 #include <sys/zfs_vnops.h>
   46 #include <sys/zfs_dir.h>
   47 #include <sys/zil.h>
   48 #include <sys/fs/zfs.h>
   49 #include <sys/dmu.h>
   50 #include <sys/dsl_prop.h>
   51 #include <sys/dsl_dataset.h>
   52 #include <sys/dsl_deleg.h>
   53 #include <sys/spa.h>
   54 #include <sys/zap.h>
   55 #include <sys/sa.h>
   56 #include <sys/sa_impl.h>
   57 #include <sys/policy.h>
   58 #include <sys/atomic.h>
   59 #include <sys/zfs_ioctl.h>
   60 #include <sys/zfs_ctldir.h>
   61 #include <sys/zfs_fuid.h>
   62 #include <sys/sunddi.h>
   63 #include <sys/dmu_objset.h>
   64 #include <sys/dsl_dir.h>
   65 #include <sys/jail.h>
   66 #include <sys/osd.h>
   67 #include <ufs/ufs/quota.h>
   68 #include <sys/zfs_quota.h>
   69 
   70 #include "zfs_comutil.h"
   71 
   72 #ifndef MNTK_VMSETSIZE_BUG
   73 #define MNTK_VMSETSIZE_BUG      0
   74 #endif
   75 #ifndef MNTK_NOMSYNC
   76 #define MNTK_NOMSYNC    8
   77 #endif
   78 
   79 struct mtx zfs_debug_mtx;
   80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
   81 
   82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
   83 
   84 int zfs_super_owner;
   85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
   86         "File system owners can perform privileged operation on file systems");
   87 
   88 int zfs_debug_level;
   89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
   90         "Debug level");
   91 
   92 struct zfs_jailparam {
   93         int mount_snapshot;
   94 };
   95 
   96 static struct zfs_jailparam zfs_jailparam0 = {
   97         .mount_snapshot = 0,
   98 };
   99 
  100 static int zfs_jailparam_slot;
  101 
  102 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters");
  103 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I",
  104         "Allow mounting snapshots in the .zfs directory for unjailed datasets");
  105 
  106 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
  107 static int zfs_version_acl = ZFS_ACL_VERSION;
  108 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
  109         "ZFS_ACL_VERSION");
  110 static int zfs_version_spa = SPA_VERSION;
  111 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
  112         "SPA_VERSION");
  113 static int zfs_version_zpl = ZPL_VERSION;
  114 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
  115         "ZPL_VERSION");
  116 
  117 #if __FreeBSD_version >= 1400018
  118 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg,
  119     bool *mp_busy);
  120 #else
  121 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
  122 #endif
  123 static int zfs_mount(vfs_t *vfsp);
  124 static int zfs_umount(vfs_t *vfsp, int fflag);
  125 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
  126 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
  127 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
  128 static int zfs_sync(vfs_t *vfsp, int waitfor);
  129 #if __FreeBSD_version >= 1300098
  130 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
  131     struct ucred **credanonp, int *numsecflavors, int *secflavors);
  132 #else
  133 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
  134     struct ucred **credanonp, int *numsecflavors, int **secflavors);
  135 #endif
  136 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
  137 static void zfs_freevfs(vfs_t *vfsp);
  138 
  139 struct vfsops zfs_vfsops = {
  140         .vfs_mount =            zfs_mount,
  141         .vfs_unmount =          zfs_umount,
  142 #if __FreeBSD_version >= 1300049
  143         .vfs_root =             vfs_cache_root,
  144         .vfs_cachedroot = zfs_root,
  145 #else
  146         .vfs_root =             zfs_root,
  147 #endif
  148         .vfs_statfs =           zfs_statfs,
  149         .vfs_vget =             zfs_vget,
  150         .vfs_sync =             zfs_sync,
  151         .vfs_checkexp =         zfs_checkexp,
  152         .vfs_fhtovp =           zfs_fhtovp,
  153         .vfs_quotactl =         zfs_quotactl,
  154 };
  155 
  156 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
  157 
  158 /*
  159  * We need to keep a count of active fs's.
  160  * This is necessary to prevent our module
  161  * from being unloaded after a umount -f
  162  */
  163 static uint32_t zfs_active_fs_count = 0;
  164 
  165 int
  166 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
  167     char *setpoint)
  168 {
  169         int error;
  170         zfsvfs_t *zfvp;
  171         vfs_t *vfsp;
  172         objset_t *os;
  173         uint64_t tmp = *val;
  174 
  175         error = dmu_objset_from_ds(ds, &os);
  176         if (error != 0)
  177                 return (error);
  178 
  179         error = getzfsvfs_impl(os, &zfvp);
  180         if (error != 0)
  181                 return (error);
  182         if (zfvp == NULL)
  183                 return (ENOENT);
  184         vfsp = zfvp->z_vfs;
  185         switch (zfs_prop) {
  186         case ZFS_PROP_ATIME:
  187                 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
  188                         tmp = 0;
  189                 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
  190                         tmp = 1;
  191                 break;
  192         case ZFS_PROP_DEVICES:
  193                 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
  194                         tmp = 0;
  195                 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
  196                         tmp = 1;
  197                 break;
  198         case ZFS_PROP_EXEC:
  199                 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
  200                         tmp = 0;
  201                 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
  202                         tmp = 1;
  203                 break;
  204         case ZFS_PROP_SETUID:
  205                 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
  206                         tmp = 0;
  207                 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
  208                         tmp = 1;
  209                 break;
  210         case ZFS_PROP_READONLY:
  211                 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
  212                         tmp = 0;
  213                 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
  214                         tmp = 1;
  215                 break;
  216         case ZFS_PROP_XATTR:
  217                 if (zfvp->z_flags & ZSB_XATTR)
  218                         tmp = zfvp->z_xattr;
  219                 break;
  220         case ZFS_PROP_NBMAND:
  221                 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
  222                         tmp = 0;
  223                 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
  224                         tmp = 1;
  225                 break;
  226         default:
  227                 vfs_unbusy(vfsp);
  228                 return (ENOENT);
  229         }
  230 
  231         vfs_unbusy(vfsp);
  232         if (tmp != *val) {
  233                 (void) strcpy(setpoint, "temporary");
  234                 *val = tmp;
  235         }
  236         return (0);
  237 }
  238 
  239 static int
  240 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
  241 {
  242         int error = 0;
  243         char buf[32];
  244         uint64_t usedobj, quotaobj;
  245         uint64_t quota, used = 0;
  246         timespec_t now;
  247 
  248         usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
  249         quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
  250 
  251         if (quotaobj == 0 || zfsvfs->z_replay) {
  252                 error = ENOENT;
  253                 goto done;
  254         }
  255         (void) sprintf(buf, "%llx", (longlong_t)id);
  256         if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
  257             buf, sizeof (quota), 1, &quota)) != 0) {
  258                 dprintf("%s(%d): quotaobj lookup failed\n",
  259                     __FUNCTION__, __LINE__);
  260                 goto done;
  261         }
  262         /*
  263          * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
  264          * So we set them to be the same.
  265          */
  266         dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
  267         error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
  268         if (error && error != ENOENT) {
  269                 dprintf("%s(%d):  usedobj failed; %d\n",
  270                     __FUNCTION__, __LINE__, error);
  271                 goto done;
  272         }
  273         dqp->dqb_curblocks = btodb(used);
  274         dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
  275         vfs_timestamp(&now);
  276         /*
  277          * Setting this to 0 causes FreeBSD quota(8) to print
  278          * the number of days since the epoch, which isn't
  279          * particularly useful.
  280          */
  281         dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
  282 done:
  283         return (error);
  284 }
  285 
  286 static int
  287 #if __FreeBSD_version >= 1400018
  288 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy)
  289 #else
  290 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
  291 #endif
  292 {
  293         zfsvfs_t *zfsvfs = vfsp->vfs_data;
  294         struct thread *td;
  295         int cmd, type, error = 0;
  296         int bitsize;
  297         zfs_userquota_prop_t quota_type;
  298         struct dqblk64 dqblk = { 0 };
  299 
  300         td = curthread;
  301         cmd = cmds >> SUBCMDSHIFT;
  302         type = cmds & SUBCMDMASK;
  303 
  304         if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
  305                 return (error);
  306         if (id == -1) {
  307                 switch (type) {
  308                 case USRQUOTA:
  309                         id = td->td_ucred->cr_ruid;
  310                         break;
  311                 case GRPQUOTA:
  312                         id = td->td_ucred->cr_rgid;
  313                         break;
  314                 default:
  315                         error = EINVAL;
  316 #if __FreeBSD_version < 1400018
  317                         if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
  318                                 vfs_unbusy(vfsp);
  319 #endif
  320                         goto done;
  321                 }
  322         }
  323         /*
  324          * Map BSD type to:
  325          * ZFS_PROP_USERUSED,
  326          * ZFS_PROP_USERQUOTA,
  327          * ZFS_PROP_GROUPUSED,
  328          * ZFS_PROP_GROUPQUOTA
  329          */
  330         switch (cmd) {
  331         case Q_SETQUOTA:
  332         case Q_SETQUOTA32:
  333                 if (type == USRQUOTA)
  334                         quota_type = ZFS_PROP_USERQUOTA;
  335                 else if (type == GRPQUOTA)
  336                         quota_type = ZFS_PROP_GROUPQUOTA;
  337                 else
  338                         error = EINVAL;
  339                 break;
  340         case Q_GETQUOTA:
  341         case Q_GETQUOTA32:
  342                 if (type == USRQUOTA)
  343                         quota_type = ZFS_PROP_USERUSED;
  344                 else if (type == GRPQUOTA)
  345                         quota_type = ZFS_PROP_GROUPUSED;
  346                 else
  347                         error = EINVAL;
  348                 break;
  349         }
  350 
  351         /*
  352          * Depending on the cmd, we may need to get
  353          * the ruid and domain (see fuidstr_to_sid?),
  354          * the fuid (how?), or other information.
  355          * Create fuid using zfs_fuid_create(zfsvfs, id,
  356          * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
  357          * I think I can use just the id?
  358          *
  359          * Look at zfs_id_overquota() to look up a quota.
  360          * zap_lookup(something, quotaobj, fuidstring,
  361          *     sizeof (long long), 1, &quota)
  362          *
  363          * See zfs_set_userquota() to set a quota.
  364          */
  365         if ((uint32_t)type >= MAXQUOTAS) {
  366                 error = EINVAL;
  367                 goto done;
  368         }
  369 
  370         switch (cmd) {
  371         case Q_GETQUOTASIZE:
  372                 bitsize = 64;
  373                 error = copyout(&bitsize, arg, sizeof (int));
  374                 break;
  375         case Q_QUOTAON:
  376                 // As far as I can tell, you can't turn quotas on or off on zfs
  377                 error = 0;
  378 #if __FreeBSD_version < 1400018
  379                 vfs_unbusy(vfsp);
  380 #endif
  381                 break;
  382         case Q_QUOTAOFF:
  383                 error = ENOTSUP;
  384 #if __FreeBSD_version < 1400018
  385                 vfs_unbusy(vfsp);
  386 #endif
  387                 break;
  388         case Q_SETQUOTA:
  389                 error = copyin(arg, &dqblk, sizeof (dqblk));
  390                 if (error == 0)
  391                         error = zfs_set_userquota(zfsvfs, quota_type,
  392                             "", id, dbtob(dqblk.dqb_bhardlimit));
  393                 break;
  394         case Q_GETQUOTA:
  395                 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
  396                 if (error == 0)
  397                         error = copyout(&dqblk, arg, sizeof (dqblk));
  398                 break;
  399         default:
  400                 error = EINVAL;
  401                 break;
  402         }
  403 done:
  404         zfs_exit(zfsvfs, FTAG);
  405         return (error);
  406 }
  407 
  408 
  409 boolean_t
  410 zfs_is_readonly(zfsvfs_t *zfsvfs)
  411 {
  412         return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
  413 }
  414 
  415 static int
  416 zfs_sync(vfs_t *vfsp, int waitfor)
  417 {
  418 
  419         /*
  420          * Data integrity is job one.  We don't want a compromised kernel
  421          * writing to the storage pool, so we never sync during panic.
  422          */
  423         if (panicstr)
  424                 return (0);
  425 
  426         /*
  427          * Ignore the system syncher.  ZFS already commits async data
  428          * at zfs_txg_timeout intervals.
  429          */
  430         if (waitfor == MNT_LAZY)
  431                 return (0);
  432 
  433         if (vfsp != NULL) {
  434                 /*
  435                  * Sync a specific filesystem.
  436                  */
  437                 zfsvfs_t *zfsvfs = vfsp->vfs_data;
  438                 dsl_pool_t *dp;
  439                 int error;
  440 
  441                 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
  442                         return (error);
  443                 dp = dmu_objset_pool(zfsvfs->z_os);
  444 
  445                 /*
  446                  * If the system is shutting down, then skip any
  447                  * filesystems which may exist on a suspended pool.
  448                  */
  449                 if (rebooting && spa_suspended(dp->dp_spa)) {
  450                         zfs_exit(zfsvfs, FTAG);
  451                         return (0);
  452                 }
  453 
  454                 if (zfsvfs->z_log != NULL)
  455                         zil_commit(zfsvfs->z_log, 0);
  456 
  457                 zfs_exit(zfsvfs, FTAG);
  458         } else {
  459                 /*
  460                  * Sync all ZFS filesystems.  This is what happens when you
  461                  * run sync(8).  Unlike other filesystems, ZFS honors the
  462                  * request by waiting for all pools to commit all dirty data.
  463                  */
  464                 spa_sync_allpools();
  465         }
  466 
  467         return (0);
  468 }
  469 
  470 static void
  471 atime_changed_cb(void *arg, uint64_t newval)
  472 {
  473         zfsvfs_t *zfsvfs = arg;
  474 
  475         if (newval == TRUE) {
  476                 zfsvfs->z_atime = TRUE;
  477                 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
  478                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
  479                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
  480         } else {
  481                 zfsvfs->z_atime = FALSE;
  482                 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
  483                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
  484                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
  485         }
  486 }
  487 
  488 static void
  489 xattr_changed_cb(void *arg, uint64_t newval)
  490 {
  491         zfsvfs_t *zfsvfs = arg;
  492 
  493         if (newval == ZFS_XATTR_OFF) {
  494                 zfsvfs->z_flags &= ~ZSB_XATTR;
  495         } else {
  496                 zfsvfs->z_flags |= ZSB_XATTR;
  497 
  498                 if (newval == ZFS_XATTR_SA)
  499                         zfsvfs->z_xattr_sa = B_TRUE;
  500                 else
  501                         zfsvfs->z_xattr_sa = B_FALSE;
  502         }
  503 }
  504 
  505 static void
  506 blksz_changed_cb(void *arg, uint64_t newval)
  507 {
  508         zfsvfs_t *zfsvfs = arg;
  509         ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
  510         ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
  511         ASSERT(ISP2(newval));
  512 
  513         zfsvfs->z_max_blksz = newval;
  514         zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
  515 }
  516 
  517 static void
  518 readonly_changed_cb(void *arg, uint64_t newval)
  519 {
  520         zfsvfs_t *zfsvfs = arg;
  521 
  522         if (newval) {
  523                 /* XXX locking on vfs_flag? */
  524                 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
  525                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
  526                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
  527         } else {
  528                 /* XXX locking on vfs_flag? */
  529                 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
  530                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
  531                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
  532         }
  533 }
  534 
  535 static void
  536 setuid_changed_cb(void *arg, uint64_t newval)
  537 {
  538         zfsvfs_t *zfsvfs = arg;
  539 
  540         if (newval == FALSE) {
  541                 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
  542                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
  543                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
  544         } else {
  545                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
  546                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
  547                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
  548         }
  549 }
  550 
  551 static void
  552 exec_changed_cb(void *arg, uint64_t newval)
  553 {
  554         zfsvfs_t *zfsvfs = arg;
  555 
  556         if (newval == FALSE) {
  557                 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
  558                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
  559                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
  560         } else {
  561                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
  562                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
  563                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
  564         }
  565 }
  566 
  567 /*
  568  * The nbmand mount option can be changed at mount time.
  569  * We can't allow it to be toggled on live file systems or incorrect
  570  * behavior may be seen from cifs clients
  571  *
  572  * This property isn't registered via dsl_prop_register(), but this callback
  573  * will be called when a file system is first mounted
  574  */
  575 static void
  576 nbmand_changed_cb(void *arg, uint64_t newval)
  577 {
  578         zfsvfs_t *zfsvfs = arg;
  579         if (newval == FALSE) {
  580                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
  581                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
  582         } else {
  583                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
  584                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
  585         }
  586 }
  587 
  588 static void
  589 snapdir_changed_cb(void *arg, uint64_t newval)
  590 {
  591         zfsvfs_t *zfsvfs = arg;
  592 
  593         zfsvfs->z_show_ctldir = newval;
  594 }
  595 
  596 static void
  597 acl_mode_changed_cb(void *arg, uint64_t newval)
  598 {
  599         zfsvfs_t *zfsvfs = arg;
  600 
  601         zfsvfs->z_acl_mode = newval;
  602 }
  603 
  604 static void
  605 acl_inherit_changed_cb(void *arg, uint64_t newval)
  606 {
  607         zfsvfs_t *zfsvfs = arg;
  608 
  609         zfsvfs->z_acl_inherit = newval;
  610 }
  611 
  612 static void
  613 acl_type_changed_cb(void *arg, uint64_t newval)
  614 {
  615         zfsvfs_t *zfsvfs = arg;
  616 
  617         zfsvfs->z_acl_type = newval;
  618 }
  619 
  620 static int
  621 zfs_register_callbacks(vfs_t *vfsp)
  622 {
  623         struct dsl_dataset *ds = NULL;
  624         objset_t *os = NULL;
  625         zfsvfs_t *zfsvfs = NULL;
  626         uint64_t nbmand;
  627         boolean_t readonly = B_FALSE;
  628         boolean_t do_readonly = B_FALSE;
  629         boolean_t setuid = B_FALSE;
  630         boolean_t do_setuid = B_FALSE;
  631         boolean_t exec = B_FALSE;
  632         boolean_t do_exec = B_FALSE;
  633         boolean_t xattr = B_FALSE;
  634         boolean_t atime = B_FALSE;
  635         boolean_t do_atime = B_FALSE;
  636         boolean_t do_xattr = B_FALSE;
  637         int error = 0;
  638 
  639         ASSERT3P(vfsp, !=, NULL);
  640         zfsvfs = vfsp->vfs_data;
  641         ASSERT3P(zfsvfs, !=, NULL);
  642         os = zfsvfs->z_os;
  643 
  644         /*
  645          * This function can be called for a snapshot when we update snapshot's
  646          * mount point, which isn't really supported.
  647          */
  648         if (dmu_objset_is_snapshot(os))
  649                 return (EOPNOTSUPP);
  650 
  651         /*
  652          * The act of registering our callbacks will destroy any mount
  653          * options we may have.  In order to enable temporary overrides
  654          * of mount options, we stash away the current values and
  655          * restore them after we register the callbacks.
  656          */
  657         if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
  658             !spa_writeable(dmu_objset_spa(os))) {
  659                 readonly = B_TRUE;
  660                 do_readonly = B_TRUE;
  661         } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
  662                 readonly = B_FALSE;
  663                 do_readonly = B_TRUE;
  664         }
  665         if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
  666                 setuid = B_FALSE;
  667                 do_setuid = B_TRUE;
  668         } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
  669                 setuid = B_TRUE;
  670                 do_setuid = B_TRUE;
  671         }
  672         if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
  673                 exec = B_FALSE;
  674                 do_exec = B_TRUE;
  675         } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
  676                 exec = B_TRUE;
  677                 do_exec = B_TRUE;
  678         }
  679         if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
  680                 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
  681                 do_xattr = B_TRUE;
  682         } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
  683                 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
  684                 do_xattr = B_TRUE;
  685         } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
  686                 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
  687                 do_xattr = B_TRUE;
  688         } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
  689                 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
  690                 do_xattr = B_TRUE;
  691         }
  692         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
  693                 atime = B_FALSE;
  694                 do_atime = B_TRUE;
  695         } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
  696                 atime = B_TRUE;
  697                 do_atime = B_TRUE;
  698         }
  699 
  700         /*
  701          * We need to enter pool configuration here, so that we can use
  702          * dsl_prop_get_int_ds() to handle the special nbmand property below.
  703          * dsl_prop_get_integer() can not be used, because it has to acquire
  704          * spa_namespace_lock and we can not do that because we already hold
  705          * z_teardown_lock.  The problem is that spa_write_cachefile() is called
  706          * with spa_namespace_lock held and the function calls ZFS vnode
  707          * operations to write the cache file and thus z_teardown_lock is
  708          * acquired after spa_namespace_lock.
  709          */
  710         ds = dmu_objset_ds(os);
  711         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
  712 
  713         /*
  714          * nbmand is a special property.  It can only be changed at
  715          * mount time.
  716          *
  717          * This is weird, but it is documented to only be changeable
  718          * at mount time.
  719          */
  720         if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
  721                 nbmand = B_FALSE;
  722         } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
  723                 nbmand = B_TRUE;
  724         } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) {
  725                 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
  726                 return (error);
  727         }
  728 
  729         /*
  730          * Register property callbacks.
  731          *
  732          * It would probably be fine to just check for i/o error from
  733          * the first prop_register(), but I guess I like to go
  734          * overboard...
  735          */
  736         error = dsl_prop_register(ds,
  737             zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
  738         error = error ? error : dsl_prop_register(ds,
  739             zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
  740         error = error ? error : dsl_prop_register(ds,
  741             zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
  742         error = error ? error : dsl_prop_register(ds,
  743             zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
  744         error = error ? error : dsl_prop_register(ds,
  745             zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
  746         error = error ? error : dsl_prop_register(ds,
  747             zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
  748         error = error ? error : dsl_prop_register(ds,
  749             zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
  750         error = error ? error : dsl_prop_register(ds,
  751             zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
  752         error = error ? error : dsl_prop_register(ds,
  753             zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
  754         error = error ? error : dsl_prop_register(ds,
  755             zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
  756             zfsvfs);
  757         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
  758         if (error)
  759                 goto unregister;
  760 
  761         /*
  762          * Invoke our callbacks to restore temporary mount options.
  763          */
  764         if (do_readonly)
  765                 readonly_changed_cb(zfsvfs, readonly);
  766         if (do_setuid)
  767                 setuid_changed_cb(zfsvfs, setuid);
  768         if (do_exec)
  769                 exec_changed_cb(zfsvfs, exec);
  770         if (do_xattr)
  771                 xattr_changed_cb(zfsvfs, xattr);
  772         if (do_atime)
  773                 atime_changed_cb(zfsvfs, atime);
  774 
  775         nbmand_changed_cb(zfsvfs, nbmand);
  776 
  777         return (0);
  778 
  779 unregister:
  780         dsl_prop_unregister_all(ds, zfsvfs);
  781         return (error);
  782 }
  783 
  784 /*
  785  * Associate this zfsvfs with the given objset, which must be owned.
  786  * This will cache a bunch of on-disk state from the objset in the
  787  * zfsvfs.
  788  */
  789 static int
  790 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
  791 {
  792         int error;
  793         uint64_t val;
  794 
  795         zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
  796         zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
  797         zfsvfs->z_os = os;
  798 
  799         error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
  800         if (error != 0)
  801                 return (error);
  802         if (zfsvfs->z_version >
  803             zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
  804                 (void) printf("Can't mount a version %lld file system "
  805                     "on a version %lld pool\n. Pool must be upgraded to mount "
  806                     "this file system.", (u_longlong_t)zfsvfs->z_version,
  807                     (u_longlong_t)spa_version(dmu_objset_spa(os)));
  808                 return (SET_ERROR(ENOTSUP));
  809         }
  810         error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
  811         if (error != 0)
  812                 return (error);
  813         zfsvfs->z_norm = (int)val;
  814 
  815         error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
  816         if (error != 0)
  817                 return (error);
  818         zfsvfs->z_utf8 = (val != 0);
  819 
  820         error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
  821         if (error != 0)
  822                 return (error);
  823         zfsvfs->z_case = (uint_t)val;
  824 
  825         error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
  826         if (error != 0)
  827                 return (error);
  828         zfsvfs->z_acl_type = (uint_t)val;
  829 
  830         /*
  831          * Fold case on file systems that are always or sometimes case
  832          * insensitive.
  833          */
  834         if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
  835             zfsvfs->z_case == ZFS_CASE_MIXED)
  836                 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
  837 
  838         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
  839         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
  840 
  841         uint64_t sa_obj = 0;
  842         if (zfsvfs->z_use_sa) {
  843                 /* should either have both of these objects or none */
  844                 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
  845                     &sa_obj);
  846                 if (error != 0)
  847                         return (error);
  848 
  849                 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
  850                 if (error == 0 && val == ZFS_XATTR_SA)
  851                         zfsvfs->z_xattr_sa = B_TRUE;
  852         }
  853 
  854         error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
  855             &zfsvfs->z_attr_table);
  856         if (error != 0)
  857                 return (error);
  858 
  859         if (zfsvfs->z_version >= ZPL_VERSION_SA)
  860                 sa_register_update_callback(os, zfs_sa_upgrade);
  861 
  862         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
  863             &zfsvfs->z_root);
  864         if (error != 0)
  865                 return (error);
  866         ASSERT3U(zfsvfs->z_root, !=, 0);
  867 
  868         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
  869             &zfsvfs->z_unlinkedobj);
  870         if (error != 0)
  871                 return (error);
  872 
  873         error = zap_lookup(os, MASTER_NODE_OBJ,
  874             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
  875             8, 1, &zfsvfs->z_userquota_obj);
  876         if (error == ENOENT)
  877                 zfsvfs->z_userquota_obj = 0;
  878         else if (error != 0)
  879                 return (error);
  880 
  881         error = zap_lookup(os, MASTER_NODE_OBJ,
  882             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
  883             8, 1, &zfsvfs->z_groupquota_obj);
  884         if (error == ENOENT)
  885                 zfsvfs->z_groupquota_obj = 0;
  886         else if (error != 0)
  887                 return (error);
  888 
  889         error = zap_lookup(os, MASTER_NODE_OBJ,
  890             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
  891             8, 1, &zfsvfs->z_projectquota_obj);
  892         if (error == ENOENT)
  893                 zfsvfs->z_projectquota_obj = 0;
  894         else if (error != 0)
  895                 return (error);
  896 
  897         error = zap_lookup(os, MASTER_NODE_OBJ,
  898             zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
  899             8, 1, &zfsvfs->z_userobjquota_obj);
  900         if (error == ENOENT)
  901                 zfsvfs->z_userobjquota_obj = 0;
  902         else if (error != 0)
  903                 return (error);
  904 
  905         error = zap_lookup(os, MASTER_NODE_OBJ,
  906             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
  907             8, 1, &zfsvfs->z_groupobjquota_obj);
  908         if (error == ENOENT)
  909                 zfsvfs->z_groupobjquota_obj = 0;
  910         else if (error != 0)
  911                 return (error);
  912 
  913         error = zap_lookup(os, MASTER_NODE_OBJ,
  914             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
  915             8, 1, &zfsvfs->z_projectobjquota_obj);
  916         if (error == ENOENT)
  917                 zfsvfs->z_projectobjquota_obj = 0;
  918         else if (error != 0)
  919                 return (error);
  920 
  921         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
  922             &zfsvfs->z_fuid_obj);
  923         if (error == ENOENT)
  924                 zfsvfs->z_fuid_obj = 0;
  925         else if (error != 0)
  926                 return (error);
  927 
  928         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
  929             &zfsvfs->z_shares_dir);
  930         if (error == ENOENT)
  931                 zfsvfs->z_shares_dir = 0;
  932         else if (error != 0)
  933                 return (error);
  934 
  935         /*
  936          * Only use the name cache if we are looking for a
  937          * name on a file system that does not require normalization
  938          * or case folding.  We can also look there if we happen to be
  939          * on a non-normalizing, mixed sensitivity file system IF we
  940          * are looking for the exact name (which is always the case on
  941          * FreeBSD).
  942          */
  943         zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
  944             ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
  945             !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
  946 
  947         return (0);
  948 }
  949 
  950 taskq_t *zfsvfs_taskq;
  951 
  952 static void
  953 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
  954 {
  955 
  956         zfs_unlinked_drain((zfsvfs_t *)context);
  957 }
  958 
  959 int
  960 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
  961 {
  962         objset_t *os;
  963         zfsvfs_t *zfsvfs;
  964         int error;
  965         boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
  966 
  967         /*
  968          * XXX: Fix struct statfs so this isn't necessary!
  969          *
  970          * The 'osname' is used as the filesystem's special node, which means
  971          * it must fit in statfs.f_mntfromname, or else it can't be
  972          * enumerated, so libzfs_mnttab_find() returns NULL, which causes
  973          * 'zfs unmount' to think it's not mounted when it is.
  974          */
  975         if (strlen(osname) >= MNAMELEN)
  976                 return (SET_ERROR(ENAMETOOLONG));
  977 
  978         zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
  979 
  980         error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
  981             &os);
  982         if (error != 0) {
  983                 kmem_free(zfsvfs, sizeof (zfsvfs_t));
  984                 return (error);
  985         }
  986 
  987         error = zfsvfs_create_impl(zfvp, zfsvfs, os);
  988 
  989         return (error);
  990 }
  991 
  992 
  993 int
  994 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
  995 {
  996         int error;
  997 
  998         zfsvfs->z_vfs = NULL;
  999         zfsvfs->z_parent = zfsvfs;
 1000 
 1001         mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 1002         mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 1003         list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 1004             offsetof(znode_t, z_link_node));
 1005         TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
 1006             zfsvfs_task_unlinked_drain, zfsvfs);
 1007         ZFS_TEARDOWN_INIT(zfsvfs);
 1008         ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
 1009         rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 1010         for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 1011                 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 1012 
 1013         error = zfsvfs_init(zfsvfs, os);
 1014         if (error != 0) {
 1015                 dmu_objset_disown(os, B_TRUE, zfsvfs);
 1016                 *zfvp = NULL;
 1017                 kmem_free(zfsvfs, sizeof (zfsvfs_t));
 1018                 return (error);
 1019         }
 1020 
 1021         *zfvp = zfsvfs;
 1022         return (0);
 1023 }
 1024 
 1025 static int
 1026 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 1027 {
 1028         int error;
 1029 
 1030         /*
 1031          * Check for a bad on-disk format version now since we
 1032          * lied about owning the dataset readonly before.
 1033          */
 1034         if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 1035             dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
 1036                 return (SET_ERROR(EROFS));
 1037 
 1038         error = zfs_register_callbacks(zfsvfs->z_vfs);
 1039         if (error)
 1040                 return (error);
 1041 
 1042         /*
 1043          * If we are not mounting (ie: online recv), then we don't
 1044          * have to worry about replaying the log as we blocked all
 1045          * operations out since we closed the ZIL.
 1046          */
 1047         if (mounting) {
 1048                 boolean_t readonly;
 1049 
 1050                 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
 1051                 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
 1052                 if (error)
 1053                         return (error);
 1054                 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
 1055                     &zfsvfs->z_kstat.dk_zil_sums);
 1056 
 1057                 /*
 1058                  * During replay we remove the read only flag to
 1059                  * allow replays to succeed.
 1060                  */
 1061                 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
 1062                 if (readonly != 0) {
 1063                         zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 1064                 } else {
 1065                         dsl_dir_t *dd;
 1066                         zap_stats_t zs;
 1067 
 1068                         if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
 1069                             &zs) == 0) {
 1070                                 dataset_kstats_update_nunlinks_kstat(
 1071                                     &zfsvfs->z_kstat, zs.zs_num_entries);
 1072                                 dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
 1073                                     "num_entries in unlinked set: %llu",
 1074                                     (u_longlong_t)zs.zs_num_entries);
 1075                         }
 1076 
 1077                         zfs_unlinked_drain(zfsvfs);
 1078                         dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
 1079                         dd->dd_activity_cancelled = B_FALSE;
 1080                 }
 1081 
 1082                 /*
 1083                  * Parse and replay the intent log.
 1084                  *
 1085                  * Because of ziltest, this must be done after
 1086                  * zfs_unlinked_drain().  (Further note: ziltest
 1087                  * doesn't use readonly mounts, where
 1088                  * zfs_unlinked_drain() isn't called.)  This is because
 1089                  * ziltest causes spa_sync() to think it's committed,
 1090                  * but actually it is not, so the intent log contains
 1091                  * many txg's worth of changes.
 1092                  *
 1093                  * In particular, if object N is in the unlinked set in
 1094                  * the last txg to actually sync, then it could be
 1095                  * actually freed in a later txg and then reallocated
 1096                  * in a yet later txg.  This would write a "create
 1097                  * object N" record to the intent log.  Normally, this
 1098                  * would be fine because the spa_sync() would have
 1099                  * written out the fact that object N is free, before
 1100                  * we could write the "create object N" intent log
 1101                  * record.
 1102                  *
 1103                  * But when we are in ziltest mode, we advance the "open
 1104                  * txg" without actually spa_sync()-ing the changes to
 1105                  * disk.  So we would see that object N is still
 1106                  * allocated and in the unlinked set, and there is an
 1107                  * intent log record saying to allocate it.
 1108                  */
 1109                 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
 1110                         if (zil_replay_disable) {
 1111                                 zil_destroy(zfsvfs->z_log, B_FALSE);
 1112                         } else {
 1113                                 boolean_t use_nc = zfsvfs->z_use_namecache;
 1114                                 zfsvfs->z_use_namecache = B_FALSE;
 1115                                 zfsvfs->z_replay = B_TRUE;
 1116                                 zil_replay(zfsvfs->z_os, zfsvfs,
 1117                                     zfs_replay_vector);
 1118                                 zfsvfs->z_replay = B_FALSE;
 1119                                 zfsvfs->z_use_namecache = use_nc;
 1120                         }
 1121                 }
 1122 
 1123                 /* restore readonly bit */
 1124                 if (readonly != 0)
 1125                         zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 1126         } else {
 1127                 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL);
 1128                 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
 1129                     &zfsvfs->z_kstat.dk_zil_sums);
 1130         }
 1131 
 1132         /*
 1133          * Set the objset user_ptr to track its zfsvfs.
 1134          */
 1135         mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 1136         dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 1137         mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 1138 
 1139         return (0);
 1140 }
 1141 
 1142 void
 1143 zfsvfs_free(zfsvfs_t *zfsvfs)
 1144 {
 1145         int i;
 1146 
 1147         zfs_fuid_destroy(zfsvfs);
 1148 
 1149         mutex_destroy(&zfsvfs->z_znodes_lock);
 1150         mutex_destroy(&zfsvfs->z_lock);
 1151         ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
 1152         list_destroy(&zfsvfs->z_all_znodes);
 1153         ZFS_TEARDOWN_DESTROY(zfsvfs);
 1154         ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
 1155         rw_destroy(&zfsvfs->z_fuid_lock);
 1156         for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 1157                 mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 1158         dataset_kstats_destroy(&zfsvfs->z_kstat);
 1159         kmem_free(zfsvfs, sizeof (zfsvfs_t));
 1160 }
 1161 
 1162 static void
 1163 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
 1164 {
 1165         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 1166         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 1167 }
 1168 
 1169 static int
 1170 zfs_domount(vfs_t *vfsp, char *osname)
 1171 {
 1172         uint64_t recordsize, fsid_guid;
 1173         int error = 0;
 1174         zfsvfs_t *zfsvfs;
 1175 
 1176         ASSERT3P(vfsp, !=, NULL);
 1177         ASSERT3P(osname, !=, NULL);
 1178 
 1179         error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
 1180         if (error)
 1181                 return (error);
 1182         zfsvfs->z_vfs = vfsp;
 1183 
 1184         if ((error = dsl_prop_get_integer(osname,
 1185             "recordsize", &recordsize, NULL)))
 1186                 goto out;
 1187         zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
 1188         zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
 1189 
 1190         vfsp->vfs_data = zfsvfs;
 1191         vfsp->mnt_flag |= MNT_LOCAL;
 1192         vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
 1193         vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
 1194         vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
 1195         /*
 1196          * This can cause a loss of coherence between ARC and page cache
 1197          * on ZoF - unclear if the problem is in FreeBSD or ZoF
 1198          */
 1199         vfsp->mnt_kern_flag |= MNTK_NO_IOPF;    /* vn_io_fault can be used */
 1200         vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
 1201         vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
 1202 
 1203 #if defined(_KERNEL) && !defined(KMEM_DEBUG)
 1204         vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
 1205 #endif
 1206         /*
 1207          * The fsid is 64 bits, composed of an 8-bit fs type, which
 1208          * separates our fsid from any other filesystem types, and a
 1209          * 56-bit objset unique ID.  The objset unique ID is unique to
 1210          * all objsets open on this system, provided by unique_create().
 1211          * The 8-bit fs type must be put in the low bits of fsid[1]
 1212          * because that's where other Solaris filesystems put it.
 1213          */
 1214         fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
 1215         ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0);
 1216         vfsp->vfs_fsid.val[0] = fsid_guid;
 1217         vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) |
 1218             (vfsp->mnt_vfc->vfc_typenum & 0xFF);
 1219 
 1220         /*
 1221          * Set features for file system.
 1222          */
 1223         zfs_set_fuid_feature(zfsvfs);
 1224 
 1225         if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
 1226                 uint64_t pval;
 1227 
 1228                 atime_changed_cb(zfsvfs, B_FALSE);
 1229                 readonly_changed_cb(zfsvfs, B_TRUE);
 1230                 if ((error = dsl_prop_get_integer(osname,
 1231                     "xattr", &pval, NULL)))
 1232                         goto out;
 1233                 xattr_changed_cb(zfsvfs, pval);
 1234                 if ((error = dsl_prop_get_integer(osname,
 1235                     "acltype", &pval, NULL)))
 1236                         goto out;
 1237                 acl_type_changed_cb(zfsvfs, pval);
 1238                 zfsvfs->z_issnap = B_TRUE;
 1239                 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
 1240 
 1241                 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 1242                 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 1243                 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 1244         } else {
 1245                 if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
 1246                         goto out;
 1247         }
 1248 
 1249         vfs_mountedfrom(vfsp, osname);
 1250 
 1251         if (!zfsvfs->z_issnap)
 1252                 zfsctl_create(zfsvfs);
 1253 out:
 1254         if (error) {
 1255                 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 1256                 zfsvfs_free(zfsvfs);
 1257         } else {
 1258                 atomic_inc_32(&zfs_active_fs_count);
 1259         }
 1260 
 1261         return (error);
 1262 }
 1263 
 1264 static void
 1265 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
 1266 {
 1267         objset_t *os = zfsvfs->z_os;
 1268 
 1269         if (!dmu_objset_is_snapshot(os))
 1270                 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
 1271 }
 1272 
 1273 static int
 1274 getpoolname(const char *osname, char *poolname)
 1275 {
 1276         char *p;
 1277 
 1278         p = strchr(osname, '/');
 1279         if (p == NULL) {
 1280                 if (strlen(osname) >= MAXNAMELEN)
 1281                         return (ENAMETOOLONG);
 1282                 (void) strcpy(poolname, osname);
 1283         } else {
 1284                 if (p - osname >= MAXNAMELEN)
 1285                         return (ENAMETOOLONG);
 1286                 (void) strlcpy(poolname, osname, p - osname + 1);
 1287         }
 1288         return (0);
 1289 }
 1290 
 1291 static void
 1292 fetch_osname_options(char *name, bool *checkpointrewind)
 1293 {
 1294 
 1295         if (name[0] == '!') {
 1296                 *checkpointrewind = true;
 1297                 memmove(name, name + 1, strlen(name));
 1298         } else {
 1299                 *checkpointrewind = false;
 1300         }
 1301 }
 1302 
 1303 static int
 1304 zfs_mount(vfs_t *vfsp)
 1305 {
 1306         kthread_t       *td = curthread;
 1307         vnode_t         *mvp = vfsp->mnt_vnodecovered;
 1308         cred_t          *cr = td->td_ucred;
 1309         char            *osname;
 1310         int             error = 0;
 1311         int             canwrite;
 1312         bool            checkpointrewind, isctlsnap = false;
 1313 
 1314         if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
 1315                 return (SET_ERROR(EINVAL));
 1316 
 1317         /*
 1318          * If full-owner-access is enabled and delegated administration is
 1319          * turned on, we must set nosuid.
 1320          */
 1321         if (zfs_super_owner &&
 1322             dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
 1323                 secpolicy_fs_mount_clearopts(cr, vfsp);
 1324         }
 1325 
 1326         fetch_osname_options(osname, &checkpointrewind);
 1327         isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) &&
 1328             strchr(osname, '@') != NULL);
 1329 
 1330         /*
 1331          * Check for mount privilege?
 1332          *
 1333          * If we don't have privilege then see if
 1334          * we have local permission to allow it
 1335          */
 1336         error = secpolicy_fs_mount(cr, mvp, vfsp);
 1337         if (error && isctlsnap) {
 1338                 secpolicy_fs_mount_clearopts(cr, vfsp);
 1339         } else if (error) {
 1340                 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
 1341                         goto out;
 1342 
 1343                 if (!(vfsp->vfs_flag & MS_REMOUNT)) {
 1344                         vattr_t         vattr;
 1345 
 1346                         /*
 1347                          * Make sure user is the owner of the mount point
 1348                          * or has sufficient privileges.
 1349                          */
 1350 
 1351                         vattr.va_mask = AT_UID;
 1352 
 1353                         vn_lock(mvp, LK_SHARED | LK_RETRY);
 1354                         if (VOP_GETATTR(mvp, &vattr, cr)) {
 1355                                 VOP_UNLOCK1(mvp);
 1356                                 goto out;
 1357                         }
 1358 
 1359                         if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
 1360                             VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
 1361                                 VOP_UNLOCK1(mvp);
 1362                                 goto out;
 1363                         }
 1364                         VOP_UNLOCK1(mvp);
 1365                 }
 1366 
 1367                 secpolicy_fs_mount_clearopts(cr, vfsp);
 1368         }
 1369 
 1370         /*
 1371          * Refuse to mount a filesystem if we are in a local zone and the
 1372          * dataset is not visible.
 1373          */
 1374         if (!INGLOBALZONE(curproc) &&
 1375             (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
 1376                 boolean_t mount_snapshot = B_FALSE;
 1377 
 1378                 /*
 1379                  * Snapshots may be mounted in .zfs for unjailed datasets
 1380                  * if allowed by the jail param zfs.mount_snapshot.
 1381                  */
 1382                 if (isctlsnap) {
 1383                         struct prison *pr;
 1384                         struct zfs_jailparam *zjp;
 1385 
 1386                         pr = curthread->td_ucred->cr_prison;
 1387                         mtx_lock(&pr->pr_mtx);
 1388                         zjp = osd_jail_get(pr, zfs_jailparam_slot);
 1389                         mtx_unlock(&pr->pr_mtx);
 1390                         if (zjp && zjp->mount_snapshot)
 1391                                 mount_snapshot = B_TRUE;
 1392                 }
 1393                 if (!mount_snapshot) {
 1394                         error = SET_ERROR(EPERM);
 1395                         goto out;
 1396                 }
 1397         }
 1398 
 1399         vfsp->vfs_flag |= MNT_NFS4ACLS;
 1400 
 1401         /*
 1402          * When doing a remount, we simply refresh our temporary properties
 1403          * according to those options set in the current VFS options.
 1404          */
 1405         if (vfsp->vfs_flag & MS_REMOUNT) {
 1406                 zfsvfs_t *zfsvfs = vfsp->vfs_data;
 1407 
 1408                 /*
 1409                  * Refresh mount options with z_teardown_lock blocking I/O while
 1410                  * the filesystem is in an inconsistent state.
 1411                  * The lock also serializes this code with filesystem
 1412                  * manipulations between entry to zfs_suspend_fs() and return
 1413                  * from zfs_resume_fs().
 1414                  */
 1415                 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 1416                 zfs_unregister_callbacks(zfsvfs);
 1417                 error = zfs_register_callbacks(vfsp);
 1418                 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 1419                 goto out;
 1420         }
 1421 
 1422         /* Initial root mount: try hard to import the requested root pool. */
 1423         if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
 1424             (vfsp->vfs_flag & MNT_UPDATE) == 0) {
 1425                 char pname[MAXNAMELEN];
 1426 
 1427                 error = getpoolname(osname, pname);
 1428                 if (error == 0)
 1429                         error = spa_import_rootpool(pname, checkpointrewind);
 1430                 if (error)
 1431                         goto out;
 1432         }
 1433         DROP_GIANT();
 1434         error = zfs_domount(vfsp, osname);
 1435         PICKUP_GIANT();
 1436 
 1437 out:
 1438         return (error);
 1439 }
 1440 
 1441 static int
 1442 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 1443 {
 1444         zfsvfs_t *zfsvfs = vfsp->vfs_data;
 1445         uint64_t refdbytes, availbytes, usedobjs, availobjs;
 1446         int error;
 1447 
 1448         statp->f_version = STATFS_VERSION;
 1449 
 1450         if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 1451                 return (error);
 1452 
 1453         dmu_objset_space(zfsvfs->z_os,
 1454             &refdbytes, &availbytes, &usedobjs, &availobjs);
 1455 
 1456         /*
 1457          * The underlying storage pool actually uses multiple block sizes.
 1458          * We report the fragsize as the smallest block size we support,
 1459          * and we report our blocksize as the filesystem's maximum blocksize.
 1460          */
 1461         statp->f_bsize = SPA_MINBLOCKSIZE;
 1462         statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
 1463 
 1464         /*
 1465          * The following report "total" blocks of various kinds in the
 1466          * file system, but reported in terms of f_frsize - the
 1467          * "fragment" size.
 1468          */
 1469 
 1470         statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
 1471         statp->f_bfree = availbytes / statp->f_bsize;
 1472         statp->f_bavail = statp->f_bfree; /* no root reservation */
 1473 
 1474         /*
 1475          * statvfs() should really be called statufs(), because it assumes
 1476          * static metadata.  ZFS doesn't preallocate files, so the best
 1477          * we can do is report the max that could possibly fit in f_files,
 1478          * and that minus the number actually used in f_ffree.
 1479          * For f_ffree, report the smaller of the number of object available
 1480          * and the number of blocks (each object will take at least a block).
 1481          */
 1482         statp->f_ffree = MIN(availobjs, statp->f_bfree);
 1483         statp->f_files = statp->f_ffree + usedobjs;
 1484 
 1485         /*
 1486          * We're a zfs filesystem.
 1487          */
 1488         strlcpy(statp->f_fstypename, "zfs",
 1489             sizeof (statp->f_fstypename));
 1490 
 1491         strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
 1492             sizeof (statp->f_mntfromname));
 1493         strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
 1494             sizeof (statp->f_mntonname));
 1495 
 1496         statp->f_namemax = MAXNAMELEN - 1;
 1497 
 1498         zfs_exit(zfsvfs, FTAG);
 1499         return (0);
 1500 }
 1501 
 1502 static int
 1503 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 1504 {
 1505         zfsvfs_t *zfsvfs = vfsp->vfs_data;
 1506         znode_t *rootzp;
 1507         int error;
 1508 
 1509         if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 1510                 return (error);
 1511 
 1512         error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 1513         if (error == 0)
 1514                 *vpp = ZTOV(rootzp);
 1515 
 1516         zfs_exit(zfsvfs, FTAG);
 1517 
 1518         if (error == 0) {
 1519                 error = vn_lock(*vpp, flags);
 1520                 if (error != 0) {
 1521                         VN_RELE(*vpp);
 1522                         *vpp = NULL;
 1523                 }
 1524         }
 1525         return (error);
 1526 }
 1527 
 1528 /*
 1529  * Teardown the zfsvfs::z_os.
 1530  *
 1531  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
 1532  * and 'z_teardown_inactive_lock' held.
 1533  */
 1534 static int
 1535 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 1536 {
 1537         znode_t *zp;
 1538         dsl_dir_t *dd;
 1539 
 1540         /*
 1541          * If someone has not already unmounted this file system,
 1542          * drain the zrele_taskq to ensure all active references to the
 1543          * zfsvfs_t have been handled only then can it be safely destroyed.
 1544          */
 1545         if (zfsvfs->z_os) {
 1546                 /*
 1547                  * If we're unmounting we have to wait for the list to
 1548                  * drain completely.
 1549                  *
 1550                  * If we're not unmounting there's no guarantee the list
 1551                  * will drain completely, but zreles run from the taskq
 1552                  * may add the parents of dir-based xattrs to the taskq
 1553                  * so we want to wait for these.
 1554                  *
 1555                  * We can safely read z_nr_znodes without locking because the
 1556                  * VFS has already blocked operations which add to the
 1557                  * z_all_znodes list and thus increment z_nr_znodes.
 1558                  */
 1559                 int round = 0;
 1560                 while (zfsvfs->z_nr_znodes > 0) {
 1561                         taskq_wait_outstanding(dsl_pool_zrele_taskq(
 1562                             dmu_objset_pool(zfsvfs->z_os)), 0);
 1563                         if (++round > 1 && !unmounting)
 1564                                 break;
 1565                 }
 1566         }
 1567         ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 1568 
 1569         if (!unmounting) {
 1570                 /*
 1571                  * We purge the parent filesystem's vfsp as the parent
 1572                  * filesystem and all of its snapshots have their vnode's
 1573                  * v_vfsp set to the parent's filesystem's vfsp.  Note,
 1574                  * 'z_parent' is self referential for non-snapshots.
 1575                  */
 1576 #ifdef FREEBSD_NAMECACHE
 1577 #if __FreeBSD_version >= 1300117
 1578                 cache_purgevfs(zfsvfs->z_parent->z_vfs);
 1579 #else
 1580                 cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
 1581 #endif
 1582 #endif
 1583         }
 1584 
 1585         /*
 1586          * Close the zil. NB: Can't close the zil while zfs_inactive
 1587          * threads are blocked as zil_close can call zfs_inactive.
 1588          */
 1589         if (zfsvfs->z_log) {
 1590                 zil_close(zfsvfs->z_log);
 1591                 zfsvfs->z_log = NULL;
 1592         }
 1593 
 1594         ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
 1595 
 1596         /*
 1597          * If we are not unmounting (ie: online recv) and someone already
 1598          * unmounted this file system while we were doing the switcheroo,
 1599          * or a reopen of z_os failed then just bail out now.
 1600          */
 1601         if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
 1602                 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 1603                 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 1604                 return (SET_ERROR(EIO));
 1605         }
 1606 
 1607         /*
 1608          * At this point there are no vops active, and any new vops will
 1609          * fail with EIO since we have z_teardown_lock for writer (only
 1610          * relevant for forced unmount).
 1611          *
 1612          * Release all holds on dbufs.
 1613          */
 1614         mutex_enter(&zfsvfs->z_znodes_lock);
 1615         for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
 1616             zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 1617                 if (zp->z_sa_hdl != NULL) {
 1618                         zfs_znode_dmu_fini(zp);
 1619                 }
 1620         }
 1621         mutex_exit(&zfsvfs->z_znodes_lock);
 1622 
 1623         /*
 1624          * If we are unmounting, set the unmounted flag and let new vops
 1625          * unblock.  zfs_inactive will have the unmounted behavior, and all
 1626          * other vops will fail with EIO.
 1627          */
 1628         if (unmounting) {
 1629                 zfsvfs->z_unmounted = B_TRUE;
 1630                 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 1631                 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 1632         }
 1633 
 1634         /*
 1635          * z_os will be NULL if there was an error in attempting to reopen
 1636          * zfsvfs, so just return as the properties had already been
 1637          * unregistered and cached data had been evicted before.
 1638          */
 1639         if (zfsvfs->z_os == NULL)
 1640                 return (0);
 1641 
 1642         /*
 1643          * Unregister properties.
 1644          */
 1645         zfs_unregister_callbacks(zfsvfs);
 1646 
 1647         /*
 1648          * Evict cached data
 1649          */
 1650         if (!zfs_is_readonly(zfsvfs))
 1651                 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
 1652         dmu_objset_evict_dbufs(zfsvfs->z_os);
 1653         dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
 1654         dsl_dir_cancel_waiters(dd);
 1655 
 1656         return (0);
 1657 }
 1658 
 1659 static int
 1660 zfs_umount(vfs_t *vfsp, int fflag)
 1661 {
 1662         kthread_t *td = curthread;
 1663         zfsvfs_t *zfsvfs = vfsp->vfs_data;
 1664         objset_t *os;
 1665         cred_t *cr = td->td_ucred;
 1666         int ret;
 1667 
 1668         ret = secpolicy_fs_unmount(cr, vfsp);
 1669         if (ret) {
 1670                 if (dsl_deleg_access((char *)vfsp->vfs_resource,
 1671                     ZFS_DELEG_PERM_MOUNT, cr))
 1672                         return (ret);
 1673         }
 1674 
 1675         /*
 1676          * Unmount any snapshots mounted under .zfs before unmounting the
 1677          * dataset itself.
 1678          */
 1679         if (zfsvfs->z_ctldir != NULL) {
 1680                 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
 1681                         return (ret);
 1682         }
 1683 
 1684         if (fflag & MS_FORCE) {
 1685                 /*
 1686                  * Mark file system as unmounted before calling
 1687                  * vflush(FORCECLOSE). This way we ensure no future vnops
 1688                  * will be called and risk operating on DOOMED vnodes.
 1689                  */
 1690                 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 1691                 zfsvfs->z_unmounted = B_TRUE;
 1692                 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 1693         }
 1694 
 1695         /*
 1696          * Flush all the files.
 1697          */
 1698         ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
 1699         if (ret != 0)
 1700                 return (ret);
 1701         while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
 1702             &zfsvfs->z_unlinked_drain_task, NULL) != 0)
 1703                 taskqueue_drain(zfsvfs_taskq->tq_queue,
 1704                     &zfsvfs->z_unlinked_drain_task);
 1705 
 1706         VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));
 1707         os = zfsvfs->z_os;
 1708 
 1709         /*
 1710          * z_os will be NULL if there was an error in
 1711          * attempting to reopen zfsvfs.
 1712          */
 1713         if (os != NULL) {
 1714                 /*
 1715                  * Unset the objset user_ptr.
 1716                  */
 1717                 mutex_enter(&os->os_user_ptr_lock);
 1718                 dmu_objset_set_user(os, NULL);
 1719                 mutex_exit(&os->os_user_ptr_lock);
 1720 
 1721                 /*
 1722                  * Finally release the objset
 1723                  */
 1724                 dmu_objset_disown(os, B_TRUE, zfsvfs);
 1725         }
 1726 
 1727         /*
 1728          * We can now safely destroy the '.zfs' directory node.
 1729          */
 1730         if (zfsvfs->z_ctldir != NULL)
 1731                 zfsctl_destroy(zfsvfs);
 1732         zfs_freevfs(vfsp);
 1733 
 1734         return (0);
 1735 }
 1736 
 1737 static int
 1738 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
 1739 {
 1740         zfsvfs_t        *zfsvfs = vfsp->vfs_data;
 1741         znode_t         *zp;
 1742         int             err;
 1743 
 1744         /*
 1745          * zfs_zget() can't operate on virtual entries like .zfs/ or
 1746          * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
 1747          * This will make NFS to switch to LOOKUP instead of using VGET.
 1748          */
 1749         if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
 1750             (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
 1751                 return (EOPNOTSUPP);
 1752 
 1753         if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 1754                 return (err);
 1755         err = zfs_zget(zfsvfs, ino, &zp);
 1756         if (err == 0 && zp->z_unlinked) {
 1757                 vrele(ZTOV(zp));
 1758                 err = EINVAL;
 1759         }
 1760         if (err == 0)
 1761                 *vpp = ZTOV(zp);
 1762         zfs_exit(zfsvfs, FTAG);
 1763         if (err == 0) {
 1764                 err = vn_lock(*vpp, flags);
 1765                 if (err != 0)
 1766                         vrele(*vpp);
 1767         }
 1768         if (err != 0)
 1769                 *vpp = NULL;
 1770         return (err);
 1771 }
 1772 
 1773 static int
 1774 #if __FreeBSD_version >= 1300098
 1775 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
 1776     struct ucred **credanonp, int *numsecflavors, int *secflavors)
 1777 #else
 1778 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
 1779     struct ucred **credanonp, int *numsecflavors, int **secflavors)
 1780 #endif
 1781 {
 1782         zfsvfs_t *zfsvfs = vfsp->vfs_data;
 1783 
 1784         /*
 1785          * If this is regular file system vfsp is the same as
 1786          * zfsvfs->z_parent->z_vfs, but if it is snapshot,
 1787          * zfsvfs->z_parent->z_vfs represents parent file system
 1788          * which we have to use here, because only this file system
 1789          * has mnt_export configured.
 1790          */
 1791         return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
 1792             credanonp, numsecflavors, secflavors));
 1793 }
 1794 
 1795 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN,
 1796         "struct fid bigger than SHORT_FID_LEN");
 1797 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN,
 1798         "struct fid bigger than LONG_FID_LEN");
 1799 
 1800 static int
 1801 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 1802 {
 1803         struct componentname cn;
 1804         zfsvfs_t        *zfsvfs = vfsp->vfs_data;
 1805         znode_t         *zp;
 1806         vnode_t         *dvp;
 1807         uint64_t        object = 0;
 1808         uint64_t        fid_gen = 0;
 1809         uint64_t        setgen = 0;
 1810         uint64_t        gen_mask;
 1811         uint64_t        zp_gen;
 1812         int             i, err;
 1813 
 1814         *vpp = NULL;
 1815 
 1816         if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 1817                 return (err);
 1818 
 1819         /*
 1820          * On FreeBSD we can get snapshot's mount point or its parent file
 1821          * system mount point depending if snapshot is already mounted or not.
 1822          */
 1823         if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
 1824                 zfid_long_t     *zlfid = (zfid_long_t *)fidp;
 1825                 uint64_t        objsetid = 0;
 1826 
 1827                 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 1828                         objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
 1829 
 1830                 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 1831                         setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 1832 
 1833                 zfs_exit(zfsvfs, FTAG);
 1834 
 1835                 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
 1836                 if (err)
 1837                         return (SET_ERROR(EINVAL));
 1838                 if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 1839                         return (err);
 1840         }
 1841 
 1842         if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
 1843                 zfid_short_t    *zfid = (zfid_short_t *)fidp;
 1844 
 1845                 for (i = 0; i < sizeof (zfid->zf_object); i++)
 1846                         object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 1847 
 1848                 for (i = 0; i < sizeof (zfid->zf_gen); i++)
 1849                         fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 1850         } else {
 1851                 zfs_exit(zfsvfs, FTAG);
 1852                 return (SET_ERROR(EINVAL));
 1853         }
 1854 
 1855         if (fidp->fid_len == LONG_FID_LEN && setgen != 0) {
 1856                 zfs_exit(zfsvfs, FTAG);
 1857                 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n",
 1858                     (u_longlong_t)fid_gen, (u_longlong_t)setgen);
 1859                 return (SET_ERROR(EINVAL));
 1860         }
 1861 
 1862         /*
 1863          * A zero fid_gen means we are in .zfs or the .zfs/snapshot
 1864          * directory tree. If the object == zfsvfs->z_shares_dir, then
 1865          * we are in the .zfs/shares directory tree.
 1866          */
 1867         if ((fid_gen == 0 &&
 1868             (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
 1869             (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
 1870                 zfs_exit(zfsvfs, FTAG);
 1871                 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
 1872                 if (object == ZFSCTL_INO_SNAPDIR) {
 1873                         cn.cn_nameptr = "snapshot";
 1874                         cn.cn_namelen = strlen(cn.cn_nameptr);
 1875                         cn.cn_nameiop = LOOKUP;
 1876                         cn.cn_flags = ISLASTCN | LOCKLEAF;
 1877                         cn.cn_lkflags = flags;
 1878                         VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 1879                         vput(dvp);
 1880                 } else if (object == zfsvfs->z_shares_dir) {
 1881                         /*
 1882                          * XXX This branch must not be taken,
 1883                          * if it is, then the lookup below will
 1884                          * explode.
 1885                          */
 1886                         cn.cn_nameptr = "shares";
 1887                         cn.cn_namelen = strlen(cn.cn_nameptr);
 1888                         cn.cn_nameiop = LOOKUP;
 1889                         cn.cn_flags = ISLASTCN;
 1890                         cn.cn_lkflags = flags;
 1891                         VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 1892                         vput(dvp);
 1893                 } else {
 1894                         *vpp = dvp;
 1895                 }
 1896                 return (err);
 1897         }
 1898 
 1899         gen_mask = -1ULL >> (64 - 8 * i);
 1900 
 1901         dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object,
 1902             (u_longlong_t)fid_gen,
 1903             (u_longlong_t)gen_mask);
 1904         if ((err = zfs_zget(zfsvfs, object, &zp))) {
 1905                 zfs_exit(zfsvfs, FTAG);
 1906                 return (err);
 1907         }
 1908         (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
 1909             sizeof (uint64_t));
 1910         zp_gen = zp_gen & gen_mask;
 1911         if (zp_gen == 0)
 1912                 zp_gen = 1;
 1913         if (zp->z_unlinked || zp_gen != fid_gen) {
 1914                 dprintf("znode gen (%llu) != fid gen (%llu)\n",
 1915                     (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
 1916                 vrele(ZTOV(zp));
 1917                 zfs_exit(zfsvfs, FTAG);
 1918                 return (SET_ERROR(EINVAL));
 1919         }
 1920 
 1921         *vpp = ZTOV(zp);
 1922         zfs_exit(zfsvfs, FTAG);
 1923         err = vn_lock(*vpp, flags);
 1924         if (err == 0)
 1925                 vnode_create_vobject(*vpp, zp->z_size, curthread);
 1926         else
 1927                 *vpp = NULL;
 1928         return (err);
 1929 }
 1930 
 1931 /*
 1932  * Block out VOPs and close zfsvfs_t::z_os
 1933  *
 1934  * Note, if successful, then we return with the 'z_teardown_lock' and
 1935  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
 1936  * dataset and objset intact so that they can be atomically handed off during
 1937  * a subsequent rollback or recv operation and the resume thereafter.
 1938  */
 1939 int
 1940 zfs_suspend_fs(zfsvfs_t *zfsvfs)
 1941 {
 1942         int error;
 1943 
 1944         if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 1945                 return (error);
 1946 
 1947         return (0);
 1948 }
 1949 
 1950 /*
 1951  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
 1952  * is an invariant across any of the operations that can be performed while the
 1953  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
 1954  * are the same: the relevant objset and associated dataset are owned by
 1955  * zfsvfs, held, and long held on entry.
 1956  */
 1957 int
 1958 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 1959 {
 1960         int err;
 1961         znode_t *zp;
 1962 
 1963         ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
 1964         ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
 1965 
 1966         /*
 1967          * We already own this, so just update the objset_t, as the one we
 1968          * had before may have been evicted.
 1969          */
 1970         objset_t *os;
 1971         VERIFY3P(ds->ds_owner, ==, zfsvfs);
 1972         VERIFY(dsl_dataset_long_held(ds));
 1973         dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
 1974         dsl_pool_config_enter(dp, FTAG);
 1975         VERIFY0(dmu_objset_from_ds(ds, &os));
 1976         dsl_pool_config_exit(dp, FTAG);
 1977 
 1978         err = zfsvfs_init(zfsvfs, os);
 1979         if (err != 0)
 1980                 goto bail;
 1981 
 1982         ds->ds_dir->dd_activity_cancelled = B_FALSE;
 1983         VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));
 1984 
 1985         zfs_set_fuid_feature(zfsvfs);
 1986 
 1987         /*
 1988          * Attempt to re-establish all the active znodes with
 1989          * their dbufs.  If a zfs_rezget() fails, then we'll let
 1990          * any potential callers discover that via zfs_enter_verify_zp
 1991          * when they try to use their znode.
 1992          */
 1993         mutex_enter(&zfsvfs->z_znodes_lock);
 1994         for (zp = list_head(&zfsvfs->z_all_znodes); zp;
 1995             zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 1996                 (void) zfs_rezget(zp);
 1997         }
 1998         mutex_exit(&zfsvfs->z_znodes_lock);
 1999 
 2000 bail:
 2001         /* release the VOPs */
 2002         ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 2003         ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 2004 
 2005         if (err) {
 2006                 /*
 2007                  * Since we couldn't setup the sa framework, try to force
 2008                  * unmount this file system.
 2009                  */
 2010                 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
 2011                         vfs_ref(zfsvfs->z_vfs);
 2012                         (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
 2013                 }
 2014         }
 2015         return (err);
 2016 }
 2017 
 2018 static void
 2019 zfs_freevfs(vfs_t *vfsp)
 2020 {
 2021         zfsvfs_t *zfsvfs = vfsp->vfs_data;
 2022 
 2023         zfsvfs_free(zfsvfs);
 2024 
 2025         atomic_dec_32(&zfs_active_fs_count);
 2026 }
 2027 
 2028 #ifdef __i386__
 2029 static int desiredvnodes_backup;
 2030 #include <sys/vmmeter.h>
 2031 
 2032 
 2033 #include <vm/vm_page.h>
 2034 #include <vm/vm_object.h>
 2035 #include <vm/vm_kern.h>
 2036 #include <vm/vm_map.h>
 2037 #endif
 2038 
 2039 static void
 2040 zfs_vnodes_adjust(void)
 2041 {
 2042 #ifdef __i386__
 2043         int newdesiredvnodes;
 2044 
 2045         desiredvnodes_backup = desiredvnodes;
 2046 
 2047         /*
 2048          * We calculate newdesiredvnodes the same way it is done in
 2049          * vntblinit(). If it is equal to desiredvnodes, it means that
 2050          * it wasn't tuned by the administrator and we can tune it down.
 2051          */
 2052         newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
 2053             vm_kmem_size / (5 * (sizeof (struct vm_object) +
 2054             sizeof (struct vnode))));
 2055         if (newdesiredvnodes == desiredvnodes)
 2056                 desiredvnodes = (3 * newdesiredvnodes) / 4;
 2057 #endif
 2058 }
 2059 
 2060 static void
 2061 zfs_vnodes_adjust_back(void)
 2062 {
 2063 
 2064 #ifdef __i386__
 2065         desiredvnodes = desiredvnodes_backup;
 2066 #endif
 2067 }
 2068 
 2069 void
 2070 zfs_init(void)
 2071 {
 2072 
 2073         printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
 2074 
 2075         /*
 2076          * Initialize .zfs directory structures
 2077          */
 2078         zfsctl_init();
 2079 
 2080         /*
 2081          * Initialize znode cache, vnode ops, etc...
 2082          */
 2083         zfs_znode_init();
 2084 
 2085         /*
 2086          * Reduce number of vnodes. Originally number of vnodes is calculated
 2087          * with UFS inode in mind. We reduce it here, because it's too big for
 2088          * ZFS/i386.
 2089          */
 2090         zfs_vnodes_adjust();
 2091 
 2092         dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
 2093 
 2094         zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
 2095 }
 2096 
 2097 void
 2098 zfs_fini(void)
 2099 {
 2100         taskq_destroy(zfsvfs_taskq);
 2101         zfsctl_fini();
 2102         zfs_znode_fini();
 2103         zfs_vnodes_adjust_back();
 2104 }
 2105 
 2106 int
 2107 zfs_busy(void)
 2108 {
 2109         return (zfs_active_fs_count != 0);
 2110 }
 2111 
 2112 /*
 2113  * Release VOPs and unmount a suspended filesystem.
 2114  */
 2115 int
 2116 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 2117 {
 2118         ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
 2119         ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
 2120 
 2121         /*
 2122          * We already own this, so just hold and rele it to update the
 2123          * objset_t, as the one we had before may have been evicted.
 2124          */
 2125         objset_t *os;
 2126         VERIFY3P(ds->ds_owner, ==, zfsvfs);
 2127         VERIFY(dsl_dataset_long_held(ds));
 2128         dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
 2129         dsl_pool_config_enter(dp, FTAG);
 2130         VERIFY0(dmu_objset_from_ds(ds, &os));
 2131         dsl_pool_config_exit(dp, FTAG);
 2132         zfsvfs->z_os = os;
 2133 
 2134         /* release the VOPs */
 2135         ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 2136         ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 2137 
 2138         /*
 2139          * Try to force unmount this file system.
 2140          */
 2141         (void) zfs_umount(zfsvfs->z_vfs, 0);
 2142         zfsvfs->z_unmounted = B_TRUE;
 2143         return (0);
 2144 }
 2145 
 2146 int
 2147 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 2148 {
 2149         int error;
 2150         objset_t *os = zfsvfs->z_os;
 2151         dmu_tx_t *tx;
 2152 
 2153         if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 2154                 return (SET_ERROR(EINVAL));
 2155 
 2156         if (newvers < zfsvfs->z_version)
 2157                 return (SET_ERROR(EINVAL));
 2158 
 2159         if (zfs_spa_version_map(newvers) >
 2160             spa_version(dmu_objset_spa(zfsvfs->z_os)))
 2161                 return (SET_ERROR(ENOTSUP));
 2162 
 2163         tx = dmu_tx_create(os);
 2164         dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 2165         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 2166                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 2167                     ZFS_SA_ATTRS);
 2168                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 2169         }
 2170         error = dmu_tx_assign(tx, TXG_WAIT);
 2171         if (error) {
 2172                 dmu_tx_abort(tx);
 2173                 return (error);
 2174         }
 2175 
 2176         error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 2177             8, 1, &newvers, tx);
 2178 
 2179         if (error) {
 2180                 dmu_tx_commit(tx);
 2181                 return (error);
 2182         }
 2183 
 2184         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 2185                 uint64_t sa_obj;
 2186 
 2187                 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
 2188                     SPA_VERSION_SA);
 2189                 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 2190                     DMU_OT_NONE, 0, tx);
 2191 
 2192                 error = zap_add(os, MASTER_NODE_OBJ,
 2193                     ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 2194                 ASSERT0(error);
 2195 
 2196                 VERIFY0(sa_set_sa_object(os, sa_obj));
 2197                 sa_register_update_callback(os, zfs_sa_upgrade);
 2198         }
 2199 
 2200         spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
 2201             "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
 2202             (uintmax_t)newvers);
 2203         dmu_tx_commit(tx);
 2204 
 2205         zfsvfs->z_version = newvers;
 2206         os->os_version = newvers;
 2207 
 2208         zfs_set_fuid_feature(zfsvfs);
 2209 
 2210         return (0);
 2211 }
 2212 
 2213 /*
 2214  * Read a property stored within the master node.
 2215  */
 2216 int
 2217 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 2218 {
 2219         uint64_t *cached_copy = NULL;
 2220 
 2221         /*
 2222          * Figure out where in the objset_t the cached copy would live, if it
 2223          * is available for the requested property.
 2224          */
 2225         if (os != NULL) {
 2226                 switch (prop) {
 2227                 case ZFS_PROP_VERSION:
 2228                         cached_copy = &os->os_version;
 2229                         break;
 2230                 case ZFS_PROP_NORMALIZE:
 2231                         cached_copy = &os->os_normalization;
 2232                         break;
 2233                 case ZFS_PROP_UTF8ONLY:
 2234                         cached_copy = &os->os_utf8only;
 2235                         break;
 2236                 case ZFS_PROP_CASE:
 2237                         cached_copy = &os->os_casesensitivity;
 2238                         break;
 2239                 default:
 2240                         break;
 2241                 }
 2242         }
 2243         if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
 2244                 *value = *cached_copy;
 2245                 return (0);
 2246         }
 2247 
 2248         /*
 2249          * If the property wasn't cached, look up the file system's value for
 2250          * the property. For the version property, we look up a slightly
 2251          * different string.
 2252          */
 2253         const char *pname;
 2254         int error = ENOENT;
 2255         if (prop == ZFS_PROP_VERSION) {
 2256                 pname = ZPL_VERSION_STR;
 2257         } else {
 2258                 pname = zfs_prop_to_name(prop);
 2259         }
 2260 
 2261         if (os != NULL) {
 2262                 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
 2263                 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
 2264         }
 2265 
 2266         if (error == ENOENT) {
 2267                 /* No value set, use the default value */
 2268                 switch (prop) {
 2269                 case ZFS_PROP_VERSION:
 2270                         *value = ZPL_VERSION;
 2271                         break;
 2272                 case ZFS_PROP_NORMALIZE:
 2273                 case ZFS_PROP_UTF8ONLY:
 2274                         *value = 0;
 2275                         break;
 2276                 case ZFS_PROP_CASE:
 2277                         *value = ZFS_CASE_SENSITIVE;
 2278                         break;
 2279                 case ZFS_PROP_ACLTYPE:
 2280                         *value = ZFS_ACLTYPE_NFSV4;
 2281                         break;
 2282                 default:
 2283                         return (error);
 2284                 }
 2285                 error = 0;
 2286         }
 2287 
 2288         /*
 2289          * If one of the methods for getting the property value above worked,
 2290          * copy it into the objset_t's cache.
 2291          */
 2292         if (error == 0 && cached_copy != NULL) {
 2293                 *cached_copy = *value;
 2294         }
 2295 
 2296         return (error);
 2297 }
 2298 
 2299 /*
 2300  * Return true if the corresponding vfs's unmounted flag is set.
 2301  * Otherwise return false.
 2302  * If this function returns true we know VFS unmount has been initiated.
 2303  */
 2304 boolean_t
 2305 zfs_get_vfs_flag_unmounted(objset_t *os)
 2306 {
 2307         zfsvfs_t *zfvp;
 2308         boolean_t unmounted = B_FALSE;
 2309 
 2310         ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS);
 2311 
 2312         mutex_enter(&os->os_user_ptr_lock);
 2313         zfvp = dmu_objset_get_user(os);
 2314         if (zfvp != NULL && zfvp->z_vfs != NULL &&
 2315             (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
 2316                 unmounted = B_TRUE;
 2317         mutex_exit(&os->os_user_ptr_lock);
 2318 
 2319         return (unmounted);
 2320 }
 2321 
 2322 #ifdef _KERNEL
 2323 void
 2324 zfsvfs_update_fromname(const char *oldname, const char *newname)
 2325 {
 2326         char tmpbuf[MAXPATHLEN];
 2327         struct mount *mp;
 2328         char *fromname;
 2329         size_t oldlen;
 2330 
 2331         oldlen = strlen(oldname);
 2332 
 2333         mtx_lock(&mountlist_mtx);
 2334         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 2335                 fromname = mp->mnt_stat.f_mntfromname;
 2336                 if (strcmp(fromname, oldname) == 0) {
 2337                         (void) strlcpy(fromname, newname,
 2338                             sizeof (mp->mnt_stat.f_mntfromname));
 2339                         continue;
 2340                 }
 2341                 if (strncmp(fromname, oldname, oldlen) == 0 &&
 2342                     (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
 2343                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
 2344                             newname, fromname + oldlen);
 2345                         (void) strlcpy(fromname, tmpbuf,
 2346                             sizeof (mp->mnt_stat.f_mntfromname));
 2347                         continue;
 2348                 }
 2349         }
 2350         mtx_unlock(&mountlist_mtx);
 2351 }
 2352 #endif
 2353 
 2354 /*
 2355  * Find a prison with ZFS info.
 2356  * Return the ZFS info and the (locked) prison.
 2357  */
 2358 static struct zfs_jailparam *
 2359 zfs_jailparam_find(struct prison *spr, struct prison **prp)
 2360 {
 2361         struct prison *pr;
 2362         struct zfs_jailparam *zjp;
 2363 
 2364         for (pr = spr; ; pr = pr->pr_parent) {
 2365                 mtx_lock(&pr->pr_mtx);
 2366                 if (pr == &prison0) {
 2367                         zjp = &zfs_jailparam0;
 2368                         break;
 2369                 }
 2370                 zjp = osd_jail_get(pr, zfs_jailparam_slot);
 2371                 if (zjp != NULL)
 2372                         break;
 2373                 mtx_unlock(&pr->pr_mtx);
 2374         }
 2375         *prp = pr;
 2376 
 2377         return (zjp);
 2378 }
 2379 
 2380 /*
 2381  * Ensure a prison has its own ZFS info.  If zjpp is non-null, point it to the
 2382  * ZFS info and lock the prison.
 2383  */
 2384 static void
 2385 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp)
 2386 {
 2387         struct prison *ppr;
 2388         struct zfs_jailparam *zjp, *nzjp;
 2389         void **rsv;
 2390 
 2391         /* If this prison already has ZFS info, return that. */
 2392         zjp = zfs_jailparam_find(pr, &ppr);
 2393         if (ppr == pr)
 2394                 goto done;
 2395 
 2396         /*
 2397          * Allocate a new info record.  Then check again, in case something
 2398          * changed during the allocation.
 2399          */
 2400         mtx_unlock(&ppr->pr_mtx);
 2401         nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK);
 2402         rsv = osd_reserve(zfs_jailparam_slot);
 2403         zjp = zfs_jailparam_find(pr, &ppr);
 2404         if (ppr == pr) {
 2405                 free(nzjp, M_PRISON);
 2406                 osd_free_reserved(rsv);
 2407                 goto done;
 2408         }
 2409         /* Inherit the initial values from the ancestor. */
 2410         mtx_lock(&pr->pr_mtx);
 2411         (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp);
 2412         (void) memcpy(nzjp, zjp, sizeof (*zjp));
 2413         zjp = nzjp;
 2414         mtx_unlock(&ppr->pr_mtx);
 2415 done:
 2416         if (zjpp != NULL)
 2417                 *zjpp = zjp;
 2418         else
 2419                 mtx_unlock(&pr->pr_mtx);
 2420 }
 2421 
 2422 /*
 2423  * Jail OSD methods for ZFS VFS info.
 2424  */
 2425 static int
 2426 zfs_jailparam_create(void *obj, void *data)
 2427 {
 2428         struct prison *pr = obj;
 2429         struct vfsoptlist *opts = data;
 2430         int jsys;
 2431 
 2432         if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 &&
 2433             jsys == JAIL_SYS_INHERIT)
 2434                 return (0);
 2435         /*
 2436          * Inherit a prison's initial values from its parent
 2437          * (different from JAIL_SYS_INHERIT which also inherits changes).
 2438          */
 2439         zfs_jailparam_alloc(pr, NULL);
 2440         return (0);
 2441 }
 2442 
 2443 static int
 2444 zfs_jailparam_get(void *obj, void *data)
 2445 {
 2446         struct prison *ppr, *pr = obj;
 2447         struct vfsoptlist *opts = data;
 2448         struct zfs_jailparam *zjp;
 2449         int jsys, error;
 2450 
 2451         zjp = zfs_jailparam_find(pr, &ppr);
 2452         jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
 2453         error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys));
 2454         if (error != 0 && error != ENOENT)
 2455                 goto done;
 2456         if (jsys == JAIL_SYS_NEW) {
 2457                 error = vfs_setopt(opts, "zfs.mount_snapshot",
 2458                     &zjp->mount_snapshot, sizeof (zjp->mount_snapshot));
 2459                 if (error != 0 && error != ENOENT)
 2460                         goto done;
 2461         } else {
 2462                 /*
 2463                  * If this prison is inheriting its ZFS info, report
 2464                  * empty/zero parameters.
 2465                  */
 2466                 static int mount_snapshot = 0;
 2467 
 2468                 error = vfs_setopt(opts, "zfs.mount_snapshot",
 2469                     &mount_snapshot, sizeof (mount_snapshot));
 2470                 if (error != 0 && error != ENOENT)
 2471                         goto done;
 2472         }
 2473         error = 0;
 2474 done:
 2475         mtx_unlock(&ppr->pr_mtx);
 2476         return (error);
 2477 }
 2478 
 2479 static int
 2480 zfs_jailparam_set(void *obj, void *data)
 2481 {
 2482         struct prison *pr = obj;
 2483         struct prison *ppr;
 2484         struct vfsoptlist *opts = data;
 2485         int error, jsys, mount_snapshot;
 2486 
 2487         /* Set the parameters, which should be correct. */
 2488         error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
 2489         if (error == ENOENT)
 2490                 jsys = -1;
 2491         error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
 2492             sizeof (mount_snapshot));
 2493         if (error == ENOENT)
 2494                 mount_snapshot = -1;
 2495         else
 2496                 jsys = JAIL_SYS_NEW;
 2497         if (jsys == JAIL_SYS_NEW) {
 2498                 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */
 2499                 struct zfs_jailparam *zjp;
 2500 
 2501                 /*
 2502                  * A child jail cannot have more permissions than its parent
 2503                  */
 2504                 if (pr->pr_parent != &prison0) {
 2505                         zjp = zfs_jailparam_find(pr->pr_parent, &ppr);
 2506                         mtx_unlock(&ppr->pr_mtx);
 2507                         if (zjp->mount_snapshot < mount_snapshot) {
 2508                                 return (EPERM);
 2509                         }
 2510                 }
 2511                 zfs_jailparam_alloc(pr, &zjp);
 2512                 if (mount_snapshot != -1)
 2513                         zjp->mount_snapshot = mount_snapshot;
 2514                 mtx_unlock(&pr->pr_mtx);
 2515         } else {
 2516                 /* "zfs=inherit": inherit the parent's ZFS info. */
 2517                 mtx_lock(&pr->pr_mtx);
 2518                 osd_jail_del(pr, zfs_jailparam_slot);
 2519                 mtx_unlock(&pr->pr_mtx);
 2520         }
 2521         return (0);
 2522 }
 2523 
 2524 static int
 2525 zfs_jailparam_check(void *obj __unused, void *data)
 2526 {
 2527         struct vfsoptlist *opts = data;
 2528         int error, jsys, mount_snapshot;
 2529 
 2530         /* Check that the parameters are correct. */
 2531         error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
 2532         if (error != ENOENT) {
 2533                 if (error != 0)
 2534                         return (error);
 2535                 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
 2536                         return (EINVAL);
 2537         }
 2538         error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
 2539             sizeof (mount_snapshot));
 2540         if (error != ENOENT) {
 2541                 if (error != 0)
 2542                         return (error);
 2543                 if (mount_snapshot != 0 && mount_snapshot != 1)
 2544                         return (EINVAL);
 2545         }
 2546         return (0);
 2547 }
 2548 
 2549 static void
 2550 zfs_jailparam_destroy(void *data)
 2551 {
 2552 
 2553         free(data, M_PRISON);
 2554 }
 2555 
 2556 static void
 2557 zfs_jailparam_sysinit(void *arg __unused)
 2558 {
 2559         struct prison *pr;
 2560         osd_method_t  methods[PR_MAXMETHOD] = {
 2561                 [PR_METHOD_CREATE] = zfs_jailparam_create,
 2562                 [PR_METHOD_GET] = zfs_jailparam_get,
 2563                 [PR_METHOD_SET] = zfs_jailparam_set,
 2564                 [PR_METHOD_CHECK] = zfs_jailparam_check,
 2565         };
 2566 
 2567         zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods);
 2568         /* Copy the defaults to any existing prisons. */
 2569         sx_slock(&allprison_lock);
 2570         TAILQ_FOREACH(pr, &allprison, pr_list)
 2571                 zfs_jailparam_alloc(pr, NULL);
 2572         sx_sunlock(&allprison_lock);
 2573 }
 2574 
 2575 static void
 2576 zfs_jailparam_sysuninit(void *arg __unused)
 2577 {
 2578 
 2579         osd_jail_deregister(zfs_jailparam_slot);
 2580 }
 2581 
 2582 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY,
 2583         zfs_jailparam_sysinit, NULL);
 2584 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY,
 2585         zfs_jailparam_sysuninit, NULL);

Cache object: 844f055c460f6372edd1e5616ee31fd6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.