The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/zfs_ioctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   24  * Portions Copyright 2011 Martin Matuska
   25  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
   26  * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
   27  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
   28  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
   29  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
   30  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
   31  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
   32  * Copyright (c) 2013 Steven Hartland. All rights reserved.
   33  * Copyright (c) 2014 Integros [integros.com]
   34  * Copyright 2016 Toomas Soome <tsoome@me.com>
   35  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
   36  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
   37  * Copyright 2017 RackTop Systems.
   38  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
   39  * Copyright (c) 2019 Datto Inc.
   40  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
   41  * Copyright (c) 2019, 2021, Klara Inc.
   42  * Copyright (c) 2019, Allan Jude
   43  */
   44 
   45 /*
   46  * ZFS ioctls.
   47  *
   48  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
   49  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
   50  *
   51  * There are two ways that we handle ioctls: the legacy way where almost
   52  * all of the logic is in the ioctl callback, and the new way where most
   53  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
   54  *
   55  * Non-legacy ioctls should be registered by calling
   56  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
   57  * from userland by lzc_ioctl().
   58  *
   59  * The registration arguments are as follows:
   60  *
   61  * const char *name
   62  *   The name of the ioctl.  This is used for history logging.  If the
   63  *   ioctl returns successfully (the callback returns 0), and allow_log
   64  *   is true, then a history log entry will be recorded with the input &
   65  *   output nvlists.  The log entry can be printed with "zpool history -i".
   66  *
   67  * zfs_ioc_t ioc
   68  *   The ioctl request number, which userland will pass to ioctl(2).
   69  *   We want newer versions of libzfs and libzfs_core to run against
   70  *   existing zfs kernel modules (i.e. a deferred reboot after an update).
   71  *   Therefore the ioctl numbers cannot change from release to release.
   72  *
   73  * zfs_secpolicy_func_t *secpolicy
   74  *   This function will be called before the zfs_ioc_func_t, to
   75  *   determine if this operation is permitted.  It should return EPERM
   76  *   on failure, and 0 on success.  Checks include determining if the
   77  *   dataset is visible in this zone, and if the user has either all
   78  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
   79  *   to do this operation on this dataset with "zfs allow".
   80  *
   81  * zfs_ioc_namecheck_t namecheck
   82  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
   83  *   name, a dataset name, or nothing.  If the name is not well-formed,
   84  *   the ioctl will fail and the callback will not be called.
   85  *   Therefore, the callback can assume that the name is well-formed
   86  *   (e.g. is null-terminated, doesn't have more than one '@' character,
   87  *   doesn't have invalid characters).
   88  *
   89  * zfs_ioc_poolcheck_t pool_check
   90  *   This specifies requirements on the pool state.  If the pool does
   91  *   not meet them (is suspended or is readonly), the ioctl will fail
   92  *   and the callback will not be called.  If any checks are specified
   93  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
   94  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
   95  *   POOL_CHECK_READONLY).
   96  *
   97  * zfs_ioc_key_t *nvl_keys
   98  *  The list of expected/allowable innvl input keys. This list is used
   99  *  to validate the nvlist input to the ioctl.
  100  *
  101  * boolean_t smush_outnvlist
  102  *   If smush_outnvlist is true, then the output is presumed to be a
  103  *   list of errors, and it will be "smushed" down to fit into the
  104  *   caller's buffer, by removing some entries and replacing them with a
  105  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  106  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  107  *   outnvlist does not fit into the userland-provided buffer, then the
  108  *   ioctl will fail with ENOMEM.
  109  *
  110  * zfs_ioc_func_t *func
  111  *   The callback function that will perform the operation.
  112  *
  113  *   The callback should return 0 on success, or an error number on
  114  *   failure.  If the function fails, the userland ioctl will return -1,
  115  *   and errno will be set to the callback's return value.  The callback
  116  *   will be called with the following arguments:
  117  *
  118  *   const char *name
  119  *     The name of the pool or dataset to operate on, from
  120  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
  121  *     expected type (pool, dataset, or none).
  122  *
  123  *   nvlist_t *innvl
  124  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
  125  *     NULL if no input nvlist was provided.  Changes to this nvlist are
  126  *     ignored.  If the input nvlist could not be deserialized, the
  127  *     ioctl will fail and the callback will not be called.
  128  *
  129  *   nvlist_t *outnvl
  130  *     The output nvlist, initially empty.  The callback can fill it in,
  131  *     and it will be returned to userland by serializing it into
  132  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
  133  *     fails (e.g. because the caller didn't supply a large enough
  134  *     buffer), then the overall ioctl will fail.  See the
  135  *     'smush_nvlist' argument above for additional behaviors.
  136  *
  137  *     There are two typical uses of the output nvlist:
  138  *       - To return state, e.g. property values.  In this case,
  139  *         smush_outnvlist should be false.  If the buffer was not large
  140  *         enough, the caller will reallocate a larger buffer and try
  141  *         the ioctl again.
  142  *
  143  *       - To return multiple errors from an ioctl which makes on-disk
  144  *         changes.  In this case, smush_outnvlist should be true.
  145  *         Ioctls which make on-disk modifications should generally not
  146  *         use the outnvl if they succeed, because the caller can not
  147  *         distinguish between the operation failing, and
  148  *         deserialization failing.
  149  *
  150  * IOCTL Interface Errors
  151  *
  152  * The following ioctl input errors can be returned:
  153  *   ZFS_ERR_IOC_CMD_UNAVAIL    the ioctl number is not supported by kernel
  154  *   ZFS_ERR_IOC_ARG_UNAVAIL    an input argument is not supported by kernel
  155  *   ZFS_ERR_IOC_ARG_REQUIRED   a required input argument is missing
  156  *   ZFS_ERR_IOC_ARG_BADTYPE    an input argument has an invalid type
  157  */
  158 
  159 #include <sys/types.h>
  160 #include <sys/param.h>
  161 #include <sys/errno.h>
  162 #include <sys/uio_impl.h>
  163 #include <sys/file.h>
  164 #include <sys/kmem.h>
  165 #include <sys/cmn_err.h>
  166 #include <sys/stat.h>
  167 #include <sys/zfs_ioctl.h>
  168 #include <sys/zfs_quota.h>
  169 #include <sys/zfs_vfsops.h>
  170 #include <sys/zfs_znode.h>
  171 #include <sys/zap.h>
  172 #include <sys/spa.h>
  173 #include <sys/spa_impl.h>
  174 #include <sys/vdev.h>
  175 #include <sys/vdev_impl.h>
  176 #include <sys/dmu.h>
  177 #include <sys/dsl_dir.h>
  178 #include <sys/dsl_dataset.h>
  179 #include <sys/dsl_prop.h>
  180 #include <sys/dsl_deleg.h>
  181 #include <sys/dmu_objset.h>
  182 #include <sys/dmu_impl.h>
  183 #include <sys/dmu_redact.h>
  184 #include <sys/dmu_tx.h>
  185 #include <sys/sunddi.h>
  186 #include <sys/policy.h>
  187 #include <sys/zone.h>
  188 #include <sys/nvpair.h>
  189 #include <sys/pathname.h>
  190 #include <sys/fs/zfs.h>
  191 #include <sys/zfs_ctldir.h>
  192 #include <sys/zfs_dir.h>
  193 #include <sys/zfs_onexit.h>
  194 #include <sys/zvol.h>
  195 #include <sys/dsl_scan.h>
  196 #include <sys/fm/util.h>
  197 #include <sys/dsl_crypt.h>
  198 #include <sys/rrwlock.h>
  199 #include <sys/zfs_file.h>
  200 
  201 #include <sys/dmu_recv.h>
  202 #include <sys/dmu_send.h>
  203 #include <sys/dmu_recv.h>
  204 #include <sys/dsl_destroy.h>
  205 #include <sys/dsl_bookmark.h>
  206 #include <sys/dsl_userhold.h>
  207 #include <sys/zfeature.h>
  208 #include <sys/zcp.h>
  209 #include <sys/zio_checksum.h>
  210 #include <sys/vdev_removal.h>
  211 #include <sys/vdev_impl.h>
  212 #include <sys/vdev_initialize.h>
  213 #include <sys/vdev_trim.h>
  214 
  215 #include "zfs_namecheck.h"
  216 #include "zfs_prop.h"
  217 #include "zfs_deleg.h"
  218 #include "zfs_comutil.h"
  219 
  220 #include <sys/lua/lua.h>
  221 #include <sys/lua/lauxlib.h>
  222 #include <sys/zfs_ioctl_impl.h>
  223 
  224 kmutex_t zfsdev_state_lock;
  225 static zfsdev_state_t *zfsdev_state_list;
  226 
  227 /*
  228  * Limit maximum nvlist size.  We don't want users passing in insane values
  229  * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
  230  * Defaults to 0=auto which is handled by platform code.
  231  */
  232 uint64_t zfs_max_nvlist_src_size = 0;
  233 
  234 /*
  235  * When logging the output nvlist of an ioctl in the on-disk history, limit
  236  * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
  237  * This applies primarily to zfs_ioc_channel_program().
  238  */
  239 static uint64_t zfs_history_output_max = 1024 * 1024;
  240 
  241 uint_t zfs_fsyncer_key;
  242 uint_t zfs_allow_log_key;
  243 
  244 /* DATA_TYPE_ANY is used when zkey_type can vary. */
  245 #define DATA_TYPE_ANY   DATA_TYPE_UNKNOWN
  246 
  247 typedef struct zfs_ioc_vec {
  248         zfs_ioc_legacy_func_t   *zvec_legacy_func;
  249         zfs_ioc_func_t          *zvec_func;
  250         zfs_secpolicy_func_t    *zvec_secpolicy;
  251         zfs_ioc_namecheck_t     zvec_namecheck;
  252         boolean_t               zvec_allow_log;
  253         zfs_ioc_poolcheck_t     zvec_pool_check;
  254         boolean_t               zvec_smush_outnvlist;
  255         const char              *zvec_name;
  256         const zfs_ioc_key_t     *zvec_nvl_keys;
  257         size_t                  zvec_nvl_key_count;
  258 } zfs_ioc_vec_t;
  259 
  260 /* This array is indexed by zfs_userquota_prop_t */
  261 static const char *userquota_perms[] = {
  262         ZFS_DELEG_PERM_USERUSED,
  263         ZFS_DELEG_PERM_USERQUOTA,
  264         ZFS_DELEG_PERM_GROUPUSED,
  265         ZFS_DELEG_PERM_GROUPQUOTA,
  266         ZFS_DELEG_PERM_USEROBJUSED,
  267         ZFS_DELEG_PERM_USEROBJQUOTA,
  268         ZFS_DELEG_PERM_GROUPOBJUSED,
  269         ZFS_DELEG_PERM_GROUPOBJQUOTA,
  270         ZFS_DELEG_PERM_PROJECTUSED,
  271         ZFS_DELEG_PERM_PROJECTQUOTA,
  272         ZFS_DELEG_PERM_PROJECTOBJUSED,
  273         ZFS_DELEG_PERM_PROJECTOBJQUOTA,
  274 };
  275 
  276 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
  277 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
  278 static int zfs_check_settable(const char *name, nvpair_t *property,
  279     cred_t *cr);
  280 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
  281     nvlist_t **errors);
  282 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
  283     boolean_t *);
  284 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
  285 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
  286 
  287 static void
  288 history_str_free(char *buf)
  289 {
  290         kmem_free(buf, HIS_MAX_RECORD_LEN);
  291 }
  292 
  293 static char *
  294 history_str_get(zfs_cmd_t *zc)
  295 {
  296         char *buf;
  297 
  298         if (zc->zc_history == 0)
  299                 return (NULL);
  300 
  301         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
  302         if (copyinstr((void *)(uintptr_t)zc->zc_history,
  303             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
  304                 history_str_free(buf);
  305                 return (NULL);
  306         }
  307 
  308         buf[HIS_MAX_RECORD_LEN -1] = '\0';
  309 
  310         return (buf);
  311 }
  312 
  313 /*
  314  * Return non-zero if the spa version is less than requested version.
  315  */
  316 static int
  317 zfs_earlier_version(const char *name, int version)
  318 {
  319         spa_t *spa;
  320 
  321         if (spa_open(name, &spa, FTAG) == 0) {
  322                 if (spa_version(spa) < version) {
  323                         spa_close(spa, FTAG);
  324                         return (1);
  325                 }
  326                 spa_close(spa, FTAG);
  327         }
  328         return (0);
  329 }
  330 
  331 /*
  332  * Return TRUE if the ZPL version is less than requested version.
  333  */
  334 static boolean_t
  335 zpl_earlier_version(const char *name, int version)
  336 {
  337         objset_t *os;
  338         boolean_t rc = B_TRUE;
  339 
  340         if (dmu_objset_hold(name, FTAG, &os) == 0) {
  341                 uint64_t zplversion;
  342 
  343                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
  344                         dmu_objset_rele(os, FTAG);
  345                         return (B_TRUE);
  346                 }
  347                 /* XXX reading from non-owned objset */
  348                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
  349                         rc = zplversion < version;
  350                 dmu_objset_rele(os, FTAG);
  351         }
  352         return (rc);
  353 }
  354 
  355 static void
  356 zfs_log_history(zfs_cmd_t *zc)
  357 {
  358         spa_t *spa;
  359         char *buf;
  360 
  361         if ((buf = history_str_get(zc)) == NULL)
  362                 return;
  363 
  364         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
  365                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
  366                         (void) spa_history_log(spa, buf);
  367                 spa_close(spa, FTAG);
  368         }
  369         history_str_free(buf);
  370 }
  371 
  372 /*
  373  * Policy for top-level read operations (list pools).  Requires no privileges,
  374  * and can be used in the local zone, as there is no associated dataset.
  375  */
  376 static int
  377 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  378 {
  379         (void) zc, (void) innvl, (void) cr;
  380         return (0);
  381 }
  382 
  383 /*
  384  * Policy for dataset read operations (list children, get statistics).  Requires
  385  * no privileges, but must be visible in the local zone.
  386  */
  387 static int
  388 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  389 {
  390         (void) innvl, (void) cr;
  391         if (INGLOBALZONE(curproc) ||
  392             zone_dataset_visible(zc->zc_name, NULL))
  393                 return (0);
  394 
  395         return (SET_ERROR(ENOENT));
  396 }
  397 
  398 static int
  399 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
  400 {
  401         int writable = 1;
  402 
  403         /*
  404          * The dataset must be visible by this zone -- check this first
  405          * so they don't see EPERM on something they shouldn't know about.
  406          */
  407         if (!INGLOBALZONE(curproc) &&
  408             !zone_dataset_visible(dataset, &writable))
  409                 return (SET_ERROR(ENOENT));
  410 
  411         if (INGLOBALZONE(curproc)) {
  412                 /*
  413                  * If the fs is zoned, only root can access it from the
  414                  * global zone.
  415                  */
  416                 if (secpolicy_zfs(cr) && zoned)
  417                         return (SET_ERROR(EPERM));
  418         } else {
  419                 /*
  420                  * If we are in a local zone, the 'zoned' property must be set.
  421                  */
  422                 if (!zoned)
  423                         return (SET_ERROR(EPERM));
  424 
  425                 /* must be writable by this zone */
  426                 if (!writable)
  427                         return (SET_ERROR(EPERM));
  428         }
  429         return (0);
  430 }
  431 
  432 static int
  433 zfs_dozonecheck(const char *dataset, cred_t *cr)
  434 {
  435         uint64_t zoned;
  436 
  437         if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
  438             &zoned, NULL))
  439                 return (SET_ERROR(ENOENT));
  440 
  441         return (zfs_dozonecheck_impl(dataset, zoned, cr));
  442 }
  443 
  444 static int
  445 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
  446 {
  447         uint64_t zoned;
  448 
  449         if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
  450                 return (SET_ERROR(ENOENT));
  451 
  452         return (zfs_dozonecheck_impl(dataset, zoned, cr));
  453 }
  454 
  455 static int
  456 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
  457     const char *perm, cred_t *cr)
  458 {
  459         int error;
  460 
  461         error = zfs_dozonecheck_ds(name, ds, cr);
  462         if (error == 0) {
  463                 error = secpolicy_zfs(cr);
  464                 if (error != 0)
  465                         error = dsl_deleg_access_impl(ds, perm, cr);
  466         }
  467         return (error);
  468 }
  469 
  470 static int
  471 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
  472 {
  473         int error;
  474         dsl_dataset_t *ds;
  475         dsl_pool_t *dp;
  476 
  477         /*
  478          * First do a quick check for root in the global zone, which
  479          * is allowed to do all write_perms.  This ensures that zfs_ioc_*
  480          * will get to handle nonexistent datasets.
  481          */
  482         if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
  483                 return (0);
  484 
  485         error = dsl_pool_hold(name, FTAG, &dp);
  486         if (error != 0)
  487                 return (error);
  488 
  489         error = dsl_dataset_hold(dp, name, FTAG, &ds);
  490         if (error != 0) {
  491                 dsl_pool_rele(dp, FTAG);
  492                 return (error);
  493         }
  494 
  495         error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
  496 
  497         dsl_dataset_rele(ds, FTAG);
  498         dsl_pool_rele(dp, FTAG);
  499         return (error);
  500 }
  501 
  502 /*
  503  * Policy for setting the security label property.
  504  *
  505  * Returns 0 for success, non-zero for access and other errors.
  506  */
  507 static int
  508 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
  509 {
  510 #ifdef HAVE_MLSLABEL
  511         char            ds_hexsl[MAXNAMELEN];
  512         bslabel_t       ds_sl, new_sl;
  513         boolean_t       new_default = FALSE;
  514         uint64_t        zoned;
  515         int             needed_priv = -1;
  516         int             error;
  517 
  518         /* First get the existing dataset label. */
  519         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
  520             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
  521         if (error != 0)
  522                 return (SET_ERROR(EPERM));
  523 
  524         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
  525                 new_default = TRUE;
  526 
  527         /* The label must be translatable */
  528         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
  529                 return (SET_ERROR(EINVAL));
  530 
  531         /*
  532          * In a non-global zone, disallow attempts to set a label that
  533          * doesn't match that of the zone; otherwise no other checks
  534          * are needed.
  535          */
  536         if (!INGLOBALZONE(curproc)) {
  537                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
  538                         return (SET_ERROR(EPERM));
  539                 return (0);
  540         }
  541 
  542         /*
  543          * For global-zone datasets (i.e., those whose zoned property is
  544          * "off", verify that the specified new label is valid for the
  545          * global zone.
  546          */
  547         if (dsl_prop_get_integer(name,
  548             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
  549                 return (SET_ERROR(EPERM));
  550         if (!zoned) {
  551                 if (zfs_check_global_label(name, strval) != 0)
  552                         return (SET_ERROR(EPERM));
  553         }
  554 
  555         /*
  556          * If the existing dataset label is nondefault, check if the
  557          * dataset is mounted (label cannot be changed while mounted).
  558          * Get the zfsvfs_t; if there isn't one, then the dataset isn't
  559          * mounted (or isn't a dataset, doesn't exist, ...).
  560          */
  561         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
  562                 objset_t *os;
  563                 static const char *setsl_tag = "setsl_tag";
  564 
  565                 /*
  566                  * Try to own the dataset; abort if there is any error,
  567                  * (e.g., already mounted, in use, or other error).
  568                  */
  569                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
  570                     setsl_tag, &os);
  571                 if (error != 0)
  572                         return (SET_ERROR(EPERM));
  573 
  574                 dmu_objset_disown(os, B_TRUE, setsl_tag);
  575 
  576                 if (new_default) {
  577                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
  578                         goto out_check;
  579                 }
  580 
  581                 if (hexstr_to_label(strval, &new_sl) != 0)
  582                         return (SET_ERROR(EPERM));
  583 
  584                 if (blstrictdom(&ds_sl, &new_sl))
  585                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
  586                 else if (blstrictdom(&new_sl, &ds_sl))
  587                         needed_priv = PRIV_FILE_UPGRADE_SL;
  588         } else {
  589                 /* dataset currently has a default label */
  590                 if (!new_default)
  591                         needed_priv = PRIV_FILE_UPGRADE_SL;
  592         }
  593 
  594 out_check:
  595         if (needed_priv != -1)
  596                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
  597         return (0);
  598 #else
  599         return (SET_ERROR(ENOTSUP));
  600 #endif /* HAVE_MLSLABEL */
  601 }
  602 
  603 static int
  604 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
  605     cred_t *cr)
  606 {
  607         char *strval;
  608 
  609         /*
  610          * Check permissions for special properties.
  611          */
  612         switch (prop) {
  613         default:
  614                 break;
  615         case ZFS_PROP_ZONED:
  616                 /*
  617                  * Disallow setting of 'zoned' from within a local zone.
  618                  */
  619                 if (!INGLOBALZONE(curproc))
  620                         return (SET_ERROR(EPERM));
  621                 break;
  622 
  623         case ZFS_PROP_QUOTA:
  624         case ZFS_PROP_FILESYSTEM_LIMIT:
  625         case ZFS_PROP_SNAPSHOT_LIMIT:
  626                 if (!INGLOBALZONE(curproc)) {
  627                         uint64_t zoned;
  628                         char setpoint[ZFS_MAX_DATASET_NAME_LEN];
  629                         /*
  630                          * Unprivileged users are allowed to modify the
  631                          * limit on things *under* (ie. contained by)
  632                          * the thing they own.
  633                          */
  634                         if (dsl_prop_get_integer(dsname,
  635                             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
  636                                 return (SET_ERROR(EPERM));
  637                         if (!zoned || strlen(dsname) <= strlen(setpoint))
  638                                 return (SET_ERROR(EPERM));
  639                 }
  640                 break;
  641 
  642         case ZFS_PROP_MLSLABEL:
  643                 if (!is_system_labeled())
  644                         return (SET_ERROR(EPERM));
  645 
  646                 if (nvpair_value_string(propval, &strval) == 0) {
  647                         int err;
  648 
  649                         err = zfs_set_slabel_policy(dsname, strval, CRED());
  650                         if (err != 0)
  651                                 return (err);
  652                 }
  653                 break;
  654         }
  655 
  656         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
  657 }
  658 
  659 static int
  660 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  661 {
  662         /*
  663          * permission to set permissions will be evaluated later in
  664          * dsl_deleg_can_allow()
  665          */
  666         (void) innvl;
  667         return (zfs_dozonecheck(zc->zc_name, cr));
  668 }
  669 
  670 static int
  671 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  672 {
  673         (void) innvl;
  674         return (zfs_secpolicy_write_perms(zc->zc_name,
  675             ZFS_DELEG_PERM_ROLLBACK, cr));
  676 }
  677 
  678 static int
  679 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  680 {
  681         (void) innvl;
  682         dsl_pool_t *dp;
  683         dsl_dataset_t *ds;
  684         const char *cp;
  685         int error;
  686 
  687         /*
  688          * Generate the current snapshot name from the given objsetid, then
  689          * use that name for the secpolicy/zone checks.
  690          */
  691         cp = strchr(zc->zc_name, '@');
  692         if (cp == NULL)
  693                 return (SET_ERROR(EINVAL));
  694         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
  695         if (error != 0)
  696                 return (error);
  697 
  698         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
  699         if (error != 0) {
  700                 dsl_pool_rele(dp, FTAG);
  701                 return (error);
  702         }
  703 
  704         dsl_dataset_name(ds, zc->zc_name);
  705 
  706         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
  707             ZFS_DELEG_PERM_SEND, cr);
  708         dsl_dataset_rele(ds, FTAG);
  709         dsl_pool_rele(dp, FTAG);
  710 
  711         return (error);
  712 }
  713 
  714 static int
  715 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  716 {
  717         (void) innvl;
  718         return (zfs_secpolicy_write_perms(zc->zc_name,
  719             ZFS_DELEG_PERM_SEND, cr));
  720 }
  721 
  722 static int
  723 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  724 {
  725         (void) zc, (void) innvl, (void) cr;
  726         return (SET_ERROR(ENOTSUP));
  727 }
  728 
  729 static int
  730 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  731 {
  732         (void) zc, (void) innvl, (void) cr;
  733         return (SET_ERROR(ENOTSUP));
  734 }
  735 
  736 static int
  737 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
  738 {
  739         char *cp;
  740 
  741         /*
  742          * Remove the @bla or /bla from the end of the name to get the parent.
  743          */
  744         (void) strlcpy(parent, datasetname, parentsize);
  745         cp = strrchr(parent, '@');
  746         if (cp != NULL) {
  747                 cp[0] = '\0';
  748         } else {
  749                 cp = strrchr(parent, '/');
  750                 if (cp == NULL)
  751                         return (SET_ERROR(ENOENT));
  752                 cp[0] = '\0';
  753         }
  754 
  755         return (0);
  756 }
  757 
  758 int
  759 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
  760 {
  761         int error;
  762 
  763         if ((error = zfs_secpolicy_write_perms(name,
  764             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
  765                 return (error);
  766 
  767         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
  768 }
  769 
  770 static int
  771 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  772 {
  773         (void) innvl;
  774         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
  775 }
  776 
  777 /*
  778  * Destroying snapshots with delegated permissions requires
  779  * descendant mount and destroy permissions.
  780  */
  781 static int
  782 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  783 {
  784         (void) zc;
  785         nvlist_t *snaps;
  786         nvpair_t *pair, *nextpair;
  787         int error = 0;
  788 
  789         snaps = fnvlist_lookup_nvlist(innvl, "snaps");
  790 
  791         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
  792             pair = nextpair) {
  793                 nextpair = nvlist_next_nvpair(snaps, pair);
  794                 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
  795                 if (error == ENOENT) {
  796                         /*
  797                          * Ignore any snapshots that don't exist (we consider
  798                          * them "already destroyed").  Remove the name from the
  799                          * nvl here in case the snapshot is created between
  800                          * now and when we try to destroy it (in which case
  801                          * we don't want to destroy it since we haven't
  802                          * checked for permission).
  803                          */
  804                         fnvlist_remove_nvpair(snaps, pair);
  805                         error = 0;
  806                 }
  807                 if (error != 0)
  808                         break;
  809         }
  810 
  811         return (error);
  812 }
  813 
  814 int
  815 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
  816 {
  817         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
  818         int     error;
  819 
  820         if ((error = zfs_secpolicy_write_perms(from,
  821             ZFS_DELEG_PERM_RENAME, cr)) != 0)
  822                 return (error);
  823 
  824         if ((error = zfs_secpolicy_write_perms(from,
  825             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
  826                 return (error);
  827 
  828         if ((error = zfs_get_parent(to, parentname,
  829             sizeof (parentname))) != 0)
  830                 return (error);
  831 
  832         if ((error = zfs_secpolicy_write_perms(parentname,
  833             ZFS_DELEG_PERM_CREATE, cr)) != 0)
  834                 return (error);
  835 
  836         if ((error = zfs_secpolicy_write_perms(parentname,
  837             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
  838                 return (error);
  839 
  840         return (error);
  841 }
  842 
  843 static int
  844 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  845 {
  846         (void) innvl;
  847         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
  848 }
  849 
  850 static int
  851 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  852 {
  853         (void) innvl;
  854         dsl_pool_t *dp;
  855         dsl_dataset_t *clone;
  856         int error;
  857 
  858         error = zfs_secpolicy_write_perms(zc->zc_name,
  859             ZFS_DELEG_PERM_PROMOTE, cr);
  860         if (error != 0)
  861                 return (error);
  862 
  863         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
  864         if (error != 0)
  865                 return (error);
  866 
  867         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
  868 
  869         if (error == 0) {
  870                 char parentname[ZFS_MAX_DATASET_NAME_LEN];
  871                 dsl_dataset_t *origin = NULL;
  872                 dsl_dir_t *dd;
  873                 dd = clone->ds_dir;
  874 
  875                 error = dsl_dataset_hold_obj(dd->dd_pool,
  876                     dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
  877                 if (error != 0) {
  878                         dsl_dataset_rele(clone, FTAG);
  879                         dsl_pool_rele(dp, FTAG);
  880                         return (error);
  881                 }
  882 
  883                 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
  884                     ZFS_DELEG_PERM_MOUNT, cr);
  885 
  886                 dsl_dataset_name(origin, parentname);
  887                 if (error == 0) {
  888                         error = zfs_secpolicy_write_perms_ds(parentname, origin,
  889                             ZFS_DELEG_PERM_PROMOTE, cr);
  890                 }
  891                 dsl_dataset_rele(clone, FTAG);
  892                 dsl_dataset_rele(origin, FTAG);
  893         }
  894         dsl_pool_rele(dp, FTAG);
  895         return (error);
  896 }
  897 
  898 static int
  899 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  900 {
  901         (void) innvl;
  902         int error;
  903 
  904         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
  905             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
  906                 return (error);
  907 
  908         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
  909             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
  910                 return (error);
  911 
  912         return (zfs_secpolicy_write_perms(zc->zc_name,
  913             ZFS_DELEG_PERM_CREATE, cr));
  914 }
  915 
  916 int
  917 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
  918 {
  919         return (zfs_secpolicy_write_perms(name,
  920             ZFS_DELEG_PERM_SNAPSHOT, cr));
  921 }
  922 
  923 /*
  924  * Check for permission to create each snapshot in the nvlist.
  925  */
  926 static int
  927 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  928 {
  929         (void) zc;
  930         nvlist_t *snaps;
  931         int error = 0;
  932         nvpair_t *pair;
  933 
  934         snaps = fnvlist_lookup_nvlist(innvl, "snaps");
  935 
  936         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
  937             pair = nvlist_next_nvpair(snaps, pair)) {
  938                 char *name = nvpair_name(pair);
  939                 char *atp = strchr(name, '@');
  940 
  941                 if (atp == NULL) {
  942                         error = SET_ERROR(EINVAL);
  943                         break;
  944                 }
  945                 *atp = '\0';
  946                 error = zfs_secpolicy_snapshot_perms(name, cr);
  947                 *atp = '@';
  948                 if (error != 0)
  949                         break;
  950         }
  951         return (error);
  952 }
  953 
  954 /*
  955  * Check for permission to create each bookmark in the nvlist.
  956  */
  957 static int
  958 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  959 {
  960         (void) zc;
  961         int error = 0;
  962 
  963         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
  964             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
  965                 char *name = nvpair_name(pair);
  966                 char *hashp = strchr(name, '#');
  967 
  968                 if (hashp == NULL) {
  969                         error = SET_ERROR(EINVAL);
  970                         break;
  971                 }
  972                 *hashp = '\0';
  973                 error = zfs_secpolicy_write_perms(name,
  974                     ZFS_DELEG_PERM_BOOKMARK, cr);
  975                 *hashp = '#';
  976                 if (error != 0)
  977                         break;
  978         }
  979         return (error);
  980 }
  981 
  982 static int
  983 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
  984 {
  985         (void) zc;
  986         nvpair_t *pair, *nextpair;
  987         int error = 0;
  988 
  989         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
  990             pair = nextpair) {
  991                 char *name = nvpair_name(pair);
  992                 char *hashp = strchr(name, '#');
  993                 nextpair = nvlist_next_nvpair(innvl, pair);
  994 
  995                 if (hashp == NULL) {
  996                         error = SET_ERROR(EINVAL);
  997                         break;
  998                 }
  999 
 1000                 *hashp = '\0';
 1001                 error = zfs_secpolicy_write_perms(name,
 1002                     ZFS_DELEG_PERM_DESTROY, cr);
 1003                 *hashp = '#';
 1004                 if (error == ENOENT) {
 1005                         /*
 1006                          * Ignore any filesystems that don't exist (we consider
 1007                          * their bookmarks "already destroyed").  Remove
 1008                          * the name from the nvl here in case the filesystem
 1009                          * is created between now and when we try to destroy
 1010                          * the bookmark (in which case we don't want to
 1011                          * destroy it since we haven't checked for permission).
 1012                          */
 1013                         fnvlist_remove_nvpair(innvl, pair);
 1014                         error = 0;
 1015                 }
 1016                 if (error != 0)
 1017                         break;
 1018         }
 1019 
 1020         return (error);
 1021 }
 1022 
 1023 static int
 1024 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1025 {
 1026         (void) zc, (void) innvl, (void) cr;
 1027         /*
 1028          * Even root must have a proper TSD so that we know what pool
 1029          * to log to.
 1030          */
 1031         if (tsd_get(zfs_allow_log_key) == NULL)
 1032                 return (SET_ERROR(EPERM));
 1033         return (0);
 1034 }
 1035 
 1036 static int
 1037 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1038 {
 1039         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
 1040         int     error;
 1041         char    *origin;
 1042 
 1043         if ((error = zfs_get_parent(zc->zc_name, parentname,
 1044             sizeof (parentname))) != 0)
 1045                 return (error);
 1046 
 1047         if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
 1048             (error = zfs_secpolicy_write_perms(origin,
 1049             ZFS_DELEG_PERM_CLONE, cr)) != 0)
 1050                 return (error);
 1051 
 1052         if ((error = zfs_secpolicy_write_perms(parentname,
 1053             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 1054                 return (error);
 1055 
 1056         return (zfs_secpolicy_write_perms(parentname,
 1057             ZFS_DELEG_PERM_MOUNT, cr));
 1058 }
 1059 
 1060 /*
 1061  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
 1062  * SYS_CONFIG privilege, which is not available in a local zone.
 1063  */
 1064 int
 1065 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1066 {
 1067         (void) zc, (void) innvl;
 1068 
 1069         if (secpolicy_sys_config(cr, B_FALSE) != 0)
 1070                 return (SET_ERROR(EPERM));
 1071 
 1072         return (0);
 1073 }
 1074 
 1075 /*
 1076  * Policy for object to name lookups.
 1077  */
 1078 static int
 1079 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1080 {
 1081         (void) innvl;
 1082         int error;
 1083 
 1084         if (secpolicy_sys_config(cr, B_FALSE) == 0)
 1085                 return (0);
 1086 
 1087         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 1088         return (error);
 1089 }
 1090 
 1091 /*
 1092  * Policy for fault injection.  Requires all privileges.
 1093  */
 1094 static int
 1095 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1096 {
 1097         (void) zc, (void) innvl;
 1098         return (secpolicy_zinject(cr));
 1099 }
 1100 
 1101 static int
 1102 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1103 {
 1104         (void) innvl;
 1105         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 1106 
 1107         if (prop == ZPROP_USERPROP) {
 1108                 if (!zfs_prop_user(zc->zc_value))
 1109                         return (SET_ERROR(EINVAL));
 1110                 return (zfs_secpolicy_write_perms(zc->zc_name,
 1111                     ZFS_DELEG_PERM_USERPROP, cr));
 1112         } else {
 1113                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
 1114                     NULL, cr));
 1115         }
 1116 }
 1117 
 1118 static int
 1119 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1120 {
 1121         int err = zfs_secpolicy_read(zc, innvl, cr);
 1122         if (err)
 1123                 return (err);
 1124 
 1125         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 1126                 return (SET_ERROR(EINVAL));
 1127 
 1128         if (zc->zc_value[0] == 0) {
 1129                 /*
 1130                  * They are asking about a posix uid/gid.  If it's
 1131                  * themself, allow it.
 1132                  */
 1133                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 1134                     zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
 1135                     zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
 1136                     zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
 1137                         if (zc->zc_guid == crgetuid(cr))
 1138                                 return (0);
 1139                 } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
 1140                     zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
 1141                     zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
 1142                     zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
 1143                         if (groupmember(zc->zc_guid, cr))
 1144                                 return (0);
 1145                 }
 1146                 /* else is for project quota/used */
 1147         }
 1148 
 1149         return (zfs_secpolicy_write_perms(zc->zc_name,
 1150             userquota_perms[zc->zc_objset_type], cr));
 1151 }
 1152 
 1153 static int
 1154 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1155 {
 1156         int err = zfs_secpolicy_read(zc, innvl, cr);
 1157         if (err)
 1158                 return (err);
 1159 
 1160         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 1161                 return (SET_ERROR(EINVAL));
 1162 
 1163         return (zfs_secpolicy_write_perms(zc->zc_name,
 1164             userquota_perms[zc->zc_objset_type], cr));
 1165 }
 1166 
 1167 static int
 1168 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1169 {
 1170         (void) innvl;
 1171         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 1172             NULL, cr));
 1173 }
 1174 
 1175 static int
 1176 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1177 {
 1178         (void) zc;
 1179         nvpair_t *pair;
 1180         nvlist_t *holds;
 1181         int error;
 1182 
 1183         holds = fnvlist_lookup_nvlist(innvl, "holds");
 1184 
 1185         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 1186             pair = nvlist_next_nvpair(holds, pair)) {
 1187                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
 1188                 error = dmu_fsname(nvpair_name(pair), fsname);
 1189                 if (error != 0)
 1190                         return (error);
 1191                 error = zfs_secpolicy_write_perms(fsname,
 1192                     ZFS_DELEG_PERM_HOLD, cr);
 1193                 if (error != 0)
 1194                         return (error);
 1195         }
 1196         return (0);
 1197 }
 1198 
 1199 static int
 1200 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1201 {
 1202         (void) zc;
 1203         nvpair_t *pair;
 1204         int error;
 1205 
 1206         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 1207             pair = nvlist_next_nvpair(innvl, pair)) {
 1208                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
 1209                 error = dmu_fsname(nvpair_name(pair), fsname);
 1210                 if (error != 0)
 1211                         return (error);
 1212                 error = zfs_secpolicy_write_perms(fsname,
 1213                     ZFS_DELEG_PERM_RELEASE, cr);
 1214                 if (error != 0)
 1215                         return (error);
 1216         }
 1217         return (0);
 1218 }
 1219 
 1220 /*
 1221  * Policy for allowing temporary snapshots to be taken or released
 1222  */
 1223 static int
 1224 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1225 {
 1226         /*
 1227          * A temporary snapshot is the same as a snapshot,
 1228          * hold, destroy and release all rolled into one.
 1229          * Delegated diff alone is sufficient that we allow this.
 1230          */
 1231         int error;
 1232 
 1233         if (zfs_secpolicy_write_perms(zc->zc_name,
 1234             ZFS_DELEG_PERM_DIFF, cr) == 0)
 1235                 return (0);
 1236 
 1237         error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
 1238 
 1239         if (innvl != NULL) {
 1240                 if (error == 0)
 1241                         error = zfs_secpolicy_hold(zc, innvl, cr);
 1242                 if (error == 0)
 1243                         error = zfs_secpolicy_release(zc, innvl, cr);
 1244                 if (error == 0)
 1245                         error = zfs_secpolicy_destroy(zc, innvl, cr);
 1246         }
 1247         return (error);
 1248 }
 1249 
 1250 static int
 1251 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1252 {
 1253         return (zfs_secpolicy_write_perms(zc->zc_name,
 1254             ZFS_DELEG_PERM_LOAD_KEY, cr));
 1255 }
 1256 
 1257 static int
 1258 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 1259 {
 1260         return (zfs_secpolicy_write_perms(zc->zc_name,
 1261             ZFS_DELEG_PERM_CHANGE_KEY, cr));
 1262 }
 1263 
 1264 /*
 1265  * Returns the nvlist as specified by the user in the zfs_cmd_t.
 1266  */
 1267 static int
 1268 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
 1269 {
 1270         char *packed;
 1271         int error;
 1272         nvlist_t *list = NULL;
 1273 
 1274         /*
 1275          * Read in and unpack the user-supplied nvlist.
 1276          */
 1277         if (size == 0)
 1278                 return (SET_ERROR(EINVAL));
 1279 
 1280         packed = vmem_alloc(size, KM_SLEEP);
 1281 
 1282         if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) {
 1283                 vmem_free(packed, size);
 1284                 return (SET_ERROR(EFAULT));
 1285         }
 1286 
 1287         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
 1288                 vmem_free(packed, size);
 1289                 return (error);
 1290         }
 1291 
 1292         vmem_free(packed, size);
 1293 
 1294         *nvp = list;
 1295         return (0);
 1296 }
 1297 
 1298 /*
 1299  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
 1300  * Entries will be removed from the end of the nvlist, and one int32 entry
 1301  * named "N_MORE_ERRORS" will be added indicating how many entries were
 1302  * removed.
 1303  */
 1304 static int
 1305 nvlist_smush(nvlist_t *errors, size_t max)
 1306 {
 1307         size_t size;
 1308 
 1309         size = fnvlist_size(errors);
 1310 
 1311         if (size > max) {
 1312                 nvpair_t *more_errors;
 1313                 int n = 0;
 1314 
 1315                 if (max < 1024)
 1316                         return (SET_ERROR(ENOMEM));
 1317 
 1318                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
 1319                 more_errors = nvlist_prev_nvpair(errors, NULL);
 1320 
 1321                 do {
 1322                         nvpair_t *pair = nvlist_prev_nvpair(errors,
 1323                             more_errors);
 1324                         fnvlist_remove_nvpair(errors, pair);
 1325                         n++;
 1326                         size = fnvlist_size(errors);
 1327                 } while (size > max);
 1328 
 1329                 fnvlist_remove_nvpair(errors, more_errors);
 1330                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
 1331                 ASSERT3U(fnvlist_size(errors), <=, max);
 1332         }
 1333 
 1334         return (0);
 1335 }
 1336 
 1337 static int
 1338 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
 1339 {
 1340         char *packed = NULL;
 1341         int error = 0;
 1342         size_t size;
 1343 
 1344         size = fnvlist_size(nvl);
 1345 
 1346         if (size > zc->zc_nvlist_dst_size) {
 1347                 error = SET_ERROR(ENOMEM);
 1348         } else {
 1349                 packed = fnvlist_pack(nvl, &size);
 1350                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
 1351                     size, zc->zc_iflags) != 0)
 1352                         error = SET_ERROR(EFAULT);
 1353                 fnvlist_pack_free(packed, size);
 1354         }
 1355 
 1356         zc->zc_nvlist_dst_size = size;
 1357         zc->zc_nvlist_dst_filled = B_TRUE;
 1358         return (error);
 1359 }
 1360 
 1361 int
 1362 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
 1363 {
 1364         int error = 0;
 1365         if (dmu_objset_type(os) != DMU_OST_ZFS) {
 1366                 return (SET_ERROR(EINVAL));
 1367         }
 1368 
 1369         mutex_enter(&os->os_user_ptr_lock);
 1370         *zfvp = dmu_objset_get_user(os);
 1371         /* bump s_active only when non-zero to prevent umount race */
 1372         error = zfs_vfs_ref(zfvp);
 1373         mutex_exit(&os->os_user_ptr_lock);
 1374         return (error);
 1375 }
 1376 
 1377 int
 1378 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
 1379 {
 1380         objset_t *os;
 1381         int error;
 1382 
 1383         error = dmu_objset_hold(dsname, FTAG, &os);
 1384         if (error != 0)
 1385                 return (error);
 1386 
 1387         error = getzfsvfs_impl(os, zfvp);
 1388         dmu_objset_rele(os, FTAG);
 1389         return (error);
 1390 }
 1391 
 1392 /*
 1393  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
 1394  * case its z_sb will be NULL, and it will be opened as the owner.
 1395  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
 1396  * which prevents all inode ops from running.
 1397  */
 1398 static int
 1399 zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
 1400     boolean_t writer)
 1401 {
 1402         int error = 0;
 1403 
 1404         if (getzfsvfs(name, zfvp) != 0)
 1405                 error = zfsvfs_create(name, B_FALSE, zfvp);
 1406         if (error == 0) {
 1407                 if (writer)
 1408                         ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
 1409                 else
 1410                         ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
 1411                 if ((*zfvp)->z_unmounted) {
 1412                         /*
 1413                          * XXX we could probably try again, since the unmounting
 1414                          * thread should be just about to disassociate the
 1415                          * objset from the zfsvfs.
 1416                          */
 1417                         ZFS_TEARDOWN_EXIT(*zfvp, tag);
 1418                         return (SET_ERROR(EBUSY));
 1419                 }
 1420         }
 1421         return (error);
 1422 }
 1423 
 1424 static void
 1425 zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
 1426 {
 1427         ZFS_TEARDOWN_EXIT(zfsvfs, tag);
 1428 
 1429         if (zfs_vfs_held(zfsvfs)) {
 1430                 zfs_vfs_rele(zfsvfs);
 1431         } else {
 1432                 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 1433                 zfsvfs_free(zfsvfs);
 1434         }
 1435 }
 1436 
 1437 static int
 1438 zfs_ioc_pool_create(zfs_cmd_t *zc)
 1439 {
 1440         int error;
 1441         nvlist_t *config, *props = NULL;
 1442         nvlist_t *rootprops = NULL;
 1443         nvlist_t *zplprops = NULL;
 1444         dsl_crypto_params_t *dcp = NULL;
 1445         const char *spa_name = zc->zc_name;
 1446         boolean_t unload_wkey = B_TRUE;
 1447 
 1448         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 1449             zc->zc_iflags, &config)))
 1450                 return (error);
 1451 
 1452         if (zc->zc_nvlist_src_size != 0 && (error =
 1453             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 1454             zc->zc_iflags, &props))) {
 1455                 nvlist_free(config);
 1456                 return (error);
 1457         }
 1458 
 1459         if (props) {
 1460                 nvlist_t *nvl = NULL;
 1461                 nvlist_t *hidden_args = NULL;
 1462                 uint64_t version = SPA_VERSION;
 1463                 char *tname;
 1464 
 1465                 (void) nvlist_lookup_uint64(props,
 1466                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
 1467                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
 1468                         error = SET_ERROR(EINVAL);
 1469                         goto pool_props_bad;
 1470                 }
 1471                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
 1472                 if (nvl) {
 1473                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
 1474                         if (error != 0)
 1475                                 goto pool_props_bad;
 1476                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
 1477                 }
 1478 
 1479                 (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
 1480                     &hidden_args);
 1481                 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
 1482                     rootprops, hidden_args, &dcp);
 1483                 if (error != 0)
 1484                         goto pool_props_bad;
 1485                 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
 1486 
 1487                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 1488                 error = zfs_fill_zplprops_root(version, rootprops,
 1489                     zplprops, NULL);
 1490                 if (error != 0)
 1491                         goto pool_props_bad;
 1492 
 1493                 if (nvlist_lookup_string(props,
 1494                     zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
 1495                         spa_name = tname;
 1496         }
 1497 
 1498         error = spa_create(zc->zc_name, config, props, zplprops, dcp);
 1499 
 1500         /*
 1501          * Set the remaining root properties
 1502          */
 1503         if (!error && (error = zfs_set_prop_nvlist(spa_name,
 1504             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
 1505                 (void) spa_destroy(spa_name);
 1506                 unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
 1507         }
 1508 
 1509 pool_props_bad:
 1510         nvlist_free(rootprops);
 1511         nvlist_free(zplprops);
 1512         nvlist_free(config);
 1513         nvlist_free(props);
 1514         dsl_crypto_params_free(dcp, unload_wkey && !!error);
 1515 
 1516         return (error);
 1517 }
 1518 
 1519 static int
 1520 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
 1521 {
 1522         int error;
 1523         zfs_log_history(zc);
 1524         error = spa_destroy(zc->zc_name);
 1525 
 1526         return (error);
 1527 }
 1528 
 1529 static int
 1530 zfs_ioc_pool_import(zfs_cmd_t *zc)
 1531 {
 1532         nvlist_t *config, *props = NULL;
 1533         uint64_t guid;
 1534         int error;
 1535 
 1536         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 1537             zc->zc_iflags, &config)) != 0)
 1538                 return (error);
 1539 
 1540         if (zc->zc_nvlist_src_size != 0 && (error =
 1541             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 1542             zc->zc_iflags, &props))) {
 1543                 nvlist_free(config);
 1544                 return (error);
 1545         }
 1546 
 1547         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
 1548             guid != zc->zc_guid)
 1549                 error = SET_ERROR(EINVAL);
 1550         else
 1551                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
 1552 
 1553         if (zc->zc_nvlist_dst != 0) {
 1554                 int err;
 1555 
 1556                 if ((err = put_nvlist(zc, config)) != 0)
 1557                         error = err;
 1558         }
 1559 
 1560         nvlist_free(config);
 1561         nvlist_free(props);
 1562 
 1563         return (error);
 1564 }
 1565 
 1566 static int
 1567 zfs_ioc_pool_export(zfs_cmd_t *zc)
 1568 {
 1569         int error;
 1570         boolean_t force = (boolean_t)zc->zc_cookie;
 1571         boolean_t hardforce = (boolean_t)zc->zc_guid;
 1572 
 1573         zfs_log_history(zc);
 1574         error = spa_export(zc->zc_name, NULL, force, hardforce);
 1575 
 1576         return (error);
 1577 }
 1578 
 1579 static int
 1580 zfs_ioc_pool_configs(zfs_cmd_t *zc)
 1581 {
 1582         nvlist_t *configs;
 1583         int error;
 1584 
 1585         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
 1586                 return (SET_ERROR(EEXIST));
 1587 
 1588         error = put_nvlist(zc, configs);
 1589 
 1590         nvlist_free(configs);
 1591 
 1592         return (error);
 1593 }
 1594 
 1595 /*
 1596  * inputs:
 1597  * zc_name              name of the pool
 1598  *
 1599  * outputs:
 1600  * zc_cookie            real errno
 1601  * zc_nvlist_dst        config nvlist
 1602  * zc_nvlist_dst_size   size of config nvlist
 1603  */
 1604 static int
 1605 zfs_ioc_pool_stats(zfs_cmd_t *zc)
 1606 {
 1607         nvlist_t *config;
 1608         int error;
 1609         int ret = 0;
 1610 
 1611         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
 1612             sizeof (zc->zc_value));
 1613 
 1614         if (config != NULL) {
 1615                 ret = put_nvlist(zc, config);
 1616                 nvlist_free(config);
 1617 
 1618                 /*
 1619                  * The config may be present even if 'error' is non-zero.
 1620                  * In this case we return success, and preserve the real errno
 1621                  * in 'zc_cookie'.
 1622                  */
 1623                 zc->zc_cookie = error;
 1624         } else {
 1625                 ret = error;
 1626         }
 1627 
 1628         return (ret);
 1629 }
 1630 
 1631 /*
 1632  * Try to import the given pool, returning pool stats as appropriate so that
 1633  * user land knows which devices are available and overall pool health.
 1634  */
 1635 static int
 1636 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
 1637 {
 1638         nvlist_t *tryconfig, *config = NULL;
 1639         int error;
 1640 
 1641         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 1642             zc->zc_iflags, &tryconfig)) != 0)
 1643                 return (error);
 1644 
 1645         config = spa_tryimport(tryconfig);
 1646 
 1647         nvlist_free(tryconfig);
 1648 
 1649         if (config == NULL)
 1650                 return (SET_ERROR(EINVAL));
 1651 
 1652         error = put_nvlist(zc, config);
 1653         nvlist_free(config);
 1654 
 1655         return (error);
 1656 }
 1657 
 1658 /*
 1659  * inputs:
 1660  * zc_name              name of the pool
 1661  * zc_cookie            scan func (pool_scan_func_t)
 1662  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
 1663  */
 1664 static int
 1665 zfs_ioc_pool_scan(zfs_cmd_t *zc)
 1666 {
 1667         spa_t *spa;
 1668         int error;
 1669 
 1670         if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
 1671                 return (SET_ERROR(EINVAL));
 1672 
 1673         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1674                 return (error);
 1675 
 1676         if (zc->zc_flags == POOL_SCRUB_PAUSE)
 1677                 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
 1678         else if (zc->zc_cookie == POOL_SCAN_NONE)
 1679                 error = spa_scan_stop(spa);
 1680         else
 1681                 error = spa_scan(spa, zc->zc_cookie);
 1682 
 1683         spa_close(spa, FTAG);
 1684 
 1685         return (error);
 1686 }
 1687 
 1688 static int
 1689 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
 1690 {
 1691         spa_t *spa;
 1692         int error;
 1693 
 1694         error = spa_open(zc->zc_name, &spa, FTAG);
 1695         if (error == 0) {
 1696                 spa_freeze(spa);
 1697                 spa_close(spa, FTAG);
 1698         }
 1699         return (error);
 1700 }
 1701 
 1702 static int
 1703 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
 1704 {
 1705         spa_t *spa;
 1706         int error;
 1707 
 1708         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1709                 return (error);
 1710 
 1711         if (zc->zc_cookie < spa_version(spa) ||
 1712             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 1713                 spa_close(spa, FTAG);
 1714                 return (SET_ERROR(EINVAL));
 1715         }
 1716 
 1717         spa_upgrade(spa, zc->zc_cookie);
 1718         spa_close(spa, FTAG);
 1719 
 1720         return (error);
 1721 }
 1722 
 1723 static int
 1724 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
 1725 {
 1726         spa_t *spa;
 1727         char *hist_buf;
 1728         uint64_t size;
 1729         int error;
 1730 
 1731         if ((size = zc->zc_history_len) == 0)
 1732                 return (SET_ERROR(EINVAL));
 1733 
 1734         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1735                 return (error);
 1736 
 1737         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 1738                 spa_close(spa, FTAG);
 1739                 return (SET_ERROR(ENOTSUP));
 1740         }
 1741 
 1742         hist_buf = vmem_alloc(size, KM_SLEEP);
 1743         if ((error = spa_history_get(spa, &zc->zc_history_offset,
 1744             &zc->zc_history_len, hist_buf)) == 0) {
 1745                 error = ddi_copyout(hist_buf,
 1746                     (void *)(uintptr_t)zc->zc_history,
 1747                     zc->zc_history_len, zc->zc_iflags);
 1748         }
 1749 
 1750         spa_close(spa, FTAG);
 1751         vmem_free(hist_buf, size);
 1752         return (error);
 1753 }
 1754 
 1755 static int
 1756 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
 1757 {
 1758         spa_t *spa;
 1759         int error;
 1760 
 1761         error = spa_open(zc->zc_name, &spa, FTAG);
 1762         if (error == 0) {
 1763                 error = spa_change_guid(spa);
 1764                 spa_close(spa, FTAG);
 1765         }
 1766         return (error);
 1767 }
 1768 
 1769 static int
 1770 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
 1771 {
 1772         return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
 1773 }
 1774 
 1775 /*
 1776  * inputs:
 1777  * zc_name              name of filesystem
 1778  * zc_obj               object to find
 1779  *
 1780  * outputs:
 1781  * zc_value             name of object
 1782  */
 1783 static int
 1784 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 1785 {
 1786         objset_t *os;
 1787         int error;
 1788 
 1789         /* XXX reading from objset not owned */
 1790         if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
 1791             FTAG, &os)) != 0)
 1792                 return (error);
 1793         if (dmu_objset_type(os) != DMU_OST_ZFS) {
 1794                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
 1795                 return (SET_ERROR(EINVAL));
 1796         }
 1797         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 1798             sizeof (zc->zc_value));
 1799         dmu_objset_rele_flags(os, B_TRUE, FTAG);
 1800 
 1801         return (error);
 1802 }
 1803 
 1804 /*
 1805  * inputs:
 1806  * zc_name              name of filesystem
 1807  * zc_obj               object to find
 1808  *
 1809  * outputs:
 1810  * zc_stat              stats on object
 1811  * zc_value             path to object
 1812  */
 1813 static int
 1814 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
 1815 {
 1816         objset_t *os;
 1817         int error;
 1818 
 1819         /* XXX reading from objset not owned */
 1820         if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
 1821             FTAG, &os)) != 0)
 1822                 return (error);
 1823         if (dmu_objset_type(os) != DMU_OST_ZFS) {
 1824                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
 1825                 return (SET_ERROR(EINVAL));
 1826         }
 1827         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 1828             sizeof (zc->zc_value));
 1829         dmu_objset_rele_flags(os, B_TRUE, FTAG);
 1830 
 1831         return (error);
 1832 }
 1833 
 1834 static int
 1835 zfs_ioc_vdev_add(zfs_cmd_t *zc)
 1836 {
 1837         spa_t *spa;
 1838         int error;
 1839         nvlist_t *config;
 1840 
 1841         error = spa_open(zc->zc_name, &spa, FTAG);
 1842         if (error != 0)
 1843                 return (error);
 1844 
 1845         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 1846             zc->zc_iflags, &config);
 1847         if (error == 0) {
 1848                 error = spa_vdev_add(spa, config);
 1849                 nvlist_free(config);
 1850         }
 1851         spa_close(spa, FTAG);
 1852         return (error);
 1853 }
 1854 
 1855 /*
 1856  * inputs:
 1857  * zc_name              name of the pool
 1858  * zc_guid              guid of vdev to remove
 1859  * zc_cookie            cancel removal
 1860  */
 1861 static int
 1862 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 1863 {
 1864         spa_t *spa;
 1865         int error;
 1866 
 1867         error = spa_open(zc->zc_name, &spa, FTAG);
 1868         if (error != 0)
 1869                 return (error);
 1870         if (zc->zc_cookie != 0) {
 1871                 error = spa_vdev_remove_cancel(spa);
 1872         } else {
 1873                 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
 1874         }
 1875         spa_close(spa, FTAG);
 1876         return (error);
 1877 }
 1878 
 1879 static int
 1880 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
 1881 {
 1882         spa_t *spa;
 1883         int error;
 1884         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 1885 
 1886         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1887                 return (error);
 1888         switch (zc->zc_cookie) {
 1889         case VDEV_STATE_ONLINE:
 1890                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
 1891                 break;
 1892 
 1893         case VDEV_STATE_OFFLINE:
 1894                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
 1895                 break;
 1896 
 1897         case VDEV_STATE_FAULTED:
 1898                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 1899                     zc->zc_obj != VDEV_AUX_EXTERNAL &&
 1900                     zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
 1901                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 1902 
 1903                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
 1904                 break;
 1905 
 1906         case VDEV_STATE_DEGRADED:
 1907                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 1908                     zc->zc_obj != VDEV_AUX_EXTERNAL)
 1909                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 1910 
 1911                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
 1912                 break;
 1913 
 1914         case VDEV_STATE_REMOVED:
 1915                 error = vdev_remove_wanted(spa, zc->zc_guid);
 1916                 break;
 1917 
 1918         default:
 1919                 error = SET_ERROR(EINVAL);
 1920         }
 1921         zc->zc_cookie = newstate;
 1922         spa_close(spa, FTAG);
 1923         return (error);
 1924 }
 1925 
 1926 static int
 1927 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
 1928 {
 1929         spa_t *spa;
 1930         nvlist_t *config;
 1931         int replacing = zc->zc_cookie;
 1932         int rebuild = zc->zc_simple;
 1933         int error;
 1934 
 1935         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1936                 return (error);
 1937 
 1938         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 1939             zc->zc_iflags, &config)) == 0) {
 1940                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
 1941                     rebuild);
 1942                 nvlist_free(config);
 1943         }
 1944 
 1945         spa_close(spa, FTAG);
 1946         return (error);
 1947 }
 1948 
 1949 static int
 1950 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
 1951 {
 1952         spa_t *spa;
 1953         int error;
 1954 
 1955         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1956                 return (error);
 1957 
 1958         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
 1959 
 1960         spa_close(spa, FTAG);
 1961         return (error);
 1962 }
 1963 
 1964 static int
 1965 zfs_ioc_vdev_split(zfs_cmd_t *zc)
 1966 {
 1967         spa_t *spa;
 1968         nvlist_t *config, *props = NULL;
 1969         int error;
 1970         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
 1971 
 1972         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 1973                 return (error);
 1974 
 1975         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 1976             zc->zc_iflags, &config))) {
 1977                 spa_close(spa, FTAG);
 1978                 return (error);
 1979         }
 1980 
 1981         if (zc->zc_nvlist_src_size != 0 && (error =
 1982             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 1983             zc->zc_iflags, &props))) {
 1984                 spa_close(spa, FTAG);
 1985                 nvlist_free(config);
 1986                 return (error);
 1987         }
 1988 
 1989         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
 1990 
 1991         spa_close(spa, FTAG);
 1992 
 1993         nvlist_free(config);
 1994         nvlist_free(props);
 1995 
 1996         return (error);
 1997 }
 1998 
 1999 static int
 2000 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 2001 {
 2002         spa_t *spa;
 2003         const char *path = zc->zc_value;
 2004         uint64_t guid = zc->zc_guid;
 2005         int error;
 2006 
 2007         error = spa_open(zc->zc_name, &spa, FTAG);
 2008         if (error != 0)
 2009                 return (error);
 2010 
 2011         error = spa_vdev_setpath(spa, guid, path);
 2012         spa_close(spa, FTAG);
 2013         return (error);
 2014 }
 2015 
 2016 static int
 2017 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
 2018 {
 2019         spa_t *spa;
 2020         const char *fru = zc->zc_value;
 2021         uint64_t guid = zc->zc_guid;
 2022         int error;
 2023 
 2024         error = spa_open(zc->zc_name, &spa, FTAG);
 2025         if (error != 0)
 2026                 return (error);
 2027 
 2028         error = spa_vdev_setfru(spa, guid, fru);
 2029         spa_close(spa, FTAG);
 2030         return (error);
 2031 }
 2032 
 2033 static int
 2034 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
 2035 {
 2036         int error = 0;
 2037         nvlist_t *nv;
 2038 
 2039         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 2040 
 2041         if (!zc->zc_simple && zc->zc_nvlist_dst != 0 &&
 2042             (error = dsl_prop_get_all(os, &nv)) == 0) {
 2043                 dmu_objset_stats(os, nv);
 2044                 /*
 2045                  * NB: zvol_get_stats() will read the objset contents,
 2046                  * which we aren't supposed to do with a
 2047                  * DS_MODE_USER hold, because it could be
 2048                  * inconsistent.  So this is a bit of a workaround...
 2049                  * XXX reading without owning
 2050                  */
 2051                 if (!zc->zc_objset_stats.dds_inconsistent &&
 2052                     dmu_objset_type(os) == DMU_OST_ZVOL) {
 2053                         error = zvol_get_stats(os, nv);
 2054                         if (error == EIO) {
 2055                                 nvlist_free(nv);
 2056                                 return (error);
 2057                         }
 2058                         VERIFY0(error);
 2059                 }
 2060                 if (error == 0)
 2061                         error = put_nvlist(zc, nv);
 2062                 nvlist_free(nv);
 2063         }
 2064 
 2065         return (error);
 2066 }
 2067 
 2068 /*
 2069  * inputs:
 2070  * zc_name              name of filesystem
 2071  * zc_nvlist_dst_size   size of buffer for property nvlist
 2072  *
 2073  * outputs:
 2074  * zc_objset_stats      stats
 2075  * zc_nvlist_dst        property nvlist
 2076  * zc_nvlist_dst_size   size of property nvlist
 2077  */
 2078 static int
 2079 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 2080 {
 2081         objset_t *os;
 2082         int error;
 2083 
 2084         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 2085         if (error == 0) {
 2086                 error = zfs_ioc_objset_stats_impl(zc, os);
 2087                 dmu_objset_rele(os, FTAG);
 2088         }
 2089 
 2090         return (error);
 2091 }
 2092 
 2093 /*
 2094  * inputs:
 2095  * zc_name              name of filesystem
 2096  * zc_nvlist_dst_size   size of buffer for property nvlist
 2097  *
 2098  * outputs:
 2099  * zc_nvlist_dst        received property nvlist
 2100  * zc_nvlist_dst_size   size of received property nvlist
 2101  *
 2102  * Gets received properties (distinct from local properties on or after
 2103  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
 2104  * local property values.
 2105  */
 2106 static int
 2107 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 2108 {
 2109         int error = 0;
 2110         nvlist_t *nv;
 2111 
 2112         /*
 2113          * Without this check, we would return local property values if the
 2114          * caller has not already received properties on or after
 2115          * SPA_VERSION_RECVD_PROPS.
 2116          */
 2117         if (!dsl_prop_get_hasrecvd(zc->zc_name))
 2118                 return (SET_ERROR(ENOTSUP));
 2119 
 2120         if (zc->zc_nvlist_dst != 0 &&
 2121             (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
 2122                 error = put_nvlist(zc, nv);
 2123                 nvlist_free(nv);
 2124         }
 2125 
 2126         return (error);
 2127 }
 2128 
 2129 static int
 2130 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
 2131 {
 2132         uint64_t value;
 2133         int error;
 2134 
 2135         /*
 2136          * zfs_get_zplprop() will either find a value or give us
 2137          * the default value (if there is one).
 2138          */
 2139         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
 2140                 return (error);
 2141         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
 2142         return (0);
 2143 }
 2144 
 2145 /*
 2146  * inputs:
 2147  * zc_name              name of filesystem
 2148  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
 2149  *
 2150  * outputs:
 2151  * zc_nvlist_dst        zpl property nvlist
 2152  * zc_nvlist_dst_size   size of zpl property nvlist
 2153  */
 2154 static int
 2155 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 2156 {
 2157         objset_t *os;
 2158         int err;
 2159 
 2160         /* XXX reading without owning */
 2161         if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
 2162                 return (err);
 2163 
 2164         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 2165 
 2166         /*
 2167          * NB: nvl_add_zplprop() will read the objset contents,
 2168          * which we aren't supposed to do with a DS_MODE_USER
 2169          * hold, because it could be inconsistent.
 2170          */
 2171         if (zc->zc_nvlist_dst != 0 &&
 2172             !zc->zc_objset_stats.dds_inconsistent &&
 2173             dmu_objset_type(os) == DMU_OST_ZFS) {
 2174                 nvlist_t *nv;
 2175 
 2176                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 2177                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
 2178                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
 2179                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
 2180                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
 2181                         err = put_nvlist(zc, nv);
 2182                 nvlist_free(nv);
 2183         } else {
 2184                 err = SET_ERROR(ENOENT);
 2185         }
 2186         dmu_objset_rele(os, FTAG);
 2187         return (err);
 2188 }
 2189 
 2190 /*
 2191  * inputs:
 2192  * zc_name              name of filesystem
 2193  * zc_cookie            zap cursor
 2194  * zc_nvlist_dst_size   size of buffer for property nvlist
 2195  *
 2196  * outputs:
 2197  * zc_name              name of next filesystem
 2198  * zc_cookie            zap cursor
 2199  * zc_objset_stats      stats
 2200  * zc_nvlist_dst        property nvlist
 2201  * zc_nvlist_dst_size   size of property nvlist
 2202  */
 2203 static int
 2204 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
 2205 {
 2206         objset_t *os;
 2207         int error;
 2208         char *p;
 2209         size_t orig_len = strlen(zc->zc_name);
 2210 
 2211 top:
 2212         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
 2213                 if (error == ENOENT)
 2214                         error = SET_ERROR(ESRCH);
 2215                 return (error);
 2216         }
 2217 
 2218         p = strrchr(zc->zc_name, '/');
 2219         if (p == NULL || p[1] != '\0')
 2220                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
 2221         p = zc->zc_name + strlen(zc->zc_name);
 2222 
 2223         do {
 2224                 error = dmu_dir_list_next(os,
 2225                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
 2226                     NULL, &zc->zc_cookie);
 2227                 if (error == ENOENT)
 2228                         error = SET_ERROR(ESRCH);
 2229         } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
 2230         dmu_objset_rele(os, FTAG);
 2231 
 2232         /*
 2233          * If it's an internal dataset (ie. with a '$' in its name),
 2234          * don't try to get stats for it, otherwise we'll return ENOENT.
 2235          */
 2236         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
 2237                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 2238                 if (error == ENOENT) {
 2239                         /* We lost a race with destroy, get the next one. */
 2240                         zc->zc_name[orig_len] = '\0';
 2241                         goto top;
 2242                 }
 2243         }
 2244         return (error);
 2245 }
 2246 
 2247 /*
 2248  * inputs:
 2249  * zc_name              name of filesystem
 2250  * zc_cookie            zap cursor
 2251  * zc_nvlist_src        iteration range nvlist
 2252  * zc_nvlist_src_size   size of iteration range nvlist
 2253  *
 2254  * outputs:
 2255  * zc_name              name of next snapshot
 2256  * zc_objset_stats      stats
 2257  * zc_nvlist_dst        property nvlist
 2258  * zc_nvlist_dst_size   size of property nvlist
 2259  */
 2260 static int
 2261 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 2262 {
 2263         int error;
 2264         objset_t *os, *ossnap;
 2265         dsl_dataset_t *ds;
 2266         uint64_t min_txg = 0, max_txg = 0;
 2267 
 2268         if (zc->zc_nvlist_src_size != 0) {
 2269                 nvlist_t *props = NULL;
 2270                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 2271                     zc->zc_iflags, &props);
 2272                 if (error != 0)
 2273                         return (error);
 2274                 (void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
 2275                     &min_txg);
 2276                 (void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
 2277                     &max_txg);
 2278                 nvlist_free(props);
 2279         }
 2280 
 2281         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 2282         if (error != 0) {
 2283                 return (error == ENOENT ? SET_ERROR(ESRCH) : error);
 2284         }
 2285 
 2286         /*
 2287          * A dataset name of maximum length cannot have any snapshots,
 2288          * so exit immediately.
 2289          */
 2290         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
 2291             ZFS_MAX_DATASET_NAME_LEN) {
 2292                 dmu_objset_rele(os, FTAG);
 2293                 return (SET_ERROR(ESRCH));
 2294         }
 2295 
 2296         while (error == 0) {
 2297                 if (issig(JUSTLOOKING) && issig(FORREAL)) {
 2298                         error = SET_ERROR(EINTR);
 2299                         break;
 2300                 }
 2301 
 2302                 error = dmu_snapshot_list_next(os,
 2303                     sizeof (zc->zc_name) - strlen(zc->zc_name),
 2304                     zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
 2305                     &zc->zc_cookie, NULL);
 2306                 if (error == ENOENT) {
 2307                         error = SET_ERROR(ESRCH);
 2308                         break;
 2309                 } else if (error != 0) {
 2310                         break;
 2311                 }
 2312 
 2313                 error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
 2314                     FTAG, &ds);
 2315                 if (error != 0)
 2316                         break;
 2317 
 2318                 if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
 2319                     (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
 2320                         dsl_dataset_rele(ds, FTAG);
 2321                         /* undo snapshot name append */
 2322                         *(strchr(zc->zc_name, '@') + 1) = '\0';
 2323                         /* skip snapshot */
 2324                         continue;
 2325                 }
 2326 
 2327                 if (zc->zc_simple) {
 2328                         dsl_dataset_fast_stat(ds, &zc->zc_objset_stats);
 2329                         dsl_dataset_rele(ds, FTAG);
 2330                         break;
 2331                 }
 2332 
 2333                 if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
 2334                         dsl_dataset_rele(ds, FTAG);
 2335                         break;
 2336                 }
 2337                 if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
 2338                         dsl_dataset_rele(ds, FTAG);
 2339                         break;
 2340                 }
 2341                 dsl_dataset_rele(ds, FTAG);
 2342                 break;
 2343         }
 2344 
 2345         dmu_objset_rele(os, FTAG);
 2346         /* if we failed, undo the @ that we tacked on to zc_name */
 2347         if (error != 0)
 2348                 *strchr(zc->zc_name, '@') = '\0';
 2349         return (error);
 2350 }
 2351 
 2352 static int
 2353 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
 2354 {
 2355         const char *propname = nvpair_name(pair);
 2356         uint64_t *valary;
 2357         unsigned int vallen;
 2358         const char *dash, *domain;
 2359         zfs_userquota_prop_t type;
 2360         uint64_t rid;
 2361         uint64_t quota;
 2362         zfsvfs_t *zfsvfs;
 2363         int err;
 2364 
 2365         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 2366                 nvlist_t *attrs;
 2367                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 2368                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 2369                     &pair) != 0)
 2370                         return (SET_ERROR(EINVAL));
 2371         }
 2372 
 2373         /*
 2374          * A correctly constructed propname is encoded as
 2375          * userquota@<rid>-<domain>.
 2376          */
 2377         if ((dash = strchr(propname, '-')) == NULL ||
 2378             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
 2379             vallen != 3)
 2380                 return (SET_ERROR(EINVAL));
 2381 
 2382         domain = dash + 1;
 2383         type = valary[0];
 2384         rid = valary[1];
 2385         quota = valary[2];
 2386 
 2387         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
 2388         if (err == 0) {
 2389                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
 2390                 zfsvfs_rele(zfsvfs, FTAG);
 2391         }
 2392 
 2393         return (err);
 2394 }
 2395 
 2396 /*
 2397  * If the named property is one that has a special function to set its value,
 2398  * return 0 on success and a positive error code on failure; otherwise if it is
 2399  * not one of the special properties handled by this function, return -1.
 2400  *
 2401  * XXX: It would be better for callers of the property interface if we handled
 2402  * these special cases in dsl_prop.c (in the dsl layer).
 2403  */
 2404 static int
 2405 zfs_prop_set_special(const char *dsname, zprop_source_t source,
 2406     nvpair_t *pair)
 2407 {
 2408         const char *propname = nvpair_name(pair);
 2409         zfs_prop_t prop = zfs_name_to_prop(propname);
 2410         uint64_t intval = 0;
 2411         const char *strval = NULL;
 2412         int err = -1;
 2413 
 2414         if (prop == ZPROP_USERPROP) {
 2415                 if (zfs_prop_userquota(propname))
 2416                         return (zfs_prop_set_userquota(dsname, pair));
 2417                 return (-1);
 2418         }
 2419 
 2420         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 2421                 nvlist_t *attrs;
 2422                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 2423                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 2424                     &pair) == 0);
 2425         }
 2426 
 2427         /* all special properties are numeric except for keylocation */
 2428         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 2429                 strval = fnvpair_value_string(pair);
 2430         } else {
 2431                 intval = fnvpair_value_uint64(pair);
 2432         }
 2433 
 2434         switch (prop) {
 2435         case ZFS_PROP_QUOTA:
 2436                 err = dsl_dir_set_quota(dsname, source, intval);
 2437                 break;
 2438         case ZFS_PROP_REFQUOTA:
 2439                 err = dsl_dataset_set_refquota(dsname, source, intval);
 2440                 break;
 2441         case ZFS_PROP_FILESYSTEM_LIMIT:
 2442         case ZFS_PROP_SNAPSHOT_LIMIT:
 2443                 if (intval == UINT64_MAX) {
 2444                         /* clearing the limit, just do it */
 2445                         err = 0;
 2446                 } else {
 2447                         err = dsl_dir_activate_fs_ss_limit(dsname);
 2448                 }
 2449                 /*
 2450                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
 2451                  * default path to set the value in the nvlist.
 2452                  */
 2453                 if (err == 0)
 2454                         err = -1;
 2455                 break;
 2456         case ZFS_PROP_KEYLOCATION:
 2457                 err = dsl_crypto_can_set_keylocation(dsname, strval);
 2458 
 2459                 /*
 2460                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
 2461                  * default path to set the value in the nvlist.
 2462                  */
 2463                 if (err == 0)
 2464                         err = -1;
 2465                 break;
 2466         case ZFS_PROP_RESERVATION:
 2467                 err = dsl_dir_set_reservation(dsname, source, intval);
 2468                 break;
 2469         case ZFS_PROP_REFRESERVATION:
 2470                 err = dsl_dataset_set_refreservation(dsname, source, intval);
 2471                 break;
 2472         case ZFS_PROP_COMPRESSION:
 2473                 err = dsl_dataset_set_compression(dsname, source, intval);
 2474                 /*
 2475                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
 2476                  * default path to set the value in the nvlist.
 2477                  */
 2478                 if (err == 0)
 2479                         err = -1;
 2480                 break;
 2481         case ZFS_PROP_VOLSIZE:
 2482                 err = zvol_set_volsize(dsname, intval);
 2483                 break;
 2484         case ZFS_PROP_SNAPDEV:
 2485                 err = zvol_set_snapdev(dsname, source, intval);
 2486                 break;
 2487         case ZFS_PROP_VOLMODE:
 2488                 err = zvol_set_volmode(dsname, source, intval);
 2489                 break;
 2490         case ZFS_PROP_VERSION:
 2491         {
 2492                 zfsvfs_t *zfsvfs;
 2493 
 2494                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
 2495                         break;
 2496 
 2497                 err = zfs_set_version(zfsvfs, intval);
 2498                 zfsvfs_rele(zfsvfs, FTAG);
 2499 
 2500                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
 2501                         zfs_cmd_t *zc;
 2502 
 2503                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 2504                         (void) strlcpy(zc->zc_name, dsname,
 2505                             sizeof (zc->zc_name));
 2506                         (void) zfs_ioc_userspace_upgrade(zc);
 2507                         (void) zfs_ioc_id_quota_upgrade(zc);
 2508                         kmem_free(zc, sizeof (zfs_cmd_t));
 2509                 }
 2510                 break;
 2511         }
 2512         default:
 2513                 err = -1;
 2514         }
 2515 
 2516         return (err);
 2517 }
 2518 
 2519 static boolean_t
 2520 zfs_is_namespace_prop(zfs_prop_t prop)
 2521 {
 2522         switch (prop) {
 2523 
 2524         case ZFS_PROP_ATIME:
 2525         case ZFS_PROP_RELATIME:
 2526         case ZFS_PROP_DEVICES:
 2527         case ZFS_PROP_EXEC:
 2528         case ZFS_PROP_SETUID:
 2529         case ZFS_PROP_READONLY:
 2530         case ZFS_PROP_XATTR:
 2531         case ZFS_PROP_NBMAND:
 2532                 return (B_TRUE);
 2533 
 2534         default:
 2535                 return (B_FALSE);
 2536         }
 2537 }
 2538 
 2539 /*
 2540  * This function is best effort. If it fails to set any of the given properties,
 2541  * it continues to set as many as it can and returns the last error
 2542  * encountered. If the caller provides a non-NULL errlist, it will be filled in
 2543  * with the list of names of all the properties that failed along with the
 2544  * corresponding error numbers.
 2545  *
 2546  * If every property is set successfully, zero is returned and errlist is not
 2547  * modified.
 2548  */
 2549 int
 2550 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
 2551     nvlist_t *errlist)
 2552 {
 2553         nvpair_t *pair;
 2554         nvpair_t *propval;
 2555         int rv = 0;
 2556         int err;
 2557         uint64_t intval;
 2558         const char *strval;
 2559         boolean_t should_update_mount_cache = B_FALSE;
 2560 
 2561         nvlist_t *genericnvl = fnvlist_alloc();
 2562         nvlist_t *retrynvl = fnvlist_alloc();
 2563 retry:
 2564         pair = NULL;
 2565         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 2566                 const char *propname = nvpair_name(pair);
 2567                 zfs_prop_t prop = zfs_name_to_prop(propname);
 2568                 err = 0;
 2569 
 2570                 /* decode the property value */
 2571                 propval = pair;
 2572                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 2573                         nvlist_t *attrs;
 2574                         attrs = fnvpair_value_nvlist(pair);
 2575                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 2576                             &propval) != 0)
 2577                                 err = SET_ERROR(EINVAL);
 2578                 }
 2579 
 2580                 /* Validate value type */
 2581                 if (err == 0 && source == ZPROP_SRC_INHERITED) {
 2582                         /* inherited properties are expected to be booleans */
 2583                         if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
 2584                                 err = SET_ERROR(EINVAL);
 2585                 } else if (err == 0 && prop == ZPROP_USERPROP) {
 2586                         if (zfs_prop_user(propname)) {
 2587                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
 2588                                         err = SET_ERROR(EINVAL);
 2589                         } else if (zfs_prop_userquota(propname)) {
 2590                                 if (nvpair_type(propval) !=
 2591                                     DATA_TYPE_UINT64_ARRAY)
 2592                                         err = SET_ERROR(EINVAL);
 2593                         } else {
 2594                                 err = SET_ERROR(EINVAL);
 2595                         }
 2596                 } else if (err == 0) {
 2597                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
 2598                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
 2599                                         err = SET_ERROR(EINVAL);
 2600                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
 2601                                 const char *unused;
 2602 
 2603                                 intval = fnvpair_value_uint64(propval);
 2604 
 2605                                 switch (zfs_prop_get_type(prop)) {
 2606                                 case PROP_TYPE_NUMBER:
 2607                                         break;
 2608                                 case PROP_TYPE_STRING:
 2609                                         err = SET_ERROR(EINVAL);
 2610                                         break;
 2611                                 case PROP_TYPE_INDEX:
 2612                                         if (zfs_prop_index_to_string(prop,
 2613                                             intval, &unused) != 0)
 2614                                                 err =
 2615                                                     SET_ERROR(ZFS_ERR_BADPROP);
 2616                                         break;
 2617                                 default:
 2618                                         cmn_err(CE_PANIC,
 2619                                             "unknown property type");
 2620                                 }
 2621                         } else {
 2622                                 err = SET_ERROR(EINVAL);
 2623                         }
 2624                 }
 2625 
 2626                 /* Validate permissions */
 2627                 if (err == 0)
 2628                         err = zfs_check_settable(dsname, pair, CRED());
 2629 
 2630                 if (err == 0) {
 2631                         if (source == ZPROP_SRC_INHERITED)
 2632                                 err = -1; /* does not need special handling */
 2633                         else
 2634                                 err = zfs_prop_set_special(dsname, source,
 2635                                     pair);
 2636                         if (err == -1) {
 2637                                 /*
 2638                                  * For better performance we build up a list of
 2639                                  * properties to set in a single transaction.
 2640                                  */
 2641                                 err = nvlist_add_nvpair(genericnvl, pair);
 2642                         } else if (err != 0 && nvl != retrynvl) {
 2643                                 /*
 2644                                  * This may be a spurious error caused by
 2645                                  * receiving quota and reservation out of order.
 2646                                  * Try again in a second pass.
 2647                                  */
 2648                                 err = nvlist_add_nvpair(retrynvl, pair);
 2649                         }
 2650                 }
 2651 
 2652                 if (err != 0) {
 2653                         if (errlist != NULL)
 2654                                 fnvlist_add_int32(errlist, propname, err);
 2655                         rv = err;
 2656                 }
 2657 
 2658                 if (zfs_is_namespace_prop(prop))
 2659                         should_update_mount_cache = B_TRUE;
 2660         }
 2661 
 2662         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
 2663                 nvl = retrynvl;
 2664                 goto retry;
 2665         }
 2666 
 2667         if (nvlist_empty(genericnvl))
 2668                 goto out;
 2669 
 2670         /*
 2671          * Try to set them all in one batch.
 2672          */
 2673         err = dsl_props_set(dsname, source, genericnvl);
 2674         if (err == 0)
 2675                 goto out;
 2676 
 2677         /*
 2678          * If batching fails, we still want to set as many properties as we
 2679          * can, so try setting them individually.
 2680          */
 2681         pair = NULL;
 2682         while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
 2683                 const char *propname = nvpair_name(pair);
 2684 
 2685                 propval = pair;
 2686                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 2687                         nvlist_t *attrs;
 2688                         attrs = fnvpair_value_nvlist(pair);
 2689                         propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
 2690                 }
 2691 
 2692                 if (nvpair_type(propval) == DATA_TYPE_STRING) {
 2693                         strval = fnvpair_value_string(propval);
 2694                         err = dsl_prop_set_string(dsname, propname,
 2695                             source, strval);
 2696                 } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
 2697                         err = dsl_prop_inherit(dsname, propname, source);
 2698                 } else {
 2699                         intval = fnvpair_value_uint64(propval);
 2700                         err = dsl_prop_set_int(dsname, propname, source,
 2701                             intval);
 2702                 }
 2703 
 2704                 if (err != 0) {
 2705                         if (errlist != NULL) {
 2706                                 fnvlist_add_int32(errlist, propname, err);
 2707                         }
 2708                         rv = err;
 2709                 }
 2710         }
 2711 
 2712 out:
 2713         if (should_update_mount_cache)
 2714                 zfs_ioctl_update_mount_cache(dsname);
 2715 
 2716         nvlist_free(genericnvl);
 2717         nvlist_free(retrynvl);
 2718 
 2719         return (rv);
 2720 }
 2721 
 2722 /*
 2723  * Check that all the properties are valid user properties.
 2724  */
 2725 static int
 2726 zfs_check_userprops(nvlist_t *nvl)
 2727 {
 2728         nvpair_t *pair = NULL;
 2729 
 2730         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 2731                 const char *propname = nvpair_name(pair);
 2732 
 2733                 if (!zfs_prop_user(propname) ||
 2734                     nvpair_type(pair) != DATA_TYPE_STRING)
 2735                         return (SET_ERROR(EINVAL));
 2736 
 2737                 if (strlen(propname) >= ZAP_MAXNAMELEN)
 2738                         return (SET_ERROR(ENAMETOOLONG));
 2739 
 2740                 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
 2741                         return (SET_ERROR(E2BIG));
 2742         }
 2743         return (0);
 2744 }
 2745 
 2746 static void
 2747 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
 2748 {
 2749         nvpair_t *pair;
 2750 
 2751         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 2752 
 2753         pair = NULL;
 2754         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
 2755                 if (nvlist_exists(skipped, nvpair_name(pair)))
 2756                         continue;
 2757 
 2758                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
 2759         }
 2760 }
 2761 
 2762 static int
 2763 clear_received_props(const char *dsname, nvlist_t *props,
 2764     nvlist_t *skipped)
 2765 {
 2766         int err = 0;
 2767         nvlist_t *cleared_props = NULL;
 2768         props_skip(props, skipped, &cleared_props);
 2769         if (!nvlist_empty(cleared_props)) {
 2770                 /*
 2771                  * Acts on local properties until the dataset has received
 2772                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
 2773                  */
 2774                 zprop_source_t flags = (ZPROP_SRC_NONE |
 2775                     (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
 2776                 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
 2777         }
 2778         nvlist_free(cleared_props);
 2779         return (err);
 2780 }
 2781 
 2782 /*
 2783  * inputs:
 2784  * zc_name              name of filesystem
 2785  * zc_value             name of property to set
 2786  * zc_nvlist_src{_size} nvlist of properties to apply
 2787  * zc_cookie            received properties flag
 2788  *
 2789  * outputs:
 2790  * zc_nvlist_dst{_size} error for each unapplied received property
 2791  */
 2792 static int
 2793 zfs_ioc_set_prop(zfs_cmd_t *zc)
 2794 {
 2795         nvlist_t *nvl;
 2796         boolean_t received = zc->zc_cookie;
 2797         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
 2798             ZPROP_SRC_LOCAL);
 2799         nvlist_t *errors;
 2800         int error;
 2801 
 2802         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 2803             zc->zc_iflags, &nvl)) != 0)
 2804                 return (error);
 2805 
 2806         if (received) {
 2807                 nvlist_t *origprops;
 2808 
 2809                 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
 2810                         (void) clear_received_props(zc->zc_name,
 2811                             origprops, nvl);
 2812                         nvlist_free(origprops);
 2813                 }
 2814 
 2815                 error = dsl_prop_set_hasrecvd(zc->zc_name);
 2816         }
 2817 
 2818         errors = fnvlist_alloc();
 2819         if (error == 0)
 2820                 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
 2821 
 2822         if (zc->zc_nvlist_dst != 0 && errors != NULL) {
 2823                 (void) put_nvlist(zc, errors);
 2824         }
 2825 
 2826         nvlist_free(errors);
 2827         nvlist_free(nvl);
 2828         return (error);
 2829 }
 2830 
 2831 /*
 2832  * inputs:
 2833  * zc_name              name of filesystem
 2834  * zc_value             name of property to inherit
 2835  * zc_cookie            revert to received value if TRUE
 2836  *
 2837  * outputs:             none
 2838  */
 2839 static int
 2840 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 2841 {
 2842         const char *propname = zc->zc_value;
 2843         zfs_prop_t prop = zfs_name_to_prop(propname);
 2844         boolean_t received = zc->zc_cookie;
 2845         zprop_source_t source = (received
 2846             ? ZPROP_SRC_NONE            /* revert to received value, if any */
 2847             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
 2848         nvlist_t *dummy;
 2849         nvpair_t *pair;
 2850         zprop_type_t type;
 2851         int err;
 2852 
 2853         if (!received) {
 2854                 /*
 2855                  * Only check this in the non-received case. We want to allow
 2856                  * 'inherit -S' to revert non-inheritable properties like quota
 2857                  * and reservation to the received or default values even though
 2858                  * they are not considered inheritable.
 2859                  */
 2860                 if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
 2861                         return (SET_ERROR(EINVAL));
 2862         }
 2863 
 2864         if (prop == ZPROP_USERPROP) {
 2865                 if (!zfs_prop_user(propname))
 2866                         return (SET_ERROR(EINVAL));
 2867 
 2868                 type = PROP_TYPE_STRING;
 2869         } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
 2870                 return (SET_ERROR(EINVAL));
 2871         } else {
 2872                 type = zfs_prop_get_type(prop);
 2873         }
 2874 
 2875         /*
 2876          * zfs_prop_set_special() expects properties in the form of an
 2877          * nvpair with type info.
 2878          */
 2879         dummy = fnvlist_alloc();
 2880 
 2881         switch (type) {
 2882         case PROP_TYPE_STRING:
 2883                 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
 2884                 break;
 2885         case PROP_TYPE_NUMBER:
 2886         case PROP_TYPE_INDEX:
 2887                 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
 2888                 break;
 2889         default:
 2890                 err = SET_ERROR(EINVAL);
 2891                 goto errout;
 2892         }
 2893 
 2894         pair = nvlist_next_nvpair(dummy, NULL);
 2895         if (pair == NULL) {
 2896                 err = SET_ERROR(EINVAL);
 2897         } else {
 2898                 err = zfs_prop_set_special(zc->zc_name, source, pair);
 2899                 if (err == -1) /* property is not "special", needs handling */
 2900                         err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
 2901                             source);
 2902         }
 2903 
 2904 errout:
 2905         nvlist_free(dummy);
 2906         return (err);
 2907 }
 2908 
 2909 static int
 2910 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
 2911 {
 2912         nvlist_t *props;
 2913         spa_t *spa;
 2914         int error;
 2915         nvpair_t *pair;
 2916 
 2917         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 2918             zc->zc_iflags, &props)))
 2919                 return (error);
 2920 
 2921         /*
 2922          * If the only property is the configfile, then just do a spa_lookup()
 2923          * to handle the faulted case.
 2924          */
 2925         pair = nvlist_next_nvpair(props, NULL);
 2926         if (pair != NULL && strcmp(nvpair_name(pair),
 2927             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
 2928             nvlist_next_nvpair(props, pair) == NULL) {
 2929                 mutex_enter(&spa_namespace_lock);
 2930                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
 2931                         spa_configfile_set(spa, props, B_FALSE);
 2932                         spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
 2933                 }
 2934                 mutex_exit(&spa_namespace_lock);
 2935                 if (spa != NULL) {
 2936                         nvlist_free(props);
 2937                         return (0);
 2938                 }
 2939         }
 2940 
 2941         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 2942                 nvlist_free(props);
 2943                 return (error);
 2944         }
 2945 
 2946         error = spa_prop_set(spa, props);
 2947 
 2948         nvlist_free(props);
 2949         spa_close(spa, FTAG);
 2950 
 2951         return (error);
 2952 }
 2953 
 2954 static int
 2955 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
 2956 {
 2957         spa_t *spa;
 2958         int error;
 2959         nvlist_t *nvp = NULL;
 2960 
 2961         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 2962                 /*
 2963                  * If the pool is faulted, there may be properties we can still
 2964                  * get (such as altroot and cachefile), so attempt to get them
 2965                  * anyway.
 2966                  */
 2967                 mutex_enter(&spa_namespace_lock);
 2968                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
 2969                         error = spa_prop_get(spa, &nvp);
 2970                 mutex_exit(&spa_namespace_lock);
 2971         } else {
 2972                 error = spa_prop_get(spa, &nvp);
 2973                 spa_close(spa, FTAG);
 2974         }
 2975 
 2976         if (error == 0 && zc->zc_nvlist_dst != 0)
 2977                 error = put_nvlist(zc, nvp);
 2978         else
 2979                 error = SET_ERROR(EFAULT);
 2980 
 2981         nvlist_free(nvp);
 2982         return (error);
 2983 }
 2984 
 2985 /*
 2986  * innvl: {
 2987  *     "vdevprops_set_vdev" -> guid
 2988  *     "vdevprops_set_props" -> { prop -> value }
 2989  * }
 2990  *
 2991  * outnvl: propname -> error code (int32)
 2992  */
 2993 static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
 2994         {ZPOOL_VDEV_PROPS_SET_VDEV,     DATA_TYPE_UINT64,       0},
 2995         {ZPOOL_VDEV_PROPS_SET_PROPS,    DATA_TYPE_NVLIST,       0}
 2996 };
 2997 
 2998 static int
 2999 zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 3000 {
 3001         spa_t *spa;
 3002         int error;
 3003         vdev_t *vd;
 3004         uint64_t vdev_guid;
 3005 
 3006         /* Early validation */
 3007         if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
 3008             &vdev_guid) != 0)
 3009                 return (SET_ERROR(EINVAL));
 3010 
 3011         if (outnvl == NULL)
 3012                 return (SET_ERROR(EINVAL));
 3013 
 3014         if ((error = spa_open(poolname, &spa, FTAG)) != 0)
 3015                 return (error);
 3016 
 3017         ASSERT(spa_writeable(spa));
 3018 
 3019         if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
 3020                 spa_close(spa, FTAG);
 3021                 return (SET_ERROR(ENOENT));
 3022         }
 3023 
 3024         error = vdev_prop_set(vd, innvl, outnvl);
 3025 
 3026         spa_close(spa, FTAG);
 3027 
 3028         return (error);
 3029 }
 3030 
 3031 /*
 3032  * innvl: {
 3033  *     "vdevprops_get_vdev" -> guid
 3034  *     (optional) "vdevprops_get_props" -> { propname -> propid }
 3035  * }
 3036  *
 3037  * outnvl: propname -> value
 3038  */
 3039 static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
 3040         {ZPOOL_VDEV_PROPS_GET_VDEV,     DATA_TYPE_UINT64,       0},
 3041         {ZPOOL_VDEV_PROPS_GET_PROPS,    DATA_TYPE_NVLIST,       ZK_OPTIONAL}
 3042 };
 3043 
 3044 static int
 3045 zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 3046 {
 3047         spa_t *spa;
 3048         int error;
 3049         vdev_t *vd;
 3050         uint64_t vdev_guid;
 3051 
 3052         /* Early validation */
 3053         if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
 3054             &vdev_guid) != 0)
 3055                 return (SET_ERROR(EINVAL));
 3056 
 3057         if (outnvl == NULL)
 3058                 return (SET_ERROR(EINVAL));
 3059 
 3060         if ((error = spa_open(poolname, &spa, FTAG)) != 0)
 3061                 return (error);
 3062 
 3063         if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
 3064                 spa_close(spa, FTAG);
 3065                 return (SET_ERROR(ENOENT));
 3066         }
 3067 
 3068         error = vdev_prop_get(vd, innvl, outnvl);
 3069 
 3070         spa_close(spa, FTAG);
 3071 
 3072         return (error);
 3073 }
 3074 
 3075 /*
 3076  * inputs:
 3077  * zc_name              name of filesystem
 3078  * zc_nvlist_src{_size} nvlist of delegated permissions
 3079  * zc_perm_action       allow/unallow flag
 3080  *
 3081  * outputs:             none
 3082  */
 3083 static int
 3084 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 3085 {
 3086         int error;
 3087         nvlist_t *fsaclnv = NULL;
 3088 
 3089         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 3090             zc->zc_iflags, &fsaclnv)) != 0)
 3091                 return (error);
 3092 
 3093         /*
 3094          * Verify nvlist is constructed correctly
 3095          */
 3096         if (zfs_deleg_verify_nvlist(fsaclnv) != 0) {
 3097                 nvlist_free(fsaclnv);
 3098                 return (SET_ERROR(EINVAL));
 3099         }
 3100 
 3101         /*
 3102          * If we don't have PRIV_SYS_MOUNT, then validate
 3103          * that user is allowed to hand out each permission in
 3104          * the nvlist(s)
 3105          */
 3106 
 3107         error = secpolicy_zfs(CRED());
 3108         if (error != 0) {
 3109                 if (zc->zc_perm_action == B_FALSE) {
 3110                         error = dsl_deleg_can_allow(zc->zc_name,
 3111                             fsaclnv, CRED());
 3112                 } else {
 3113                         error = dsl_deleg_can_unallow(zc->zc_name,
 3114                             fsaclnv, CRED());
 3115                 }
 3116         }
 3117 
 3118         if (error == 0)
 3119                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
 3120 
 3121         nvlist_free(fsaclnv);
 3122         return (error);
 3123 }
 3124 
 3125 /*
 3126  * inputs:
 3127  * zc_name              name of filesystem
 3128  *
 3129  * outputs:
 3130  * zc_nvlist_src{_size} nvlist of delegated permissions
 3131  */
 3132 static int
 3133 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 3134 {
 3135         nvlist_t *nvp;
 3136         int error;
 3137 
 3138         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
 3139                 error = put_nvlist(zc, nvp);
 3140                 nvlist_free(nvp);
 3141         }
 3142 
 3143         return (error);
 3144 }
 3145 
 3146 static void
 3147 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 3148 {
 3149         zfs_creat_t *zct = arg;
 3150 
 3151         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
 3152 }
 3153 
 3154 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
 3155 
 3156 /*
 3157  * inputs:
 3158  * os                   parent objset pointer (NULL if root fs)
 3159  * fuids_ok             fuids allowed in this version of the spa?
 3160  * sa_ok                SAs allowed in this version of the spa?
 3161  * createprops          list of properties requested by creator
 3162  *
 3163  * outputs:
 3164  * zplprops     values for the zplprops we attach to the master node object
 3165  * is_ci        true if requested file system will be purely case-insensitive
 3166  *
 3167  * Determine the settings for utf8only, normalization and
 3168  * casesensitivity.  Specific values may have been requested by the
 3169  * creator and/or we can inherit values from the parent dataset.  If
 3170  * the file system is of too early a vintage, a creator can not
 3171  * request settings for these properties, even if the requested
 3172  * setting is the default value.  We don't actually want to create dsl
 3173  * properties for these, so remove them from the source nvlist after
 3174  * processing.
 3175  */
 3176 static int
 3177 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
 3178     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
 3179     nvlist_t *zplprops, boolean_t *is_ci)
 3180 {
 3181         uint64_t sense = ZFS_PROP_UNDEFINED;
 3182         uint64_t norm = ZFS_PROP_UNDEFINED;
 3183         uint64_t u8 = ZFS_PROP_UNDEFINED;
 3184         int error;
 3185 
 3186         ASSERT(zplprops != NULL);
 3187 
 3188         /* parent dataset must be a filesystem */
 3189         if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
 3190                 return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
 3191 
 3192         /*
 3193          * Pull out creator prop choices, if any.
 3194          */
 3195         if (createprops) {
 3196                 (void) nvlist_lookup_uint64(createprops,
 3197                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
 3198                 (void) nvlist_lookup_uint64(createprops,
 3199                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
 3200                 (void) nvlist_remove_all(createprops,
 3201                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
 3202                 (void) nvlist_lookup_uint64(createprops,
 3203                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
 3204                 (void) nvlist_remove_all(createprops,
 3205                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 3206                 (void) nvlist_lookup_uint64(createprops,
 3207                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
 3208                 (void) nvlist_remove_all(createprops,
 3209                     zfs_prop_to_name(ZFS_PROP_CASE));
 3210         }
 3211 
 3212         /*
 3213          * If the zpl version requested is whacky or the file system
 3214          * or pool is version is too "young" to support normalization
 3215          * and the creator tried to set a value for one of the props,
 3216          * error out.
 3217          */
 3218         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
 3219             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
 3220             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
 3221             (zplver < ZPL_VERSION_NORMALIZATION &&
 3222             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 3223             sense != ZFS_PROP_UNDEFINED)))
 3224                 return (SET_ERROR(ENOTSUP));
 3225 
 3226         /*
 3227          * Put the version in the zplprops
 3228          */
 3229         VERIFY(nvlist_add_uint64(zplprops,
 3230             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
 3231 
 3232         if (norm == ZFS_PROP_UNDEFINED &&
 3233             (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
 3234                 return (error);
 3235         VERIFY(nvlist_add_uint64(zplprops,
 3236             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
 3237 
 3238         /*
 3239          * If we're normalizing, names must always be valid UTF-8 strings.
 3240          */
 3241         if (norm)
 3242                 u8 = 1;
 3243         if (u8 == ZFS_PROP_UNDEFINED &&
 3244             (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
 3245                 return (error);
 3246         VERIFY(nvlist_add_uint64(zplprops,
 3247             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
 3248 
 3249         if (sense == ZFS_PROP_UNDEFINED &&
 3250             (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
 3251                 return (error);
 3252         VERIFY(nvlist_add_uint64(zplprops,
 3253             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
 3254 
 3255         if (is_ci)
 3256                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
 3257 
 3258         return (0);
 3259 }
 3260 
 3261 static int
 3262 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
 3263     nvlist_t *zplprops, boolean_t *is_ci)
 3264 {
 3265         boolean_t fuids_ok, sa_ok;
 3266         uint64_t zplver = ZPL_VERSION;
 3267         objset_t *os = NULL;
 3268         char parentname[ZFS_MAX_DATASET_NAME_LEN];
 3269         spa_t *spa;
 3270         uint64_t spa_vers;
 3271         int error;
 3272 
 3273         zfs_get_parent(dataset, parentname, sizeof (parentname));
 3274 
 3275         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
 3276                 return (error);
 3277 
 3278         spa_vers = spa_version(spa);
 3279         spa_close(spa, FTAG);
 3280 
 3281         zplver = zfs_zpl_version_map(spa_vers);
 3282         fuids_ok = (zplver >= ZPL_VERSION_FUID);
 3283         sa_ok = (zplver >= ZPL_VERSION_SA);
 3284 
 3285         /*
 3286          * Open parent object set so we can inherit zplprop values.
 3287          */
 3288         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 3289                 return (error);
 3290 
 3291         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
 3292             zplprops, is_ci);
 3293         dmu_objset_rele(os, FTAG);
 3294         return (error);
 3295 }
 3296 
 3297 static int
 3298 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
 3299     nvlist_t *zplprops, boolean_t *is_ci)
 3300 {
 3301         boolean_t fuids_ok;
 3302         boolean_t sa_ok;
 3303         uint64_t zplver = ZPL_VERSION;
 3304         int error;
 3305 
 3306         zplver = zfs_zpl_version_map(spa_vers);
 3307         fuids_ok = (zplver >= ZPL_VERSION_FUID);
 3308         sa_ok = (zplver >= ZPL_VERSION_SA);
 3309 
 3310         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
 3311             createprops, zplprops, is_ci);
 3312         return (error);
 3313 }
 3314 
 3315 /*
 3316  * innvl: {
 3317  *     "type" -> dmu_objset_type_t (int32)
 3318  *     (optional) "props" -> { prop -> value }
 3319  *     (optional) "hidden_args" -> { "wkeydata" -> value }
 3320  *         raw uint8_t array of encryption wrapping key data (32 bytes)
 3321  * }
 3322  *
 3323  * outnvl: propname -> error code (int32)
 3324  */
 3325 
 3326 static const zfs_ioc_key_t zfs_keys_create[] = {
 3327         {"type",        DATA_TYPE_INT32,        0},
 3328         {"props",       DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 3329         {"hidden_args", DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 3330 };
 3331 
 3332 static int
 3333 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 3334 {
 3335         int error = 0;
 3336         zfs_creat_t zct = { 0 };
 3337         nvlist_t *nvprops = NULL;
 3338         nvlist_t *hidden_args = NULL;
 3339         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 3340         dmu_objset_type_t type;
 3341         boolean_t is_insensitive = B_FALSE;
 3342         dsl_crypto_params_t *dcp = NULL;
 3343 
 3344         type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
 3345         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 3346         (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 3347 
 3348         switch (type) {
 3349         case DMU_OST_ZFS:
 3350                 cbfunc = zfs_create_cb;
 3351                 break;
 3352 
 3353         case DMU_OST_ZVOL:
 3354                 cbfunc = zvol_create_cb;
 3355                 break;
 3356 
 3357         default:
 3358                 cbfunc = NULL;
 3359                 break;
 3360         }
 3361         if (strchr(fsname, '@') ||
 3362             strchr(fsname, '%'))
 3363                 return (SET_ERROR(EINVAL));
 3364 
 3365         zct.zct_props = nvprops;
 3366 
 3367         if (cbfunc == NULL)
 3368                 return (SET_ERROR(EINVAL));
 3369 
 3370         if (type == DMU_OST_ZVOL) {
 3371                 uint64_t volsize, volblocksize;
 3372 
 3373                 if (nvprops == NULL)
 3374                         return (SET_ERROR(EINVAL));
 3375                 if (nvlist_lookup_uint64(nvprops,
 3376                     zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
 3377                         return (SET_ERROR(EINVAL));
 3378 
 3379                 if ((error = nvlist_lookup_uint64(nvprops,
 3380                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 3381                     &volblocksize)) != 0 && error != ENOENT)
 3382                         return (SET_ERROR(EINVAL));
 3383 
 3384                 if (error != 0)
 3385                         volblocksize = zfs_prop_default_numeric(
 3386                             ZFS_PROP_VOLBLOCKSIZE);
 3387 
 3388                 if ((error = zvol_check_volblocksize(fsname,
 3389                     volblocksize)) != 0 ||
 3390                     (error = zvol_check_volsize(volsize,
 3391                     volblocksize)) != 0)
 3392                         return (error);
 3393         } else if (type == DMU_OST_ZFS) {
 3394                 int error;
 3395 
 3396                 /*
 3397                  * We have to have normalization and
 3398                  * case-folding flags correct when we do the
 3399                  * file system creation, so go figure them out
 3400                  * now.
 3401                  */
 3402                 VERIFY(nvlist_alloc(&zct.zct_zplprops,
 3403                     NV_UNIQUE_NAME, KM_SLEEP) == 0);
 3404                 error = zfs_fill_zplprops(fsname, nvprops,
 3405                     zct.zct_zplprops, &is_insensitive);
 3406                 if (error != 0) {
 3407                         nvlist_free(zct.zct_zplprops);
 3408                         return (error);
 3409                 }
 3410         }
 3411 
 3412         error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
 3413             hidden_args, &dcp);
 3414         if (error != 0) {
 3415                 nvlist_free(zct.zct_zplprops);
 3416                 return (error);
 3417         }
 3418 
 3419         error = dmu_objset_create(fsname, type,
 3420             is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
 3421 
 3422         nvlist_free(zct.zct_zplprops);
 3423         dsl_crypto_params_free(dcp, !!error);
 3424 
 3425         /*
 3426          * It would be nice to do this atomically.
 3427          */
 3428         if (error == 0) {
 3429                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 3430                     nvprops, outnvl);
 3431                 if (error != 0) {
 3432                         spa_t *spa;
 3433                         int error2;
 3434 
 3435                         /*
 3436                          * Volumes will return EBUSY and cannot be destroyed
 3437                          * until all asynchronous minor handling (e.g. from
 3438                          * setting the volmode property) has completed. Wait for
 3439                          * the spa_zvol_taskq to drain then retry.
 3440                          */
 3441                         error2 = dsl_destroy_head(fsname);
 3442                         while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
 3443                                 error2 = spa_open(fsname, &spa, FTAG);
 3444                                 if (error2 == 0) {
 3445                                         taskq_wait(spa->spa_zvol_taskq);
 3446                                         spa_close(spa, FTAG);
 3447                                 }
 3448                                 error2 = dsl_destroy_head(fsname);
 3449                         }
 3450                 }
 3451         }
 3452         return (error);
 3453 }
 3454 
 3455 /*
 3456  * innvl: {
 3457  *     "origin" -> name of origin snapshot
 3458  *     (optional) "props" -> { prop -> value }
 3459  *     (optional) "hidden_args" -> { "wkeydata" -> value }
 3460  *         raw uint8_t array of encryption wrapping key data (32 bytes)
 3461  * }
 3462  *
 3463  * outputs:
 3464  * outnvl: propname -> error code (int32)
 3465  */
 3466 static const zfs_ioc_key_t zfs_keys_clone[] = {
 3467         {"origin",      DATA_TYPE_STRING,       0},
 3468         {"props",       DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 3469         {"hidden_args", DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 3470 };
 3471 
 3472 static int
 3473 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 3474 {
 3475         int error = 0;
 3476         nvlist_t *nvprops = NULL;
 3477         const char *origin_name;
 3478 
 3479         origin_name = fnvlist_lookup_string(innvl, "origin");
 3480         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 3481 
 3482         if (strchr(fsname, '@') ||
 3483             strchr(fsname, '%'))
 3484                 return (SET_ERROR(EINVAL));
 3485 
 3486         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
 3487                 return (SET_ERROR(EINVAL));
 3488 
 3489         error = dmu_objset_clone(fsname, origin_name);
 3490 
 3491         /*
 3492          * It would be nice to do this atomically.
 3493          */
 3494         if (error == 0) {
 3495                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 3496                     nvprops, outnvl);
 3497                 if (error != 0)
 3498                         (void) dsl_destroy_head(fsname);
 3499         }
 3500         return (error);
 3501 }
 3502 
 3503 static const zfs_ioc_key_t zfs_keys_remap[] = {
 3504         /* no nvl keys */
 3505 };
 3506 
 3507 static int
 3508 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 3509 {
 3510         /* This IOCTL is no longer supported. */
 3511         (void) fsname, (void) innvl, (void) outnvl;
 3512         return (0);
 3513 }
 3514 
 3515 /*
 3516  * innvl: {
 3517  *     "snaps" -> { snapshot1, snapshot2 }
 3518  *     (optional) "props" -> { prop -> value (string) }
 3519  * }
 3520  *
 3521  * outnvl: snapshot -> error code (int32)
 3522  */
 3523 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
 3524         {"snaps",       DATA_TYPE_NVLIST,       0},
 3525         {"props",       DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 3526 };
 3527 
 3528 static int
 3529 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 3530 {
 3531         nvlist_t *snaps;
 3532         nvlist_t *props = NULL;
 3533         int error, poollen;
 3534         nvpair_t *pair;
 3535 
 3536         (void) nvlist_lookup_nvlist(innvl, "props", &props);
 3537         if (!nvlist_empty(props) &&
 3538             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
 3539                 return (SET_ERROR(ENOTSUP));
 3540         if ((error = zfs_check_userprops(props)) != 0)
 3541                 return (error);
 3542 
 3543         snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 3544         poollen = strlen(poolname);
 3545         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 3546             pair = nvlist_next_nvpair(snaps, pair)) {
 3547                 const char *name = nvpair_name(pair);
 3548                 char *cp = strchr(name, '@');
 3549 
 3550                 /*
 3551                  * The snap name must contain an @, and the part after it must
 3552                  * contain only valid characters.
 3553                  */
 3554                 if (cp == NULL ||
 3555                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 3556                         return (SET_ERROR(EINVAL));
 3557 
 3558                 /*
 3559                  * The snap must be in the specified pool.
 3560                  */
 3561                 if (strncmp(name, poolname, poollen) != 0 ||
 3562                     (name[poollen] != '/' && name[poollen] != '@'))
 3563                         return (SET_ERROR(EXDEV));
 3564 
 3565                 /*
 3566                  * Check for permission to set the properties on the fs.
 3567                  */
 3568                 if (!nvlist_empty(props)) {
 3569                         *cp = '\0';
 3570                         error = zfs_secpolicy_write_perms(name,
 3571                             ZFS_DELEG_PERM_USERPROP, CRED());
 3572                         *cp = '@';
 3573                         if (error != 0)
 3574                                 return (error);
 3575                 }
 3576 
 3577                 /* This must be the only snap of this fs. */
 3578                 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
 3579                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
 3580                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
 3581                             == 0) {
 3582                                 return (SET_ERROR(EXDEV));
 3583                         }
 3584                 }
 3585         }
 3586 
 3587         error = dsl_dataset_snapshot(snaps, props, outnvl);
 3588 
 3589         return (error);
 3590 }
 3591 
 3592 /*
 3593  * innvl: "message" -> string
 3594  */
 3595 static const zfs_ioc_key_t zfs_keys_log_history[] = {
 3596         {"message",     DATA_TYPE_STRING,       0},
 3597 };
 3598 
 3599 static int
 3600 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 3601 {
 3602         (void) unused, (void) outnvl;
 3603         const char *message;
 3604         char *poolname;
 3605         spa_t *spa;
 3606         int error;
 3607 
 3608         /*
 3609          * The poolname in the ioctl is not set, we get it from the TSD,
 3610          * which was set at the end of the last successful ioctl that allows
 3611          * logging.  The secpolicy func already checked that it is set.
 3612          * Only one log ioctl is allowed after each successful ioctl, so
 3613          * we clear the TSD here.
 3614          */
 3615         poolname = tsd_get(zfs_allow_log_key);
 3616         if (poolname == NULL)
 3617                 return (SET_ERROR(EINVAL));
 3618         (void) tsd_set(zfs_allow_log_key, NULL);
 3619         error = spa_open(poolname, &spa, FTAG);
 3620         kmem_strfree(poolname);
 3621         if (error != 0)
 3622                 return (error);
 3623 
 3624         message = fnvlist_lookup_string(innvl, "message");
 3625 
 3626         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 3627                 spa_close(spa, FTAG);
 3628                 return (SET_ERROR(ENOTSUP));
 3629         }
 3630 
 3631         error = spa_history_log(spa, message);
 3632         spa_close(spa, FTAG);
 3633         return (error);
 3634 }
 3635 
 3636 /*
 3637  * This ioctl is used to set the bootenv configuration on the current
 3638  * pool. This configuration is stored in the second padding area of the label,
 3639  * and it is used by the bootloader(s) to store the bootloader and/or system
 3640  * specific data.
 3641  * The data is stored as nvlist data stream, and is protected by
 3642  * an embedded checksum.
 3643  * The version can have two possible values:
 3644  * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
 3645  * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
 3646  */
 3647 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
 3648         {"version",     DATA_TYPE_UINT64,       0},
 3649         {"<keys>",      DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
 3650 };
 3651 
 3652 static int
 3653 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 3654 {
 3655         int error;
 3656         spa_t *spa;
 3657 
 3658         if ((error = spa_open(name, &spa, FTAG)) != 0)
 3659                 return (error);
 3660         spa_vdev_state_enter(spa, SCL_ALL);
 3661         error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
 3662         (void) spa_vdev_state_exit(spa, NULL, 0);
 3663         spa_close(spa, FTAG);
 3664         return (error);
 3665 }
 3666 
 3667 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
 3668         /* no nvl keys */
 3669 };
 3670 
 3671 static int
 3672 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 3673 {
 3674         spa_t *spa;
 3675         int error;
 3676 
 3677         if ((error = spa_open(name, &spa, FTAG)) != 0)
 3678                 return (error);
 3679         spa_vdev_state_enter(spa, SCL_ALL);
 3680         error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
 3681         (void) spa_vdev_state_exit(spa, NULL, 0);
 3682         spa_close(spa, FTAG);
 3683         return (error);
 3684 }
 3685 
 3686 /*
 3687  * The dp_config_rwlock must not be held when calling this, because the
 3688  * unmount may need to write out data.
 3689  *
 3690  * This function is best-effort.  Callers must deal gracefully if it
 3691  * remains mounted (or is remounted after this call).
 3692  *
 3693  * Returns 0 if the argument is not a snapshot, or it is not currently a
 3694  * filesystem, or we were able to unmount it.  Returns error code otherwise.
 3695  */
 3696 void
 3697 zfs_unmount_snap(const char *snapname)
 3698 {
 3699         if (strchr(snapname, '@') == NULL)
 3700                 return;
 3701 
 3702         (void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
 3703 }
 3704 
 3705 static int
 3706 zfs_unmount_snap_cb(const char *snapname, void *arg)
 3707 {
 3708         (void) arg;
 3709         zfs_unmount_snap(snapname);
 3710         return (0);
 3711 }
 3712 
 3713 /*
 3714  * When a clone is destroyed, its origin may also need to be destroyed,
 3715  * in which case it must be unmounted.  This routine will do that unmount
 3716  * if necessary.
 3717  */
 3718 void
 3719 zfs_destroy_unmount_origin(const char *fsname)
 3720 {
 3721         int error;
 3722         objset_t *os;
 3723         dsl_dataset_t *ds;
 3724 
 3725         error = dmu_objset_hold(fsname, FTAG, &os);
 3726         if (error != 0)
 3727                 return;
 3728         ds = dmu_objset_ds(os);
 3729         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
 3730                 char originname[ZFS_MAX_DATASET_NAME_LEN];
 3731                 dsl_dataset_name(ds->ds_prev, originname);
 3732                 dmu_objset_rele(os, FTAG);
 3733                 zfs_unmount_snap(originname);
 3734         } else {
 3735                 dmu_objset_rele(os, FTAG);
 3736         }
 3737 }
 3738 
 3739 /*
 3740  * innvl: {
 3741  *     "snaps" -> { snapshot1, snapshot2 }
 3742  *     (optional boolean) "defer"
 3743  * }
 3744  *
 3745  * outnvl: snapshot -> error code (int32)
 3746  */
 3747 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
 3748         {"snaps",       DATA_TYPE_NVLIST,       0},
 3749         {"defer",       DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 3750 };
 3751 
 3752 static int
 3753 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 3754 {
 3755         int poollen;
 3756         nvlist_t *snaps;
 3757         nvpair_t *pair;
 3758         boolean_t defer;
 3759         spa_t *spa;
 3760 
 3761         snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 3762         defer = nvlist_exists(innvl, "defer");
 3763 
 3764         poollen = strlen(poolname);
 3765         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 3766             pair = nvlist_next_nvpair(snaps, pair)) {
 3767                 const char *name = nvpair_name(pair);
 3768 
 3769                 /*
 3770                  * The snap must be in the specified pool to prevent the
 3771                  * invalid removal of zvol minors below.
 3772                  */
 3773                 if (strncmp(name, poolname, poollen) != 0 ||
 3774                     (name[poollen] != '/' && name[poollen] != '@'))
 3775                         return (SET_ERROR(EXDEV));
 3776 
 3777                 zfs_unmount_snap(nvpair_name(pair));
 3778                 if (spa_open(name, &spa, FTAG) == 0) {
 3779                         zvol_remove_minors(spa, name, B_TRUE);
 3780                         spa_close(spa, FTAG);
 3781                 }
 3782         }
 3783 
 3784         return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
 3785 }
 3786 
 3787 /*
 3788  * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
 3789  * All bookmarks and snapshots must be in the same pool.
 3790  * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
 3791  *
 3792  * innvl: {
 3793  *     new_bookmark1 -> existing_snapshot,
 3794  *     new_bookmark2 -> existing_bookmark,
 3795  * }
 3796  *
 3797  * outnvl: bookmark -> error code (int32)
 3798  *
 3799  */
 3800 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
 3801         {"<bookmark>...",       DATA_TYPE_STRING,       ZK_WILDCARDLIST},
 3802 };
 3803 
 3804 static int
 3805 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 3806 {
 3807         (void) poolname;
 3808         return (dsl_bookmark_create(innvl, outnvl));
 3809 }
 3810 
 3811 /*
 3812  * innvl: {
 3813  *     property 1, property 2, ...
 3814  * }
 3815  *
 3816  * outnvl: {
 3817  *     bookmark name 1 -> { property 1, property 2, ... },
 3818  *     bookmark name 2 -> { property 1, property 2, ... }
 3819  * }
 3820  *
 3821  */
 3822 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
 3823         {"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
 3824 };
 3825 
 3826 static int
 3827 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 3828 {
 3829         return (dsl_get_bookmarks(fsname, innvl, outnvl));
 3830 }
 3831 
 3832 /*
 3833  * innvl is not used.
 3834  *
 3835  * outnvl: {
 3836  *     property 1, property 2, ...
 3837  * }
 3838  *
 3839  */
 3840 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
 3841         /* no nvl keys */
 3842 };
 3843 
 3844 static int
 3845 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
 3846     nvlist_t *outnvl)
 3847 {
 3848         (void) innvl;
 3849         char fsname[ZFS_MAX_DATASET_NAME_LEN];
 3850         char *bmname;
 3851 
 3852         bmname = strchr(bookmark, '#');
 3853         if (bmname == NULL)
 3854                 return (SET_ERROR(EINVAL));
 3855         bmname++;
 3856 
 3857         (void) strlcpy(fsname, bookmark, sizeof (fsname));
 3858         *(strchr(fsname, '#')) = '\0';
 3859 
 3860         return (dsl_get_bookmark_props(fsname, bmname, outnvl));
 3861 }
 3862 
 3863 /*
 3864  * innvl: {
 3865  *     bookmark name 1, bookmark name 2
 3866  * }
 3867  *
 3868  * outnvl: bookmark -> error code (int32)
 3869  *
 3870  */
 3871 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
 3872         {"<bookmark>...",       DATA_TYPE_BOOLEAN,      ZK_WILDCARDLIST},
 3873 };
 3874 
 3875 static int
 3876 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
 3877     nvlist_t *outnvl)
 3878 {
 3879         int error, poollen;
 3880 
 3881         poollen = strlen(poolname);
 3882         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 3883             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 3884                 const char *name = nvpair_name(pair);
 3885                 const char *cp = strchr(name, '#');
 3886 
 3887                 /*
 3888                  * The bookmark name must contain an #, and the part after it
 3889                  * must contain only valid characters.
 3890                  */
 3891                 if (cp == NULL ||
 3892                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 3893                         return (SET_ERROR(EINVAL));
 3894 
 3895                 /*
 3896                  * The bookmark must be in the specified pool.
 3897                  */
 3898                 if (strncmp(name, poolname, poollen) != 0 ||
 3899                     (name[poollen] != '/' && name[poollen] != '#'))
 3900                         return (SET_ERROR(EXDEV));
 3901         }
 3902 
 3903         error = dsl_bookmark_destroy(innvl, outnvl);
 3904         return (error);
 3905 }
 3906 
 3907 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
 3908         {"program",     DATA_TYPE_STRING,               0},
 3909         {"arg",         DATA_TYPE_ANY,                  0},
 3910         {"sync",        DATA_TYPE_BOOLEAN_VALUE,        ZK_OPTIONAL},
 3911         {"instrlimit",  DATA_TYPE_UINT64,               ZK_OPTIONAL},
 3912         {"memlimit",    DATA_TYPE_UINT64,               ZK_OPTIONAL},
 3913 };
 3914 
 3915 static int
 3916 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
 3917     nvlist_t *outnvl)
 3918 {
 3919         char *program;
 3920         uint64_t instrlimit, memlimit;
 3921         boolean_t sync_flag;
 3922         nvpair_t *nvarg = NULL;
 3923 
 3924         program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
 3925         if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
 3926                 sync_flag = B_TRUE;
 3927         }
 3928         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
 3929                 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
 3930         }
 3931         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
 3932                 memlimit = ZCP_DEFAULT_MEMLIMIT;
 3933         }
 3934         nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
 3935 
 3936         if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
 3937                 return (SET_ERROR(EINVAL));
 3938         if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
 3939                 return (SET_ERROR(EINVAL));
 3940 
 3941         return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
 3942             nvarg, outnvl));
 3943 }
 3944 
 3945 /*
 3946  * innvl: unused
 3947  * outnvl: empty
 3948  */
 3949 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
 3950         /* no nvl keys */
 3951 };
 3952 
 3953 static int
 3954 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 3955 {
 3956         (void) innvl, (void) outnvl;
 3957         return (spa_checkpoint(poolname));
 3958 }
 3959 
 3960 /*
 3961  * innvl: unused
 3962  * outnvl: empty
 3963  */
 3964 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
 3965         /* no nvl keys */
 3966 };
 3967 
 3968 static int
 3969 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
 3970     nvlist_t *outnvl)
 3971 {
 3972         (void) innvl, (void) outnvl;
 3973         return (spa_checkpoint_discard(poolname));
 3974 }
 3975 
 3976 /*
 3977  * inputs:
 3978  * zc_name              name of dataset to destroy
 3979  * zc_defer_destroy     mark for deferred destroy
 3980  *
 3981  * outputs:             none
 3982  */
 3983 static int
 3984 zfs_ioc_destroy(zfs_cmd_t *zc)
 3985 {
 3986         objset_t *os;
 3987         dmu_objset_type_t ost;
 3988         int err;
 3989 
 3990         err = dmu_objset_hold(zc->zc_name, FTAG, &os);
 3991         if (err != 0)
 3992                 return (err);
 3993         ost = dmu_objset_type(os);
 3994         dmu_objset_rele(os, FTAG);
 3995 
 3996         if (ost == DMU_OST_ZFS)
 3997                 zfs_unmount_snap(zc->zc_name);
 3998 
 3999         if (strchr(zc->zc_name, '@')) {
 4000                 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
 4001         } else {
 4002                 err = dsl_destroy_head(zc->zc_name);
 4003                 if (err == EEXIST) {
 4004                         /*
 4005                          * It is possible that the given DS may have
 4006                          * hidden child (%recv) datasets - "leftovers"
 4007                          * resulting from the previously interrupted
 4008                          * 'zfs receive'.
 4009                          *
 4010                          * 6 extra bytes for /%recv
 4011                          */
 4012                         char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
 4013 
 4014                         if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
 4015                             zc->zc_name, recv_clone_name) >=
 4016                             sizeof (namebuf))
 4017                                 return (SET_ERROR(EINVAL));
 4018 
 4019                         /*
 4020                          * Try to remove the hidden child (%recv) and after
 4021                          * that try to remove the target dataset.
 4022                          * If the hidden child (%recv) does not exist
 4023                          * the original error (EEXIST) will be returned
 4024                          */
 4025                         err = dsl_destroy_head(namebuf);
 4026                         if (err == 0)
 4027                                 err = dsl_destroy_head(zc->zc_name);
 4028                         else if (err == ENOENT)
 4029                                 err = SET_ERROR(EEXIST);
 4030                 }
 4031         }
 4032 
 4033         return (err);
 4034 }
 4035 
 4036 /*
 4037  * innvl: {
 4038  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
 4039  *     "initialize_vdevs": { -> guids to initialize (nvlist)
 4040  *         "vdev_path_1": vdev_guid_1, (uint64),
 4041  *         "vdev_path_2": vdev_guid_2, (uint64),
 4042  *         ...
 4043  *     },
 4044  * }
 4045  *
 4046  * outnvl: {
 4047  *     "initialize_vdevs": { -> initialization errors (nvlist)
 4048  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
 4049  *         "vdev_path_2": errno, ... (uint64)
 4050  *         ...
 4051  *     }
 4052  * }
 4053  *
 4054  * EINVAL is returned for an unknown commands or if any of the provided vdev
 4055  * guids have be specified with a type other than uint64.
 4056  */
 4057 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
 4058         {ZPOOL_INITIALIZE_COMMAND,      DATA_TYPE_UINT64,       0},
 4059         {ZPOOL_INITIALIZE_VDEVS,        DATA_TYPE_NVLIST,       0}
 4060 };
 4061 
 4062 static int
 4063 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 4064 {
 4065         uint64_t cmd_type;
 4066         if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
 4067             &cmd_type) != 0) {
 4068                 return (SET_ERROR(EINVAL));
 4069         }
 4070 
 4071         if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
 4072             cmd_type == POOL_INITIALIZE_START ||
 4073             cmd_type == POOL_INITIALIZE_SUSPEND)) {
 4074                 return (SET_ERROR(EINVAL));
 4075         }
 4076 
 4077         nvlist_t *vdev_guids;
 4078         if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
 4079             &vdev_guids) != 0) {
 4080                 return (SET_ERROR(EINVAL));
 4081         }
 4082 
 4083         for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
 4084             pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
 4085                 uint64_t vdev_guid;
 4086                 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
 4087                         return (SET_ERROR(EINVAL));
 4088                 }
 4089         }
 4090 
 4091         spa_t *spa;
 4092         int error = spa_open(poolname, &spa, FTAG);
 4093         if (error != 0)
 4094                 return (error);
 4095 
 4096         nvlist_t *vdev_errlist = fnvlist_alloc();
 4097         int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
 4098             vdev_errlist);
 4099 
 4100         if (fnvlist_size(vdev_errlist) > 0) {
 4101                 fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
 4102                     vdev_errlist);
 4103         }
 4104         fnvlist_free(vdev_errlist);
 4105 
 4106         spa_close(spa, FTAG);
 4107         return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 4108 }
 4109 
 4110 /*
 4111  * innvl: {
 4112  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
 4113  *     "trim_vdevs": { -> guids to TRIM (nvlist)
 4114  *         "vdev_path_1": vdev_guid_1, (uint64),
 4115  *         "vdev_path_2": vdev_guid_2, (uint64),
 4116  *         ...
 4117  *     },
 4118  *     "trim_rate" -> Target TRIM rate in bytes/sec.
 4119  *     "trim_secure" -> Set to request a secure TRIM.
 4120  * }
 4121  *
 4122  * outnvl: {
 4123  *     "trim_vdevs": { -> TRIM errors (nvlist)
 4124  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
 4125  *         "vdev_path_2": errno, ... (uint64)
 4126  *         ...
 4127  *     }
 4128  * }
 4129  *
 4130  * EINVAL is returned for an unknown commands or if any of the provided vdev
 4131  * guids have be specified with a type other than uint64.
 4132  */
 4133 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
 4134         {ZPOOL_TRIM_COMMAND,    DATA_TYPE_UINT64,               0},
 4135         {ZPOOL_TRIM_VDEVS,      DATA_TYPE_NVLIST,               0},
 4136         {ZPOOL_TRIM_RATE,       DATA_TYPE_UINT64,               ZK_OPTIONAL},
 4137         {ZPOOL_TRIM_SECURE,     DATA_TYPE_BOOLEAN_VALUE,        ZK_OPTIONAL},
 4138 };
 4139 
 4140 static int
 4141 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 4142 {
 4143         uint64_t cmd_type;
 4144         if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
 4145                 return (SET_ERROR(EINVAL));
 4146 
 4147         if (!(cmd_type == POOL_TRIM_CANCEL ||
 4148             cmd_type == POOL_TRIM_START ||
 4149             cmd_type == POOL_TRIM_SUSPEND)) {
 4150                 return (SET_ERROR(EINVAL));
 4151         }
 4152 
 4153         nvlist_t *vdev_guids;
 4154         if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
 4155                 return (SET_ERROR(EINVAL));
 4156 
 4157         for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
 4158             pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
 4159                 uint64_t vdev_guid;
 4160                 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
 4161                         return (SET_ERROR(EINVAL));
 4162                 }
 4163         }
 4164 
 4165         /* Optional, defaults to maximum rate when not provided */
 4166         uint64_t rate;
 4167         if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
 4168                 rate = 0;
 4169 
 4170         /* Optional, defaults to standard TRIM when not provided */
 4171         boolean_t secure;
 4172         if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
 4173             &secure) != 0) {
 4174                 secure = B_FALSE;
 4175         }
 4176 
 4177         spa_t *spa;
 4178         int error = spa_open(poolname, &spa, FTAG);
 4179         if (error != 0)
 4180                 return (error);
 4181 
 4182         nvlist_t *vdev_errlist = fnvlist_alloc();
 4183         int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
 4184             rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
 4185 
 4186         if (fnvlist_size(vdev_errlist) > 0)
 4187                 fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
 4188 
 4189         fnvlist_free(vdev_errlist);
 4190 
 4191         spa_close(spa, FTAG);
 4192         return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 4193 }
 4194 
 4195 /*
 4196  * This ioctl waits for activity of a particular type to complete. If there is
 4197  * no activity of that type in progress, it returns immediately, and the
 4198  * returned value "waited" is false. If there is activity in progress, and no
 4199  * tag is passed in, the ioctl blocks until all activity of that type is
 4200  * complete, and then returns with "waited" set to true.
 4201  *
 4202  * If a tag is provided, it identifies a particular instance of an activity to
 4203  * wait for. Currently, this is only valid for use with 'initialize', because
 4204  * that is the only activity for which there can be multiple instances running
 4205  * concurrently. In the case of 'initialize', the tag corresponds to the guid of
 4206  * the vdev on which to wait.
 4207  *
 4208  * If a thread waiting in the ioctl receives a signal, the call will return
 4209  * immediately, and the return value will be EINTR.
 4210  *
 4211  * innvl: {
 4212  *     "wait_activity" -> int32_t
 4213  *     (optional) "wait_tag" -> uint64_t
 4214  * }
 4215  *
 4216  * outnvl: "waited" -> boolean_t
 4217  */
 4218 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
 4219         {ZPOOL_WAIT_ACTIVITY,   DATA_TYPE_INT32,                0},
 4220         {ZPOOL_WAIT_TAG,        DATA_TYPE_UINT64,               ZK_OPTIONAL},
 4221 };
 4222 
 4223 static int
 4224 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 4225 {
 4226         int32_t activity;
 4227         uint64_t tag;
 4228         boolean_t waited;
 4229         int error;
 4230 
 4231         if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
 4232                 return (EINVAL);
 4233 
 4234         if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
 4235                 error = spa_wait_tag(name, activity, tag, &waited);
 4236         else
 4237                 error = spa_wait(name, activity, &waited);
 4238 
 4239         if (error == 0)
 4240                 fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
 4241 
 4242         return (error);
 4243 }
 4244 
 4245 /*
 4246  * This ioctl waits for activity of a particular type to complete. If there is
 4247  * no activity of that type in progress, it returns immediately, and the
 4248  * returned value "waited" is false. If there is activity in progress, and no
 4249  * tag is passed in, the ioctl blocks until all activity of that type is
 4250  * complete, and then returns with "waited" set to true.
 4251  *
 4252  * If a thread waiting in the ioctl receives a signal, the call will return
 4253  * immediately, and the return value will be EINTR.
 4254  *
 4255  * innvl: {
 4256  *     "wait_activity" -> int32_t
 4257  * }
 4258  *
 4259  * outnvl: "waited" -> boolean_t
 4260  */
 4261 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
 4262         {ZFS_WAIT_ACTIVITY,     DATA_TYPE_INT32,                0},
 4263 };
 4264 
 4265 static int
 4266 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 4267 {
 4268         int32_t activity;
 4269         boolean_t waited = B_FALSE;
 4270         int error;
 4271         dsl_pool_t *dp;
 4272         dsl_dir_t *dd;
 4273         dsl_dataset_t *ds;
 4274 
 4275         if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
 4276                 return (SET_ERROR(EINVAL));
 4277 
 4278         if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
 4279                 return (SET_ERROR(EINVAL));
 4280 
 4281         if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
 4282                 return (error);
 4283 
 4284         if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
 4285                 dsl_pool_rele(dp, FTAG);
 4286                 return (error);
 4287         }
 4288 
 4289         dd = ds->ds_dir;
 4290         mutex_enter(&dd->dd_activity_lock);
 4291         dd->dd_activity_waiters++;
 4292 
 4293         /*
 4294          * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
 4295          * aren't evicted while we're waiting. Normally this is prevented by
 4296          * holding the pool, but we can't do that while we're waiting since
 4297          * that would prevent TXGs from syncing out. Some of the functionality
 4298          * of long-holds (e.g. preventing deletion) is unnecessary for this
 4299          * case, since we would cancel the waiters before proceeding with a
 4300          * deletion. An alternative mechanism for keeping the dataset around
 4301          * could be developed but this is simpler.
 4302          */
 4303         dsl_dataset_long_hold(ds, FTAG);
 4304         dsl_pool_rele(dp, FTAG);
 4305 
 4306         error = dsl_dir_wait(dd, ds, activity, &waited);
 4307 
 4308         dsl_dataset_long_rele(ds, FTAG);
 4309         dd->dd_activity_waiters--;
 4310         if (dd->dd_activity_waiters == 0)
 4311                 cv_signal(&dd->dd_activity_cv);
 4312         mutex_exit(&dd->dd_activity_lock);
 4313 
 4314         dsl_dataset_rele(ds, FTAG);
 4315 
 4316         if (error == 0)
 4317                 fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
 4318 
 4319         return (error);
 4320 }
 4321 
 4322 /*
 4323  * fsname is name of dataset to rollback (to most recent snapshot)
 4324  *
 4325  * innvl may contain name of expected target snapshot
 4326  *
 4327  * outnvl: "target" -> name of most recent snapshot
 4328  * }
 4329  */
 4330 static const zfs_ioc_key_t zfs_keys_rollback[] = {
 4331         {"target",      DATA_TYPE_STRING,       ZK_OPTIONAL},
 4332 };
 4333 
 4334 static int
 4335 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 4336 {
 4337         zfsvfs_t *zfsvfs;
 4338         zvol_state_handle_t *zv;
 4339         char *target = NULL;
 4340         int error;
 4341 
 4342         (void) nvlist_lookup_string(innvl, "target", &target);
 4343         if (target != NULL) {
 4344                 const char *cp = strchr(target, '@');
 4345 
 4346                 /*
 4347                  * The snap name must contain an @, and the part after it must
 4348                  * contain only valid characters.
 4349                  */
 4350                 if (cp == NULL ||
 4351                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 4352                         return (SET_ERROR(EINVAL));
 4353         }
 4354 
 4355         if (getzfsvfs(fsname, &zfsvfs) == 0) {
 4356                 dsl_dataset_t *ds;
 4357 
 4358                 ds = dmu_objset_ds(zfsvfs->z_os);
 4359                 error = zfs_suspend_fs(zfsvfs);
 4360                 if (error == 0) {
 4361                         int resume_err;
 4362 
 4363                         error = dsl_dataset_rollback(fsname, target, zfsvfs,
 4364                             outnvl);
 4365                         resume_err = zfs_resume_fs(zfsvfs, ds);
 4366                         error = error ? error : resume_err;
 4367                 }
 4368                 zfs_vfs_rele(zfsvfs);
 4369         } else if ((zv = zvol_suspend(fsname)) != NULL) {
 4370                 error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
 4371                     outnvl);
 4372                 zvol_resume(zv);
 4373         } else {
 4374                 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
 4375         }
 4376         return (error);
 4377 }
 4378 
 4379 static int
 4380 recursive_unmount(const char *fsname, void *arg)
 4381 {
 4382         const char *snapname = arg;
 4383         char *fullname;
 4384 
 4385         fullname = kmem_asprintf("%s@%s", fsname, snapname);
 4386         zfs_unmount_snap(fullname);
 4387         kmem_strfree(fullname);
 4388 
 4389         return (0);
 4390 }
 4391 
 4392 /*
 4393  *
 4394  * snapname is the snapshot to redact.
 4395  * innvl: {
 4396  *     "bookname" -> (string)
 4397  *         shortname of the redaction bookmark to generate
 4398  *     "snapnv" -> (nvlist, values ignored)
 4399  *         snapshots to redact snapname with respect to
 4400  * }
 4401  *
 4402  * outnvl is unused
 4403  */
 4404 
 4405 static const zfs_ioc_key_t zfs_keys_redact[] = {
 4406         {"bookname",            DATA_TYPE_STRING,       0},
 4407         {"snapnv",              DATA_TYPE_NVLIST,       0},
 4408 };
 4409 
 4410 static int
 4411 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 4412 {
 4413         (void) outnvl;
 4414         nvlist_t *redactnvl = NULL;
 4415         char *redactbook = NULL;
 4416 
 4417         if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
 4418                 return (SET_ERROR(EINVAL));
 4419         if (fnvlist_num_pairs(redactnvl) == 0)
 4420                 return (SET_ERROR(ENXIO));
 4421         if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
 4422                 return (SET_ERROR(EINVAL));
 4423 
 4424         return (dmu_redact_snap(snapname, redactnvl, redactbook));
 4425 }
 4426 
 4427 /*
 4428  * inputs:
 4429  * zc_name      old name of dataset
 4430  * zc_value     new name of dataset
 4431  * zc_cookie    recursive flag (only valid for snapshots)
 4432  *
 4433  * outputs:     none
 4434  */
 4435 static int
 4436 zfs_ioc_rename(zfs_cmd_t *zc)
 4437 {
 4438         objset_t *os;
 4439         dmu_objset_type_t ost;
 4440         boolean_t recursive = zc->zc_cookie & 1;
 4441         boolean_t nounmount = !!(zc->zc_cookie & 2);
 4442         char *at;
 4443         int err;
 4444 
 4445         /* "zfs rename" from and to ...%recv datasets should both fail */
 4446         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 4447         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
 4448         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
 4449             dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 4450             strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
 4451                 return (SET_ERROR(EINVAL));
 4452 
 4453         err = dmu_objset_hold(zc->zc_name, FTAG, &os);
 4454         if (err != 0)
 4455                 return (err);
 4456         ost = dmu_objset_type(os);
 4457         dmu_objset_rele(os, FTAG);
 4458 
 4459         at = strchr(zc->zc_name, '@');
 4460         if (at != NULL) {
 4461                 /* snaps must be in same fs */
 4462                 int error;
 4463 
 4464                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
 4465                         return (SET_ERROR(EXDEV));
 4466                 *at = '\0';
 4467                 if (ost == DMU_OST_ZFS && !nounmount) {
 4468                         error = dmu_objset_find(zc->zc_name,
 4469                             recursive_unmount, at + 1,
 4470                             recursive ? DS_FIND_CHILDREN : 0);
 4471                         if (error != 0) {
 4472                                 *at = '@';
 4473                                 return (error);
 4474                         }
 4475                 }
 4476                 error = dsl_dataset_rename_snapshot(zc->zc_name,
 4477                     at + 1, strchr(zc->zc_value, '@') + 1, recursive);
 4478                 *at = '@';
 4479 
 4480                 return (error);
 4481         } else {
 4482                 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
 4483         }
 4484 }
 4485 
 4486 static int
 4487 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 4488 {
 4489         const char *propname = nvpair_name(pair);
 4490         boolean_t issnap = (strchr(dsname, '@') != NULL);
 4491         zfs_prop_t prop = zfs_name_to_prop(propname);
 4492         uint64_t intval, compval;
 4493         int err;
 4494 
 4495         if (prop == ZPROP_USERPROP) {
 4496                 if (zfs_prop_user(propname)) {
 4497                         if ((err = zfs_secpolicy_write_perms(dsname,
 4498                             ZFS_DELEG_PERM_USERPROP, cr)))
 4499                                 return (err);
 4500                         return (0);
 4501                 }
 4502 
 4503                 if (!issnap && zfs_prop_userquota(propname)) {
 4504                         const char *perm = NULL;
 4505                         const char *uq_prefix =
 4506                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
 4507                         const char *gq_prefix =
 4508                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
 4509                         const char *uiq_prefix =
 4510                             zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
 4511                         const char *giq_prefix =
 4512                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
 4513                         const char *pq_prefix =
 4514                             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
 4515                         const char *piq_prefix = zfs_userquota_prop_prefixes[\
 4516                             ZFS_PROP_PROJECTOBJQUOTA];
 4517 
 4518                         if (strncmp(propname, uq_prefix,
 4519                             strlen(uq_prefix)) == 0) {
 4520                                 perm = ZFS_DELEG_PERM_USERQUOTA;
 4521                         } else if (strncmp(propname, uiq_prefix,
 4522                             strlen(uiq_prefix)) == 0) {
 4523                                 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
 4524                         } else if (strncmp(propname, gq_prefix,
 4525                             strlen(gq_prefix)) == 0) {
 4526                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
 4527                         } else if (strncmp(propname, giq_prefix,
 4528                             strlen(giq_prefix)) == 0) {
 4529                                 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
 4530                         } else if (strncmp(propname, pq_prefix,
 4531                             strlen(pq_prefix)) == 0) {
 4532                                 perm = ZFS_DELEG_PERM_PROJECTQUOTA;
 4533                         } else if (strncmp(propname, piq_prefix,
 4534                             strlen(piq_prefix)) == 0) {
 4535                                 perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
 4536                         } else {
 4537                                 /* {USER|GROUP|PROJECT}USED are read-only */
 4538                                 return (SET_ERROR(EINVAL));
 4539                         }
 4540 
 4541                         if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
 4542                                 return (err);
 4543                         return (0);
 4544                 }
 4545 
 4546                 return (SET_ERROR(EINVAL));
 4547         }
 4548 
 4549         if (issnap)
 4550                 return (SET_ERROR(EINVAL));
 4551 
 4552         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 4553                 /*
 4554                  * dsl_prop_get_all_impl() returns properties in this
 4555                  * format.
 4556                  */
 4557                 nvlist_t *attrs;
 4558                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 4559                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 4560                     &pair) == 0);
 4561         }
 4562 
 4563         /*
 4564          * Check that this value is valid for this pool version
 4565          */
 4566         switch (prop) {
 4567         case ZFS_PROP_COMPRESSION:
 4568                 /*
 4569                  * If the user specified gzip compression, make sure
 4570                  * the SPA supports it. We ignore any errors here since
 4571                  * we'll catch them later.
 4572                  */
 4573                 if (nvpair_value_uint64(pair, &intval) == 0) {
 4574                         compval = ZIO_COMPRESS_ALGO(intval);
 4575                         if (compval >= ZIO_COMPRESS_GZIP_1 &&
 4576                             compval <= ZIO_COMPRESS_GZIP_9 &&
 4577                             zfs_earlier_version(dsname,
 4578                             SPA_VERSION_GZIP_COMPRESSION)) {
 4579                                 return (SET_ERROR(ENOTSUP));
 4580                         }
 4581 
 4582                         if (compval == ZIO_COMPRESS_ZLE &&
 4583                             zfs_earlier_version(dsname,
 4584                             SPA_VERSION_ZLE_COMPRESSION))
 4585                                 return (SET_ERROR(ENOTSUP));
 4586 
 4587                         if (compval == ZIO_COMPRESS_LZ4) {
 4588                                 spa_t *spa;
 4589 
 4590                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 4591                                         return (err);
 4592 
 4593                                 if (!spa_feature_is_enabled(spa,
 4594                                     SPA_FEATURE_LZ4_COMPRESS)) {
 4595                                         spa_close(spa, FTAG);
 4596                                         return (SET_ERROR(ENOTSUP));
 4597                                 }
 4598                                 spa_close(spa, FTAG);
 4599                         }
 4600 
 4601                         if (compval == ZIO_COMPRESS_ZSTD) {
 4602                                 spa_t *spa;
 4603 
 4604                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 4605                                         return (err);
 4606 
 4607                                 if (!spa_feature_is_enabled(spa,
 4608                                     SPA_FEATURE_ZSTD_COMPRESS)) {
 4609                                         spa_close(spa, FTAG);
 4610                                         return (SET_ERROR(ENOTSUP));
 4611                                 }
 4612                                 spa_close(spa, FTAG);
 4613                         }
 4614                 }
 4615                 break;
 4616 
 4617         case ZFS_PROP_COPIES:
 4618                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
 4619                         return (SET_ERROR(ENOTSUP));
 4620                 break;
 4621 
 4622         case ZFS_PROP_VOLBLOCKSIZE:
 4623         case ZFS_PROP_RECORDSIZE:
 4624                 /* Record sizes above 128k need the feature to be enabled */
 4625                 if (nvpair_value_uint64(pair, &intval) == 0 &&
 4626                     intval > SPA_OLD_MAXBLOCKSIZE) {
 4627                         spa_t *spa;
 4628 
 4629                         /*
 4630                          * We don't allow setting the property above 1MB,
 4631                          * unless the tunable has been changed.
 4632                          */
 4633                         if (intval > zfs_max_recordsize ||
 4634                             intval > SPA_MAXBLOCKSIZE)
 4635                                 return (SET_ERROR(ERANGE));
 4636 
 4637                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 4638                                 return (err);
 4639 
 4640                         if (!spa_feature_is_enabled(spa,
 4641                             SPA_FEATURE_LARGE_BLOCKS)) {
 4642                                 spa_close(spa, FTAG);
 4643                                 return (SET_ERROR(ENOTSUP));
 4644                         }
 4645                         spa_close(spa, FTAG);
 4646                 }
 4647                 break;
 4648 
 4649         case ZFS_PROP_DNODESIZE:
 4650                 /* Dnode sizes above 512 need the feature to be enabled */
 4651                 if (nvpair_value_uint64(pair, &intval) == 0 &&
 4652                     intval != ZFS_DNSIZE_LEGACY) {
 4653                         spa_t *spa;
 4654 
 4655                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 4656                                 return (err);
 4657 
 4658                         if (!spa_feature_is_enabled(spa,
 4659                             SPA_FEATURE_LARGE_DNODE)) {
 4660                                 spa_close(spa, FTAG);
 4661                                 return (SET_ERROR(ENOTSUP));
 4662                         }
 4663                         spa_close(spa, FTAG);
 4664                 }
 4665                 break;
 4666 
 4667         case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
 4668                 /*
 4669                  * This property could require the allocation classes
 4670                  * feature to be active for setting, however we allow
 4671                  * it so that tests of settable properties succeed.
 4672                  * The CLI will issue a warning in this case.
 4673                  */
 4674                 break;
 4675 
 4676         case ZFS_PROP_SHARESMB:
 4677                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 4678                         return (SET_ERROR(ENOTSUP));
 4679                 break;
 4680 
 4681         case ZFS_PROP_ACLINHERIT:
 4682                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 4683                     nvpair_value_uint64(pair, &intval) == 0) {
 4684                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
 4685                             zfs_earlier_version(dsname,
 4686                             SPA_VERSION_PASSTHROUGH_X))
 4687                                 return (SET_ERROR(ENOTSUP));
 4688                 }
 4689                 break;
 4690         case ZFS_PROP_CHECKSUM:
 4691         case ZFS_PROP_DEDUP:
 4692         {
 4693                 spa_feature_t feature;
 4694                 spa_t *spa;
 4695                 int err;
 4696 
 4697                 /* dedup feature version checks */
 4698                 if (prop == ZFS_PROP_DEDUP &&
 4699                     zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
 4700                         return (SET_ERROR(ENOTSUP));
 4701 
 4702                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 4703                     nvpair_value_uint64(pair, &intval) == 0) {
 4704                         /* check prop value is enabled in features */
 4705                         feature = zio_checksum_to_feature(
 4706                             intval & ZIO_CHECKSUM_MASK);
 4707                         if (feature == SPA_FEATURE_NONE)
 4708                                 break;
 4709 
 4710                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 4711                                 return (err);
 4712 
 4713                         if (!spa_feature_is_enabled(spa, feature)) {
 4714                                 spa_close(spa, FTAG);
 4715                                 return (SET_ERROR(ENOTSUP));
 4716                         }
 4717                         spa_close(spa, FTAG);
 4718                 }
 4719                 break;
 4720         }
 4721 
 4722         default:
 4723                 break;
 4724         }
 4725 
 4726         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 4727 }
 4728 
 4729 /*
 4730  * Removes properties from the given props list that fail permission checks
 4731  * needed to clear them and to restore them in case of a receive error. For each
 4732  * property, make sure we have both set and inherit permissions.
 4733  *
 4734  * Returns the first error encountered if any permission checks fail. If the
 4735  * caller provides a non-NULL errlist, it also gives the complete list of names
 4736  * of all the properties that failed a permission check along with the
 4737  * corresponding error numbers. The caller is responsible for freeing the
 4738  * returned errlist.
 4739  *
 4740  * If every property checks out successfully, zero is returned and the list
 4741  * pointed at by errlist is NULL.
 4742  */
 4743 static int
 4744 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 4745 {
 4746         zfs_cmd_t *zc;
 4747         nvpair_t *pair, *next_pair;
 4748         nvlist_t *errors;
 4749         int err, rv = 0;
 4750 
 4751         if (props == NULL)
 4752                 return (0);
 4753 
 4754         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 4755 
 4756         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
 4757         (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
 4758         pair = nvlist_next_nvpair(props, NULL);
 4759         while (pair != NULL) {
 4760                 next_pair = nvlist_next_nvpair(props, pair);
 4761 
 4762                 (void) strlcpy(zc->zc_value, nvpair_name(pair),
 4763                     sizeof (zc->zc_value));
 4764                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
 4765                     (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
 4766                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
 4767                         VERIFY(nvlist_add_int32(errors,
 4768                             zc->zc_value, err) == 0);
 4769                 }
 4770                 pair = next_pair;
 4771         }
 4772         kmem_free(zc, sizeof (zfs_cmd_t));
 4773 
 4774         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
 4775                 nvlist_free(errors);
 4776                 errors = NULL;
 4777         } else {
 4778                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
 4779         }
 4780 
 4781         if (errlist == NULL)
 4782                 nvlist_free(errors);
 4783         else
 4784                 *errlist = errors;
 4785 
 4786         return (rv);
 4787 }
 4788 
 4789 static boolean_t
 4790 propval_equals(nvpair_t *p1, nvpair_t *p2)
 4791 {
 4792         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
 4793                 /* dsl_prop_get_all_impl() format */
 4794                 nvlist_t *attrs;
 4795                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
 4796                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 4797                     &p1) == 0);
 4798         }
 4799 
 4800         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
 4801                 nvlist_t *attrs;
 4802                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
 4803                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 4804                     &p2) == 0);
 4805         }
 4806 
 4807         if (nvpair_type(p1) != nvpair_type(p2))
 4808                 return (B_FALSE);
 4809 
 4810         if (nvpair_type(p1) == DATA_TYPE_STRING) {
 4811                 char *valstr1, *valstr2;
 4812 
 4813                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
 4814                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
 4815                 return (strcmp(valstr1, valstr2) == 0);
 4816         } else {
 4817                 uint64_t intval1, intval2;
 4818 
 4819                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
 4820                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
 4821                 return (intval1 == intval2);
 4822         }
 4823 }
 4824 
 4825 /*
 4826  * Remove properties from props if they are not going to change (as determined
 4827  * by comparison with origprops). Remove them from origprops as well, since we
 4828  * do not need to clear or restore properties that won't change.
 4829  */
 4830 static void
 4831 props_reduce(nvlist_t *props, nvlist_t *origprops)
 4832 {
 4833         nvpair_t *pair, *next_pair;
 4834 
 4835         if (origprops == NULL)
 4836                 return; /* all props need to be received */
 4837 
 4838         pair = nvlist_next_nvpair(props, NULL);
 4839         while (pair != NULL) {
 4840                 const char *propname = nvpair_name(pair);
 4841                 nvpair_t *match;
 4842 
 4843                 next_pair = nvlist_next_nvpair(props, pair);
 4844 
 4845                 if ((nvlist_lookup_nvpair(origprops, propname,
 4846                     &match) != 0) || !propval_equals(pair, match))
 4847                         goto next; /* need to set received value */
 4848 
 4849                 /* don't clear the existing received value */
 4850                 (void) nvlist_remove_nvpair(origprops, match);
 4851                 /* don't bother receiving the property */
 4852                 (void) nvlist_remove_nvpair(props, pair);
 4853 next:
 4854                 pair = next_pair;
 4855         }
 4856 }
 4857 
 4858 /*
 4859  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
 4860  * For example, refquota cannot be set until after the receipt of a dataset,
 4861  * because in replication streams, an older/earlier snapshot may exceed the
 4862  * refquota.  We want to receive the older/earlier snapshot, but setting
 4863  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
 4864  * the older/earlier snapshot from being received (with EDQUOT).
 4865  *
 4866  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
 4867  *
 4868  * libzfs will need to be judicious handling errors encountered by props
 4869  * extracted by this function.
 4870  */
 4871 static nvlist_t *
 4872 extract_delay_props(nvlist_t *props)
 4873 {
 4874         nvlist_t *delayprops;
 4875         nvpair_t *nvp, *tmp;
 4876         static const zfs_prop_t delayable[] = {
 4877                 ZFS_PROP_REFQUOTA,
 4878                 ZFS_PROP_KEYLOCATION,
 4879                 /*
 4880                  * Setting ZFS_PROP_SHARESMB requires the objset type to be
 4881                  * known, which is not possible prior to receipt of raw sends.
 4882                  */
 4883                 ZFS_PROP_SHARESMB,
 4884                 0
 4885         };
 4886         int i;
 4887 
 4888         VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 4889 
 4890         for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
 4891             nvp = nvlist_next_nvpair(props, nvp)) {
 4892                 /*
 4893                  * strcmp() is safe because zfs_prop_to_name() always returns
 4894                  * a bounded string.
 4895                  */
 4896                 for (i = 0; delayable[i] != 0; i++) {
 4897                         if (strcmp(zfs_prop_to_name(delayable[i]),
 4898                             nvpair_name(nvp)) == 0) {
 4899                                 break;
 4900                         }
 4901                 }
 4902                 if (delayable[i] != 0) {
 4903                         tmp = nvlist_prev_nvpair(props, nvp);
 4904                         VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
 4905                         VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
 4906                         nvp = tmp;
 4907                 }
 4908         }
 4909 
 4910         if (nvlist_empty(delayprops)) {
 4911                 nvlist_free(delayprops);
 4912                 delayprops = NULL;
 4913         }
 4914         return (delayprops);
 4915 }
 4916 
 4917 static void
 4918 zfs_allow_log_destroy(void *arg)
 4919 {
 4920         char *poolname = arg;
 4921 
 4922         if (poolname != NULL)
 4923                 kmem_strfree(poolname);
 4924 }
 4925 
 4926 #ifdef  ZFS_DEBUG
 4927 static boolean_t zfs_ioc_recv_inject_err;
 4928 #endif
 4929 
 4930 /*
 4931  * nvlist 'errors' is always allocated. It will contain descriptions of
 4932  * encountered errors, if any. It's the callers responsibility to free.
 4933  */
 4934 static int
 4935 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
 4936     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
 4937     boolean_t heal, boolean_t resumable, int input_fd,
 4938     dmu_replay_record_t *begin_record, uint64_t *read_bytes,
 4939     uint64_t *errflags, nvlist_t **errors)
 4940 {
 4941         dmu_recv_cookie_t drc;
 4942         int error = 0;
 4943         int props_error = 0;
 4944         offset_t off, noff;
 4945         nvlist_t *local_delayprops = NULL;
 4946         nvlist_t *recv_delayprops = NULL;
 4947         nvlist_t *inherited_delayprops = NULL;
 4948         nvlist_t *origprops = NULL; /* existing properties */
 4949         nvlist_t *origrecvd = NULL; /* existing received properties */
 4950         boolean_t first_recvd_props = B_FALSE;
 4951         boolean_t tofs_was_redacted;
 4952         zfs_file_t *input_fp;
 4953 
 4954         *read_bytes = 0;
 4955         *errflags = 0;
 4956         *errors = fnvlist_alloc();
 4957         off = 0;
 4958 
 4959         if ((input_fp = zfs_file_get(input_fd)) == NULL)
 4960                 return (SET_ERROR(EBADF));
 4961 
 4962         noff = off = zfs_file_off(input_fp);
 4963         error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
 4964             resumable, localprops, hidden_args, origin, &drc, input_fp,
 4965             &off);
 4966         if (error != 0)
 4967                 goto out;
 4968         tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
 4969 
 4970         /*
 4971          * Set properties before we receive the stream so that they are applied
 4972          * to the new data. Note that we must call dmu_recv_stream() if
 4973          * dmu_recv_begin() succeeds.
 4974          */
 4975         if (recvprops != NULL && !drc.drc_newfs) {
 4976                 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
 4977                     SPA_VERSION_RECVD_PROPS &&
 4978                     !dsl_prop_get_hasrecvd(tofs))
 4979                         first_recvd_props = B_TRUE;
 4980 
 4981                 /*
 4982                  * If new received properties are supplied, they are to
 4983                  * completely replace the existing received properties,
 4984                  * so stash away the existing ones.
 4985                  */
 4986                 if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
 4987                         nvlist_t *errlist = NULL;
 4988                         /*
 4989                          * Don't bother writing a property if its value won't
 4990                          * change (and avoid the unnecessary security checks).
 4991                          *
 4992                          * The first receive after SPA_VERSION_RECVD_PROPS is a
 4993                          * special case where we blow away all local properties
 4994                          * regardless.
 4995                          */
 4996                         if (!first_recvd_props)
 4997                                 props_reduce(recvprops, origrecvd);
 4998                         if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
 4999                                 (void) nvlist_merge(*errors, errlist, 0);
 5000                         nvlist_free(errlist);
 5001 
 5002                         if (clear_received_props(tofs, origrecvd,
 5003                             first_recvd_props ? NULL : recvprops) != 0)
 5004                                 *errflags |= ZPROP_ERR_NOCLEAR;
 5005                 } else {
 5006                         *errflags |= ZPROP_ERR_NOCLEAR;
 5007                 }
 5008         }
 5009 
 5010         /*
 5011          * Stash away existing properties so we can restore them on error unless
 5012          * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
 5013          * case "origrecvd" will take care of that.
 5014          */
 5015         if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
 5016                 objset_t *os;
 5017                 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
 5018                         if (dsl_prop_get_all(os, &origprops) != 0) {
 5019                                 *errflags |= ZPROP_ERR_NOCLEAR;
 5020                         }
 5021                         dmu_objset_rele(os, FTAG);
 5022                 } else {
 5023                         *errflags |= ZPROP_ERR_NOCLEAR;
 5024                 }
 5025         }
 5026 
 5027         if (recvprops != NULL) {
 5028                 props_error = dsl_prop_set_hasrecvd(tofs);
 5029 
 5030                 if (props_error == 0) {
 5031                         recv_delayprops = extract_delay_props(recvprops);
 5032                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 5033                             recvprops, *errors);
 5034                 }
 5035         }
 5036 
 5037         if (localprops != NULL) {
 5038                 nvlist_t *oprops = fnvlist_alloc();
 5039                 nvlist_t *xprops = fnvlist_alloc();
 5040                 nvpair_t *nvp = NULL;
 5041 
 5042                 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
 5043                         if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
 5044                                 /* -x property */
 5045                                 const char *name = nvpair_name(nvp);
 5046                                 zfs_prop_t prop = zfs_name_to_prop(name);
 5047                                 if (prop != ZPROP_USERPROP) {
 5048                                         if (!zfs_prop_inheritable(prop))
 5049                                                 continue;
 5050                                 } else if (!zfs_prop_user(name))
 5051                                         continue;
 5052                                 fnvlist_add_boolean(xprops, name);
 5053                         } else {
 5054                                 /* -o property=value */
 5055                                 fnvlist_add_nvpair(oprops, nvp);
 5056                         }
 5057                 }
 5058 
 5059                 local_delayprops = extract_delay_props(oprops);
 5060                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 5061                     oprops, *errors);
 5062                 inherited_delayprops = extract_delay_props(xprops);
 5063                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 5064                     xprops, *errors);
 5065 
 5066                 nvlist_free(oprops);
 5067                 nvlist_free(xprops);
 5068         }
 5069 
 5070         error = dmu_recv_stream(&drc, &off);
 5071 
 5072         if (error == 0) {
 5073                 zfsvfs_t *zfsvfs = NULL;
 5074                 zvol_state_handle_t *zv = NULL;
 5075 
 5076                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
 5077                         /* online recv */
 5078                         dsl_dataset_t *ds;
 5079                         int end_err;
 5080                         boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
 5081                             begin_record->drr_u.drr_begin.
 5082                             drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
 5083 
 5084                         ds = dmu_objset_ds(zfsvfs->z_os);
 5085                         error = zfs_suspend_fs(zfsvfs);
 5086                         /*
 5087                          * If the suspend fails, then the recv_end will
 5088                          * likely also fail, and clean up after itself.
 5089                          */
 5090                         end_err = dmu_recv_end(&drc, zfsvfs);
 5091                         /*
 5092                          * If the dataset was not redacted, but we received a
 5093                          * redacted stream onto it, we need to unmount the
 5094                          * dataset.  Otherwise, resume the filesystem.
 5095                          */
 5096                         if (error == 0 && !drc.drc_newfs &&
 5097                             stream_is_redacted && !tofs_was_redacted) {
 5098                                 error = zfs_end_fs(zfsvfs, ds);
 5099                         } else if (error == 0) {
 5100                                 error = zfs_resume_fs(zfsvfs, ds);
 5101                         }
 5102                         error = error ? error : end_err;
 5103                         zfs_vfs_rele(zfsvfs);
 5104                 } else if ((zv = zvol_suspend(tofs)) != NULL) {
 5105                         error = dmu_recv_end(&drc, zvol_tag(zv));
 5106                         zvol_resume(zv);
 5107                 } else {
 5108                         error = dmu_recv_end(&drc, NULL);
 5109                 }
 5110 
 5111                 /* Set delayed properties now, after we're done receiving. */
 5112                 if (recv_delayprops != NULL && error == 0) {
 5113                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 5114                             recv_delayprops, *errors);
 5115                 }
 5116                 if (local_delayprops != NULL && error == 0) {
 5117                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 5118                             local_delayprops, *errors);
 5119                 }
 5120                 if (inherited_delayprops != NULL && error == 0) {
 5121                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 5122                             inherited_delayprops, *errors);
 5123                 }
 5124         }
 5125 
 5126         /*
 5127          * Merge delayed props back in with initial props, in case
 5128          * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
 5129          * we have to make sure clear_received_props() includes
 5130          * the delayed properties).
 5131          *
 5132          * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
 5133          * using ASSERT() will be just like a VERIFY.
 5134          */
 5135         if (recv_delayprops != NULL) {
 5136                 ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
 5137                 nvlist_free(recv_delayprops);
 5138         }
 5139         if (local_delayprops != NULL) {
 5140                 ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
 5141                 nvlist_free(local_delayprops);
 5142         }
 5143         if (inherited_delayprops != NULL) {
 5144                 ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
 5145                 nvlist_free(inherited_delayprops);
 5146         }
 5147         *read_bytes = off - noff;
 5148 
 5149 #ifdef  ZFS_DEBUG
 5150         if (zfs_ioc_recv_inject_err) {
 5151                 zfs_ioc_recv_inject_err = B_FALSE;
 5152                 error = 1;
 5153         }
 5154 #endif
 5155 
 5156         /*
 5157          * On error, restore the original props.
 5158          */
 5159         if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
 5160                 if (clear_received_props(tofs, recvprops, NULL) != 0) {
 5161                         /*
 5162                          * We failed to clear the received properties.
 5163                          * Since we may have left a $recvd value on the
 5164                          * system, we can't clear the $hasrecvd flag.
 5165                          */
 5166                         *errflags |= ZPROP_ERR_NORESTORE;
 5167                 } else if (first_recvd_props) {
 5168                         dsl_prop_unset_hasrecvd(tofs);
 5169                 }
 5170 
 5171                 if (origrecvd == NULL && !drc.drc_newfs) {
 5172                         /* We failed to stash the original properties. */
 5173                         *errflags |= ZPROP_ERR_NORESTORE;
 5174                 }
 5175 
 5176                 /*
 5177                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
 5178                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
 5179                  * explicitly if we're restoring local properties cleared in the
 5180                  * first new-style receive.
 5181                  */
 5182                 if (origrecvd != NULL &&
 5183                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
 5184                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
 5185                     origrecvd, NULL) != 0) {
 5186                         /*
 5187                          * We stashed the original properties but failed to
 5188                          * restore them.
 5189                          */
 5190                         *errflags |= ZPROP_ERR_NORESTORE;
 5191                 }
 5192         }
 5193         if (error != 0 && localprops != NULL && !drc.drc_newfs &&
 5194             !first_recvd_props) {
 5195                 nvlist_t *setprops;
 5196                 nvlist_t *inheritprops;
 5197                 nvpair_t *nvp;
 5198 
 5199                 if (origprops == NULL) {
 5200                         /* We failed to stash the original properties. */
 5201                         *errflags |= ZPROP_ERR_NORESTORE;
 5202                         goto out;
 5203                 }
 5204 
 5205                 /* Restore original props */
 5206                 setprops = fnvlist_alloc();
 5207                 inheritprops = fnvlist_alloc();
 5208                 nvp = NULL;
 5209                 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
 5210                         const char *name = nvpair_name(nvp);
 5211                         const char *source;
 5212                         nvlist_t *attrs;
 5213 
 5214                         if (!nvlist_exists(origprops, name)) {
 5215                                 /*
 5216                                  * Property was not present or was explicitly
 5217                                  * inherited before the receive, restore this.
 5218                                  */
 5219                                 fnvlist_add_boolean(inheritprops, name);
 5220                                 continue;
 5221                         }
 5222                         attrs = fnvlist_lookup_nvlist(origprops, name);
 5223                         source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
 5224 
 5225                         /* Skip received properties */
 5226                         if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
 5227                                 continue;
 5228 
 5229                         if (strcmp(source, tofs) == 0) {
 5230                                 /* Property was locally set */
 5231                                 fnvlist_add_nvlist(setprops, name, attrs);
 5232                         } else {
 5233                                 /* Property was implicitly inherited */
 5234                                 fnvlist_add_boolean(inheritprops, name);
 5235                         }
 5236                 }
 5237 
 5238                 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
 5239                     NULL) != 0)
 5240                         *errflags |= ZPROP_ERR_NORESTORE;
 5241                 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
 5242                     NULL) != 0)
 5243                         *errflags |= ZPROP_ERR_NORESTORE;
 5244 
 5245                 nvlist_free(setprops);
 5246                 nvlist_free(inheritprops);
 5247         }
 5248 out:
 5249         zfs_file_put(input_fp);
 5250         nvlist_free(origrecvd);
 5251         nvlist_free(origprops);
 5252 
 5253         if (error == 0)
 5254                 error = props_error;
 5255 
 5256         return (error);
 5257 }
 5258 
 5259 /*
 5260  * inputs:
 5261  * zc_name              name of containing filesystem (unused)
 5262  * zc_nvlist_src{_size} nvlist of properties to apply
 5263  * zc_nvlist_conf{_size}        nvlist of properties to exclude
 5264  *                      (DATA_TYPE_BOOLEAN) and override (everything else)
 5265  * zc_value             name of snapshot to create
 5266  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
 5267  * zc_cookie            file descriptor to recv from
 5268  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
 5269  * zc_guid              force flag
 5270  *
 5271  * outputs:
 5272  * zc_cookie            number of bytes read
 5273  * zc_obj               zprop_errflags_t
 5274  * zc_nvlist_dst{_size} error for each unapplied received property
 5275  */
 5276 static int
 5277 zfs_ioc_recv(zfs_cmd_t *zc)
 5278 {
 5279         dmu_replay_record_t begin_record;
 5280         nvlist_t *errors = NULL;
 5281         nvlist_t *recvdprops = NULL;
 5282         nvlist_t *localprops = NULL;
 5283         char *origin = NULL;
 5284         char *tosnap;
 5285         char tofs[ZFS_MAX_DATASET_NAME_LEN];
 5286         int error = 0;
 5287 
 5288         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 5289             strchr(zc->zc_value, '@') == NULL ||
 5290             strchr(zc->zc_value, '%'))
 5291                 return (SET_ERROR(EINVAL));
 5292 
 5293         (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
 5294         tosnap = strchr(tofs, '@');
 5295         *tosnap++ = '\0';
 5296 
 5297         if (zc->zc_nvlist_src != 0 &&
 5298             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 5299             zc->zc_iflags, &recvdprops)) != 0)
 5300                 return (error);
 5301 
 5302         if (zc->zc_nvlist_conf != 0 &&
 5303             (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 5304             zc->zc_iflags, &localprops)) != 0)
 5305                 return (error);
 5306 
 5307         if (zc->zc_string[0])
 5308                 origin = zc->zc_string;
 5309 
 5310         begin_record.drr_type = DRR_BEGIN;
 5311         begin_record.drr_payloadlen = 0;
 5312         begin_record.drr_u.drr_begin = zc->zc_begin_record;
 5313 
 5314         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
 5315             NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
 5316             &zc->zc_cookie, &zc->zc_obj, &errors);
 5317         nvlist_free(recvdprops);
 5318         nvlist_free(localprops);
 5319 
 5320         /*
 5321          * Now that all props, initial and delayed, are set, report the prop
 5322          * errors to the caller.
 5323          */
 5324         if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
 5325             (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
 5326             put_nvlist(zc, errors) != 0)) {
 5327                 /*
 5328                  * Caller made zc->zc_nvlist_dst less than the minimum expected
 5329                  * size or supplied an invalid address.
 5330                  */
 5331                 error = SET_ERROR(EINVAL);
 5332         }
 5333 
 5334         nvlist_free(errors);
 5335 
 5336         return (error);
 5337 }
 5338 
 5339 /*
 5340  * innvl: {
 5341  *     "snapname" -> full name of the snapshot to create
 5342  *     (optional) "props" -> received properties to set (nvlist)
 5343  *     (optional) "localprops" -> override and exclude properties (nvlist)
 5344  *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
 5345  *     "begin_record" -> non-byteswapped dmu_replay_record_t
 5346  *     "input_fd" -> file descriptor to read stream from (int32)
 5347  *     (optional) "force" -> force flag (value ignored)
 5348  *     (optional) "heal" -> use send stream to heal data corruption
 5349  *     (optional) "resumable" -> resumable flag (value ignored)
 5350  *     (optional) "cleanup_fd" -> unused
 5351  *     (optional) "action_handle" -> unused
 5352  *     (optional) "hidden_args" -> { "wkeydata" -> value }
 5353  * }
 5354  *
 5355  * outnvl: {
 5356  *     "read_bytes" -> number of bytes read
 5357  *     "error_flags" -> zprop_errflags_t
 5358  *     "errors" -> error for each unapplied received property (nvlist)
 5359  * }
 5360  */
 5361 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
 5362         {"snapname",            DATA_TYPE_STRING,       0},
 5363         {"props",               DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 5364         {"localprops",          DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 5365         {"origin",              DATA_TYPE_STRING,       ZK_OPTIONAL},
 5366         {"begin_record",        DATA_TYPE_BYTE_ARRAY,   0},
 5367         {"input_fd",            DATA_TYPE_INT32,        0},
 5368         {"force",               DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 5369         {"heal",                DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 5370         {"resumable",           DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 5371         {"cleanup_fd",          DATA_TYPE_INT32,        ZK_OPTIONAL},
 5372         {"action_handle",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
 5373         {"hidden_args",         DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 5374 };
 5375 
 5376 static int
 5377 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 5378 {
 5379         dmu_replay_record_t *begin_record;
 5380         uint_t begin_record_size;
 5381         nvlist_t *errors = NULL;
 5382         nvlist_t *recvprops = NULL;
 5383         nvlist_t *localprops = NULL;
 5384         nvlist_t *hidden_args = NULL;
 5385         char *snapname;
 5386         char *origin = NULL;
 5387         char *tosnap;
 5388         char tofs[ZFS_MAX_DATASET_NAME_LEN];
 5389         boolean_t force;
 5390         boolean_t heal;
 5391         boolean_t resumable;
 5392         uint64_t read_bytes = 0;
 5393         uint64_t errflags = 0;
 5394         int input_fd = -1;
 5395         int error;
 5396 
 5397         snapname = fnvlist_lookup_string(innvl, "snapname");
 5398 
 5399         if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
 5400             strchr(snapname, '@') == NULL ||
 5401             strchr(snapname, '%'))
 5402                 return (SET_ERROR(EINVAL));
 5403 
 5404         (void) strlcpy(tofs, snapname, sizeof (tofs));
 5405         tosnap = strchr(tofs, '@');
 5406         *tosnap++ = '\0';
 5407 
 5408         error = nvlist_lookup_string(innvl, "origin", &origin);
 5409         if (error && error != ENOENT)
 5410                 return (error);
 5411 
 5412         error = nvlist_lookup_byte_array(innvl, "begin_record",
 5413             (uchar_t **)&begin_record, &begin_record_size);
 5414         if (error != 0 || begin_record_size != sizeof (*begin_record))
 5415                 return (SET_ERROR(EINVAL));
 5416 
 5417         input_fd = fnvlist_lookup_int32(innvl, "input_fd");
 5418 
 5419         force = nvlist_exists(innvl, "force");
 5420         heal = nvlist_exists(innvl, "heal");
 5421         resumable = nvlist_exists(innvl, "resumable");
 5422 
 5423         /* we still use "props" here for backwards compatibility */
 5424         error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
 5425         if (error && error != ENOENT)
 5426                 return (error);
 5427 
 5428         error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
 5429         if (error && error != ENOENT)
 5430                 return (error);
 5431 
 5432         error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 5433         if (error && error != ENOENT)
 5434                 return (error);
 5435 
 5436         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
 5437             hidden_args, force, heal, resumable, input_fd, begin_record,
 5438             &read_bytes, &errflags, &errors);
 5439 
 5440         fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
 5441         fnvlist_add_uint64(outnvl, "error_flags", errflags);
 5442         fnvlist_add_nvlist(outnvl, "errors", errors);
 5443 
 5444         nvlist_free(errors);
 5445         nvlist_free(recvprops);
 5446         nvlist_free(localprops);
 5447 
 5448         return (error);
 5449 }
 5450 
 5451 typedef struct dump_bytes_io {
 5452         zfs_file_t      *dbi_fp;
 5453         caddr_t         dbi_buf;
 5454         int             dbi_len;
 5455         int             dbi_err;
 5456 } dump_bytes_io_t;
 5457 
 5458 static void
 5459 dump_bytes_cb(void *arg)
 5460 {
 5461         dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
 5462         zfs_file_t *fp;
 5463         caddr_t buf;
 5464 
 5465         fp = dbi->dbi_fp;
 5466         buf = dbi->dbi_buf;
 5467 
 5468         dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
 5469 }
 5470 
 5471 static int
 5472 dump_bytes(objset_t *os, void *buf, int len, void *arg)
 5473 {
 5474         dump_bytes_io_t dbi;
 5475 
 5476         dbi.dbi_fp = arg;
 5477         dbi.dbi_buf = buf;
 5478         dbi.dbi_len = len;
 5479 
 5480 #if defined(HAVE_LARGE_STACKS)
 5481         dump_bytes_cb(&dbi);
 5482 #else
 5483         /*
 5484          * The vn_rdwr() call is performed in a taskq to ensure that there is
 5485          * always enough stack space to write safely to the target filesystem.
 5486          * The ZIO_TYPE_FREE threads are used because there can be a lot of
 5487          * them and they are used in vdev_file.c for a similar purpose.
 5488          */
 5489         spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
 5490             ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
 5491 #endif /* HAVE_LARGE_STACKS */
 5492 
 5493         return (dbi.dbi_err);
 5494 }
 5495 
 5496 /*
 5497  * inputs:
 5498  * zc_name      name of snapshot to send
 5499  * zc_cookie    file descriptor to send stream to
 5500  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
 5501  * zc_sendobj   objsetid of snapshot to send
 5502  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
 5503  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
 5504  *              output size in zc_objset_type.
 5505  * zc_flags     lzc_send_flags
 5506  *
 5507  * outputs:
 5508  * zc_objset_type       estimated size, if zc_guid is set
 5509  *
 5510  * NOTE: This is no longer the preferred interface, any new functionality
 5511  *        should be added to zfs_ioc_send_new() instead.
 5512  */
 5513 static int
 5514 zfs_ioc_send(zfs_cmd_t *zc)
 5515 {
 5516         int error;
 5517         offset_t off;
 5518         boolean_t estimate = (zc->zc_guid != 0);
 5519         boolean_t embedok = (zc->zc_flags & 0x1);
 5520         boolean_t large_block_ok = (zc->zc_flags & 0x2);
 5521         boolean_t compressok = (zc->zc_flags & 0x4);
 5522         boolean_t rawok = (zc->zc_flags & 0x8);
 5523         boolean_t savedok = (zc->zc_flags & 0x10);
 5524 
 5525         if (zc->zc_obj != 0) {
 5526                 dsl_pool_t *dp;
 5527                 dsl_dataset_t *tosnap;
 5528 
 5529                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 5530                 if (error != 0)
 5531                         return (error);
 5532 
 5533                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
 5534                 if (error != 0) {
 5535                         dsl_pool_rele(dp, FTAG);
 5536                         return (error);
 5537                 }
 5538 
 5539                 if (dsl_dir_is_clone(tosnap->ds_dir))
 5540                         zc->zc_fromobj =
 5541                             dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
 5542                 dsl_dataset_rele(tosnap, FTAG);
 5543                 dsl_pool_rele(dp, FTAG);
 5544         }
 5545 
 5546         if (estimate) {
 5547                 dsl_pool_t *dp;
 5548                 dsl_dataset_t *tosnap;
 5549                 dsl_dataset_t *fromsnap = NULL;
 5550 
 5551                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 5552                 if (error != 0)
 5553                         return (error);
 5554 
 5555                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
 5556                     FTAG, &tosnap);
 5557                 if (error != 0) {
 5558                         dsl_pool_rele(dp, FTAG);
 5559                         return (error);
 5560                 }
 5561 
 5562                 if (zc->zc_fromobj != 0) {
 5563                         error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
 5564                             FTAG, &fromsnap);
 5565                         if (error != 0) {
 5566                                 dsl_dataset_rele(tosnap, FTAG);
 5567                                 dsl_pool_rele(dp, FTAG);
 5568                                 return (error);
 5569                         }
 5570                 }
 5571 
 5572                 error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
 5573                     compressok || rawok, savedok, &zc->zc_objset_type);
 5574 
 5575                 if (fromsnap != NULL)
 5576                         dsl_dataset_rele(fromsnap, FTAG);
 5577                 dsl_dataset_rele(tosnap, FTAG);
 5578                 dsl_pool_rele(dp, FTAG);
 5579         } else {
 5580                 zfs_file_t *fp;
 5581                 dmu_send_outparams_t out = {0};
 5582 
 5583                 if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
 5584                         return (SET_ERROR(EBADF));
 5585 
 5586                 off = zfs_file_off(fp);
 5587                 out.dso_outfunc = dump_bytes;
 5588                 out.dso_arg = fp;
 5589                 out.dso_dryrun = B_FALSE;
 5590                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
 5591                     zc->zc_fromobj, embedok, large_block_ok, compressok,
 5592                     rawok, savedok, zc->zc_cookie, &off, &out);
 5593 
 5594                 zfs_file_put(fp);
 5595         }
 5596         return (error);
 5597 }
 5598 
 5599 /*
 5600  * inputs:
 5601  * zc_name              name of snapshot on which to report progress
 5602  * zc_cookie            file descriptor of send stream
 5603  *
 5604  * outputs:
 5605  * zc_cookie            number of bytes written in send stream thus far
 5606  * zc_objset_type       logical size of data traversed by send thus far
 5607  */
 5608 static int
 5609 zfs_ioc_send_progress(zfs_cmd_t *zc)
 5610 {
 5611         dsl_pool_t *dp;
 5612         dsl_dataset_t *ds;
 5613         dmu_sendstatus_t *dsp = NULL;
 5614         int error;
 5615 
 5616         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 5617         if (error != 0)
 5618                 return (error);
 5619 
 5620         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 5621         if (error != 0) {
 5622                 dsl_pool_rele(dp, FTAG);
 5623                 return (error);
 5624         }
 5625 
 5626         mutex_enter(&ds->ds_sendstream_lock);
 5627 
 5628         /*
 5629          * Iterate over all the send streams currently active on this dataset.
 5630          * If there's one which matches the specified file descriptor _and_ the
 5631          * stream was started by the current process, return the progress of
 5632          * that stream.
 5633          */
 5634 
 5635         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
 5636             dsp = list_next(&ds->ds_sendstreams, dsp)) {
 5637                 if (dsp->dss_outfd == zc->zc_cookie &&
 5638                     zfs_proc_is_caller(dsp->dss_proc))
 5639                         break;
 5640         }
 5641 
 5642         if (dsp != NULL) {
 5643                 zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
 5644                     0, 0);
 5645                 /* This is the closest thing we have to atomic_read_64. */
 5646                 zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
 5647         } else {
 5648                 error = SET_ERROR(ENOENT);
 5649         }
 5650 
 5651         mutex_exit(&ds->ds_sendstream_lock);
 5652         dsl_dataset_rele(ds, FTAG);
 5653         dsl_pool_rele(dp, FTAG);
 5654         return (error);
 5655 }
 5656 
 5657 static int
 5658 zfs_ioc_inject_fault(zfs_cmd_t *zc)
 5659 {
 5660         int id, error;
 5661 
 5662         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
 5663             &zc->zc_inject_record);
 5664 
 5665         if (error == 0)
 5666                 zc->zc_guid = (uint64_t)id;
 5667 
 5668         return (error);
 5669 }
 5670 
 5671 static int
 5672 zfs_ioc_clear_fault(zfs_cmd_t *zc)
 5673 {
 5674         return (zio_clear_fault((int)zc->zc_guid));
 5675 }
 5676 
 5677 static int
 5678 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
 5679 {
 5680         int id = (int)zc->zc_guid;
 5681         int error;
 5682 
 5683         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
 5684             &zc->zc_inject_record);
 5685 
 5686         zc->zc_guid = id;
 5687 
 5688         return (error);
 5689 }
 5690 
 5691 static int
 5692 zfs_ioc_error_log(zfs_cmd_t *zc)
 5693 {
 5694         spa_t *spa;
 5695         int error;
 5696 
 5697         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 5698                 return (error);
 5699 
 5700         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
 5701             &zc->zc_nvlist_dst_size);
 5702 
 5703         spa_close(spa, FTAG);
 5704 
 5705         return (error);
 5706 }
 5707 
 5708 static int
 5709 zfs_ioc_clear(zfs_cmd_t *zc)
 5710 {
 5711         spa_t *spa;
 5712         vdev_t *vd;
 5713         int error;
 5714 
 5715         /*
 5716          * On zpool clear we also fix up missing slogs
 5717          */
 5718         mutex_enter(&spa_namespace_lock);
 5719         spa = spa_lookup(zc->zc_name);
 5720         if (spa == NULL) {
 5721                 mutex_exit(&spa_namespace_lock);
 5722                 return (SET_ERROR(EIO));
 5723         }
 5724         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
 5725                 /* we need to let spa_open/spa_load clear the chains */
 5726                 spa_set_log_state(spa, SPA_LOG_CLEAR);
 5727         }
 5728         spa->spa_last_open_failed = 0;
 5729         mutex_exit(&spa_namespace_lock);
 5730 
 5731         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
 5732                 error = spa_open(zc->zc_name, &spa, FTAG);
 5733         } else {
 5734                 nvlist_t *policy;
 5735                 nvlist_t *config = NULL;
 5736 
 5737                 if (zc->zc_nvlist_src == 0)
 5738                         return (SET_ERROR(EINVAL));
 5739 
 5740                 if ((error = get_nvlist(zc->zc_nvlist_src,
 5741                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
 5742                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
 5743                             policy, &config);
 5744                         if (config != NULL) {
 5745                                 int err;
 5746 
 5747                                 if ((err = put_nvlist(zc, config)) != 0)
 5748                                         error = err;
 5749                                 nvlist_free(config);
 5750                         }
 5751                         nvlist_free(policy);
 5752                 }
 5753         }
 5754 
 5755         if (error != 0)
 5756                 return (error);
 5757 
 5758         /*
 5759          * If multihost is enabled, resuming I/O is unsafe as another
 5760          * host may have imported the pool.
 5761          */
 5762         if (spa_multihost(spa) && spa_suspended(spa))
 5763                 return (SET_ERROR(EINVAL));
 5764 
 5765         spa_vdev_state_enter(spa, SCL_NONE);
 5766 
 5767         if (zc->zc_guid == 0) {
 5768                 vd = NULL;
 5769         } else {
 5770                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
 5771                 if (vd == NULL) {
 5772                         error = SET_ERROR(ENODEV);
 5773                         (void) spa_vdev_state_exit(spa, NULL, error);
 5774                         spa_close(spa, FTAG);
 5775                         return (error);
 5776                 }
 5777         }
 5778 
 5779         vdev_clear(spa, vd);
 5780 
 5781         (void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
 5782             NULL : spa->spa_root_vdev, 0);
 5783 
 5784         /*
 5785          * Resume any suspended I/Os.
 5786          */
 5787         if (zio_resume(spa) != 0)
 5788                 error = SET_ERROR(EIO);
 5789 
 5790         spa_close(spa, FTAG);
 5791 
 5792         return (error);
 5793 }
 5794 
 5795 /*
 5796  * Reopen all the vdevs associated with the pool.
 5797  *
 5798  * innvl: {
 5799  *  "scrub_restart" -> when true and scrub is running, allow to restart
 5800  *              scrub as the side effect of the reopen (boolean).
 5801  * }
 5802  *
 5803  * outnvl is unused
 5804  */
 5805 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
 5806         {"scrub_restart",       DATA_TYPE_BOOLEAN_VALUE,        ZK_OPTIONAL},
 5807 };
 5808 
 5809 static int
 5810 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
 5811 {
 5812         (void) outnvl;
 5813         spa_t *spa;
 5814         int error;
 5815         boolean_t rc, scrub_restart = B_TRUE;
 5816 
 5817         if (innvl) {
 5818                 error = nvlist_lookup_boolean_value(innvl,
 5819                     "scrub_restart", &rc);
 5820                 if (error == 0)
 5821                         scrub_restart = rc;
 5822         }
 5823 
 5824         error = spa_open(pool, &spa, FTAG);
 5825         if (error != 0)
 5826                 return (error);
 5827 
 5828         spa_vdev_state_enter(spa, SCL_NONE);
 5829 
 5830         /*
 5831          * If the scrub_restart flag is B_FALSE and a scrub is already
 5832          * in progress then set spa_scrub_reopen flag to B_TRUE so that
 5833          * we don't restart the scrub as a side effect of the reopen.
 5834          * Otherwise, let vdev_open() decided if a resilver is required.
 5835          */
 5836 
 5837         spa->spa_scrub_reopen = (!scrub_restart &&
 5838             dsl_scan_scrubbing(spa->spa_dsl_pool));
 5839         vdev_reopen(spa->spa_root_vdev);
 5840         spa->spa_scrub_reopen = B_FALSE;
 5841 
 5842         (void) spa_vdev_state_exit(spa, NULL, 0);
 5843         spa_close(spa, FTAG);
 5844         return (0);
 5845 }
 5846 
 5847 /*
 5848  * inputs:
 5849  * zc_name      name of filesystem
 5850  *
 5851  * outputs:
 5852  * zc_string    name of conflicting snapshot, if there is one
 5853  */
 5854 static int
 5855 zfs_ioc_promote(zfs_cmd_t *zc)
 5856 {
 5857         dsl_pool_t *dp;
 5858         dsl_dataset_t *ds, *ods;
 5859         char origin[ZFS_MAX_DATASET_NAME_LEN];
 5860         char *cp;
 5861         int error;
 5862 
 5863         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 5864         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
 5865             strchr(zc->zc_name, '%'))
 5866                 return (SET_ERROR(EINVAL));
 5867 
 5868         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 5869         if (error != 0)
 5870                 return (error);
 5871 
 5872         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 5873         if (error != 0) {
 5874                 dsl_pool_rele(dp, FTAG);
 5875                 return (error);
 5876         }
 5877 
 5878         if (!dsl_dir_is_clone(ds->ds_dir)) {
 5879                 dsl_dataset_rele(ds, FTAG);
 5880                 dsl_pool_rele(dp, FTAG);
 5881                 return (SET_ERROR(EINVAL));
 5882         }
 5883 
 5884         error = dsl_dataset_hold_obj(dp,
 5885             dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
 5886         if (error != 0) {
 5887                 dsl_dataset_rele(ds, FTAG);
 5888                 dsl_pool_rele(dp, FTAG);
 5889                 return (error);
 5890         }
 5891 
 5892         dsl_dataset_name(ods, origin);
 5893         dsl_dataset_rele(ods, FTAG);
 5894         dsl_dataset_rele(ds, FTAG);
 5895         dsl_pool_rele(dp, FTAG);
 5896 
 5897         /*
 5898          * We don't need to unmount *all* the origin fs's snapshots, but
 5899          * it's easier.
 5900          */
 5901         cp = strchr(origin, '@');
 5902         if (cp)
 5903                 *cp = '\0';
 5904         (void) dmu_objset_find(origin,
 5905             zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
 5906         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
 5907 }
 5908 
 5909 /*
 5910  * Retrieve a single {user|group|project}{used|quota}@... property.
 5911  *
 5912  * inputs:
 5913  * zc_name      name of filesystem
 5914  * zc_objset_type zfs_userquota_prop_t
 5915  * zc_value     domain name (eg. "S-1-234-567-89")
 5916  * zc_guid      RID/UID/GID
 5917  *
 5918  * outputs:
 5919  * zc_cookie    property value
 5920  */
 5921 static int
 5922 zfs_ioc_userspace_one(zfs_cmd_t *zc)
 5923 {
 5924         zfsvfs_t *zfsvfs;
 5925         int error;
 5926 
 5927         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 5928                 return (SET_ERROR(EINVAL));
 5929 
 5930         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 5931         if (error != 0)
 5932                 return (error);
 5933 
 5934         error = zfs_userspace_one(zfsvfs,
 5935             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
 5936         zfsvfs_rele(zfsvfs, FTAG);
 5937 
 5938         return (error);
 5939 }
 5940 
 5941 /*
 5942  * inputs:
 5943  * zc_name              name of filesystem
 5944  * zc_cookie            zap cursor
 5945  * zc_objset_type       zfs_userquota_prop_t
 5946  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
 5947  *
 5948  * outputs:
 5949  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
 5950  * zc_cookie    zap cursor
 5951  */
 5952 static int
 5953 zfs_ioc_userspace_many(zfs_cmd_t *zc)
 5954 {
 5955         zfsvfs_t *zfsvfs;
 5956         int bufsize = zc->zc_nvlist_dst_size;
 5957 
 5958         if (bufsize <= 0)
 5959                 return (SET_ERROR(ENOMEM));
 5960 
 5961         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 5962         if (error != 0)
 5963                 return (error);
 5964 
 5965         void *buf = vmem_alloc(bufsize, KM_SLEEP);
 5966 
 5967         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
 5968             buf, &zc->zc_nvlist_dst_size);
 5969 
 5970         if (error == 0) {
 5971                 error = xcopyout(buf,
 5972                     (void *)(uintptr_t)zc->zc_nvlist_dst,
 5973                     zc->zc_nvlist_dst_size);
 5974         }
 5975         vmem_free(buf, bufsize);
 5976         zfsvfs_rele(zfsvfs, FTAG);
 5977 
 5978         return (error);
 5979 }
 5980 
 5981 /*
 5982  * inputs:
 5983  * zc_name              name of filesystem
 5984  *
 5985  * outputs:
 5986  * none
 5987  */
 5988 static int
 5989 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 5990 {
 5991         int error = 0;
 5992         zfsvfs_t *zfsvfs;
 5993 
 5994         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
 5995                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
 5996                         /*
 5997                          * If userused is not enabled, it may be because the
 5998                          * objset needs to be closed & reopened (to grow the
 5999                          * objset_phys_t).  Suspend/resume the fs will do that.
 6000                          */
 6001                         dsl_dataset_t *ds, *newds;
 6002 
 6003                         ds = dmu_objset_ds(zfsvfs->z_os);
 6004                         error = zfs_suspend_fs(zfsvfs);
 6005                         if (error == 0) {
 6006                                 dmu_objset_refresh_ownership(ds, &newds,
 6007                                     B_TRUE, zfsvfs);
 6008                                 error = zfs_resume_fs(zfsvfs, newds);
 6009                         }
 6010                 }
 6011                 if (error == 0) {
 6012                         mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
 6013                         if (zfsvfs->z_os->os_upgrade_id == 0) {
 6014                                 /* clear potential error code and retry */
 6015                                 zfsvfs->z_os->os_upgrade_status = 0;
 6016                                 mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
 6017 
 6018                                 dsl_pool_config_enter(
 6019                                     dmu_objset_pool(zfsvfs->z_os), FTAG);
 6020                                 dmu_objset_userspace_upgrade(zfsvfs->z_os);
 6021                                 dsl_pool_config_exit(
 6022                                     dmu_objset_pool(zfsvfs->z_os), FTAG);
 6023                         } else {
 6024                                 mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
 6025                         }
 6026 
 6027                         taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
 6028                             zfsvfs->z_os->os_upgrade_id);
 6029                         error = zfsvfs->z_os->os_upgrade_status;
 6030                 }
 6031                 zfs_vfs_rele(zfsvfs);
 6032         } else {
 6033                 objset_t *os;
 6034 
 6035                 /* XXX kind of reading contents without owning */
 6036                 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 6037                 if (error != 0)
 6038                         return (error);
 6039 
 6040                 mutex_enter(&os->os_upgrade_lock);
 6041                 if (os->os_upgrade_id == 0) {
 6042                         /* clear potential error code and retry */
 6043                         os->os_upgrade_status = 0;
 6044                         mutex_exit(&os->os_upgrade_lock);
 6045 
 6046                         dmu_objset_userspace_upgrade(os);
 6047                 } else {
 6048                         mutex_exit(&os->os_upgrade_lock);
 6049                 }
 6050 
 6051                 dsl_pool_rele(dmu_objset_pool(os), FTAG);
 6052 
 6053                 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
 6054                 error = os->os_upgrade_status;
 6055 
 6056                 dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
 6057                     FTAG);
 6058         }
 6059         return (error);
 6060 }
 6061 
 6062 /*
 6063  * inputs:
 6064  * zc_name              name of filesystem
 6065  *
 6066  * outputs:
 6067  * none
 6068  */
 6069 static int
 6070 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
 6071 {
 6072         objset_t *os;
 6073         int error;
 6074 
 6075         error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 6076         if (error != 0)
 6077                 return (error);
 6078 
 6079         if (dmu_objset_userobjspace_upgradable(os) ||
 6080             dmu_objset_projectquota_upgradable(os)) {
 6081                 mutex_enter(&os->os_upgrade_lock);
 6082                 if (os->os_upgrade_id == 0) {
 6083                         /* clear potential error code and retry */
 6084                         os->os_upgrade_status = 0;
 6085                         mutex_exit(&os->os_upgrade_lock);
 6086 
 6087                         dmu_objset_id_quota_upgrade(os);
 6088                 } else {
 6089                         mutex_exit(&os->os_upgrade_lock);
 6090                 }
 6091 
 6092                 dsl_pool_rele(dmu_objset_pool(os), FTAG);
 6093 
 6094                 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
 6095                 error = os->os_upgrade_status;
 6096         } else {
 6097                 dsl_pool_rele(dmu_objset_pool(os), FTAG);
 6098         }
 6099 
 6100         dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
 6101 
 6102         return (error);
 6103 }
 6104 
 6105 static int
 6106 zfs_ioc_share(zfs_cmd_t *zc)
 6107 {
 6108         return (SET_ERROR(ENOSYS));
 6109 }
 6110 
 6111 /*
 6112  * inputs:
 6113  * zc_name              name of containing filesystem
 6114  * zc_obj               object # beyond which we want next in-use object #
 6115  *
 6116  * outputs:
 6117  * zc_obj               next in-use object #
 6118  */
 6119 static int
 6120 zfs_ioc_next_obj(zfs_cmd_t *zc)
 6121 {
 6122         objset_t *os = NULL;
 6123         int error;
 6124 
 6125         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 6126         if (error != 0)
 6127                 return (error);
 6128 
 6129         error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
 6130 
 6131         dmu_objset_rele(os, FTAG);
 6132         return (error);
 6133 }
 6134 
 6135 /*
 6136  * inputs:
 6137  * zc_name              name of filesystem
 6138  * zc_value             prefix name for snapshot
 6139  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
 6140  *
 6141  * outputs:
 6142  * zc_value             short name of new snapshot
 6143  */
 6144 static int
 6145 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
 6146 {
 6147         char *snap_name;
 6148         char *hold_name;
 6149         minor_t minor;
 6150 
 6151         zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
 6152         if (fp == NULL)
 6153                 return (SET_ERROR(EBADF));
 6154 
 6155         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
 6156             (u_longlong_t)ddi_get_lbolt64());
 6157         hold_name = kmem_asprintf("%%%s", zc->zc_value);
 6158 
 6159         int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
 6160             hold_name);
 6161         if (error == 0)
 6162                 (void) strlcpy(zc->zc_value, snap_name,
 6163                     sizeof (zc->zc_value));
 6164         kmem_strfree(snap_name);
 6165         kmem_strfree(hold_name);
 6166         zfs_onexit_fd_rele(fp);
 6167         return (error);
 6168 }
 6169 
 6170 /*
 6171  * inputs:
 6172  * zc_name              name of "to" snapshot
 6173  * zc_value             name of "from" snapshot
 6174  * zc_cookie            file descriptor to write diff data on
 6175  *
 6176  * outputs:
 6177  * dmu_diff_record_t's to the file descriptor
 6178  */
 6179 static int
 6180 zfs_ioc_diff(zfs_cmd_t *zc)
 6181 {
 6182         zfs_file_t *fp;
 6183         offset_t off;
 6184         int error;
 6185 
 6186         if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
 6187                 return (SET_ERROR(EBADF));
 6188 
 6189         off = zfs_file_off(fp);
 6190         error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
 6191 
 6192         zfs_file_put(fp);
 6193 
 6194         return (error);
 6195 }
 6196 
 6197 static int
 6198 zfs_ioc_smb_acl(zfs_cmd_t *zc)
 6199 {
 6200         return (SET_ERROR(ENOTSUP));
 6201 }
 6202 
 6203 /*
 6204  * innvl: {
 6205  *     "holds" -> { snapname -> holdname (string), ... }
 6206  *     (optional) "cleanup_fd" -> fd (int32)
 6207  * }
 6208  *
 6209  * outnvl: {
 6210  *     snapname -> error value (int32)
 6211  *     ...
 6212  * }
 6213  */
 6214 static const zfs_ioc_key_t zfs_keys_hold[] = {
 6215         {"holds",               DATA_TYPE_NVLIST,       0},
 6216         {"cleanup_fd",          DATA_TYPE_INT32,        ZK_OPTIONAL},
 6217 };
 6218 
 6219 static int
 6220 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
 6221 {
 6222         (void) pool;
 6223         nvpair_t *pair;
 6224         nvlist_t *holds;
 6225         int cleanup_fd = -1;
 6226         int error;
 6227         minor_t minor = 0;
 6228         zfs_file_t *fp = NULL;
 6229 
 6230         holds = fnvlist_lookup_nvlist(args, "holds");
 6231 
 6232         /* make sure the user didn't pass us any invalid (empty) tags */
 6233         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 6234             pair = nvlist_next_nvpair(holds, pair)) {
 6235                 char *htag;
 6236 
 6237                 error = nvpair_value_string(pair, &htag);
 6238                 if (error != 0)
 6239                         return (SET_ERROR(error));
 6240 
 6241                 if (strlen(htag) == 0)
 6242                         return (SET_ERROR(EINVAL));
 6243         }
 6244 
 6245         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
 6246                 fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
 6247                 if (fp == NULL)
 6248                         return (SET_ERROR(EBADF));
 6249         }
 6250 
 6251         error = dsl_dataset_user_hold(holds, minor, errlist);
 6252         if (fp != NULL) {
 6253                 ASSERT3U(minor, !=, 0);
 6254                 zfs_onexit_fd_rele(fp);
 6255         }
 6256         return (SET_ERROR(error));
 6257 }
 6258 
 6259 /*
 6260  * innvl is not used.
 6261  *
 6262  * outnvl: {
 6263  *    holdname -> time added (uint64 seconds since epoch)
 6264  *    ...
 6265  * }
 6266  */
 6267 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
 6268         /* no nvl keys */
 6269 };
 6270 
 6271 static int
 6272 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
 6273 {
 6274         (void) args;
 6275         return (dsl_dataset_get_holds(snapname, outnvl));
 6276 }
 6277 
 6278 /*
 6279  * innvl: {
 6280  *     snapname -> { holdname, ... }
 6281  *     ...
 6282  * }
 6283  *
 6284  * outnvl: {
 6285  *     snapname -> error value (int32)
 6286  *     ...
 6287  * }
 6288  */
 6289 static const zfs_ioc_key_t zfs_keys_release[] = {
 6290         {"<snapname>...",       DATA_TYPE_NVLIST,       ZK_WILDCARDLIST},
 6291 };
 6292 
 6293 static int
 6294 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 6295 {
 6296         (void) pool;
 6297         return (dsl_dataset_user_release(holds, errlist));
 6298 }
 6299 
 6300 /*
 6301  * inputs:
 6302  * zc_guid              flags (ZEVENT_NONBLOCK)
 6303  * zc_cleanup_fd        zevent file descriptor
 6304  *
 6305  * outputs:
 6306  * zc_nvlist_dst        next nvlist event
 6307  * zc_cookie            dropped events since last get
 6308  */
 6309 static int
 6310 zfs_ioc_events_next(zfs_cmd_t *zc)
 6311 {
 6312         zfs_zevent_t *ze;
 6313         nvlist_t *event = NULL;
 6314         minor_t minor;
 6315         uint64_t dropped = 0;
 6316         int error;
 6317 
 6318         zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
 6319         if (fp == NULL)
 6320                 return (SET_ERROR(EBADF));
 6321 
 6322         do {
 6323                 error = zfs_zevent_next(ze, &event,
 6324                     &zc->zc_nvlist_dst_size, &dropped);
 6325                 if (event != NULL) {
 6326                         zc->zc_cookie = dropped;
 6327                         error = put_nvlist(zc, event);
 6328                         nvlist_free(event);
 6329                 }
 6330 
 6331                 if (zc->zc_guid & ZEVENT_NONBLOCK)
 6332                         break;
 6333 
 6334                 if ((error == 0) || (error != ENOENT))
 6335                         break;
 6336 
 6337                 error = zfs_zevent_wait(ze);
 6338                 if (error != 0)
 6339                         break;
 6340         } while (1);
 6341 
 6342         zfs_zevent_fd_rele(fp);
 6343 
 6344         return (error);
 6345 }
 6346 
 6347 /*
 6348  * outputs:
 6349  * zc_cookie            cleared events count
 6350  */
 6351 static int
 6352 zfs_ioc_events_clear(zfs_cmd_t *zc)
 6353 {
 6354         uint_t count;
 6355 
 6356         zfs_zevent_drain_all(&count);
 6357         zc->zc_cookie = count;
 6358 
 6359         return (0);
 6360 }
 6361 
 6362 /*
 6363  * inputs:
 6364  * zc_guid              eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
 6365  * zc_cleanup           zevent file descriptor
 6366  */
 6367 static int
 6368 zfs_ioc_events_seek(zfs_cmd_t *zc)
 6369 {
 6370         zfs_zevent_t *ze;
 6371         minor_t minor;
 6372         int error;
 6373 
 6374         zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
 6375         if (fp == NULL)
 6376                 return (SET_ERROR(EBADF));
 6377 
 6378         error = zfs_zevent_seek(ze, zc->zc_guid);
 6379         zfs_zevent_fd_rele(fp);
 6380 
 6381         return (error);
 6382 }
 6383 
 6384 /*
 6385  * inputs:
 6386  * zc_name              name of later filesystem or snapshot
 6387  * zc_value             full name of old snapshot or bookmark
 6388  *
 6389  * outputs:
 6390  * zc_cookie            space in bytes
 6391  * zc_objset_type       compressed space in bytes
 6392  * zc_perm_action       uncompressed space in bytes
 6393  */
 6394 static int
 6395 zfs_ioc_space_written(zfs_cmd_t *zc)
 6396 {
 6397         int error;
 6398         dsl_pool_t *dp;
 6399         dsl_dataset_t *new;
 6400 
 6401         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 6402         if (error != 0)
 6403                 return (error);
 6404         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
 6405         if (error != 0) {
 6406                 dsl_pool_rele(dp, FTAG);
 6407                 return (error);
 6408         }
 6409         if (strchr(zc->zc_value, '#') != NULL) {
 6410                 zfs_bookmark_phys_t bmp;
 6411                 error = dsl_bookmark_lookup(dp, zc->zc_value,
 6412                     new, &bmp);
 6413                 if (error == 0) {
 6414                         error = dsl_dataset_space_written_bookmark(&bmp, new,
 6415                             &zc->zc_cookie,
 6416                             &zc->zc_objset_type, &zc->zc_perm_action);
 6417                 }
 6418         } else {
 6419                 dsl_dataset_t *old;
 6420                 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
 6421 
 6422                 if (error == 0) {
 6423                         error = dsl_dataset_space_written(old, new,
 6424                             &zc->zc_cookie,
 6425                             &zc->zc_objset_type, &zc->zc_perm_action);
 6426                         dsl_dataset_rele(old, FTAG);
 6427                 }
 6428         }
 6429         dsl_dataset_rele(new, FTAG);
 6430         dsl_pool_rele(dp, FTAG);
 6431         return (error);
 6432 }
 6433 
 6434 /*
 6435  * innvl: {
 6436  *     "firstsnap" -> snapshot name
 6437  * }
 6438  *
 6439  * outnvl: {
 6440  *     "used" -> space in bytes
 6441  *     "compressed" -> compressed space in bytes
 6442  *     "uncompressed" -> uncompressed space in bytes
 6443  * }
 6444  */
 6445 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
 6446         {"firstsnap",   DATA_TYPE_STRING,       0},
 6447 };
 6448 
 6449 static int
 6450 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 6451 {
 6452         int error;
 6453         dsl_pool_t *dp;
 6454         dsl_dataset_t *new, *old;
 6455         char *firstsnap;
 6456         uint64_t used, comp, uncomp;
 6457 
 6458         firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
 6459 
 6460         error = dsl_pool_hold(lastsnap, FTAG, &dp);
 6461         if (error != 0)
 6462                 return (error);
 6463 
 6464         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
 6465         if (error == 0 && !new->ds_is_snapshot) {
 6466                 dsl_dataset_rele(new, FTAG);
 6467                 error = SET_ERROR(EINVAL);
 6468         }
 6469         if (error != 0) {
 6470                 dsl_pool_rele(dp, FTAG);
 6471                 return (error);
 6472         }
 6473         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
 6474         if (error == 0 && !old->ds_is_snapshot) {
 6475                 dsl_dataset_rele(old, FTAG);
 6476                 error = SET_ERROR(EINVAL);
 6477         }
 6478         if (error != 0) {
 6479                 dsl_dataset_rele(new, FTAG);
 6480                 dsl_pool_rele(dp, FTAG);
 6481                 return (error);
 6482         }
 6483 
 6484         error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
 6485         dsl_dataset_rele(old, FTAG);
 6486         dsl_dataset_rele(new, FTAG);
 6487         dsl_pool_rele(dp, FTAG);
 6488         fnvlist_add_uint64(outnvl, "used", used);
 6489         fnvlist_add_uint64(outnvl, "compressed", comp);
 6490         fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
 6491         return (error);
 6492 }
 6493 
 6494 /*
 6495  * innvl: {
 6496  *     "fd" -> file descriptor to write stream to (int32)
 6497  *     (optional) "fromsnap" -> full snap name to send an incremental from
 6498  *     (optional) "largeblockok" -> (value ignored)
 6499  *         indicates that blocks > 128KB are permitted
 6500  *     (optional) "embedok" -> (value ignored)
 6501  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
 6502  *     (optional) "compressok" -> (value ignored)
 6503  *         presence indicates compressed DRR_WRITE records are permitted
 6504  *     (optional) "rawok" -> (value ignored)
 6505  *         presence indicates raw encrypted records should be used.
 6506  *     (optional) "savedok" -> (value ignored)
 6507  *         presence indicates we should send a partially received snapshot
 6508  *     (optional) "resume_object" and "resume_offset" -> (uint64)
 6509  *         if present, resume send stream from specified object and offset.
 6510  *     (optional) "redactbook" -> (string)
 6511  *         if present, use this bookmark's redaction list to generate a redacted
 6512  *         send stream
 6513  * }
 6514  *
 6515  * outnvl is unused
 6516  */
 6517 static const zfs_ioc_key_t zfs_keys_send_new[] = {
 6518         {"fd",                  DATA_TYPE_INT32,        0},
 6519         {"fromsnap",            DATA_TYPE_STRING,       ZK_OPTIONAL},
 6520         {"largeblockok",        DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6521         {"embedok",             DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6522         {"compressok",          DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6523         {"rawok",               DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6524         {"savedok",             DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6525         {"resume_object",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
 6526         {"resume_offset",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
 6527         {"redactbook",          DATA_TYPE_STRING,       ZK_OPTIONAL},
 6528 };
 6529 
 6530 static int
 6531 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 6532 {
 6533         (void) outnvl;
 6534         int error;
 6535         offset_t off;
 6536         char *fromname = NULL;
 6537         int fd;
 6538         zfs_file_t *fp;
 6539         boolean_t largeblockok;
 6540         boolean_t embedok;
 6541         boolean_t compressok;
 6542         boolean_t rawok;
 6543         boolean_t savedok;
 6544         uint64_t resumeobj = 0;
 6545         uint64_t resumeoff = 0;
 6546         char *redactbook = NULL;
 6547 
 6548         fd = fnvlist_lookup_int32(innvl, "fd");
 6549 
 6550         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
 6551 
 6552         largeblockok = nvlist_exists(innvl, "largeblockok");
 6553         embedok = nvlist_exists(innvl, "embedok");
 6554         compressok = nvlist_exists(innvl, "compressok");
 6555         rawok = nvlist_exists(innvl, "rawok");
 6556         savedok = nvlist_exists(innvl, "savedok");
 6557 
 6558         (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 6559         (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 6560 
 6561         (void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
 6562 
 6563         if ((fp = zfs_file_get(fd)) == NULL)
 6564                 return (SET_ERROR(EBADF));
 6565 
 6566         off = zfs_file_off(fp);
 6567 
 6568         dmu_send_outparams_t out = {0};
 6569         out.dso_outfunc = dump_bytes;
 6570         out.dso_arg = fp;
 6571         out.dso_dryrun = B_FALSE;
 6572         error = dmu_send(snapname, fromname, embedok, largeblockok,
 6573             compressok, rawok, savedok, resumeobj, resumeoff,
 6574             redactbook, fd, &off, &out);
 6575 
 6576         zfs_file_put(fp);
 6577         return (error);
 6578 }
 6579 
 6580 static int
 6581 send_space_sum(objset_t *os, void *buf, int len, void *arg)
 6582 {
 6583         (void) os, (void) buf;
 6584         uint64_t *size = arg;
 6585 
 6586         *size += len;
 6587         return (0);
 6588 }
 6589 
 6590 /*
 6591  * Determine approximately how large a zfs send stream will be -- the number
 6592  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
 6593  *
 6594  * innvl: {
 6595  *     (optional) "from" -> full snap or bookmark name to send an incremental
 6596  *                          from
 6597  *     (optional) "largeblockok" -> (value ignored)
 6598  *         indicates that blocks > 128KB are permitted
 6599  *     (optional) "embedok" -> (value ignored)
 6600  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
 6601  *     (optional) "compressok" -> (value ignored)
 6602  *         presence indicates compressed DRR_WRITE records are permitted
 6603  *     (optional) "rawok" -> (value ignored)
 6604  *         presence indicates raw encrypted records should be used.
 6605  *     (optional) "resume_object" and "resume_offset" -> (uint64)
 6606  *         if present, resume send stream from specified object and offset.
 6607  *     (optional) "fd" -> file descriptor to use as a cookie for progress
 6608  *         tracking (int32)
 6609  * }
 6610  *
 6611  * outnvl: {
 6612  *     "space" -> bytes of space (uint64)
 6613  * }
 6614  */
 6615 static const zfs_ioc_key_t zfs_keys_send_space[] = {
 6616         {"from",                DATA_TYPE_STRING,       ZK_OPTIONAL},
 6617         {"fromsnap",            DATA_TYPE_STRING,       ZK_OPTIONAL},
 6618         {"largeblockok",        DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6619         {"embedok",             DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6620         {"compressok",          DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6621         {"rawok",               DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6622         {"fd",                  DATA_TYPE_INT32,        ZK_OPTIONAL},
 6623         {"redactbook",          DATA_TYPE_STRING,       ZK_OPTIONAL},
 6624         {"resume_object",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
 6625         {"resume_offset",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
 6626         {"bytes",               DATA_TYPE_UINT64,       ZK_OPTIONAL},
 6627 };
 6628 
 6629 static int
 6630 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 6631 {
 6632         dsl_pool_t *dp;
 6633         dsl_dataset_t *tosnap;
 6634         dsl_dataset_t *fromsnap = NULL;
 6635         int error;
 6636         char *fromname = NULL;
 6637         char *redactlist_book = NULL;
 6638         boolean_t largeblockok;
 6639         boolean_t embedok;
 6640         boolean_t compressok;
 6641         boolean_t rawok;
 6642         boolean_t savedok;
 6643         uint64_t space = 0;
 6644         boolean_t full_estimate = B_FALSE;
 6645         uint64_t resumeobj = 0;
 6646         uint64_t resumeoff = 0;
 6647         uint64_t resume_bytes = 0;
 6648         int32_t fd = -1;
 6649         zfs_bookmark_phys_t zbm = {0};
 6650 
 6651         error = dsl_pool_hold(snapname, FTAG, &dp);
 6652         if (error != 0)
 6653                 return (error);
 6654 
 6655         error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
 6656         if (error != 0) {
 6657                 dsl_pool_rele(dp, FTAG);
 6658                 return (error);
 6659         }
 6660         (void) nvlist_lookup_int32(innvl, "fd", &fd);
 6661 
 6662         largeblockok = nvlist_exists(innvl, "largeblockok");
 6663         embedok = nvlist_exists(innvl, "embedok");
 6664         compressok = nvlist_exists(innvl, "compressok");
 6665         rawok = nvlist_exists(innvl, "rawok");
 6666         savedok = nvlist_exists(innvl, "savedok");
 6667         boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
 6668         boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
 6669             &redactlist_book) == 0);
 6670 
 6671         (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 6672         (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 6673         (void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
 6674 
 6675         if (altbook) {
 6676                 full_estimate = B_TRUE;
 6677         } else if (from) {
 6678                 if (strchr(fromname, '#')) {
 6679                         error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
 6680 
 6681                         /*
 6682                          * dsl_bookmark_lookup() will fail with EXDEV if
 6683                          * the from-bookmark and tosnap are at the same txg.
 6684                          * However, it's valid to do a send (and therefore,
 6685                          * a send estimate) from and to the same time point,
 6686                          * if the bookmark is redacted (the incremental send
 6687                          * can change what's redacted on the target).  In
 6688                          * this case, dsl_bookmark_lookup() fills in zbm
 6689                          * but returns EXDEV.  Ignore this error.
 6690                          */
 6691                         if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
 6692                             zbm.zbm_guid ==
 6693                             dsl_dataset_phys(tosnap)->ds_guid)
 6694                                 error = 0;
 6695 
 6696                         if (error != 0) {
 6697                                 dsl_dataset_rele(tosnap, FTAG);
 6698                                 dsl_pool_rele(dp, FTAG);
 6699                                 return (error);
 6700                         }
 6701                         if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
 6702                             ZBM_FLAG_HAS_FBN)) {
 6703                                 full_estimate = B_TRUE;
 6704                         }
 6705                 } else if (strchr(fromname, '@')) {
 6706                         error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
 6707                         if (error != 0) {
 6708                                 dsl_dataset_rele(tosnap, FTAG);
 6709                                 dsl_pool_rele(dp, FTAG);
 6710                                 return (error);
 6711                         }
 6712 
 6713                         if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
 6714                                 full_estimate = B_TRUE;
 6715                                 dsl_dataset_rele(fromsnap, FTAG);
 6716                         }
 6717                 } else {
 6718                         /*
 6719                          * from is not properly formatted as a snapshot or
 6720                          * bookmark
 6721                          */
 6722                         dsl_dataset_rele(tosnap, FTAG);
 6723                         dsl_pool_rele(dp, FTAG);
 6724                         return (SET_ERROR(EINVAL));
 6725                 }
 6726         }
 6727 
 6728         if (full_estimate) {
 6729                 dmu_send_outparams_t out = {0};
 6730                 offset_t off = 0;
 6731                 out.dso_outfunc = send_space_sum;
 6732                 out.dso_arg = &space;
 6733                 out.dso_dryrun = B_TRUE;
 6734                 /*
 6735                  * We have to release these holds so dmu_send can take them.  It
 6736                  * will do all the error checking we need.
 6737                  */
 6738                 dsl_dataset_rele(tosnap, FTAG);
 6739                 dsl_pool_rele(dp, FTAG);
 6740                 error = dmu_send(snapname, fromname, embedok, largeblockok,
 6741                     compressok, rawok, savedok, resumeobj, resumeoff,
 6742                     redactlist_book, fd, &off, &out);
 6743         } else {
 6744                 error = dmu_send_estimate_fast(tosnap, fromsnap,
 6745                     (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
 6746                     compressok || rawok, savedok, &space);
 6747                 space -= resume_bytes;
 6748                 if (fromsnap != NULL)
 6749                         dsl_dataset_rele(fromsnap, FTAG);
 6750                 dsl_dataset_rele(tosnap, FTAG);
 6751                 dsl_pool_rele(dp, FTAG);
 6752         }
 6753 
 6754         fnvlist_add_uint64(outnvl, "space", space);
 6755 
 6756         return (error);
 6757 }
 6758 
 6759 /*
 6760  * Sync the currently open TXG to disk for the specified pool.
 6761  * This is somewhat similar to 'zfs_sync()'.
 6762  * For cases that do not result in error this ioctl will wait for
 6763  * the currently open TXG to commit before returning back to the caller.
 6764  *
 6765  * innvl: {
 6766  *  "force" -> when true, force uberblock update even if there is no dirty data.
 6767  *             In addition this will cause the vdev configuration to be written
 6768  *             out including updating the zpool cache file. (boolean_t)
 6769  * }
 6770  *
 6771  * onvl is unused
 6772  */
 6773 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
 6774         {"force",       DATA_TYPE_BOOLEAN_VALUE,        0},
 6775 };
 6776 
 6777 static int
 6778 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
 6779 {
 6780         (void) onvl;
 6781         int err;
 6782         boolean_t rc, force = B_FALSE;
 6783         spa_t *spa;
 6784 
 6785         if ((err = spa_open(pool, &spa, FTAG)) != 0)
 6786                 return (err);
 6787 
 6788         if (innvl) {
 6789                 err = nvlist_lookup_boolean_value(innvl, "force", &rc);
 6790                 if (err == 0)
 6791                         force = rc;
 6792         }
 6793 
 6794         if (force) {
 6795                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
 6796                 vdev_config_dirty(spa->spa_root_vdev);
 6797                 spa_config_exit(spa, SCL_CONFIG, FTAG);
 6798         }
 6799         txg_wait_synced(spa_get_dsl(spa), 0);
 6800 
 6801         spa_close(spa, FTAG);
 6802 
 6803         return (0);
 6804 }
 6805 
 6806 /*
 6807  * Load a user's wrapping key into the kernel.
 6808  * innvl: {
 6809  *     "hidden_args" -> { "wkeydata" -> value }
 6810  *         raw uint8_t array of encryption wrapping key data (32 bytes)
 6811  *     (optional) "noop" -> (value ignored)
 6812  *         presence indicated key should only be verified, not loaded
 6813  * }
 6814  */
 6815 static const zfs_ioc_key_t zfs_keys_load_key[] = {
 6816         {"hidden_args", DATA_TYPE_NVLIST,       0},
 6817         {"noop",        DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
 6818 };
 6819 
 6820 static int
 6821 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 6822 {
 6823         (void) outnvl;
 6824         int ret;
 6825         dsl_crypto_params_t *dcp = NULL;
 6826         nvlist_t *hidden_args;
 6827         boolean_t noop = nvlist_exists(innvl, "noop");
 6828 
 6829         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 6830                 ret = SET_ERROR(EINVAL);
 6831                 goto error;
 6832         }
 6833 
 6834         hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
 6835 
 6836         ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
 6837             hidden_args, &dcp);
 6838         if (ret != 0)
 6839                 goto error;
 6840 
 6841         ret = spa_keystore_load_wkey(dsname, dcp, noop);
 6842         if (ret != 0)
 6843                 goto error;
 6844 
 6845         dsl_crypto_params_free(dcp, noop);
 6846 
 6847         return (0);
 6848 
 6849 error:
 6850         dsl_crypto_params_free(dcp, B_TRUE);
 6851         return (ret);
 6852 }
 6853 
 6854 /*
 6855  * Unload a user's wrapping key from the kernel.
 6856  * Both innvl and outnvl are unused.
 6857  */
 6858 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
 6859         /* no nvl keys */
 6860 };
 6861 
 6862 static int
 6863 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 6864 {
 6865         (void) innvl, (void) outnvl;
 6866         int ret = 0;
 6867 
 6868         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 6869                 ret = (SET_ERROR(EINVAL));
 6870                 goto out;
 6871         }
 6872 
 6873         ret = spa_keystore_unload_wkey(dsname);
 6874         if (ret != 0)
 6875                 goto out;
 6876 
 6877 out:
 6878         return (ret);
 6879 }
 6880 
 6881 /*
 6882  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
 6883  * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
 6884  * here to change how the key is derived in userspace.
 6885  *
 6886  * innvl: {
 6887  *    "hidden_args" (optional) -> { "wkeydata" -> value }
 6888  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
 6889  *    "props" (optional) -> { prop -> value }
 6890  * }
 6891  *
 6892  * outnvl is unused
 6893  */
 6894 static const zfs_ioc_key_t zfs_keys_change_key[] = {
 6895         {"crypt_cmd",   DATA_TYPE_UINT64,       ZK_OPTIONAL},
 6896         {"hidden_args", DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 6897         {"props",       DATA_TYPE_NVLIST,       ZK_OPTIONAL},
 6898 };
 6899 
 6900 static int
 6901 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 6902 {
 6903         (void) outnvl;
 6904         int ret;
 6905         uint64_t cmd = DCP_CMD_NONE;
 6906         dsl_crypto_params_t *dcp = NULL;
 6907         nvlist_t *args = NULL, *hidden_args = NULL;
 6908 
 6909         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 6910                 ret = (SET_ERROR(EINVAL));
 6911                 goto error;
 6912         }
 6913 
 6914         (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
 6915         (void) nvlist_lookup_nvlist(innvl, "props", &args);
 6916         (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 6917 
 6918         ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
 6919         if (ret != 0)
 6920                 goto error;
 6921 
 6922         ret = spa_keystore_change_key(dsname, dcp);
 6923         if (ret != 0)
 6924                 goto error;
 6925 
 6926         dsl_crypto_params_free(dcp, B_FALSE);
 6927 
 6928         return (0);
 6929 
 6930 error:
 6931         dsl_crypto_params_free(dcp, B_TRUE);
 6932         return (ret);
 6933 }
 6934 
 6935 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
 6936 
 6937 static void
 6938 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 6939     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
 6940     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
 6941 {
 6942         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 6943 
 6944         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 6945         ASSERT3U(ioc, <, ZFS_IOC_LAST);
 6946         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 6947         ASSERT3P(vec->zvec_func, ==, NULL);
 6948 
 6949         vec->zvec_legacy_func = func;
 6950         vec->zvec_secpolicy = secpolicy;
 6951         vec->zvec_namecheck = namecheck;
 6952         vec->zvec_allow_log = log_history;
 6953         vec->zvec_pool_check = pool_check;
 6954 }
 6955 
 6956 /*
 6957  * See the block comment at the beginning of this file for details on
 6958  * each argument to this function.
 6959  */
 6960 void
 6961 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
 6962     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
 6963     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
 6964     boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
 6965 {
 6966         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 6967 
 6968         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 6969         ASSERT3U(ioc, <, ZFS_IOC_LAST);
 6970         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 6971         ASSERT3P(vec->zvec_func, ==, NULL);
 6972 
 6973         /* if we are logging, the name must be valid */
 6974         ASSERT(!allow_log || namecheck != NO_NAME);
 6975 
 6976         vec->zvec_name = name;
 6977         vec->zvec_func = func;
 6978         vec->zvec_secpolicy = secpolicy;
 6979         vec->zvec_namecheck = namecheck;
 6980         vec->zvec_pool_check = pool_check;
 6981         vec->zvec_smush_outnvlist = smush_outnvlist;
 6982         vec->zvec_allow_log = allow_log;
 6983         vec->zvec_nvl_keys = nvl_keys;
 6984         vec->zvec_nvl_key_count = num_keys;
 6985 }
 6986 
 6987 static void
 6988 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 6989     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
 6990     zfs_ioc_poolcheck_t pool_check)
 6991 {
 6992         zfs_ioctl_register_legacy(ioc, func, secpolicy,
 6993             POOL_NAME, log_history, pool_check);
 6994 }
 6995 
 6996 void
 6997 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 6998     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
 6999 {
 7000         zfs_ioctl_register_legacy(ioc, func, secpolicy,
 7001             DATASET_NAME, B_FALSE, pool_check);
 7002 }
 7003 
 7004 static void
 7005 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 7006 {
 7007         zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
 7008             POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 7009 }
 7010 
 7011 static void
 7012 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 7013     zfs_secpolicy_func_t *secpolicy)
 7014 {
 7015         zfs_ioctl_register_legacy(ioc, func, secpolicy,
 7016             NO_NAME, B_FALSE, POOL_CHECK_NONE);
 7017 }
 7018 
 7019 static void
 7020 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
 7021     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
 7022 {
 7023         zfs_ioctl_register_legacy(ioc, func, secpolicy,
 7024             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
 7025 }
 7026 
 7027 static void
 7028 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 7029 {
 7030         zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
 7031             zfs_secpolicy_read);
 7032 }
 7033 
 7034 static void
 7035 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 7036     zfs_secpolicy_func_t *secpolicy)
 7037 {
 7038         zfs_ioctl_register_legacy(ioc, func, secpolicy,
 7039             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 7040 }
 7041 
 7042 static void
 7043 zfs_ioctl_init(void)
 7044 {
 7045         zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
 7046             zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
 7047             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7048             zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
 7049 
 7050         zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
 7051             zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
 7052             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 7053             zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
 7054 
 7055         zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
 7056             zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
 7057             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 7058             zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
 7059 
 7060         zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
 7061             zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
 7062             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 7063             zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
 7064 
 7065         zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
 7066             zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
 7067             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 7068             zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
 7069 
 7070         zfs_ioctl_register("create", ZFS_IOC_CREATE,
 7071             zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
 7072             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7073             zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
 7074 
 7075         zfs_ioctl_register("clone", ZFS_IOC_CLONE,
 7076             zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
 7077             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7078             zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
 7079 
 7080         zfs_ioctl_register("remap", ZFS_IOC_REMAP,
 7081             zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
 7082             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 7083             zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
 7084 
 7085         zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
 7086             zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
 7087             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7088             zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
 7089 
 7090         zfs_ioctl_register("hold", ZFS_IOC_HOLD,
 7091             zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
 7092             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7093             zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
 7094         zfs_ioctl_register("release", ZFS_IOC_RELEASE,
 7095             zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
 7096             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7097             zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
 7098 
 7099         zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
 7100             zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
 7101             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 7102             zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
 7103 
 7104         zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
 7105             zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
 7106             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 7107             zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
 7108 
 7109         zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
 7110             zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
 7111             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7112             zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
 7113 
 7114         zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
 7115             zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
 7116             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 7117             zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
 7118 
 7119         zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
 7120             zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
 7121             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
 7122             ARRAY_SIZE(zfs_keys_get_bookmark_props));
 7123 
 7124         zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
 7125             zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
 7126             POOL_NAME,
 7127             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7128             zfs_keys_destroy_bookmarks,
 7129             ARRAY_SIZE(zfs_keys_destroy_bookmarks));
 7130 
 7131         zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
 7132             zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
 7133             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7134             zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
 7135         zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
 7136             zfs_ioc_load_key, zfs_secpolicy_load_key,
 7137             DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
 7138             zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
 7139         zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
 7140             zfs_ioc_unload_key, zfs_secpolicy_load_key,
 7141             DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
 7142             zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
 7143         zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
 7144             zfs_ioc_change_key, zfs_secpolicy_change_key,
 7145             DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
 7146             B_TRUE, B_TRUE, zfs_keys_change_key,
 7147             ARRAY_SIZE(zfs_keys_change_key));
 7148 
 7149         zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
 7150             zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
 7151             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 7152             zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
 7153         zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
 7154             zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
 7155             B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
 7156 
 7157         zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
 7158             zfs_ioc_channel_program, zfs_secpolicy_config,
 7159             POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
 7160             B_TRUE, zfs_keys_channel_program,
 7161             ARRAY_SIZE(zfs_keys_channel_program));
 7162 
 7163         zfs_ioctl_register("redact", ZFS_IOC_REDACT,
 7164             zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
 7165             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7166             zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
 7167 
 7168         zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
 7169             zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
 7170             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7171             zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
 7172 
 7173         zfs_ioctl_register("zpool_discard_checkpoint",
 7174             ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
 7175             zfs_secpolicy_config, POOL_NAME,
 7176             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7177             zfs_keys_pool_discard_checkpoint,
 7178             ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
 7179 
 7180         zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
 7181             zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
 7182             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7183             zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
 7184 
 7185         zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
 7186             zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
 7187             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 7188             zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
 7189 
 7190         zfs_ioctl_register("wait", ZFS_IOC_WAIT,
 7191             zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
 7192             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 7193             zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
 7194 
 7195         zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
 7196             zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
 7197             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 7198             zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
 7199 
 7200         zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
 7201             zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
 7202             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 7203             zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
 7204 
 7205         zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
 7206             zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
 7207             POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
 7208             zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
 7209 
 7210         zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
 7211             zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
 7212             POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
 7213             ARRAY_SIZE(zfs_keys_vdev_get_props));
 7214 
 7215         zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
 7216             zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
 7217             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 7218             zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
 7219 
 7220         /* IOCTLS that use the legacy function signature */
 7221 
 7222         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
 7223             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
 7224 
 7225         zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
 7226             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 7227         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
 7228             zfs_ioc_pool_scan);
 7229         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
 7230             zfs_ioc_pool_upgrade);
 7231         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
 7232             zfs_ioc_vdev_add);
 7233         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
 7234             zfs_ioc_vdev_remove);
 7235         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
 7236             zfs_ioc_vdev_set_state);
 7237         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
 7238             zfs_ioc_vdev_attach);
 7239         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
 7240             zfs_ioc_vdev_detach);
 7241         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
 7242             zfs_ioc_vdev_setpath);
 7243         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
 7244             zfs_ioc_vdev_setfru);
 7245         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
 7246             zfs_ioc_pool_set_props);
 7247         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
 7248             zfs_ioc_vdev_split);
 7249         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
 7250             zfs_ioc_pool_reguid);
 7251 
 7252         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
 7253             zfs_ioc_pool_configs, zfs_secpolicy_none);
 7254         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
 7255             zfs_ioc_pool_tryimport, zfs_secpolicy_config);
 7256         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
 7257             zfs_ioc_inject_fault, zfs_secpolicy_inject);
 7258         zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
 7259             zfs_ioc_clear_fault, zfs_secpolicy_inject);
 7260         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
 7261             zfs_ioc_inject_list_next, zfs_secpolicy_inject);
 7262 
 7263         /*
 7264          * pool destroy, and export don't log the history as part of
 7265          * zfsdev_ioctl, but rather zfs_ioc_pool_export
 7266          * does the logging of those commands.
 7267          */
 7268         zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
 7269             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 7270         zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
 7271             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 7272 
 7273         zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
 7274             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 7275         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
 7276             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 7277 
 7278         zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
 7279             zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
 7280         zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
 7281             zfs_ioc_dsobj_to_dsname,
 7282             zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
 7283         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
 7284             zfs_ioc_pool_get_history,
 7285             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 7286 
 7287         zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
 7288             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 7289 
 7290         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
 7291             zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
 7292 
 7293         zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
 7294             zfs_ioc_space_written);
 7295         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
 7296             zfs_ioc_objset_recvd_props);
 7297         zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
 7298             zfs_ioc_next_obj);
 7299         zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
 7300             zfs_ioc_get_fsacl);
 7301         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
 7302             zfs_ioc_objset_stats);
 7303         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
 7304             zfs_ioc_objset_zplprops);
 7305         zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
 7306             zfs_ioc_dataset_list_next);
 7307         zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
 7308             zfs_ioc_snapshot_list_next);
 7309         zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
 7310             zfs_ioc_send_progress);
 7311 
 7312         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
 7313             zfs_ioc_diff, zfs_secpolicy_diff);
 7314         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
 7315             zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
 7316         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
 7317             zfs_ioc_obj_to_path, zfs_secpolicy_diff);
 7318         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
 7319             zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
 7320         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
 7321             zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
 7322         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
 7323             zfs_ioc_send, zfs_secpolicy_send);
 7324 
 7325         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
 7326             zfs_secpolicy_none);
 7327         zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
 7328             zfs_secpolicy_destroy);
 7329         zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
 7330             zfs_secpolicy_rename);
 7331         zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
 7332             zfs_secpolicy_recv);
 7333         zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
 7334             zfs_secpolicy_promote);
 7335         zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
 7336             zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
 7337         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
 7338             zfs_secpolicy_set_fsacl);
 7339 
 7340         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
 7341             zfs_secpolicy_share, POOL_CHECK_NONE);
 7342         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
 7343             zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
 7344         zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
 7345             zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
 7346             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 7347         zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
 7348             zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
 7349             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 7350 
 7351         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
 7352             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 7353         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
 7354             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 7355         zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
 7356             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 7357 
 7358         zfs_ioctl_init_os();
 7359 }
 7360 
 7361 /*
 7362  * Verify that for non-legacy ioctls the input nvlist
 7363  * pairs match against the expected input.
 7364  *
 7365  * Possible errors are:
 7366  * ZFS_ERR_IOC_ARG_UNAVAIL      An unrecognized nvpair was encountered
 7367  * ZFS_ERR_IOC_ARG_REQUIRED     A required nvpair is missing
 7368  * ZFS_ERR_IOC_ARG_BADTYPE      Invalid type for nvpair
 7369  */
 7370 static int
 7371 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
 7372 {
 7373         const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
 7374         boolean_t required_keys_found = B_FALSE;
 7375 
 7376         /*
 7377          * examine each input pair
 7378          */
 7379         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 7380             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 7381                 char *name = nvpair_name(pair);
 7382                 data_type_t type = nvpair_type(pair);
 7383                 boolean_t identified = B_FALSE;
 7384 
 7385                 /*
 7386                  * check pair against the documented names and type
 7387                  */
 7388                 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
 7389                         /* if not a wild card name, check for an exact match */
 7390                         if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
 7391                             strcmp(nvl_keys[k].zkey_name, name) != 0)
 7392                                 continue;
 7393 
 7394                         identified = B_TRUE;
 7395 
 7396                         if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
 7397                             nvl_keys[k].zkey_type != type) {
 7398                                 return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
 7399                         }
 7400 
 7401                         if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
 7402                                 continue;
 7403 
 7404                         required_keys_found = B_TRUE;
 7405                         break;
 7406                 }
 7407 
 7408                 /* allow an 'optional' key, everything else is invalid */
 7409                 if (!identified &&
 7410                     (strcmp(name, "optional") != 0 ||
 7411                     type != DATA_TYPE_NVLIST)) {
 7412                         return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
 7413                 }
 7414         }
 7415 
 7416         /* verify that all required keys were found */
 7417         for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
 7418                 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
 7419                         continue;
 7420 
 7421                 if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
 7422                         /* at least one non-optional key is expected here */
 7423                         if (!required_keys_found)
 7424                                 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
 7425                         continue;
 7426                 }
 7427 
 7428                 if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
 7429                         return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
 7430         }
 7431 
 7432         return (0);
 7433 }
 7434 
 7435 static int
 7436 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
 7437     zfs_ioc_poolcheck_t check)
 7438 {
 7439         spa_t *spa;
 7440         int error;
 7441 
 7442         ASSERT(type == POOL_NAME || type == DATASET_NAME ||
 7443             type == ENTITY_NAME);
 7444 
 7445         if (check & POOL_CHECK_NONE)
 7446                 return (0);
 7447 
 7448         error = spa_open(name, &spa, FTAG);
 7449         if (error == 0) {
 7450                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
 7451                         error = SET_ERROR(EAGAIN);
 7452                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
 7453                         error = SET_ERROR(EROFS);
 7454                 spa_close(spa, FTAG);
 7455         }
 7456         return (error);
 7457 }
 7458 
 7459 int
 7460 zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
 7461 {
 7462         zfsdev_state_t *zs, *fpd;
 7463 
 7464         ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
 7465 
 7466         fpd = zfs_file_private(fp);
 7467         if (fpd == NULL)
 7468                 return (SET_ERROR(EBADF));
 7469 
 7470         mutex_enter(&zfsdev_state_lock);
 7471 
 7472         for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 7473 
 7474                 if (zs->zs_minor == -1)
 7475                         continue;
 7476 
 7477                 if (fpd == zs) {
 7478                         *minorp = fpd->zs_minor;
 7479                         mutex_exit(&zfsdev_state_lock);
 7480                         return (0);
 7481                 }
 7482         }
 7483 
 7484         mutex_exit(&zfsdev_state_lock);
 7485 
 7486         return (SET_ERROR(EBADF));
 7487 }
 7488 
 7489 void *
 7490 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 7491 {
 7492         zfsdev_state_t *zs;
 7493 
 7494         for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 7495                 if (zs->zs_minor == minor) {
 7496                         membar_consumer();
 7497                         switch (which) {
 7498                         case ZST_ONEXIT:
 7499                                 return (zs->zs_onexit);
 7500                         case ZST_ZEVENT:
 7501                                 return (zs->zs_zevent);
 7502                         case ZST_ALL:
 7503                                 return (zs);
 7504                         }
 7505                 }
 7506         }
 7507 
 7508         return (NULL);
 7509 }
 7510 
 7511 /*
 7512  * Find a free minor number.  The zfsdev_state_list is expected to
 7513  * be short since it is only a list of currently open file handles.
 7514  */
 7515 static minor_t
 7516 zfsdev_minor_alloc(void)
 7517 {
 7518         static minor_t last_minor = 0;
 7519         minor_t m;
 7520 
 7521         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 7522 
 7523         for (m = last_minor + 1; m != last_minor; m++) {
 7524                 if (m > ZFSDEV_MAX_MINOR)
 7525                         m = 1;
 7526                 if (zfsdev_get_state(m, ZST_ALL) == NULL) {
 7527                         last_minor = m;
 7528                         return (m);
 7529                 }
 7530         }
 7531 
 7532         return (0);
 7533 }
 7534 
 7535 int
 7536 zfsdev_state_init(void *priv)
 7537 {
 7538         zfsdev_state_t *zs, *zsprev = NULL;
 7539         minor_t minor;
 7540         boolean_t newzs = B_FALSE;
 7541 
 7542         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 7543 
 7544         minor = zfsdev_minor_alloc();
 7545         if (minor == 0)
 7546                 return (SET_ERROR(ENXIO));
 7547 
 7548         for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 7549                 if (zs->zs_minor == -1)
 7550                         break;
 7551                 zsprev = zs;
 7552         }
 7553 
 7554         if (!zs) {
 7555                 zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 7556                 newzs = B_TRUE;
 7557         }
 7558 
 7559         zfsdev_private_set_state(priv, zs);
 7560 
 7561         zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
 7562         zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
 7563 
 7564         /*
 7565          * In order to provide for lock-free concurrent read access
 7566          * to the minor list in zfsdev_get_state(), new entries
 7567          * must be completely written before linking them into the
 7568          * list whereas existing entries are already linked; the last
 7569          * operation must be updating zs_minor (from -1 to the new
 7570          * value).
 7571          */
 7572         if (newzs) {
 7573                 zs->zs_minor = minor;
 7574                 membar_producer();
 7575                 zsprev->zs_next = zs;
 7576         } else {
 7577                 membar_producer();
 7578                 zs->zs_minor = minor;
 7579         }
 7580 
 7581         return (0);
 7582 }
 7583 
 7584 void
 7585 zfsdev_state_destroy(void *priv)
 7586 {
 7587         zfsdev_state_t *zs = zfsdev_private_get_state(priv);
 7588 
 7589         ASSERT(zs != NULL);
 7590         ASSERT3S(zs->zs_minor, >, 0);
 7591 
 7592         /*
 7593          * The last reference to this zfsdev file descriptor is being dropped.
 7594          * We don't have to worry about lookup grabbing this state object, and
 7595          * zfsdev_state_init() will not try to reuse this object until it is
 7596          * invalidated by setting zs_minor to -1.  Invalidation must be done
 7597          * last, with a memory barrier to ensure ordering.  This lets us avoid
 7598          * taking the global zfsdev state lock around destruction.
 7599          */
 7600         zfs_onexit_destroy(zs->zs_onexit);
 7601         zfs_zevent_destroy(zs->zs_zevent);
 7602         zs->zs_onexit = NULL;
 7603         zs->zs_zevent = NULL;
 7604         membar_producer();
 7605         zs->zs_minor = -1;
 7606 }
 7607 
 7608 long
 7609 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
 7610 {
 7611         int error, cmd;
 7612         const zfs_ioc_vec_t *vec;
 7613         char *saved_poolname = NULL;
 7614         uint64_t max_nvlist_src_size;
 7615         size_t saved_poolname_len = 0;
 7616         nvlist_t *innvl = NULL;
 7617         fstrans_cookie_t cookie;
 7618         hrtime_t start_time = gethrtime();
 7619 
 7620         cmd = vecnum;
 7621         error = 0;
 7622         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
 7623                 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 7624 
 7625         vec = &zfs_ioc_vec[vecnum];
 7626 
 7627         /*
 7628          * The registered ioctl list may be sparse, verify that either
 7629          * a normal or legacy handler are registered.
 7630          */
 7631         if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
 7632                 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 7633 
 7634         zc->zc_iflags = flag & FKIOCTL;
 7635         max_nvlist_src_size = zfs_max_nvlist_src_size_os();
 7636         if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
 7637                 /*
 7638                  * Make sure the user doesn't pass in an insane value for
 7639                  * zc_nvlist_src_size.  We have to check, since we will end
 7640                  * up allocating that much memory inside of get_nvlist().  This
 7641                  * prevents a nefarious user from allocating tons of kernel
 7642                  * memory.
 7643                  *
 7644                  * Also, we return EINVAL instead of ENOMEM here.  The reason
 7645                  * being that returning ENOMEM from an ioctl() has a special
 7646                  * connotation; that the user's size value is too small and
 7647                  * needs to be expanded to hold the nvlist.  See
 7648                  * zcmd_expand_dst_nvlist() for details.
 7649                  */
 7650                 error = SET_ERROR(EINVAL);      /* User's size too big */
 7651 
 7652         } else if (zc->zc_nvlist_src_size != 0) {
 7653                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 7654                     zc->zc_iflags, &innvl);
 7655                 if (error != 0)
 7656                         goto out;
 7657         }
 7658 
 7659         /*
 7660          * Ensure that all pool/dataset names are valid before we pass down to
 7661          * the lower layers.
 7662          */
 7663         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 7664         switch (vec->zvec_namecheck) {
 7665         case POOL_NAME:
 7666                 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
 7667                         error = SET_ERROR(EINVAL);
 7668                 else
 7669                         error = pool_status_check(zc->zc_name,
 7670                             vec->zvec_namecheck, vec->zvec_pool_check);
 7671                 break;
 7672 
 7673         case DATASET_NAME:
 7674                 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
 7675                         error = SET_ERROR(EINVAL);
 7676                 else
 7677                         error = pool_status_check(zc->zc_name,
 7678                             vec->zvec_namecheck, vec->zvec_pool_check);
 7679                 break;
 7680 
 7681         case ENTITY_NAME:
 7682                 if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
 7683                         error = SET_ERROR(EINVAL);
 7684                 } else {
 7685                         error = pool_status_check(zc->zc_name,
 7686                             vec->zvec_namecheck, vec->zvec_pool_check);
 7687                 }
 7688                 break;
 7689 
 7690         case NO_NAME:
 7691                 break;
 7692         }
 7693         /*
 7694          * Ensure that all input pairs are valid before we pass them down
 7695          * to the lower layers.
 7696          *
 7697          * The vectored functions can use fnvlist_lookup_{type} for any
 7698          * required pairs since zfs_check_input_nvpairs() confirmed that
 7699          * they exist and are of the correct type.
 7700          */
 7701         if (error == 0 && vec->zvec_func != NULL) {
 7702                 error = zfs_check_input_nvpairs(innvl, vec);
 7703                 if (error != 0)
 7704                         goto out;
 7705         }
 7706 
 7707         if (error == 0) {
 7708                 cookie = spl_fstrans_mark();
 7709                 error = vec->zvec_secpolicy(zc, innvl, CRED());
 7710                 spl_fstrans_unmark(cookie);
 7711         }
 7712 
 7713         if (error != 0)
 7714                 goto out;
 7715 
 7716         /* legacy ioctls can modify zc_name */
 7717         /*
 7718          * Can't use kmem_strdup() as we might truncate the string and
 7719          * kmem_strfree() would then free with incorrect size.
 7720          */
 7721         saved_poolname_len = strlen(zc->zc_name) + 1;
 7722         saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
 7723 
 7724         strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
 7725         saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
 7726 
 7727         if (vec->zvec_func != NULL) {
 7728                 nvlist_t *outnvl;
 7729                 int puterror = 0;
 7730                 spa_t *spa;
 7731                 nvlist_t *lognv = NULL;
 7732 
 7733                 ASSERT(vec->zvec_legacy_func == NULL);
 7734 
 7735                 /*
 7736                  * Add the innvl to the lognv before calling the func,
 7737                  * in case the func changes the innvl.
 7738                  */
 7739                 if (vec->zvec_allow_log) {
 7740                         lognv = fnvlist_alloc();
 7741                         fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
 7742                             vec->zvec_name);
 7743                         if (!nvlist_empty(innvl)) {
 7744                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
 7745                                     innvl);
 7746                         }
 7747                 }
 7748 
 7749                 outnvl = fnvlist_alloc();
 7750                 cookie = spl_fstrans_mark();
 7751                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
 7752                 spl_fstrans_unmark(cookie);
 7753 
 7754                 /*
 7755                  * Some commands can partially execute, modify state, and still
 7756                  * return an error.  In these cases, attempt to record what
 7757                  * was modified.
 7758                  */
 7759                 if ((error == 0 ||
 7760                     (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
 7761                     vec->zvec_allow_log &&
 7762                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
 7763                         if (!nvlist_empty(outnvl)) {
 7764                                 size_t out_size = fnvlist_size(outnvl);
 7765                                 if (out_size > zfs_history_output_max) {
 7766                                         fnvlist_add_int64(lognv,
 7767                                             ZPOOL_HIST_OUTPUT_SIZE, out_size);
 7768                                 } else {
 7769                                         fnvlist_add_nvlist(lognv,
 7770                                             ZPOOL_HIST_OUTPUT_NVL, outnvl);
 7771                                 }
 7772                         }
 7773                         if (error != 0) {
 7774                                 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
 7775                                     error);
 7776                         }
 7777                         fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
 7778                             gethrtime() - start_time);
 7779                         (void) spa_history_log_nvl(spa, lognv);
 7780                         spa_close(spa, FTAG);
 7781                 }
 7782                 fnvlist_free(lognv);
 7783 
 7784                 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
 7785                         int smusherror = 0;
 7786                         if (vec->zvec_smush_outnvlist) {
 7787                                 smusherror = nvlist_smush(outnvl,
 7788                                     zc->zc_nvlist_dst_size);
 7789                         }
 7790                         if (smusherror == 0)
 7791                                 puterror = put_nvlist(zc, outnvl);
 7792                 }
 7793 
 7794                 if (puterror != 0)
 7795                         error = puterror;
 7796 
 7797                 nvlist_free(outnvl);
 7798         } else {
 7799                 cookie = spl_fstrans_mark();
 7800                 error = vec->zvec_legacy_func(zc);
 7801                 spl_fstrans_unmark(cookie);
 7802         }
 7803 
 7804 out:
 7805         nvlist_free(innvl);
 7806         if (error == 0 && vec->zvec_allow_log) {
 7807                 char *s = tsd_get(zfs_allow_log_key);
 7808                 if (s != NULL)
 7809                         kmem_strfree(s);
 7810                 (void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
 7811         }
 7812         if (saved_poolname != NULL)
 7813                 kmem_free(saved_poolname, saved_poolname_len);
 7814 
 7815         return (error);
 7816 }
 7817 
 7818 int
 7819 zfs_kmod_init(void)
 7820 {
 7821         int error;
 7822 
 7823         if ((error = zvol_init()) != 0)
 7824                 return (error);
 7825 
 7826         spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
 7827         zfs_init();
 7828 
 7829         zfs_ioctl_init();
 7830 
 7831         mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
 7832         zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 7833         zfsdev_state_list->zs_minor = -1;
 7834 
 7835         if ((error = zfsdev_attach()) != 0)
 7836                 goto out;
 7837 
 7838         tsd_create(&zfs_fsyncer_key, NULL);
 7839         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 7840         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 7841 
 7842         return (0);
 7843 out:
 7844         zfs_fini();
 7845         spa_fini();
 7846         zvol_fini();
 7847 
 7848         return (error);
 7849 }
 7850 
 7851 void
 7852 zfs_kmod_fini(void)
 7853 {
 7854         zfsdev_state_t *zs, *zsnext = NULL;
 7855 
 7856         zfsdev_detach();
 7857 
 7858         mutex_destroy(&zfsdev_state_lock);
 7859 
 7860         for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
 7861                 zsnext = zs->zs_next;
 7862                 if (zs->zs_onexit)
 7863                         zfs_onexit_destroy(zs->zs_onexit);
 7864                 if (zs->zs_zevent)
 7865                         zfs_zevent_destroy(zs->zs_zevent);
 7866                 kmem_free(zs, sizeof (zfsdev_state_t));
 7867         }
 7868 
 7869         zfs_ereport_taskq_fini();       /* run before zfs_fini() on Linux */
 7870         zfs_fini();
 7871         spa_fini();
 7872         zvol_fini();
 7873 
 7874         tsd_destroy(&zfs_fsyncer_key);
 7875         tsd_destroy(&rrw_tsd_key);
 7876         tsd_destroy(&zfs_allow_log_key);
 7877 }
 7878 
 7879 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
 7880         "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
 7881 
 7882 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
 7883         "Maximum size in bytes of ZFS ioctl output that will be logged");

Cache object: 234cc69453383850c4202d4caa674c5b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.