The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/lib/libzfs_core/libzfs_core.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
   24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
   25  * Copyright 2017 RackTop Systems.
   26  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
   27  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
   28  * Copyright (c) 2019 Datto Inc.
   29  */
   30 
   31 /*
   32  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
   33  * It has the following characteristics:
   34  *
   35  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
   36  *  threads.  This is accomplished primarily by avoiding global data
   37  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
   38  *  process to have multiple libzfs "instances".  Therefore, we store
   39  *  our few pieces of data (e.g. the file descriptor) in global
   40  *  variables.  The fd is reference-counted so that the libzfs_core
   41  *  library can be "initialized" multiple times (e.g. by different
   42  *  consumers within the same process).
   43  *
   44  *  - Committed Interface.  The libzfs_core interface will be committed,
   45  *  therefore consumers can compile against it and be confident that
   46  *  their code will continue to work on future releases of this code.
   47  *  Currently, the interface is Evolving (not Committed), but we intend
   48  *  to commit to it once it is more complete and we determine that it
   49  *  meets the needs of all consumers.
   50  *
   51  *  - Programmatic Error Handling.  libzfs_core communicates errors with
   52  *  defined error numbers, and doesn't print anything to stdout/stderr.
   53  *
   54  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
   55  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
   56  *  between libzfs_core functions and ioctls to ZFS_DEV.
   57  *
   58  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
   59  *  with kernel ioctls, and kernel ioctls are general atomic, each
   60  *  libzfs_core function is atomic.  For example, creating multiple
   61  *  snapshots with a single call to lzc_snapshot() is atomic -- it
   62  *  can't fail with only some of the requested snapshots created, even
   63  *  in the event of power loss or system crash.
   64  *
   65  *  - Continued libzfs Support.  Some higher-level operations (e.g.
   66  *  support for "zfs send -R") are too complicated to fit the scope of
   67  *  libzfs_core.  This functionality will continue to live in libzfs.
   68  *  Where appropriate, libzfs will use the underlying atomic operations
   69  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
   70  *  zfs receive" by using individual "send one snapshot", rename,
   71  *  destroy, and "receive one snapshot" operations in libzfs_core.
   72  *  /sbin/zfs and /sbin/zpool will link with both libzfs and
   73  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
   74  *  since that will be the supported, stable interface going forwards.
   75  */
   76 
   77 #include <libzfs_core.h>
   78 #include <ctype.h>
   79 #include <unistd.h>
   80 #include <stdlib.h>
   81 #include <string.h>
   82 #ifdef ZFS_DEBUG
   83 #include <stdio.h>
   84 #endif
   85 #include <errno.h>
   86 #include <fcntl.h>
   87 #include <pthread.h>
   88 #include <libzutil.h>
   89 #include <sys/nvpair.h>
   90 #include <sys/param.h>
   91 #include <sys/types.h>
   92 #include <sys/stat.h>
   93 #include <sys/zfs_ioctl.h>
   94 #if __FreeBSD__
   95 #define BIG_PIPE_SIZE (64 * 1024) /* From sys/pipe.h */
   96 #endif
   97 
   98 static int g_fd = -1;
   99 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
  100 static int g_refcount;
  101 
  102 #ifdef ZFS_DEBUG
  103 static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
  104 static zfs_errno_t fail_ioc_err;
  105 
  106 static void
  107 libzfs_core_debug_ioc(void)
  108 {
  109         /*
  110          * To test running newer user space binaries with kernel's
  111          * that don't yet support an ioctl or a new ioctl arg we
  112          * provide an override to intentionally fail an ioctl.
  113          *
  114          * USAGE:
  115          * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
  116          *
  117          * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
  118          * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
  119          *
  120          * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
  121          * cannot checkpoint 'tank': the loaded zfs module does not support
  122          * this operation. A reboot may be required to enable this operation.
  123          */
  124         if (fail_ioc_cmd == ZFS_IOC_LAST) {
  125                 char *ioc_test = getenv("ZFS_IOC_TEST");
  126                 unsigned int ioc_num = 0, ioc_err = 0;
  127 
  128                 if (ioc_test != NULL &&
  129                     sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
  130                     ioc_num < ZFS_IOC_LAST)  {
  131                         fail_ioc_cmd = ioc_num;
  132                         fail_ioc_err = ioc_err;
  133                 }
  134         }
  135 }
  136 #endif
  137 
  138 int
  139 libzfs_core_init(void)
  140 {
  141         (void) pthread_mutex_lock(&g_lock);
  142         if (g_refcount == 0) {
  143                 g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC);
  144                 if (g_fd < 0) {
  145                         (void) pthread_mutex_unlock(&g_lock);
  146                         return (errno);
  147                 }
  148         }
  149         g_refcount++;
  150 
  151 #ifdef ZFS_DEBUG
  152         libzfs_core_debug_ioc();
  153 #endif
  154         (void) pthread_mutex_unlock(&g_lock);
  155         return (0);
  156 }
  157 
  158 void
  159 libzfs_core_fini(void)
  160 {
  161         (void) pthread_mutex_lock(&g_lock);
  162         ASSERT3S(g_refcount, >, 0);
  163 
  164         g_refcount--;
  165 
  166         if (g_refcount == 0 && g_fd != -1) {
  167                 (void) close(g_fd);
  168                 g_fd = -1;
  169         }
  170         (void) pthread_mutex_unlock(&g_lock);
  171 }
  172 
  173 static int
  174 lzc_ioctl(zfs_ioc_t ioc, const char *name,
  175     nvlist_t *source, nvlist_t **resultp)
  176 {
  177         zfs_cmd_t zc = {"\0"};
  178         int error = 0;
  179         char *packed = NULL;
  180         size_t size = 0;
  181 
  182         ASSERT3S(g_refcount, >, 0);
  183         VERIFY3S(g_fd, !=, -1);
  184 
  185 #ifdef ZFS_DEBUG
  186         if (ioc == fail_ioc_cmd)
  187                 return (fail_ioc_err);
  188 #endif
  189 
  190         if (name != NULL)
  191                 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
  192 
  193         if (source != NULL) {
  194                 packed = fnvlist_pack(source, &size);
  195                 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
  196                 zc.zc_nvlist_src_size = size;
  197         }
  198 
  199         if (resultp != NULL) {
  200                 *resultp = NULL;
  201                 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
  202                         zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
  203                             ZCP_ARG_MEMLIMIT);
  204                 } else {
  205                         zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
  206                 }
  207                 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
  208                     malloc(zc.zc_nvlist_dst_size);
  209                 if (zc.zc_nvlist_dst == (uint64_t)0) {
  210                         error = ENOMEM;
  211                         goto out;
  212                 }
  213         }
  214 
  215         while (lzc_ioctl_fd(g_fd, ioc, &zc) != 0) {
  216                 /*
  217                  * If ioctl exited with ENOMEM, we retry the ioctl after
  218                  * increasing the size of the destination nvlist.
  219                  *
  220                  * Channel programs that exit with ENOMEM ran over the
  221                  * lua memory sandbox; they should not be retried.
  222                  */
  223                 if (errno == ENOMEM && resultp != NULL &&
  224                     ioc != ZFS_IOC_CHANNEL_PROGRAM) {
  225                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
  226                         zc.zc_nvlist_dst_size *= 2;
  227                         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
  228                             malloc(zc.zc_nvlist_dst_size);
  229                         if (zc.zc_nvlist_dst == (uint64_t)0) {
  230                                 error = ENOMEM;
  231                                 goto out;
  232                         }
  233                 } else {
  234                         error = errno;
  235                         break;
  236                 }
  237         }
  238         if (zc.zc_nvlist_dst_filled && resultp != NULL) {
  239                 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
  240                     zc.zc_nvlist_dst_size);
  241         }
  242 
  243 out:
  244         if (packed != NULL)
  245                 fnvlist_pack_free(packed, size);
  246         free((void *)(uintptr_t)zc.zc_nvlist_dst);
  247         return (error);
  248 }
  249 
  250 int
  251 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
  252     uint8_t *wkeydata, uint_t wkeylen)
  253 {
  254         int error;
  255         nvlist_t *hidden_args = NULL;
  256         nvlist_t *args = fnvlist_alloc();
  257 
  258         fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
  259         if (props != NULL)
  260                 fnvlist_add_nvlist(args, "props", props);
  261 
  262         if (wkeydata != NULL) {
  263                 hidden_args = fnvlist_alloc();
  264                 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
  265                     wkeylen);
  266                 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
  267         }
  268 
  269         error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
  270         nvlist_free(hidden_args);
  271         nvlist_free(args);
  272         return (error);
  273 }
  274 
  275 int
  276 lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
  277 {
  278         int error;
  279         nvlist_t *hidden_args = NULL;
  280         nvlist_t *args = fnvlist_alloc();
  281 
  282         fnvlist_add_string(args, "origin", origin);
  283         if (props != NULL)
  284                 fnvlist_add_nvlist(args, "props", props);
  285         error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
  286         nvlist_free(hidden_args);
  287         nvlist_free(args);
  288         return (error);
  289 }
  290 
  291 int
  292 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
  293 {
  294         /*
  295          * The promote ioctl is still legacy, so we need to construct our
  296          * own zfs_cmd_t rather than using lzc_ioctl().
  297          */
  298         zfs_cmd_t zc = {"\0"};
  299 
  300         ASSERT3S(g_refcount, >, 0);
  301         VERIFY3S(g_fd, !=, -1);
  302 
  303         (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
  304         if (lzc_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
  305                 int error = errno;
  306                 if (error == EEXIST && snapnamebuf != NULL)
  307                         (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
  308                 return (error);
  309         }
  310         return (0);
  311 }
  312 
  313 int
  314 lzc_rename(const char *source, const char *target)
  315 {
  316         zfs_cmd_t zc = {"\0"};
  317         int error;
  318 
  319         ASSERT3S(g_refcount, >, 0);
  320         VERIFY3S(g_fd, !=, -1);
  321         (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
  322         (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
  323         error = lzc_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
  324         if (error != 0)
  325                 error = errno;
  326         return (error);
  327 }
  328 
  329 int
  330 lzc_destroy(const char *fsname)
  331 {
  332         int error;
  333         nvlist_t *args = fnvlist_alloc();
  334         error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
  335         nvlist_free(args);
  336         return (error);
  337 }
  338 
  339 /*
  340  * Creates snapshots.
  341  *
  342  * The keys in the snaps nvlist are the snapshots to be created.
  343  * They must all be in the same pool.
  344  *
  345  * The props nvlist is properties to set.  Currently only user properties
  346  * are supported.  { user:prop_name -> string value }
  347  *
  348  * The returned results nvlist will have an entry for each snapshot that failed.
  349  * The value will be the (int32) error code.
  350  *
  351  * The return value will be 0 if all snapshots were created, otherwise it will
  352  * be the errno of a (unspecified) snapshot that failed.
  353  */
  354 int
  355 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
  356 {
  357         nvpair_t *elem;
  358         nvlist_t *args;
  359         int error;
  360         char pool[ZFS_MAX_DATASET_NAME_LEN];
  361 
  362         *errlist = NULL;
  363 
  364         /* determine the pool name */
  365         elem = nvlist_next_nvpair(snaps, NULL);
  366         if (elem == NULL)
  367                 return (0);
  368         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
  369         pool[strcspn(pool, "/@")] = '\0';
  370 
  371         args = fnvlist_alloc();
  372         fnvlist_add_nvlist(args, "snaps", snaps);
  373         if (props != NULL)
  374                 fnvlist_add_nvlist(args, "props", props);
  375 
  376         error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
  377         nvlist_free(args);
  378 
  379         return (error);
  380 }
  381 
  382 /*
  383  * Destroys snapshots.
  384  *
  385  * The keys in the snaps nvlist are the snapshots to be destroyed.
  386  * They must all be in the same pool.
  387  *
  388  * Snapshots that do not exist will be silently ignored.
  389  *
  390  * If 'defer' is not set, and a snapshot has user holds or clones, the
  391  * destroy operation will fail and none of the snapshots will be
  392  * destroyed.
  393  *
  394  * If 'defer' is set, and a snapshot has user holds or clones, it will be
  395  * marked for deferred destruction, and will be destroyed when the last hold
  396  * or clone is removed/destroyed.
  397  *
  398  * The return value will be 0 if all snapshots were destroyed (or marked for
  399  * later destruction if 'defer' is set) or didn't exist to begin with.
  400  *
  401  * Otherwise the return value will be the errno of a (unspecified) snapshot
  402  * that failed, no snapshots will be destroyed, and the errlist will have an
  403  * entry for each snapshot that failed.  The value in the errlist will be
  404  * the (int32) error code.
  405  */
  406 int
  407 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
  408 {
  409         nvpair_t *elem;
  410         nvlist_t *args;
  411         int error;
  412         char pool[ZFS_MAX_DATASET_NAME_LEN];
  413 
  414         /* determine the pool name */
  415         elem = nvlist_next_nvpair(snaps, NULL);
  416         if (elem == NULL)
  417                 return (0);
  418         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
  419         pool[strcspn(pool, "/@")] = '\0';
  420 
  421         args = fnvlist_alloc();
  422         fnvlist_add_nvlist(args, "snaps", snaps);
  423         if (defer)
  424                 fnvlist_add_boolean(args, "defer");
  425 
  426         error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
  427         nvlist_free(args);
  428 
  429         return (error);
  430 }
  431 
  432 int
  433 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
  434     uint64_t *usedp)
  435 {
  436         nvlist_t *args;
  437         nvlist_t *result;
  438         int err;
  439         char fs[ZFS_MAX_DATASET_NAME_LEN];
  440         char *atp;
  441 
  442         /* determine the fs name */
  443         (void) strlcpy(fs, firstsnap, sizeof (fs));
  444         atp = strchr(fs, '@');
  445         if (atp == NULL)
  446                 return (EINVAL);
  447         *atp = '\0';
  448 
  449         args = fnvlist_alloc();
  450         fnvlist_add_string(args, "firstsnap", firstsnap);
  451 
  452         err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
  453         nvlist_free(args);
  454         if (err == 0)
  455                 *usedp = fnvlist_lookup_uint64(result, "used");
  456         fnvlist_free(result);
  457 
  458         return (err);
  459 }
  460 
  461 boolean_t
  462 lzc_exists(const char *dataset)
  463 {
  464         /*
  465          * The objset_stats ioctl is still legacy, so we need to construct our
  466          * own zfs_cmd_t rather than using lzc_ioctl().
  467          */
  468         zfs_cmd_t zc = {"\0"};
  469 
  470         ASSERT3S(g_refcount, >, 0);
  471         VERIFY3S(g_fd, !=, -1);
  472 
  473         (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
  474         return (lzc_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
  475 }
  476 
  477 /*
  478  * outnvl is unused.
  479  * It was added to preserve the function signature in case it is
  480  * needed in the future.
  481  */
  482 int
  483 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
  484 {
  485         (void) outnvl;
  486         return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
  487 }
  488 
  489 /*
  490  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
  491  * the snapshot can not be destroyed.  (However, it can be marked for deletion
  492  * by lzc_destroy_snaps(defer=B_TRUE).)
  493  *
  494  * The keys in the nvlist are snapshot names.
  495  * The snapshots must all be in the same pool.
  496  * The value is the name of the hold (string type).
  497  *
  498  * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
  499  * In this case, when the cleanup_fd is closed (including on process
  500  * termination), the holds will be released.  If the system is shut down
  501  * uncleanly, the holds will be released when the pool is next opened
  502  * or imported.
  503  *
  504  * Holds for snapshots which don't exist will be skipped and have an entry
  505  * added to errlist, but will not cause an overall failure.
  506  *
  507  * The return value will be 0 if all holds, for snapshots that existed,
  508  * were successfully created.
  509  *
  510  * Otherwise the return value will be the errno of a (unspecified) hold that
  511  * failed and no holds will be created.
  512  *
  513  * In all cases the errlist will have an entry for each hold that failed
  514  * (name = snapshot), with its value being the error code (int32).
  515  */
  516 int
  517 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
  518 {
  519         char pool[ZFS_MAX_DATASET_NAME_LEN];
  520         nvlist_t *args;
  521         nvpair_t *elem;
  522         int error;
  523 
  524         /* determine the pool name */
  525         elem = nvlist_next_nvpair(holds, NULL);
  526         if (elem == NULL)
  527                 return (0);
  528         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
  529         pool[strcspn(pool, "/@")] = '\0';
  530 
  531         args = fnvlist_alloc();
  532         fnvlist_add_nvlist(args, "holds", holds);
  533         if (cleanup_fd != -1)
  534                 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
  535 
  536         error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
  537         nvlist_free(args);
  538         return (error);
  539 }
  540 
  541 /*
  542  * Release "user holds" on snapshots.  If the snapshot has been marked for
  543  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
  544  * any clones, and all the user holds are removed, then the snapshot will be
  545  * destroyed.
  546  *
  547  * The keys in the nvlist are snapshot names.
  548  * The snapshots must all be in the same pool.
  549  * The value is an nvlist whose keys are the holds to remove.
  550  *
  551  * Holds which failed to release because they didn't exist will have an entry
  552  * added to errlist, but will not cause an overall failure.
  553  *
  554  * The return value will be 0 if the nvl holds was empty or all holds that
  555  * existed, were successfully removed.
  556  *
  557  * Otherwise the return value will be the errno of a (unspecified) hold that
  558  * failed to release and no holds will be released.
  559  *
  560  * In all cases the errlist will have an entry for each hold that failed to
  561  * to release.
  562  */
  563 int
  564 lzc_release(nvlist_t *holds, nvlist_t **errlist)
  565 {
  566         char pool[ZFS_MAX_DATASET_NAME_LEN];
  567         nvpair_t *elem;
  568 
  569         /* determine the pool name */
  570         elem = nvlist_next_nvpair(holds, NULL);
  571         if (elem == NULL)
  572                 return (0);
  573         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
  574         pool[strcspn(pool, "/@")] = '\0';
  575 
  576         return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
  577 }
  578 
  579 /*
  580  * Retrieve list of user holds on the specified snapshot.
  581  *
  582  * On success, *holdsp will be set to an nvlist which the caller must free.
  583  * The keys are the names of the holds, and the value is the creation time
  584  * of the hold (uint64) in seconds since the epoch.
  585  */
  586 int
  587 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
  588 {
  589         return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
  590 }
  591 
  592 static unsigned int
  593 max_pipe_buffer(int infd)
  594 {
  595 #if __linux__
  596         static unsigned int max;
  597         if (max == 0) {
  598                 max = 1048576; /* fs/pipe.c default */
  599 
  600                 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re");
  601                 if (procf != NULL) {
  602                         if (fscanf(procf, "%u", &max) <= 0) {
  603                                 /* ignore error: max untouched if parse fails */
  604                         }
  605                         fclose(procf);
  606                 }
  607         }
  608 
  609         unsigned int cur = fcntl(infd, F_GETPIPE_SZ);
  610         /*
  611          * Sadly, Linux has an unfixed deadlock if you do SETPIPE_SZ on a pipe
  612          * with data in it.
  613          * cf. #13232, https://bugzilla.kernel.org/show_bug.cgi?id=212295
  614          *
  615          * And since the problem is in waking up the writer, there's nothing
  616          * we can do about it from here.
  617          *
  618          * So if people want to, they can set this, but they
  619          * may regret it...
  620          */
  621         if (getenv("ZFS_SET_PIPE_MAX") == NULL)
  622                 return (cur);
  623         if (cur < max && fcntl(infd, F_SETPIPE_SZ, max) != -1)
  624                 cur = max;
  625         return (cur);
  626 #else
  627         /* FreeBSD automatically resizes */
  628         (void) infd;
  629         return (BIG_PIPE_SIZE);
  630 #endif
  631 }
  632 
  633 #if __linux__
  634 struct send_worker_ctx {
  635         int from;       /* read end of pipe, with send data; closed on exit */
  636         int to;         /* original arbitrary output fd; mustn't be a pipe */
  637 };
  638 
  639 static void *
  640 send_worker(void *arg)
  641 {
  642         struct send_worker_ctx *ctx = arg;
  643         unsigned int bufsiz = max_pipe_buffer(ctx->from);
  644         ssize_t rd;
  645 
  646         while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
  647             SPLICE_F_MOVE | SPLICE_F_MORE)) > 0)
  648                 ;
  649 
  650         int err = (rd == -1) ? errno : 0;
  651         close(ctx->from);
  652         return ((void *)(uintptr_t)err);
  653 }
  654 #endif
  655 
  656 /*
  657  * Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf
  658  * ("fs: don't allow kernel reads and writes without iter ops"),
  659  * ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c.
  660  *
  661  * This wrapper transparently executes func() with a pipe
  662  * by spawning a thread to copy from that pipe to the original output
  663  * in the background.
  664  *
  665  * Returns the error from func(), if nonzero,
  666  * otherwise the error from the thread.
  667  *
  668  * No-op if orig_fd is -1, already a pipe (but the buffer size is bumped),
  669  * and on not-Linux; as such, it is safe to wrap/call wrapped functions
  670  * in a wrapped context.
  671  */
  672 int
  673 lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data)
  674 {
  675 #if __linux__
  676         struct stat sb;
  677         if (orig_fd != -1 && fstat(orig_fd, &sb) == -1)
  678                 return (errno);
  679         if (orig_fd == -1 || S_ISFIFO(sb.st_mode)) {
  680                 if (orig_fd != -1)
  681                         (void) max_pipe_buffer(orig_fd);
  682                 return (func(orig_fd, data));
  683         }
  684         if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY)
  685                 return (errno = EBADF);
  686 
  687         int rw[2];
  688         if (pipe2(rw, O_CLOEXEC) == -1)
  689                 return (errno);
  690 
  691         int err;
  692         pthread_t send_thread;
  693         struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd};
  694         if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx))
  695             != 0) {
  696                 close(rw[0]);
  697                 close(rw[1]);
  698                 return (errno = err);
  699         }
  700 
  701         err = func(rw[1], data);
  702 
  703         void *send_err;
  704         close(rw[1]);
  705         pthread_join(send_thread, &send_err);
  706         if (err == 0 && send_err != 0)
  707                 errno = err = (uintptr_t)send_err;
  708 
  709         return (err);
  710 #else
  711         return (func(orig_fd, data));
  712 #endif
  713 }
  714 
  715 /*
  716  * Generate a zfs send stream for the specified snapshot and write it to
  717  * the specified file descriptor.
  718  *
  719  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
  720  *
  721  * If "from" is NULL, a full (non-incremental) stream will be sent.
  722  * If "from" is non-NULL, it must be the full name of a snapshot or
  723  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
  724  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
  725  * bookmark must represent an earlier point in the history of "snapname").
  726  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
  727  * or it can be the origin of "snapname"'s filesystem, or an earlier
  728  * snapshot in the origin, etc.
  729  *
  730  * "fd" is the file descriptor to write the send stream to.
  731  *
  732  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
  733  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
  734  * records with drr_blksz > 128K.
  735  *
  736  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
  737  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
  738  * which the receiving system must support (as indicated by support
  739  * for the "embedded_data" feature).
  740  *
  741  * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
  742  * compressed WRITE records for blocks which are compressed on disk and in
  743  * memory.  If the lz4_compress feature is active on the sending system, then
  744  * the receiving system must have that feature enabled as well.
  745  *
  746  * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
  747  * datasets, by sending data exactly as it exists on disk.  This allows backups
  748  * to be taken even if encryption keys are not currently loaded.
  749  */
  750 int
  751 lzc_send(const char *snapname, const char *from, int fd,
  752     enum lzc_send_flags flags)
  753 {
  754         return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
  755             NULL));
  756 }
  757 
  758 int
  759 lzc_send_redacted(const char *snapname, const char *from, int fd,
  760     enum lzc_send_flags flags, const char *redactbook)
  761 {
  762         return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
  763             redactbook));
  764 }
  765 
  766 int
  767 lzc_send_resume(const char *snapname, const char *from, int fd,
  768     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
  769 {
  770         return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
  771             resumeoff, NULL));
  772 }
  773 
  774 /*
  775  * snapname: The name of the "tosnap", or the snapshot whose contents we are
  776  * sending.
  777  * from: The name of the "fromsnap", or the incremental source.
  778  * fd: File descriptor to write the stream to.
  779  * flags: flags that determine features to be used by the stream.
  780  * resumeobj: Object to resume from, for resuming send
  781  * resumeoff: Offset to resume from, for resuming send.
  782  * redactnv: nvlist of string -> boolean(ignored) containing the names of all
  783  * the snapshots that we should redact with respect to.
  784  * redactbook: Name of the redaction bookmark to create.
  785  *
  786  * Pre-wrapped.
  787  */
  788 static int
  789 lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd,
  790     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
  791     const char *redactbook)
  792 {
  793         nvlist_t *args;
  794         int err;
  795 
  796         args = fnvlist_alloc();
  797         fnvlist_add_int32(args, "fd", fd);
  798         if (from != NULL)
  799                 fnvlist_add_string(args, "fromsnap", from);
  800         if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
  801                 fnvlist_add_boolean(args, "largeblockok");
  802         if (flags & LZC_SEND_FLAG_EMBED_DATA)
  803                 fnvlist_add_boolean(args, "embedok");
  804         if (flags & LZC_SEND_FLAG_COMPRESS)
  805                 fnvlist_add_boolean(args, "compressok");
  806         if (flags & LZC_SEND_FLAG_RAW)
  807                 fnvlist_add_boolean(args, "rawok");
  808         if (flags & LZC_SEND_FLAG_SAVED)
  809                 fnvlist_add_boolean(args, "savedok");
  810         if (resumeobj != 0 || resumeoff != 0) {
  811                 fnvlist_add_uint64(args, "resume_object", resumeobj);
  812                 fnvlist_add_uint64(args, "resume_offset", resumeoff);
  813         }
  814         if (redactbook != NULL)
  815                 fnvlist_add_string(args, "redactbook", redactbook);
  816 
  817         err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
  818         nvlist_free(args);
  819         return (err);
  820 }
  821 
  822 struct lzc_send_resume_redacted {
  823         const char *snapname;
  824         const char *from;
  825         enum lzc_send_flags flags;
  826         uint64_t resumeobj;
  827         uint64_t resumeoff;
  828         const char *redactbook;
  829 };
  830 
  831 static int
  832 lzc_send_resume_redacted_cb(int fd, void *arg)
  833 {
  834         struct lzc_send_resume_redacted *zsrr = arg;
  835         return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from,
  836             fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff,
  837             zsrr->redactbook));
  838 }
  839 
  840 int
  841 lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
  842     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
  843     const char *redactbook)
  844 {
  845         struct lzc_send_resume_redacted zsrr = {
  846                 .snapname = snapname,
  847                 .from = from,
  848                 .flags = flags,
  849                 .resumeobj = resumeobj,
  850                 .resumeoff = resumeoff,
  851                 .redactbook = redactbook,
  852         };
  853         return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr));
  854 }
  855 
  856 /*
  857  * "from" can be NULL, a snapshot, or a bookmark.
  858  *
  859  * If from is NULL, a full (non-incremental) stream will be estimated.  This
  860  * is calculated very efficiently.
  861  *
  862  * If from is a snapshot, lzc_send_space uses the deadlists attached to
  863  * each snapshot to efficiently estimate the stream size.
  864  *
  865  * If from is a bookmark, the indirect blocks in the destination snapshot
  866  * are traversed, looking for blocks with a birth time since the creation TXG of
  867  * the snapshot this bookmark was created from.  This will result in
  868  * significantly more I/O and be less efficient than a send space estimation on
  869  * an equivalent snapshot. This process is also used if redact_snaps is
  870  * non-null.
  871  *
  872  * Pre-wrapped.
  873  */
  874 static int
  875 lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from,
  876     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
  877     uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
  878 {
  879         nvlist_t *args;
  880         nvlist_t *result;
  881         int err;
  882 
  883         args = fnvlist_alloc();
  884         if (from != NULL)
  885                 fnvlist_add_string(args, "from", from);
  886         if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
  887                 fnvlist_add_boolean(args, "largeblockok");
  888         if (flags & LZC_SEND_FLAG_EMBED_DATA)
  889                 fnvlist_add_boolean(args, "embedok");
  890         if (flags & LZC_SEND_FLAG_COMPRESS)
  891                 fnvlist_add_boolean(args, "compressok");
  892         if (flags & LZC_SEND_FLAG_RAW)
  893                 fnvlist_add_boolean(args, "rawok");
  894         if (resumeobj != 0 || resumeoff != 0) {
  895                 fnvlist_add_uint64(args, "resume_object", resumeobj);
  896                 fnvlist_add_uint64(args, "resume_offset", resumeoff);
  897                 fnvlist_add_uint64(args, "bytes", resume_bytes);
  898         }
  899         if (redactbook != NULL)
  900                 fnvlist_add_string(args, "redactbook", redactbook);
  901         if (fd != -1)
  902                 fnvlist_add_int32(args, "fd", fd);
  903 
  904         err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
  905         nvlist_free(args);
  906         if (err == 0)
  907                 *spacep = fnvlist_lookup_uint64(result, "space");
  908         nvlist_free(result);
  909         return (err);
  910 }
  911 
  912 struct lzc_send_space_resume_redacted {
  913         const char *snapname;
  914         const char *from;
  915         enum lzc_send_flags flags;
  916         uint64_t resumeobj;
  917         uint64_t resumeoff;
  918         uint64_t resume_bytes;
  919         const char *redactbook;
  920         uint64_t *spacep;
  921 };
  922 
  923 static int
  924 lzc_send_space_resume_redacted_cb(int fd, void *arg)
  925 {
  926         struct lzc_send_space_resume_redacted *zssrr = arg;
  927         return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname,
  928             zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff,
  929             zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep));
  930 }
  931 
  932 int
  933 lzc_send_space_resume_redacted(const char *snapname, const char *from,
  934     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
  935     uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
  936 {
  937         struct lzc_send_space_resume_redacted zssrr = {
  938                 .snapname = snapname,
  939                 .from = from,
  940                 .flags = flags,
  941                 .resumeobj = resumeobj,
  942                 .resumeoff = resumeoff,
  943                 .resume_bytes = resume_bytes,
  944                 .redactbook = redactbook,
  945                 .spacep = spacep,
  946         };
  947         return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb,
  948             fd, &zssrr));
  949 }
  950 
  951 int
  952 lzc_send_space(const char *snapname, const char *from,
  953     enum lzc_send_flags flags, uint64_t *spacep)
  954 {
  955         return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
  956             NULL, -1, spacep));
  957 }
  958 
  959 static int
  960 recv_read(int fd, void *buf, int ilen)
  961 {
  962         char *cp = buf;
  963         int rv;
  964         int len = ilen;
  965 
  966         do {
  967                 rv = read(fd, cp, len);
  968                 cp += rv;
  969                 len -= rv;
  970         } while (rv > 0);
  971 
  972         if (rv < 0 || len != 0)
  973                 return (EIO);
  974 
  975         return (0);
  976 }
  977 
  978 /*
  979  * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
  980  * legacy ZFS_IOC_RECV user/kernel interface.  The new interface supports all
  981  * stream options but is currently only used for resumable streams.  This way
  982  * updated user space utilities will interoperate with older kernel modules.
  983  *
  984  * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
  985  */
  986 static int
  987 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
  988     uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
  989     boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd,
  990     const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
  991     uint64_t *errflags, nvlist_t **errors)
  992 {
  993         dmu_replay_record_t drr;
  994         char fsname[MAXPATHLEN];
  995         char *atp;
  996         int error;
  997         boolean_t payload = B_FALSE;
  998 
  999         ASSERT3S(g_refcount, >, 0);
 1000         VERIFY3S(g_fd, !=, -1);
 1001 
 1002         /* Set 'fsname' to the name of containing filesystem */
 1003         (void) strlcpy(fsname, snapname, sizeof (fsname));
 1004         atp = strchr(fsname, '@');
 1005         if (atp == NULL)
 1006                 return (EINVAL);
 1007         *atp = '\0';
 1008 
 1009         /* If the fs does not exist, try its parent. */
 1010         if (!lzc_exists(fsname)) {
 1011                 char *slashp = strrchr(fsname, '/');
 1012                 if (slashp == NULL)
 1013                         return (ENOENT);
 1014                 *slashp = '\0';
 1015         }
 1016 
 1017         /*
 1018          * It is not uncommon for gigabytes to be processed by zfs receive.
 1019          * Speculatively increase the buffer size if supported by the platform.
 1020          */
 1021         struct stat sb;
 1022         if (fstat(input_fd, &sb) == -1)
 1023                 return (errno);
 1024         if (S_ISFIFO(sb.st_mode))
 1025                 (void) max_pipe_buffer(input_fd);
 1026 
 1027         /*
 1028          * The begin_record is normally a non-byteswapped BEGIN record.
 1029          * For resumable streams it may be set to any non-byteswapped
 1030          * dmu_replay_record_t.
 1031          */
 1032         if (begin_record == NULL) {
 1033                 error = recv_read(input_fd, &drr, sizeof (drr));
 1034                 if (error != 0)
 1035                         return (error);
 1036         } else {
 1037                 drr = *begin_record;
 1038                 payload = (begin_record->drr_payloadlen != 0);
 1039         }
 1040 
 1041         /*
 1042          * All receives with a payload should use the new interface.
 1043          */
 1044         if (resumable || heal || raw || wkeydata != NULL || payload) {
 1045                 nvlist_t *outnvl = NULL;
 1046                 nvlist_t *innvl = fnvlist_alloc();
 1047 
 1048                 fnvlist_add_string(innvl, "snapname", snapname);
 1049 
 1050                 if (recvdprops != NULL)
 1051                         fnvlist_add_nvlist(innvl, "props", recvdprops);
 1052 
 1053                 if (localprops != NULL)
 1054                         fnvlist_add_nvlist(innvl, "localprops", localprops);
 1055 
 1056                 if (wkeydata != NULL) {
 1057                         /*
 1058                          * wkeydata must be placed in the special
 1059                          * ZPOOL_HIDDEN_ARGS nvlist so that it
 1060                          * will not be printed to the zpool history.
 1061                          */
 1062                         nvlist_t *hidden_args = fnvlist_alloc();
 1063                         fnvlist_add_uint8_array(hidden_args, "wkeydata",
 1064                             wkeydata, wkeylen);
 1065                         fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
 1066                             hidden_args);
 1067                         nvlist_free(hidden_args);
 1068                 }
 1069 
 1070                 if (origin != NULL && strlen(origin))
 1071                         fnvlist_add_string(innvl, "origin", origin);
 1072 
 1073                 fnvlist_add_byte_array(innvl, "begin_record",
 1074                     (uchar_t *)&drr, sizeof (drr));
 1075 
 1076                 fnvlist_add_int32(innvl, "input_fd", input_fd);
 1077 
 1078                 if (force)
 1079                         fnvlist_add_boolean(innvl, "force");
 1080 
 1081                 if (resumable)
 1082                         fnvlist_add_boolean(innvl, "resumable");
 1083 
 1084                 if (heal)
 1085                         fnvlist_add_boolean(innvl, "heal");
 1086 
 1087                 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
 1088 
 1089                 if (error == 0 && read_bytes != NULL)
 1090                         error = nvlist_lookup_uint64(outnvl, "read_bytes",
 1091                             read_bytes);
 1092 
 1093                 if (error == 0 && errflags != NULL)
 1094                         error = nvlist_lookup_uint64(outnvl, "error_flags",
 1095                             errflags);
 1096 
 1097                 if (error == 0 && errors != NULL) {
 1098                         nvlist_t *nvl;
 1099                         error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
 1100                         if (error == 0)
 1101                                 *errors = fnvlist_dup(nvl);
 1102                 }
 1103 
 1104                 fnvlist_free(innvl);
 1105                 fnvlist_free(outnvl);
 1106         } else {
 1107                 zfs_cmd_t zc = {"\0"};
 1108                 char *rp_packed = NULL;
 1109                 char *lp_packed = NULL;
 1110                 size_t size;
 1111 
 1112                 ASSERT3S(g_refcount, >, 0);
 1113 
 1114                 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
 1115                 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 1116 
 1117                 if (recvdprops != NULL) {
 1118                         rp_packed = fnvlist_pack(recvdprops, &size);
 1119                         zc.zc_nvlist_src = (uint64_t)(uintptr_t)rp_packed;
 1120                         zc.zc_nvlist_src_size = size;
 1121                 }
 1122 
 1123                 if (localprops != NULL) {
 1124                         lp_packed = fnvlist_pack(localprops, &size);
 1125                         zc.zc_nvlist_conf = (uint64_t)(uintptr_t)lp_packed;
 1126                         zc.zc_nvlist_conf_size = size;
 1127                 }
 1128 
 1129                 if (origin != NULL)
 1130                         (void) strlcpy(zc.zc_string, origin,
 1131                             sizeof (zc.zc_string));
 1132 
 1133                 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
 1134                 zc.zc_begin_record = drr.drr_u.drr_begin;
 1135                 zc.zc_guid = force;
 1136                 zc.zc_cookie = input_fd;
 1137                 zc.zc_cleanup_fd = -1;
 1138                 zc.zc_action_handle = 0;
 1139 
 1140                 zc.zc_nvlist_dst_size = 128 * 1024;
 1141                 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 1142                     malloc(zc.zc_nvlist_dst_size);
 1143 
 1144                 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
 1145                 if (error != 0) {
 1146                         error = errno;
 1147                 } else {
 1148                         if (read_bytes != NULL)
 1149                                 *read_bytes = zc.zc_cookie;
 1150 
 1151                         if (errflags != NULL)
 1152                                 *errflags = zc.zc_obj;
 1153 
 1154                         if (errors != NULL)
 1155                                 VERIFY0(nvlist_unpack(
 1156                                     (void *)(uintptr_t)zc.zc_nvlist_dst,
 1157                                     zc.zc_nvlist_dst_size, errors, KM_SLEEP));
 1158                 }
 1159 
 1160                 if (rp_packed != NULL)
 1161                         fnvlist_pack_free(rp_packed, size);
 1162                 if (lp_packed != NULL)
 1163                         fnvlist_pack_free(lp_packed, size);
 1164                 free((void *)(uintptr_t)zc.zc_nvlist_dst);
 1165         }
 1166 
 1167         return (error);
 1168 }
 1169 
 1170 /*
 1171  * The simplest receive case: receive from the specified fd, creating the
 1172  * specified snapshot.  Apply the specified properties as "received" properties
 1173  * (which can be overridden by locally-set properties).  If the stream is a
 1174  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
 1175  * flag will cause the target filesystem to be rolled back or destroyed if
 1176  * necessary to receive.
 1177  *
 1178  * Return 0 on success or an errno on failure.
 1179  *
 1180  * Note: this interface does not work on dedup'd streams
 1181  * (those with DMU_BACKUP_FEATURE_DEDUP).
 1182  */
 1183 int
 1184 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
 1185     boolean_t force, boolean_t raw, int fd)
 1186 {
 1187         return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
 1188             B_FALSE, B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
 1189 }
 1190 
 1191 /*
 1192  * Like lzc_receive, but if the receive fails due to premature stream
 1193  * termination, the intermediate state will be preserved on disk.  In this
 1194  * case, ECKSUM will be returned.  The receive may subsequently be resumed
 1195  * with a resuming send stream generated by lzc_send_resume().
 1196  */
 1197 int
 1198 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
 1199     boolean_t force, boolean_t raw, int fd)
 1200 {
 1201         return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
 1202             B_FALSE, B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
 1203 }
 1204 
 1205 /*
 1206  * Like lzc_receive, but allows the caller to read the begin record and then to
 1207  * pass it in.  That could be useful if the caller wants to derive, for example,
 1208  * the snapname or the origin parameters based on the information contained in
 1209  * the begin record.
 1210  * The begin record must be in its original form as read from the stream,
 1211  * in other words, it should not be byteswapped.
 1212  *
 1213  * The 'resumable' parameter allows to obtain the same behavior as with
 1214  * lzc_receive_resumable.
 1215  */
 1216 int
 1217 lzc_receive_with_header(const char *snapname, nvlist_t *props,
 1218     const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
 1219     int fd, const dmu_replay_record_t *begin_record)
 1220 {
 1221         if (begin_record == NULL)
 1222                 return (EINVAL);
 1223 
 1224         return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
 1225             B_FALSE, resumable, raw, fd, begin_record, NULL, NULL, NULL));
 1226 }
 1227 
 1228 /*
 1229  * Like lzc_receive, but allows the caller to pass all supported arguments
 1230  * and retrieve all values returned.  The only additional input parameter
 1231  * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
 1232  *
 1233  * The following parameters all provide return values.  Several may be set
 1234  * in the failure case and will contain additional information.
 1235  *
 1236  * The 'read_bytes' value will be set to the total number of bytes read.
 1237  *
 1238  * The 'errflags' value will contain zprop_errflags_t flags which are
 1239  * used to describe any failures.
 1240  *
 1241  * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
 1242  *
 1243  * The 'errors' nvlist contains an entry for each unapplied received
 1244  * property.  Callers are responsible for freeing this nvlist.
 1245  */
 1246 int
 1247 lzc_receive_one(const char *snapname, nvlist_t *props,
 1248     const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
 1249     int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
 1250     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
 1251     nvlist_t **errors)
 1252 {
 1253         (void) action_handle, (void) cleanup_fd;
 1254         return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
 1255             B_FALSE, resumable, raw, input_fd, begin_record,
 1256             read_bytes, errflags, errors));
 1257 }
 1258 
 1259 /*
 1260  * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
 1261  * argument.
 1262  *
 1263  * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
 1264  * exclude ('zfs receive -x') properties. Callers are responsible for freeing
 1265  * this nvlist
 1266  */
 1267 int
 1268 lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
 1269     nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
 1270     boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
 1271     const dmu_replay_record_t *begin_record, int cleanup_fd,
 1272     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
 1273     nvlist_t **errors)
 1274 {
 1275         (void) action_handle, (void) cleanup_fd;
 1276         return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
 1277             force, B_FALSE, resumable, raw, input_fd, begin_record,
 1278             read_bytes, errflags, errors));
 1279 }
 1280 
 1281 /*
 1282  * Like lzc_receive_with_cmdprops, but allows the caller to pass an additional
 1283  * 'heal' argument.
 1284  *
 1285  * The heal arguments tells us to heal the provided snapshot using the provided
 1286  * send stream
 1287  */
 1288 int lzc_receive_with_heal(const char *snapname, nvlist_t *props,
 1289     nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
 1290     boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw,
 1291     int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
 1292     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
 1293     nvlist_t **errors)
 1294 {
 1295         (void) action_handle, (void) cleanup_fd;
 1296         return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
 1297             force, heal, resumable, raw, input_fd, begin_record,
 1298             read_bytes, errflags, errors));
 1299 }
 1300 
 1301 /*
 1302  * Roll back this filesystem or volume to its most recent snapshot.
 1303  * If snapnamebuf is not NULL, it will be filled in with the name
 1304  * of the most recent snapshot.
 1305  * Note that the latest snapshot may change if a new one is concurrently
 1306  * created or the current one is destroyed.  lzc_rollback_to can be used
 1307  * to roll back to a specific latest snapshot.
 1308  *
 1309  * Return 0 on success or an errno on failure.
 1310  */
 1311 int
 1312 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
 1313 {
 1314         nvlist_t *args;
 1315         nvlist_t *result;
 1316         int err;
 1317 
 1318         args = fnvlist_alloc();
 1319         err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
 1320         nvlist_free(args);
 1321         if (err == 0 && snapnamebuf != NULL) {
 1322                 const char *snapname = fnvlist_lookup_string(result, "target");
 1323                 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
 1324         }
 1325         nvlist_free(result);
 1326 
 1327         return (err);
 1328 }
 1329 
 1330 /*
 1331  * Roll back this filesystem or volume to the specified snapshot,
 1332  * if possible.
 1333  *
 1334  * Return 0 on success or an errno on failure.
 1335  */
 1336 int
 1337 lzc_rollback_to(const char *fsname, const char *snapname)
 1338 {
 1339         nvlist_t *args;
 1340         nvlist_t *result;
 1341         int err;
 1342 
 1343         args = fnvlist_alloc();
 1344         fnvlist_add_string(args, "target", snapname);
 1345         err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
 1346         nvlist_free(args);
 1347         nvlist_free(result);
 1348         return (err);
 1349 }
 1350 
 1351 /*
 1352  * Creates new bookmarks from existing snapshot or bookmark.
 1353  *
 1354  * The bookmarks nvlist maps from the full name of the new bookmark to
 1355  * the full name of the source snapshot or bookmark.
 1356  * All the bookmarks and snapshots must be in the same pool.
 1357  * The new bookmarks names must be unique.
 1358  * => see function dsl_bookmark_create_nvl_validate
 1359  *
 1360  * The returned results nvlist will have an entry for each bookmark that failed.
 1361  * The value will be the (int32) error code.
 1362  *
 1363  * The return value will be 0 if all bookmarks were created, otherwise it will
 1364  * be the errno of a (undetermined) bookmarks that failed.
 1365  */
 1366 int
 1367 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
 1368 {
 1369         nvpair_t *elem;
 1370         int error;
 1371         char pool[ZFS_MAX_DATASET_NAME_LEN];
 1372 
 1373         /* determine pool name from first bookmark */
 1374         elem = nvlist_next_nvpair(bookmarks, NULL);
 1375         if (elem == NULL)
 1376                 return (0);
 1377         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 1378         pool[strcspn(pool, "/#")] = '\0';
 1379 
 1380         error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
 1381 
 1382         return (error);
 1383 }
 1384 
 1385 /*
 1386  * Retrieve bookmarks.
 1387  *
 1388  * Retrieve the list of bookmarks for the given file system. The props
 1389  * parameter is an nvlist of property names (with no values) that will be
 1390  * returned for each bookmark.
 1391  *
 1392  * The following are valid properties on bookmarks, most of which are numbers
 1393  * (represented as uint64 in the nvlist), except redact_snaps, which is a
 1394  * uint64 array, and redact_complete, which is a boolean
 1395  *
 1396  * "guid" - globally unique identifier of the snapshot it refers to
 1397  * "createtxg" - txg when the snapshot it refers to was created
 1398  * "creation" - timestamp when the snapshot it refers to was created
 1399  * "ivsetguid" - IVset guid for identifying encrypted snapshots
 1400  * "redact_snaps" - list of guids of the redaction snapshots for the specified
 1401  *     bookmark.  If the bookmark is not a redaction bookmark, the nvlist will
 1402  *     not contain an entry for this value.  If it is redacted with respect to
 1403  *     no snapshots, it will contain value -> NULL uint64 array
 1404  * "redact_complete" - boolean value; true if the redaction bookmark is
 1405  *     complete, false otherwise.
 1406  *
 1407  * The format of the returned nvlist as follows:
 1408  * <short name of bookmark> -> {
 1409  *     <name of property> -> {
 1410  *         "value" -> uint64
 1411  *     }
 1412  *     ...
 1413  *     "redact_snaps" -> {
 1414  *         "value" -> uint64 array
 1415  *     }
 1416  *     "redact_complete" -> {
 1417  *         "value" -> boolean value
 1418  *     }
 1419  *  }
 1420  */
 1421 int
 1422 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
 1423 {
 1424         return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
 1425 }
 1426 
 1427 /*
 1428  * Get bookmark properties.
 1429  *
 1430  * Given a bookmark's full name, retrieve all properties for the bookmark.
 1431  *
 1432  * The format of the returned property list is as follows:
 1433  * {
 1434  *     <name of property> -> {
 1435  *         "value" -> uint64
 1436  *     }
 1437  *     ...
 1438  *     "redact_snaps" -> {
 1439  *         "value" -> uint64 array
 1440  * }
 1441  */
 1442 int
 1443 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
 1444 {
 1445         int error;
 1446 
 1447         nvlist_t *innvl = fnvlist_alloc();
 1448         error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
 1449         fnvlist_free(innvl);
 1450 
 1451         return (error);
 1452 }
 1453 
 1454 /*
 1455  * Destroys bookmarks.
 1456  *
 1457  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
 1458  * They must all be in the same pool.  Bookmarks are specified as
 1459  * <fs>#<bmark>.
 1460  *
 1461  * Bookmarks that do not exist will be silently ignored.
 1462  *
 1463  * The return value will be 0 if all bookmarks that existed were destroyed.
 1464  *
 1465  * Otherwise the return value will be the errno of a (undetermined) bookmark
 1466  * that failed, no bookmarks will be destroyed, and the errlist will have an
 1467  * entry for each bookmarks that failed.  The value in the errlist will be
 1468  * the (int32) error code.
 1469  */
 1470 int
 1471 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
 1472 {
 1473         nvpair_t *elem;
 1474         int error;
 1475         char pool[ZFS_MAX_DATASET_NAME_LEN];
 1476 
 1477         /* determine the pool name */
 1478         elem = nvlist_next_nvpair(bmarks, NULL);
 1479         if (elem == NULL)
 1480                 return (0);
 1481         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 1482         pool[strcspn(pool, "/#")] = '\0';
 1483 
 1484         error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
 1485 
 1486         return (error);
 1487 }
 1488 
 1489 static int
 1490 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
 1491     uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
 1492 {
 1493         int error;
 1494         nvlist_t *args;
 1495 
 1496         args = fnvlist_alloc();
 1497         fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
 1498         fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
 1499         fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
 1500         fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
 1501         fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
 1502         error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
 1503         fnvlist_free(args);
 1504 
 1505         return (error);
 1506 }
 1507 
 1508 /*
 1509  * Executes a channel program.
 1510  *
 1511  * If this function returns 0 the channel program was successfully loaded and
 1512  * ran without failing. Note that individual commands the channel program ran
 1513  * may have failed and the channel program is responsible for reporting such
 1514  * errors through outnvl if they are important.
 1515  *
 1516  * This method may also return:
 1517  *
 1518  * EINVAL   The program contains syntax errors, or an invalid memory or time
 1519  *          limit was given. No part of the channel program was executed.
 1520  *          If caused by syntax errors, 'outnvl' contains information about the
 1521  *          errors.
 1522  *
 1523  * ECHRNG   The program was executed, but encountered a runtime error, such as
 1524  *          calling a function with incorrect arguments, invoking the error()
 1525  *          function directly, failing an assert() command, etc. Some portion
 1526  *          of the channel program may have executed and committed changes.
 1527  *          Information about the failure can be found in 'outnvl'.
 1528  *
 1529  * ENOMEM   The program fully executed, but the output buffer was not large
 1530  *          enough to store the returned value. No output is returned through
 1531  *          'outnvl'.
 1532  *
 1533  * ENOSPC   The program was terminated because it exceeded its memory usage
 1534  *          limit. Some portion of the channel program may have executed and
 1535  *          committed changes to disk. No output is returned through 'outnvl'.
 1536  *
 1537  * ETIME    The program was terminated because it exceeded its Lua instruction
 1538  *          limit. Some portion of the channel program may have executed and
 1539  *          committed changes to disk. No output is returned through 'outnvl'.
 1540  */
 1541 int
 1542 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
 1543     uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
 1544 {
 1545         return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
 1546             memlimit, argnvl, outnvl));
 1547 }
 1548 
 1549 /*
 1550  * Creates a checkpoint for the specified pool.
 1551  *
 1552  * If this function returns 0 the pool was successfully checkpointed.
 1553  *
 1554  * This method may also return:
 1555  *
 1556  * ZFS_ERR_CHECKPOINT_EXISTS
 1557  *      The pool already has a checkpoint. A pools can only have one
 1558  *      checkpoint at most, at any given time.
 1559  *
 1560  * ZFS_ERR_DISCARDING_CHECKPOINT
 1561  *      ZFS is in the middle of discarding a checkpoint for this pool.
 1562  *      The pool can be checkpointed again once the discard is done.
 1563  *
 1564  * ZFS_DEVRM_IN_PROGRESS
 1565  *      A vdev is currently being removed. The pool cannot be
 1566  *      checkpointed until the device removal is done.
 1567  *
 1568  * ZFS_VDEV_TOO_BIG
 1569  *      One or more top-level vdevs exceed the maximum vdev size
 1570  *      supported for this feature.
 1571  */
 1572 int
 1573 lzc_pool_checkpoint(const char *pool)
 1574 {
 1575         int error;
 1576 
 1577         nvlist_t *result = NULL;
 1578         nvlist_t *args = fnvlist_alloc();
 1579 
 1580         error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
 1581 
 1582         fnvlist_free(args);
 1583         fnvlist_free(result);
 1584 
 1585         return (error);
 1586 }
 1587 
 1588 /*
 1589  * Discard the checkpoint from the specified pool.
 1590  *
 1591  * If this function returns 0 the checkpoint was successfully discarded.
 1592  *
 1593  * This method may also return:
 1594  *
 1595  * ZFS_ERR_NO_CHECKPOINT
 1596  *      The pool does not have a checkpoint.
 1597  *
 1598  * ZFS_ERR_DISCARDING_CHECKPOINT
 1599  *      ZFS is already in the middle of discarding the checkpoint.
 1600  */
 1601 int
 1602 lzc_pool_checkpoint_discard(const char *pool)
 1603 {
 1604         int error;
 1605 
 1606         nvlist_t *result = NULL;
 1607         nvlist_t *args = fnvlist_alloc();
 1608 
 1609         error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
 1610 
 1611         fnvlist_free(args);
 1612         fnvlist_free(result);
 1613 
 1614         return (error);
 1615 }
 1616 
 1617 /*
 1618  * Executes a read-only channel program.
 1619  *
 1620  * A read-only channel program works programmatically the same way as a
 1621  * normal channel program executed with lzc_channel_program(). The only
 1622  * difference is it runs exclusively in open-context and therefore can
 1623  * return faster. The downside to that, is that the program cannot change
 1624  * on-disk state by calling functions from the zfs.sync submodule.
 1625  *
 1626  * The return values of this function (and their meaning) are exactly the
 1627  * same as the ones described in lzc_channel_program().
 1628  */
 1629 int
 1630 lzc_channel_program_nosync(const char *pool, const char *program,
 1631     uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
 1632 {
 1633         return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
 1634             memlimit, argnvl, outnvl));
 1635 }
 1636 
 1637 int
 1638 lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl)
 1639 {
 1640         return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl));
 1641 }
 1642 
 1643 int
 1644 lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl)
 1645 {
 1646         return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl));
 1647 }
 1648 
 1649 /*
 1650  * Performs key management functions
 1651  *
 1652  * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
 1653  * load or change a wrapping key, the key should be specified in the
 1654  * hidden_args nvlist so that it is not logged.
 1655  */
 1656 int
 1657 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
 1658     uint_t wkeylen)
 1659 {
 1660         int error;
 1661         nvlist_t *ioc_args;
 1662         nvlist_t *hidden_args;
 1663 
 1664         if (wkeydata == NULL)
 1665                 return (EINVAL);
 1666 
 1667         ioc_args = fnvlist_alloc();
 1668         hidden_args = fnvlist_alloc();
 1669         fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
 1670         fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
 1671         if (noop)
 1672                 fnvlist_add_boolean(ioc_args, "noop");
 1673         error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
 1674         nvlist_free(hidden_args);
 1675         nvlist_free(ioc_args);
 1676 
 1677         return (error);
 1678 }
 1679 
 1680 int
 1681 lzc_unload_key(const char *fsname)
 1682 {
 1683         return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
 1684 }
 1685 
 1686 int
 1687 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
 1688     uint8_t *wkeydata, uint_t wkeylen)
 1689 {
 1690         int error;
 1691         nvlist_t *ioc_args = fnvlist_alloc();
 1692         nvlist_t *hidden_args = NULL;
 1693 
 1694         fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
 1695 
 1696         if (wkeydata != NULL) {
 1697                 hidden_args = fnvlist_alloc();
 1698                 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
 1699                     wkeylen);
 1700                 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
 1701         }
 1702 
 1703         if (props != NULL)
 1704                 fnvlist_add_nvlist(ioc_args, "props", props);
 1705 
 1706         error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
 1707         nvlist_free(hidden_args);
 1708         nvlist_free(ioc_args);
 1709 
 1710         return (error);
 1711 }
 1712 
 1713 int
 1714 lzc_reopen(const char *pool_name, boolean_t scrub_restart)
 1715 {
 1716         nvlist_t *args = fnvlist_alloc();
 1717         int error;
 1718 
 1719         fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
 1720 
 1721         error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
 1722         nvlist_free(args);
 1723         return (error);
 1724 }
 1725 
 1726 /*
 1727  * Changes initializing state.
 1728  *
 1729  * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
 1730  * The key is ignored.
 1731  *
 1732  * If there are errors related to vdev arguments, per-vdev errors are returned
 1733  * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
 1734  * guid is stringified with PRIu64, and errno is one of the following as
 1735  * an int64_t:
 1736  *      - ENODEV if the device was not found
 1737  *      - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
 1738  *      - EROFS if the device is not writeable
 1739  *      - EBUSY start requested but the device is already being either
 1740  *              initialized or trimmed
 1741  *      - ESRCH cancel/suspend requested but device is not being initialized
 1742  *
 1743  * If the errlist is empty, then return value will be:
 1744  *      - EINVAL if one or more arguments was invalid
 1745  *      - Other spa_open failures
 1746  *      - 0 if the operation succeeded
 1747  */
 1748 int
 1749 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
 1750     nvlist_t *vdevs, nvlist_t **errlist)
 1751 {
 1752         int error;
 1753 
 1754         nvlist_t *args = fnvlist_alloc();
 1755         fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
 1756         fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
 1757 
 1758         error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
 1759 
 1760         fnvlist_free(args);
 1761 
 1762         return (error);
 1763 }
 1764 
 1765 /*
 1766  * Changes TRIM state.
 1767  *
 1768  * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
 1769  * The key is ignored.
 1770  *
 1771  * If there are errors related to vdev arguments, per-vdev errors are returned
 1772  * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
 1773  * guid is stringified with PRIu64, and errno is one of the following as
 1774  * an int64_t:
 1775  *      - ENODEV if the device was not found
 1776  *      - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
 1777  *      - EROFS if the device is not writeable
 1778  *      - EBUSY start requested but the device is already being either trimmed
 1779  *              or initialized
 1780  *      - ESRCH cancel/suspend requested but device is not being initialized
 1781  *      - EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
 1782  *
 1783  * If the errlist is empty, then return value will be:
 1784  *      - EINVAL if one or more arguments was invalid
 1785  *      - Other spa_open failures
 1786  *      - 0 if the operation succeeded
 1787  */
 1788 int
 1789 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
 1790     boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
 1791 {
 1792         int error;
 1793 
 1794         nvlist_t *args = fnvlist_alloc();
 1795         fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
 1796         fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
 1797         fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
 1798         fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
 1799 
 1800         error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
 1801 
 1802         fnvlist_free(args);
 1803 
 1804         return (error);
 1805 }
 1806 
 1807 /*
 1808  * Create a redaction bookmark named bookname by redacting snapshot with respect
 1809  * to all the snapshots in snapnv.
 1810  */
 1811 int
 1812 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
 1813 {
 1814         nvlist_t *args = fnvlist_alloc();
 1815         fnvlist_add_string(args, "bookname", bookname);
 1816         fnvlist_add_nvlist(args, "snapnv", snapnv);
 1817         int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
 1818         fnvlist_free(args);
 1819         return (error);
 1820 }
 1821 
 1822 static int
 1823 wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
 1824     uint64_t tag, boolean_t *waited)
 1825 {
 1826         nvlist_t *args = fnvlist_alloc();
 1827         nvlist_t *result = NULL;
 1828 
 1829         fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
 1830         if (use_tag)
 1831                 fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
 1832 
 1833         int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
 1834 
 1835         if (error == 0 && waited != NULL)
 1836                 *waited = fnvlist_lookup_boolean_value(result,
 1837                     ZPOOL_WAIT_WAITED);
 1838 
 1839         fnvlist_free(args);
 1840         fnvlist_free(result);
 1841 
 1842         return (error);
 1843 }
 1844 
 1845 int
 1846 lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
 1847 {
 1848         return (wait_common(pool, activity, B_FALSE, 0, waited));
 1849 }
 1850 
 1851 int
 1852 lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
 1853     boolean_t *waited)
 1854 {
 1855         return (wait_common(pool, activity, B_TRUE, tag, waited));
 1856 }
 1857 
 1858 int
 1859 lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
 1860 {
 1861         nvlist_t *args = fnvlist_alloc();
 1862         nvlist_t *result = NULL;
 1863 
 1864         fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
 1865 
 1866         int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
 1867 
 1868         if (error == 0 && waited != NULL)
 1869                 *waited = fnvlist_lookup_boolean_value(result,
 1870                     ZFS_WAIT_WAITED);
 1871 
 1872         fnvlist_free(args);
 1873         fnvlist_free(result);
 1874 
 1875         return (error);
 1876 }
 1877 
 1878 /*
 1879  * Set the bootenv contents for the given pool.
 1880  */
 1881 int
 1882 lzc_set_bootenv(const char *pool, const nvlist_t *env)
 1883 {
 1884         return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
 1885 }
 1886 
 1887 /*
 1888  * Get the contents of the bootenv of the given pool.
 1889  */
 1890 int
 1891 lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
 1892 {
 1893         return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
 1894 }

Cache object: 26d612fe7ce8b8b2284adfca3a8aa7d6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.