The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/include/sys/spa.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
   24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   25  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
   26  * Copyright 2013 Saso Kiselkov. All rights reserved.
   27  * Copyright (c) 2014 Integros [integros.com]
   28  * Copyright 2017 Joyent, Inc.
   29  * Copyright (c) 2017, Intel Corporation.
   30  * Copyright (c) 2019, Allan Jude
   31  * Copyright (c) 2019, Klara Inc.
   32  * Copyright (c) 2019, Datto Inc.
   33  */
   34 
   35 #ifndef _SYS_SPA_H
   36 #define _SYS_SPA_H
   37 
   38 #include <sys/avl.h>
   39 #include <sys/zfs_context.h>
   40 #include <sys/kstat.h>
   41 #include <sys/nvpair.h>
   42 #include <sys/sysmacros.h>
   43 #include <sys/types.h>
   44 #include <sys/fs/zfs.h>
   45 #include <sys/spa_checksum.h>
   46 #include <sys/dmu.h>
   47 #include <sys/space_map.h>
   48 #include <sys/bitops.h>
   49 
   50 #ifdef  __cplusplus
   51 extern "C" {
   52 #endif
   53 
   54 /*
   55  * Forward references that lots of things need.
   56  */
   57 typedef struct spa spa_t;
   58 typedef struct vdev vdev_t;
   59 typedef struct metaslab metaslab_t;
   60 typedef struct metaslab_group metaslab_group_t;
   61 typedef struct metaslab_class metaslab_class_t;
   62 typedef struct zio zio_t;
   63 typedef struct zilog zilog_t;
   64 typedef struct spa_aux_vdev spa_aux_vdev_t;
   65 typedef struct ddt ddt_t;
   66 typedef struct ddt_entry ddt_entry_t;
   67 typedef struct zbookmark_phys zbookmark_phys_t;
   68 
   69 struct bpobj;
   70 struct bplist;
   71 struct dsl_pool;
   72 struct dsl_dataset;
   73 struct dsl_crypto_params;
   74 
   75 /*
   76  * Alignment Shift (ashift) is an immutable, internal top-level vdev property
   77  * which can only be set at vdev creation time. Physical writes are always done
   78  * according to it, which makes 2^ashift the smallest possible IO on a vdev.
   79  *
   80  * We currently allow values ranging from 512 bytes (2^9 = 512) to 64 KiB
   81  * (2^16 = 65,536).
   82  */
   83 #define ASHIFT_MIN              9
   84 #define ASHIFT_MAX              16
   85 
   86 /*
   87  * Size of block to hold the configuration data (a packed nvlist)
   88  */
   89 #define SPA_CONFIG_BLOCKSIZE    (1ULL << 14)
   90 
   91 /*
   92  * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
   93  * The ASIZE encoding should be at least 64 times larger (6 more bits)
   94  * to support up to 4-way RAID-Z mirror mode with worst-case gang block
   95  * overhead, three DVAs per bp, plus one more bit in case we do anything
   96  * else that expands the ASIZE.
   97  */
   98 #define SPA_LSIZEBITS           16      /* LSIZE up to 32M (2^16 * 512) */
   99 #define SPA_PSIZEBITS           16      /* PSIZE up to 32M (2^16 * 512) */
  100 #define SPA_ASIZEBITS           24      /* ASIZE up to 64 times larger  */
  101 
  102 #define SPA_COMPRESSBITS        7
  103 #define SPA_VDEVBITS            24
  104 #define SPA_COMPRESSMASK        ((1U << SPA_COMPRESSBITS) - 1)
  105 
  106 /*
  107  * All SPA data is represented by 128-bit data virtual addresses (DVAs).
  108  * The members of the dva_t should be considered opaque outside the SPA.
  109  */
  110 typedef struct dva {
  111         uint64_t        dva_word[2];
  112 } dva_t;
  113 
  114 
  115 /*
  116  * Some checksums/hashes need a 256-bit initialization salt. This salt is kept
  117  * secret and is suitable for use in MAC algorithms as the key.
  118  */
  119 typedef struct zio_cksum_salt {
  120         uint8_t         zcs_bytes[32];
  121 } zio_cksum_salt_t;
  122 
  123 /*
  124  * Each block is described by its DVAs, time of birth, checksum, etc.
  125  * The word-by-word, bit-by-bit layout of the blkptr is as follows:
  126  *
  127  *      64      56      48      40      32      24      16      8       0
  128  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  129  * 0    |  pad  |         vdev1         | GRID  |         ASIZE         |
  130  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  131  * 1    |G|                      offset1                                |
  132  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  133  * 2    |  pad  |         vdev2         | GRID  |         ASIZE         |
  134  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  135  * 3    |G|                      offset2                                |
  136  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  137  * 4    |  pad  |         vdev3         | GRID  |         ASIZE         |
  138  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  139  * 5    |G|                      offset3                                |
  140  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  141  * 6    |BDX|lvl| type  | cksum |E| comp|    PSIZE      |     LSIZE     |
  142  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  143  * 7    |                       padding                                 |
  144  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  145  * 8    |                       padding                                 |
  146  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  147  * 9    |                       physical birth txg                      |
  148  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  149  * a    |                       logical birth txg                       |
  150  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  151  * b    |                       fill count                              |
  152  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  153  * c    |                       checksum[0]                             |
  154  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  155  * d    |                       checksum[1]                             |
  156  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  157  * e    |                       checksum[2]                             |
  158  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  159  * f    |                       checksum[3]                             |
  160  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  161  *
  162  * Legend:
  163  *
  164  * vdev         virtual device ID
  165  * offset       offset into virtual device
  166  * LSIZE        logical size
  167  * PSIZE        physical size (after compression)
  168  * ASIZE        allocated size (including RAID-Z parity and gang block headers)
  169  * GRID         RAID-Z layout information (reserved for future use)
  170  * cksum        checksum function
  171  * comp         compression function
  172  * G            gang block indicator
  173  * B            byteorder (endianness)
  174  * D            dedup
  175  * X            encryption
  176  * E            blkptr_t contains embedded data (see below)
  177  * lvl          level of indirection
  178  * type         DMU object type
  179  * phys birth   txg when dva[0] was written; zero if same as logical birth txg
  180  *              note that typically all the dva's would be written in this
  181  *              txg, but they could be different if they were moved by
  182  *              device removal.
  183  * log. birth   transaction group in which the block was logically born
  184  * fill count   number of non-zero blocks under this bp
  185  * checksum[4]  256-bit checksum of the data this bp describes
  186  */
  187 
  188 /*
  189  * The blkptr_t's of encrypted blocks also need to store the encryption
  190  * parameters so that the block can be decrypted. This layout is as follows:
  191  *
  192  *      64      56      48      40      32      24      16      8       0
  193  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  194  * 0    |               vdev1           | GRID  |         ASIZE         |
  195  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  196  * 1    |G|                      offset1                                |
  197  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  198  * 2    |               vdev2           | GRID  |         ASIZE         |
  199  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  200  * 3    |G|                      offset2                                |
  201  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  202  * 4    |                       salt                                    |
  203  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  204  * 5    |                       IV1                                     |
  205  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  206  * 6    |BDX|lvl| type  | cksum |E| comp|    PSIZE      |     LSIZE     |
  207  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  208  * 7    |                       padding                                 |
  209  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  210  * 8    |                       padding                                 |
  211  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  212  * 9    |                       physical birth txg                      |
  213  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  214  * a    |                       logical birth txg                       |
  215  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  216  * b    |               IV2             |           fill count          |
  217  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  218  * c    |                       checksum[0]                             |
  219  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  220  * d    |                       checksum[1]                             |
  221  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  222  * e    |                       MAC[0]                                  |
  223  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  224  * f    |                       MAC[1]                                  |
  225  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  226  *
  227  * Legend:
  228  *
  229  * salt         Salt for generating encryption keys
  230  * IV1          First 64 bits of encryption IV
  231  * X            Block requires encryption handling (set to 1)
  232  * E            blkptr_t contains embedded data (set to 0, see below)
  233  * fill count   number of non-zero blocks under this bp (truncated to 32 bits)
  234  * IV2          Last 32 bits of encryption IV
  235  * checksum[2]  128-bit checksum of the data this bp describes
  236  * MAC[2]       128-bit message authentication code for this data
  237  *
  238  * The X bit being set indicates that this block is one of 3 types. If this is
  239  * a level 0 block with an encrypted object type, the block is encrypted
  240  * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
  241  * object type, this block is authenticated with an HMAC (see
  242  * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
  243  * words to store a checksum-of-MACs from the level below (see
  244  * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
  245  * refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
  246  * refers to any of these 3 kinds of blocks.
  247  *
  248  * The additional encryption parameters are the salt, IV, and MAC which are
  249  * explained in greater detail in the block comment at the top of zio_crypt.c.
  250  * The MAC occupies half of the checksum space since it serves a very similar
  251  * purpose: to prevent data corruption on disk. The only functional difference
  252  * is that the checksum is used to detect on-disk corruption whether or not the
  253  * encryption key is loaded and the MAC provides additional protection against
  254  * malicious disk tampering. We use the 3rd DVA to store the salt and first
  255  * 64 bits of the IV. As a result encrypted blocks can only have 2 copies
  256  * maximum instead of the normal 3. The last 32 bits of the IV are stored in
  257  * the upper bits of what is usually the fill count. Note that only blocks at
  258  * level 0 or -2 are ever encrypted, which allows us to guarantee that these
  259  * 32 bits are not trampled over by other code (see zio_crypt.c for details).
  260  * The salt and IV are not used for authenticated bps or bps with an indirect
  261  * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
  262  * for the fill count.
  263  */
  264 
  265 /*
  266  * "Embedded" blkptr_t's don't actually point to a block, instead they
  267  * have a data payload embedded in the blkptr_t itself.  See the comment
  268  * in blkptr.c for more details.
  269  *
  270  * The blkptr_t is laid out as follows:
  271  *
  272  *      64      56      48      40      32      24      16      8       0
  273  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  274  * 0    |      payload                                                  |
  275  * 1    |      payload                                                  |
  276  * 2    |      payload                                                  |
  277  * 3    |      payload                                                  |
  278  * 4    |      payload                                                  |
  279  * 5    |      payload                                                  |
  280  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  281  * 6    |BDX|lvl| type  | etype |E| comp| PSIZE|              LSIZE     |
  282  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  283  * 7    |      payload                                                  |
  284  * 8    |      payload                                                  |
  285  * 9    |      payload                                                  |
  286  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  287  * a    |                       logical birth txg                       |
  288  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  289  * b    |      payload                                                  |
  290  * c    |      payload                                                  |
  291  * d    |      payload                                                  |
  292  * e    |      payload                                                  |
  293  * f    |      payload                                                  |
  294  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  295  *
  296  * Legend:
  297  *
  298  * payload              contains the embedded data
  299  * B (byteorder)        byteorder (endianness)
  300  * D (dedup)            padding (set to zero)
  301  * X                    encryption (set to zero)
  302  * E (embedded)         set to one
  303  * lvl                  indirection level
  304  * type                 DMU object type
  305  * etype                how to interpret embedded data (BP_EMBEDDED_TYPE_*)
  306  * comp                 compression function of payload
  307  * PSIZE                size of payload after compression, in bytes
  308  * LSIZE                logical size of payload, in bytes
  309  *                      note that 25 bits is enough to store the largest
  310  *                      "normal" BP's LSIZE (2^16 * 2^9) in bytes
  311  * log. birth           transaction group in which the block was logically born
  312  *
  313  * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
  314  * bp's they are stored in units of SPA_MINBLOCKSHIFT.
  315  * Generally, the generic BP_GET_*() macros can be used on embedded BP's.
  316  * The B, D, X, lvl, type, and comp fields are stored the same as with normal
  317  * BP's so the BP_SET_* macros can be used with them.  etype, PSIZE, LSIZE must
  318  * be set with the BPE_SET_* macros.  BP_SET_EMBEDDED() should be called before
  319  * other macros, as they assert that they are only used on BP's of the correct
  320  * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
  321  * the payload space for encryption parameters (see the comment above on
  322  * how encryption parameters are stored).
  323  */
  324 
  325 #define BPE_GET_ETYPE(bp)       \
  326         (ASSERT(BP_IS_EMBEDDED(bp)), \
  327         BF64_GET((bp)->blk_prop, 40, 8))
  328 #define BPE_SET_ETYPE(bp, t)    do { \
  329         ASSERT(BP_IS_EMBEDDED(bp)); \
  330         BF64_SET((bp)->blk_prop, 40, 8, t); \
  331 } while (0)
  332 
  333 #define BPE_GET_LSIZE(bp)       \
  334         (ASSERT(BP_IS_EMBEDDED(bp)), \
  335         BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1))
  336 #define BPE_SET_LSIZE(bp, x)    do { \
  337         ASSERT(BP_IS_EMBEDDED(bp)); \
  338         BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \
  339 } while (0)
  340 
  341 #define BPE_GET_PSIZE(bp)       \
  342         (ASSERT(BP_IS_EMBEDDED(bp)), \
  343         BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1))
  344 #define BPE_SET_PSIZE(bp, x)    do { \
  345         ASSERT(BP_IS_EMBEDDED(bp)); \
  346         BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \
  347 } while (0)
  348 
  349 typedef enum bp_embedded_type {
  350         BP_EMBEDDED_TYPE_DATA,
  351         BP_EMBEDDED_TYPE_RESERVED, /* Reserved for Delphix byteswap feature. */
  352         BP_EMBEDDED_TYPE_REDACTED,
  353         NUM_BP_EMBEDDED_TYPES
  354 } bp_embedded_type_t;
  355 
  356 #define BPE_NUM_WORDS 14
  357 #define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
  358 #define BPE_IS_PAYLOADWORD(bp, wp) \
  359         ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
  360 
  361 #define SPA_BLKPTRSHIFT 7               /* blkptr_t is 128 bytes        */
  362 #define SPA_DVAS_PER_BP 3               /* Number of DVAs in a bp       */
  363 #define SPA_SYNC_MIN_VDEVS 3            /* min vdevs to update during sync */
  364 
  365 /*
  366  * A block is a hole when it has either 1) never been written to, or
  367  * 2) is zero-filled. In both cases, ZFS can return all zeroes for all reads
  368  * without physically allocating disk space. Holes are represented in the
  369  * blkptr_t structure by zeroed blk_dva. Correct checking for holes is
  370  * done through the BP_IS_HOLE macro. For holes, the logical size, level,
  371  * DMU object type, and birth times are all also stored for holes that
  372  * were written to at some point (i.e. were punched after having been filled).
  373  */
  374 typedef struct blkptr {
  375         dva_t           blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
  376         uint64_t        blk_prop;       /* size, compression, type, etc     */
  377         uint64_t        blk_pad[2];     /* Extra space for the future       */
  378         uint64_t        blk_phys_birth; /* txg when block was allocated     */
  379         uint64_t        blk_birth;      /* transaction group at birth       */
  380         uint64_t        blk_fill;       /* fill count                       */
  381         zio_cksum_t     blk_cksum;      /* 256-bit checksum                 */
  382 } blkptr_t;
  383 
  384 /*
  385  * Macros to get and set fields in a bp or DVA.
  386  */
  387 
  388 /*
  389  * Note, for gang blocks, DVA_GET_ASIZE() is the total space allocated for
  390  * this gang DVA including its children BP's.  The space allocated at this
  391  * DVA's vdev/offset is vdev_gang_header_asize(vdev).
  392  */
  393 #define DVA_GET_ASIZE(dva)      \
  394         BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0)
  395 #define DVA_SET_ASIZE(dva, x)   \
  396         BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
  397         SPA_MINBLOCKSHIFT, 0, x)
  398 
  399 #define DVA_GET_GRID(dva)       BF64_GET((dva)->dva_word[0], 24, 8)
  400 #define DVA_SET_GRID(dva, x)    BF64_SET((dva)->dva_word[0], 24, 8, x)
  401 
  402 #define DVA_GET_VDEV(dva)       BF64_GET((dva)->dva_word[0], 32, SPA_VDEVBITS)
  403 #define DVA_SET_VDEV(dva, x)    \
  404         BF64_SET((dva)->dva_word[0], 32, SPA_VDEVBITS, x)
  405 
  406 #define DVA_GET_OFFSET(dva)     \
  407         BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
  408 #define DVA_SET_OFFSET(dva, x)  \
  409         BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
  410 
  411 #define DVA_GET_GANG(dva)       BF64_GET((dva)->dva_word[1], 63, 1)
  412 #define DVA_SET_GANG(dva, x)    BF64_SET((dva)->dva_word[1], 63, 1, x)
  413 
  414 #define BP_GET_LSIZE(bp)        \
  415         (BP_IS_EMBEDDED(bp) ?   \
  416         (BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \
  417         BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1))
  418 #define BP_SET_LSIZE(bp, x)     do { \
  419         ASSERT(!BP_IS_EMBEDDED(bp)); \
  420         BF64_SET_SB((bp)->blk_prop, \
  421             0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
  422 } while (0)
  423 
  424 #define BP_GET_PSIZE(bp)        \
  425         (BP_IS_EMBEDDED(bp) ? 0 : \
  426         BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1))
  427 #define BP_SET_PSIZE(bp, x)     do { \
  428         ASSERT(!BP_IS_EMBEDDED(bp)); \
  429         BF64_SET_SB((bp)->blk_prop, \
  430             16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
  431 } while (0)
  432 
  433 #define BP_GET_COMPRESS(bp)             \
  434         BF64_GET((bp)->blk_prop, 32, SPA_COMPRESSBITS)
  435 #define BP_SET_COMPRESS(bp, x)          \
  436         BF64_SET((bp)->blk_prop, 32, SPA_COMPRESSBITS, x)
  437 
  438 #define BP_IS_EMBEDDED(bp)              BF64_GET((bp)->blk_prop, 39, 1)
  439 #define BP_SET_EMBEDDED(bp, x)          BF64_SET((bp)->blk_prop, 39, 1, x)
  440 
  441 #define BP_GET_CHECKSUM(bp)             \
  442         (BP_IS_EMBEDDED(bp) ? ZIO_CHECKSUM_OFF : \
  443         BF64_GET((bp)->blk_prop, 40, 8))
  444 #define BP_SET_CHECKSUM(bp, x)          do { \
  445         ASSERT(!BP_IS_EMBEDDED(bp)); \
  446         BF64_SET((bp)->blk_prop, 40, 8, x); \
  447 } while (0)
  448 
  449 #define BP_GET_TYPE(bp)                 BF64_GET((bp)->blk_prop, 48, 8)
  450 #define BP_SET_TYPE(bp, x)              BF64_SET((bp)->blk_prop, 48, 8, x)
  451 
  452 #define BP_GET_LEVEL(bp)                BF64_GET((bp)->blk_prop, 56, 5)
  453 #define BP_SET_LEVEL(bp, x)             BF64_SET((bp)->blk_prop, 56, 5, x)
  454 
  455 /* encrypted, authenticated, and MAC cksum bps use the same bit */
  456 #define BP_USES_CRYPT(bp)               BF64_GET((bp)->blk_prop, 61, 1)
  457 #define BP_SET_CRYPT(bp, x)             BF64_SET((bp)->blk_prop, 61, 1, x)
  458 
  459 #define BP_IS_ENCRYPTED(bp)                     \
  460         (BP_USES_CRYPT(bp) &&                   \
  461         BP_GET_LEVEL(bp) <= 0 &&                \
  462         DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
  463 
  464 #define BP_IS_AUTHENTICATED(bp)                 \
  465         (BP_USES_CRYPT(bp) &&                   \
  466         BP_GET_LEVEL(bp) <= 0 &&                \
  467         !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
  468 
  469 #define BP_HAS_INDIRECT_MAC_CKSUM(bp)           \
  470         (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)
  471 
  472 #define BP_IS_PROTECTED(bp)                     \
  473         (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))
  474 
  475 #define BP_GET_DEDUP(bp)                BF64_GET((bp)->blk_prop, 62, 1)
  476 #define BP_SET_DEDUP(bp, x)             BF64_SET((bp)->blk_prop, 62, 1, x)
  477 
  478 #define BP_GET_BYTEORDER(bp)            BF64_GET((bp)->blk_prop, 63, 1)
  479 #define BP_SET_BYTEORDER(bp, x)         BF64_SET((bp)->blk_prop, 63, 1, x)
  480 
  481 #define BP_GET_FREE(bp)                 BF64_GET((bp)->blk_fill, 0, 1)
  482 #define BP_SET_FREE(bp, x)              BF64_SET((bp)->blk_fill, 0, 1, x)
  483 
  484 #define BP_PHYSICAL_BIRTH(bp)           \
  485         (BP_IS_EMBEDDED(bp) ? 0 : \
  486         (bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
  487 
  488 #define BP_SET_BIRTH(bp, logical, physical)     \
  489 {                                               \
  490         ASSERT(!BP_IS_EMBEDDED(bp));            \
  491         (bp)->blk_birth = (logical);            \
  492         (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
  493 }
  494 
  495 #define BP_GET_FILL(bp)                         \
  496         ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
  497         ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
  498 
  499 #define BP_SET_FILL(bp, fill)                   \
  500 {                                               \
  501         if (BP_IS_ENCRYPTED(bp))                        \
  502                 BF64_SET((bp)->blk_fill, 0, 32, fill); \
  503         else                                    \
  504                 (bp)->blk_fill = fill;          \
  505 }
  506 
  507 #define BP_GET_IV2(bp)                          \
  508         (ASSERT(BP_IS_ENCRYPTED(bp)),           \
  509         BF64_GET((bp)->blk_fill, 32, 32))
  510 #define BP_SET_IV2(bp, iv2)                     \
  511 {                                               \
  512         ASSERT(BP_IS_ENCRYPTED(bp));            \
  513         BF64_SET((bp)->blk_fill, 32, 32, iv2);  \
  514 }
  515 
  516 #define BP_IS_METADATA(bp)      \
  517         (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
  518 
  519 #define BP_GET_ASIZE(bp)        \
  520         (BP_IS_EMBEDDED(bp) ? 0 : \
  521         DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
  522         DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
  523         (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
  524 
  525 #define BP_GET_UCSIZE(bp)       \
  526         (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
  527 
  528 #define BP_GET_NDVAS(bp)        \
  529         (BP_IS_EMBEDDED(bp) ? 0 : \
  530         !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
  531         !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
  532         (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
  533 
  534 #define BP_COUNT_GANG(bp)       \
  535         (BP_IS_EMBEDDED(bp) ? 0 : \
  536         (DVA_GET_GANG(&(bp)->blk_dva[0]) + \
  537         DVA_GET_GANG(&(bp)->blk_dva[1]) + \
  538         (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))
  539 
  540 #define DVA_EQUAL(dva1, dva2)   \
  541         ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
  542         (dva1)->dva_word[0] == (dva2)->dva_word[0])
  543 
  544 #define BP_EQUAL(bp1, bp2)      \
  545         (BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) &&    \
  546         (bp1)->blk_birth == (bp2)->blk_birth &&                 \
  547         DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) &&    \
  548         DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) &&    \
  549         DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
  550 
  551 
  552 #define DVA_IS_VALID(dva)       (DVA_GET_ASIZE(dva) != 0)
  553 
  554 #define BP_IDENTITY(bp)         (ASSERT(!BP_IS_EMBEDDED(bp)), &(bp)->blk_dva[0])
  555 #define BP_IS_GANG(bp)          \
  556         (BP_IS_EMBEDDED(bp) ? B_FALSE : DVA_GET_GANG(BP_IDENTITY(bp)))
  557 #define DVA_IS_EMPTY(dva)       ((dva)->dva_word[0] == 0ULL &&  \
  558                                 (dva)->dva_word[1] == 0ULL)
  559 #define BP_IS_HOLE(bp) \
  560         (!BP_IS_EMBEDDED(bp) && DVA_IS_EMPTY(BP_IDENTITY(bp)))
  561 
  562 #define BP_SET_REDACTED(bp) \
  563 {                                                       \
  564         BP_SET_EMBEDDED(bp, B_TRUE);                    \
  565         BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_REDACTED);   \
  566 }
  567 #define BP_IS_REDACTED(bp) \
  568         (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_REDACTED)
  569 
  570 /* BP_IS_RAIDZ(bp) assumes no block compression */
  571 #define BP_IS_RAIDZ(bp)         (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
  572                                 BP_GET_PSIZE(bp))
  573 
  574 #define BP_ZERO(bp)                             \
  575 {                                               \
  576         (bp)->blk_dva[0].dva_word[0] = 0;       \
  577         (bp)->blk_dva[0].dva_word[1] = 0;       \
  578         (bp)->blk_dva[1].dva_word[0] = 0;       \
  579         (bp)->blk_dva[1].dva_word[1] = 0;       \
  580         (bp)->blk_dva[2].dva_word[0] = 0;       \
  581         (bp)->blk_dva[2].dva_word[1] = 0;       \
  582         (bp)->blk_prop = 0;                     \
  583         (bp)->blk_pad[0] = 0;                   \
  584         (bp)->blk_pad[1] = 0;                   \
  585         (bp)->blk_phys_birth = 0;               \
  586         (bp)->blk_birth = 0;                    \
  587         (bp)->blk_fill = 0;                     \
  588         ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
  589 }
  590 
  591 #ifdef _ZFS_BIG_ENDIAN
  592 #define ZFS_HOST_BYTEORDER      (0ULL)
  593 #else
  594 #define ZFS_HOST_BYTEORDER      (1ULL)
  595 #endif
  596 
  597 #define BP_SHOULD_BYTESWAP(bp)  (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
  598 
  599 #define BP_SPRINTF_LEN  400
  600 
  601 /*
  602  * This macro allows code sharing between zfs, libzpool, and mdb.
  603  * 'func' is either kmem_scnprintf() or mdb_snprintf().
  604  * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
  605  */
  606 
  607 #define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
  608 {                                                                       \
  609         static const char *const copyname[] =                           \
  610             { "zero", "single", "double", "triple" };                   \
  611         int len = 0;                                                    \
  612         int copies = 0;                                                 \
  613         const char *crypt_type;                                         \
  614         if (bp != NULL) {                                               \
  615                 if (BP_IS_ENCRYPTED(bp)) {                              \
  616                         crypt_type = "encrypted";                       \
  617                         /* LINTED E_SUSPICIOUS_COMPARISON */            \
  618                 } else if (BP_IS_AUTHENTICATED(bp)) {                   \
  619                         crypt_type = "authenticated";                   \
  620                 } else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {             \
  621                         crypt_type = "indirect-MAC";                    \
  622                 } else {                                                \
  623                         crypt_type = "unencrypted";                     \
  624                 }                                                       \
  625         }                                                               \
  626         if (bp == NULL) {                                               \
  627                 len += func(buf + len, size - len, "<NULL>");           \
  628         } else if (BP_IS_HOLE(bp)) {                                    \
  629                 len += func(buf + len, size - len,                      \
  630                     "HOLE [L%llu %s] "                                  \
  631                     "size=%llxL birth=%lluL",                           \
  632                     (u_longlong_t)BP_GET_LEVEL(bp),                     \
  633                     type,                                               \
  634                     (u_longlong_t)BP_GET_LSIZE(bp),                     \
  635                     (u_longlong_t)bp->blk_birth);                       \
  636         } else if (BP_IS_EMBEDDED(bp)) {                                \
  637                 len = func(buf + len, size - len,                       \
  638                     "EMBEDDED [L%llu %s] et=%u %s "                     \
  639                     "size=%llxL/%llxP birth=%lluL",                     \
  640                     (u_longlong_t)BP_GET_LEVEL(bp),                     \
  641                     type,                                               \
  642                     (int)BPE_GET_ETYPE(bp),                             \
  643                     compress,                                           \
  644                     (u_longlong_t)BPE_GET_LSIZE(bp),                    \
  645                     (u_longlong_t)BPE_GET_PSIZE(bp),                    \
  646                     (u_longlong_t)bp->blk_birth);                       \
  647         } else if (BP_IS_REDACTED(bp)) {                                \
  648                 len += func(buf + len, size - len,                      \
  649                     "REDACTED [L%llu %s] size=%llxL birth=%lluL",       \
  650                     (u_longlong_t)BP_GET_LEVEL(bp),                     \
  651                     type,                                               \
  652                     (u_longlong_t)BP_GET_LSIZE(bp),                     \
  653                     (u_longlong_t)bp->blk_birth);                       \
  654         } else {                                                        \
  655                 for (int d = 0; d < BP_GET_NDVAS(bp); d++) {            \
  656                         const dva_t *dva = &bp->blk_dva[d];             \
  657                         if (DVA_IS_VALID(dva))                          \
  658                                 copies++;                               \
  659                         len += func(buf + len, size - len,              \
  660                             "DVA[%d]=<%llu:%llx:%llx>%c", d,            \
  661                             (u_longlong_t)DVA_GET_VDEV(dva),            \
  662                             (u_longlong_t)DVA_GET_OFFSET(dva),          \
  663                             (u_longlong_t)DVA_GET_ASIZE(dva),           \
  664                             ws);                                        \
  665                 }                                                       \
  666                 if (BP_IS_ENCRYPTED(bp)) {                              \
  667                         len += func(buf + len, size - len,              \
  668                             "salt=%llx iv=%llx:%llx%c",                 \
  669                             (u_longlong_t)bp->blk_dva[2].dva_word[0],   \
  670                             (u_longlong_t)bp->blk_dva[2].dva_word[1],   \
  671                             (u_longlong_t)BP_GET_IV2(bp),               \
  672                             ws);                                        \
  673                 }                                                       \
  674                 if (BP_IS_GANG(bp) &&                                   \
  675                     DVA_GET_ASIZE(&bp->blk_dva[2]) <=                   \
  676                     DVA_GET_ASIZE(&bp->blk_dva[1]) / 2)                 \
  677                         copies--;                                       \
  678                 len += func(buf + len, size - len,                      \
  679                     "[L%llu %s] %s %s %s %s %s %s %s%c"                 \
  680                     "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c"    \
  681                     "cksum=%llx:%llx:%llx:%llx",                        \
  682                     (u_longlong_t)BP_GET_LEVEL(bp),                     \
  683                     type,                                               \
  684                     checksum,                                           \
  685                     compress,                                           \
  686                     crypt_type,                                         \
  687                     BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",            \
  688                     BP_IS_GANG(bp) ? "gang" : "contiguous",             \
  689                     BP_GET_DEDUP(bp) ? "dedup" : "unique",              \
  690                     copyname[copies],                                   \
  691                     ws,                                                 \
  692                     (u_longlong_t)BP_GET_LSIZE(bp),                     \
  693                     (u_longlong_t)BP_GET_PSIZE(bp),                     \
  694                     (u_longlong_t)bp->blk_birth,                        \
  695                     (u_longlong_t)BP_PHYSICAL_BIRTH(bp),                \
  696                     (u_longlong_t)BP_GET_FILL(bp),                      \
  697                     ws,                                                 \
  698                     (u_longlong_t)bp->blk_cksum.zc_word[0],             \
  699                     (u_longlong_t)bp->blk_cksum.zc_word[1],             \
  700                     (u_longlong_t)bp->blk_cksum.zc_word[2],             \
  701                     (u_longlong_t)bp->blk_cksum.zc_word[3]);            \
  702         }                                                               \
  703         ASSERT(len < size);                                             \
  704 }
  705 
  706 #define BP_GET_BUFC_TYPE(bp)                                            \
  707         (BP_IS_METADATA(bp) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
  708 
  709 typedef enum spa_import_type {
  710         SPA_IMPORT_EXISTING,
  711         SPA_IMPORT_ASSEMBLE
  712 } spa_import_type_t;
  713 
  714 typedef enum spa_mode {
  715         SPA_MODE_UNINIT = 0,
  716         SPA_MODE_READ = 1,
  717         SPA_MODE_WRITE = 2,
  718 } spa_mode_t;
  719 
  720 /*
  721  * Send TRIM commands in-line during normal pool operation while deleting.
  722  *      OFF: no
  723  *      ON: yes
  724  * NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
  725  */
  726 typedef enum {
  727         SPA_AUTOTRIM_OFF = 0,   /* default */
  728         SPA_AUTOTRIM_ON,
  729 #ifdef IN_FREEBSD_BASE
  730         SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
  731 #else
  732         SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
  733 #endif
  734 } spa_autotrim_t;
  735 
  736 /*
  737  * Reason TRIM command was issued, used internally for accounting purposes.
  738  */
  739 typedef enum trim_type {
  740         TRIM_TYPE_MANUAL = 0,
  741         TRIM_TYPE_AUTO = 1,
  742         TRIM_TYPE_SIMPLE = 2
  743 } trim_type_t;
  744 
  745 /* state manipulation functions */
  746 extern int spa_open(const char *pool, spa_t **, const void *tag);
  747 extern int spa_open_rewind(const char *pool, spa_t **, const void *tag,
  748     nvlist_t *policy, nvlist_t **config);
  749 extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
  750     size_t buflen);
  751 extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
  752     nvlist_t *zplprops, struct dsl_crypto_params *dcp);
  753 extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
  754     uint64_t flags);
  755 extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
  756 extern int spa_destroy(const char *pool);
  757 extern int spa_checkpoint(const char *pool);
  758 extern int spa_checkpoint_discard(const char *pool);
  759 extern int spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force,
  760     boolean_t hardforce);
  761 extern int spa_reset(const char *pool);
  762 extern void spa_async_request(spa_t *spa, int flag);
  763 extern void spa_async_unrequest(spa_t *spa, int flag);
  764 extern void spa_async_suspend(spa_t *spa);
  765 extern void spa_async_resume(spa_t *spa);
  766 extern int spa_async_tasks(spa_t *spa);
  767 extern spa_t *spa_inject_addref(char *pool);
  768 extern void spa_inject_delref(spa_t *spa);
  769 extern void spa_scan_stat_init(spa_t *spa);
  770 extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
  771 extern int bpobj_enqueue_alloc_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
  772 extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
  773 
  774 #define SPA_ASYNC_CONFIG_UPDATE                 0x01
  775 #define SPA_ASYNC_REMOVE                        0x02
  776 #define SPA_ASYNC_PROBE                         0x04
  777 #define SPA_ASYNC_RESILVER_DONE                 0x08
  778 #define SPA_ASYNC_RESILVER                      0x10
  779 #define SPA_ASYNC_AUTOEXPAND                    0x20
  780 #define SPA_ASYNC_REMOVE_DONE                   0x40
  781 #define SPA_ASYNC_REMOVE_STOP                   0x80
  782 #define SPA_ASYNC_INITIALIZE_RESTART            0x100
  783 #define SPA_ASYNC_TRIM_RESTART                  0x200
  784 #define SPA_ASYNC_AUTOTRIM_RESTART              0x400
  785 #define SPA_ASYNC_L2CACHE_REBUILD               0x800
  786 #define SPA_ASYNC_L2CACHE_TRIM                  0x1000
  787 #define SPA_ASYNC_REBUILD_DONE                  0x2000
  788 
  789 /* device manipulation */
  790 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
  791 extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
  792     int replacing, int rebuild);
  793 extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
  794     int replace_done);
  795 extern int spa_vdev_alloc(spa_t *spa, uint64_t guid);
  796 extern int spa_vdev_noalloc(spa_t *spa, uint64_t guid);
  797 extern boolean_t spa_vdev_remove_active(spa_t *spa);
  798 extern int spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
  799     nvlist_t *vdev_errlist);
  800 extern int spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
  801     uint64_t rate, boolean_t partial, boolean_t secure, nvlist_t *vdev_errlist);
  802 extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
  803 extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
  804 extern int spa_vdev_split_mirror(spa_t *spa, const char *newname,
  805     nvlist_t *config, nvlist_t *props, boolean_t exp);
  806 
  807 /* spare state (which is global across all pools) */
  808 extern void spa_spare_add(vdev_t *vd);
  809 extern void spa_spare_remove(vdev_t *vd);
  810 extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
  811 extern void spa_spare_activate(vdev_t *vd);
  812 
  813 /* L2ARC state (which is global across all pools) */
  814 extern void spa_l2cache_add(vdev_t *vd);
  815 extern void spa_l2cache_remove(vdev_t *vd);
  816 extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
  817 extern void spa_l2cache_activate(vdev_t *vd);
  818 extern void spa_l2cache_drop(spa_t *spa);
  819 
  820 /* scanning */
  821 extern int spa_scan(spa_t *spa, pool_scan_func_t func);
  822 extern int spa_scan_stop(spa_t *spa);
  823 extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
  824 
  825 /* spa syncing */
  826 extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
  827 extern void spa_sync_allpools(void);
  828 
  829 extern uint_t zfs_sync_pass_deferred_free;
  830 
  831 /* spa namespace global mutex */
  832 extern kmutex_t spa_namespace_lock;
  833 
  834 /*
  835  * SPA configuration functions in spa_config.c
  836  */
  837 
  838 #define SPA_CONFIG_UPDATE_POOL  0
  839 #define SPA_CONFIG_UPDATE_VDEVS 1
  840 
  841 extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
  842 extern void spa_config_load(void);
  843 extern nvlist_t *spa_all_configs(uint64_t *);
  844 extern void spa_config_set(spa_t *spa, nvlist_t *config);
  845 extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
  846     int getstats);
  847 extern void spa_config_update(spa_t *spa, int what);
  848 extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,
  849     vdev_t *parent, uint_t id, int atype);
  850 
  851 
  852 /*
  853  * Miscellaneous SPA routines in spa_misc.c
  854  */
  855 
  856 /* Namespace manipulation */
  857 extern spa_t *spa_lookup(const char *name);
  858 extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
  859 extern void spa_remove(spa_t *spa);
  860 extern spa_t *spa_next(spa_t *prev);
  861 
  862 /* Refcount functions */
  863 extern void spa_open_ref(spa_t *spa, const void *tag);
  864 extern void spa_close(spa_t *spa, const void *tag);
  865 extern void spa_async_close(spa_t *spa, const void *tag);
  866 extern boolean_t spa_refcount_zero(spa_t *spa);
  867 
  868 #define SCL_NONE        0x00
  869 #define SCL_CONFIG      0x01
  870 #define SCL_STATE       0x02
  871 #define SCL_L2ARC       0x04            /* hack until L2ARC 2.0 */
  872 #define SCL_ALLOC       0x08
  873 #define SCL_ZIO         0x10
  874 #define SCL_FREE        0x20
  875 #define SCL_VDEV        0x40
  876 #define SCL_LOCKS       7
  877 #define SCL_ALL         ((1 << SCL_LOCKS) - 1)
  878 #define SCL_STATE_ALL   (SCL_STATE | SCL_L2ARC | SCL_ZIO)
  879 
  880 /* Historical pool statistics */
  881 typedef struct spa_history_kstat {
  882         kmutex_t                lock;
  883         uint64_t                count;
  884         uint64_t                size;
  885         kstat_t                 *kstat;
  886         void                    *priv;
  887         list_t                  list;
  888 } spa_history_kstat_t;
  889 
  890 typedef struct spa_history_list {
  891         uint64_t                size;
  892         procfs_list_t           procfs_list;
  893 } spa_history_list_t;
  894 
  895 typedef struct spa_stats {
  896         spa_history_list_t      read_history;
  897         spa_history_list_t      txg_history;
  898         spa_history_kstat_t     tx_assign_histogram;
  899         spa_history_list_t      mmp_history;
  900         spa_history_kstat_t     state;          /* pool state */
  901         spa_history_kstat_t     guid;           /* pool guid */
  902         spa_history_kstat_t     iostats;
  903 } spa_stats_t;
  904 
  905 typedef enum txg_state {
  906         TXG_STATE_BIRTH         = 0,
  907         TXG_STATE_OPEN          = 1,
  908         TXG_STATE_QUIESCED      = 2,
  909         TXG_STATE_WAIT_FOR_SYNC = 3,
  910         TXG_STATE_SYNCED        = 4,
  911         TXG_STATE_COMMITTED     = 5,
  912 } txg_state_t;
  913 
  914 typedef struct txg_stat {
  915         vdev_stat_t             vs1;
  916         vdev_stat_t             vs2;
  917         uint64_t                txg;
  918         uint64_t                ndirty;
  919 } txg_stat_t;
  920 
  921 /* Assorted pool IO kstats */
  922 typedef struct spa_iostats {
  923         kstat_named_t   trim_extents_written;
  924         kstat_named_t   trim_bytes_written;
  925         kstat_named_t   trim_extents_skipped;
  926         kstat_named_t   trim_bytes_skipped;
  927         kstat_named_t   trim_extents_failed;
  928         kstat_named_t   trim_bytes_failed;
  929         kstat_named_t   autotrim_extents_written;
  930         kstat_named_t   autotrim_bytes_written;
  931         kstat_named_t   autotrim_extents_skipped;
  932         kstat_named_t   autotrim_bytes_skipped;
  933         kstat_named_t   autotrim_extents_failed;
  934         kstat_named_t   autotrim_bytes_failed;
  935         kstat_named_t   simple_trim_extents_written;
  936         kstat_named_t   simple_trim_bytes_written;
  937         kstat_named_t   simple_trim_extents_skipped;
  938         kstat_named_t   simple_trim_bytes_skipped;
  939         kstat_named_t   simple_trim_extents_failed;
  940         kstat_named_t   simple_trim_bytes_failed;
  941 } spa_iostats_t;
  942 
  943 extern void spa_stats_init(spa_t *spa);
  944 extern void spa_stats_destroy(spa_t *spa);
  945 extern void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb,
  946     uint32_t aflags);
  947 extern void spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time);
  948 extern int spa_txg_history_set(spa_t *spa,  uint64_t txg,
  949     txg_state_t completed_state, hrtime_t completed_time);
  950 extern txg_stat_t *spa_txg_history_init_io(spa_t *, uint64_t,
  951     struct dsl_pool *);
  952 extern void spa_txg_history_fini_io(spa_t *, txg_stat_t *);
  953 extern void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs);
  954 extern int spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id);
  955 extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
  956     hrtime_t duration);
  957 extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
  958     uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
  959     int error);
  960 extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
  961     uint64_t extents_written, uint64_t bytes_written,
  962     uint64_t extents_skipped, uint64_t bytes_skipped,
  963     uint64_t extents_failed, uint64_t bytes_failed);
  964 extern void spa_import_progress_add(spa_t *spa);
  965 extern void spa_import_progress_remove(uint64_t spa_guid);
  966 extern int spa_import_progress_set_mmp_check(uint64_t pool_guid,
  967     uint64_t mmp_sec_remaining);
  968 extern int spa_import_progress_set_max_txg(uint64_t pool_guid,
  969     uint64_t max_txg);
  970 extern int spa_import_progress_set_state(uint64_t pool_guid,
  971     spa_load_state_t spa_load_state);
  972 
  973 /* Pool configuration locks */
  974 extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag,
  975     krw_t rw);
  976 extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
  977 extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
  978 extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
  979 
  980 /* Pool vdev add/remove lock */
  981 extern uint64_t spa_vdev_enter(spa_t *spa);
  982 extern uint64_t spa_vdev_detach_enter(spa_t *spa, uint64_t guid);
  983 extern uint64_t spa_vdev_config_enter(spa_t *spa);
  984 extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
  985     int error, const char *tag);
  986 extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
  987 
  988 /* Pool vdev state change lock */
  989 extern void spa_vdev_state_enter(spa_t *spa, int oplock);
  990 extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
  991 
  992 /* Log state */
  993 typedef enum spa_log_state {
  994         SPA_LOG_UNKNOWN = 0,    /* unknown log state */
  995         SPA_LOG_MISSING,        /* missing log(s) */
  996         SPA_LOG_CLEAR,          /* clear the log(s) */
  997         SPA_LOG_GOOD,           /* log(s) are good */
  998 } spa_log_state_t;
  999 
 1000 extern spa_log_state_t spa_get_log_state(spa_t *spa);
 1001 extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
 1002 extern int spa_reset_logs(spa_t *spa);
 1003 
 1004 /* Log claim callback */
 1005 extern void spa_claim_notify(zio_t *zio);
 1006 extern void spa_deadman(void *);
 1007 
 1008 /* Accessor functions */
 1009 extern boolean_t spa_shutting_down(spa_t *spa);
 1010 extern struct dsl_pool *spa_get_dsl(spa_t *spa);
 1011 extern boolean_t spa_is_initializing(spa_t *spa);
 1012 extern boolean_t spa_indirect_vdevs_loaded(spa_t *spa);
 1013 extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
 1014 extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
 1015 extern void spa_altroot(spa_t *, char *, size_t);
 1016 extern uint32_t spa_sync_pass(spa_t *spa);
 1017 extern char *spa_name(spa_t *spa);
 1018 extern uint64_t spa_guid(spa_t *spa);
 1019 extern uint64_t spa_load_guid(spa_t *spa);
 1020 extern uint64_t spa_last_synced_txg(spa_t *spa);
 1021 extern uint64_t spa_first_txg(spa_t *spa);
 1022 extern uint64_t spa_syncing_txg(spa_t *spa);
 1023 extern uint64_t spa_final_dirty_txg(spa_t *spa);
 1024 extern uint64_t spa_version(spa_t *spa);
 1025 extern pool_state_t spa_state(spa_t *spa);
 1026 extern spa_load_state_t spa_load_state(spa_t *spa);
 1027 extern uint64_t spa_freeze_txg(spa_t *spa);
 1028 extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize);
 1029 extern uint64_t spa_get_dspace(spa_t *spa);
 1030 extern uint64_t spa_get_checkpoint_space(spa_t *spa);
 1031 extern uint64_t spa_get_slop_space(spa_t *spa);
 1032 extern void spa_update_dspace(spa_t *spa);
 1033 extern uint64_t spa_version(spa_t *spa);
 1034 extern boolean_t spa_deflate(spa_t *spa);
 1035 extern metaslab_class_t *spa_normal_class(spa_t *spa);
 1036 extern metaslab_class_t *spa_log_class(spa_t *spa);
 1037 extern metaslab_class_t *spa_embedded_log_class(spa_t *spa);
 1038 extern metaslab_class_t *spa_special_class(spa_t *spa);
 1039 extern metaslab_class_t *spa_dedup_class(spa_t *spa);
 1040 extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size,
 1041     dmu_object_type_t objtype, uint_t level, uint_t special_smallblk);
 1042 
 1043 extern void spa_evicting_os_register(spa_t *, objset_t *os);
 1044 extern void spa_evicting_os_deregister(spa_t *, objset_t *os);
 1045 extern void spa_evicting_os_wait(spa_t *spa);
 1046 extern int spa_max_replication(spa_t *spa);
 1047 extern int spa_prev_software_version(spa_t *spa);
 1048 extern uint64_t spa_get_failmode(spa_t *spa);
 1049 extern uint64_t spa_get_deadman_failmode(spa_t *spa);
 1050 extern void spa_set_deadman_failmode(spa_t *spa, const char *failmode);
 1051 extern boolean_t spa_suspended(spa_t *spa);
 1052 extern uint64_t spa_bootfs(spa_t *spa);
 1053 extern uint64_t spa_delegation(spa_t *spa);
 1054 extern objset_t *spa_meta_objset(spa_t *spa);
 1055 extern space_map_t *spa_syncing_log_sm(spa_t *spa);
 1056 extern uint64_t spa_deadman_synctime(spa_t *spa);
 1057 extern uint64_t spa_deadman_ziotime(spa_t *spa);
 1058 extern uint64_t spa_dirty_data(spa_t *spa);
 1059 extern spa_autotrim_t spa_get_autotrim(spa_t *spa);
 1060 
 1061 /* Miscellaneous support routines */
 1062 extern void spa_load_failed(spa_t *spa, const char *fmt, ...)
 1063     __attribute__((format(printf, 2, 3)));
 1064 extern void spa_load_note(spa_t *spa, const char *fmt, ...)
 1065     __attribute__((format(printf, 2, 3)));
 1066 extern void spa_activate_mos_feature(spa_t *spa, const char *feature,
 1067     dmu_tx_t *tx);
 1068 extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
 1069 extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
 1070 extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
 1071 extern char *spa_strdup(const char *);
 1072 extern void spa_strfree(char *);
 1073 extern uint64_t spa_generate_guid(spa_t *spa);
 1074 extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
 1075 extern void spa_freeze(spa_t *spa);
 1076 extern int spa_change_guid(spa_t *spa);
 1077 extern void spa_upgrade(spa_t *spa, uint64_t version);
 1078 extern void spa_evict_all(void);
 1079 extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
 1080     boolean_t l2cache);
 1081 extern boolean_t spa_has_l2cache(spa_t *, uint64_t guid);
 1082 extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
 1083 extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
 1084 extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
 1085 extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
 1086 extern boolean_t spa_has_slogs(spa_t *spa);
 1087 extern boolean_t spa_is_root(spa_t *spa);
 1088 extern boolean_t spa_writeable(spa_t *spa);
 1089 extern boolean_t spa_has_pending_synctask(spa_t *spa);
 1090 extern int spa_maxblocksize(spa_t *spa);
 1091 extern int spa_maxdnodesize(spa_t *spa);
 1092 extern boolean_t spa_has_checkpoint(spa_t *spa);
 1093 extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa);
 1094 extern boolean_t spa_suspend_async_destroy(spa_t *spa);
 1095 extern uint64_t spa_min_claim_txg(spa_t *spa);
 1096 extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva,
 1097     const blkptr_t *bp);
 1098 typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size,
 1099     void *arg);
 1100 extern boolean_t spa_remap_blkptr(spa_t *spa, blkptr_t *bp,
 1101     spa_remap_cb_t callback, void *arg);
 1102 extern uint64_t spa_get_last_removal_txg(spa_t *spa);
 1103 extern boolean_t spa_trust_config(spa_t *spa);
 1104 extern uint64_t spa_missing_tvds_allowed(spa_t *spa);
 1105 extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing);
 1106 extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa);
 1107 extern uint64_t spa_total_metaslabs(spa_t *spa);
 1108 extern boolean_t spa_multihost(spa_t *spa);
 1109 extern uint32_t spa_get_hostid(spa_t *spa);
 1110 extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
 1111 extern boolean_t spa_livelist_delete_check(spa_t *spa);
 1112 
 1113 extern spa_mode_t spa_mode(spa_t *spa);
 1114 extern uint64_t zfs_strtonum(const char *str, char **nptr);
 1115 
 1116 extern char *spa_his_ievent_table[];
 1117 
 1118 extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
 1119 extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
 1120     char *his_buf);
 1121 extern int spa_history_log(spa_t *spa, const char *his_buf);
 1122 extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl);
 1123 extern void spa_history_log_version(spa_t *spa, const char *operation,
 1124     dmu_tx_t *tx);
 1125 extern void spa_history_log_internal(spa_t *spa, const char *operation,
 1126     dmu_tx_t *tx, const char *fmt, ...) __printflike(4, 5);
 1127 extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
 1128     dmu_tx_t *tx, const char *fmt, ...)  __printflike(4, 5);
 1129 extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
 1130     dmu_tx_t *tx, const char *fmt, ...) __printflike(4, 5);
 1131 
 1132 extern const char *spa_state_to_name(spa_t *spa);
 1133 
 1134 /* error handling */
 1135 struct zbookmark_phys;
 1136 extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
 1137 extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
 1138 extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
 1139     const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
 1140 extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
 1141     zio_t *zio);
 1142 extern void zfs_ereport_taskq_fini(void);
 1143 extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
 1144 extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
 1145     const char *name, nvlist_t *aux);
 1146 extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
 1147 extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
 1148 extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
 1149 extern uint64_t spa_approx_errlog_size(spa_t *spa);
 1150 extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
 1151 extern void spa_errlog_rotate(spa_t *spa);
 1152 extern void spa_errlog_drain(spa_t *spa);
 1153 extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
 1154 extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
 1155 extern void spa_delete_dataset_errlog(spa_t *spa, uint64_t ds, dmu_tx_t *tx);
 1156 extern void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds,
 1157     uint64_t old_head_ds, dmu_tx_t *tx);
 1158 extern void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj,
 1159     dmu_tx_t *tx);
 1160 extern void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx);
 1161 
 1162 /* vdev cache */
 1163 extern void vdev_cache_stat_init(void);
 1164 extern void vdev_cache_stat_fini(void);
 1165 
 1166 /* vdev mirror */
 1167 extern void vdev_mirror_stat_init(void);
 1168 extern void vdev_mirror_stat_fini(void);
 1169 
 1170 /* Initialization and termination */
 1171 extern void spa_init(spa_mode_t mode);
 1172 extern void spa_fini(void);
 1173 extern void spa_boot_init(void);
 1174 
 1175 /* properties */
 1176 extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
 1177 extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
 1178 extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
 1179 extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
 1180 
 1181 /* asynchronous event notification */
 1182 extern void spa_event_notify(spa_t *spa, vdev_t *vdev, nvlist_t *hist_nvl,
 1183     const char *name);
 1184 extern void zfs_ereport_zvol_post(const char *subclass, const char *name,
 1185     const char *device_name, const char *raw_name);
 1186 
 1187 /* waiting for pool activities to complete */
 1188 extern int spa_wait(const char *pool, zpool_wait_activity_t activity,
 1189     boolean_t *waited);
 1190 extern int spa_wait_tag(const char *name, zpool_wait_activity_t activity,
 1191     uint64_t tag, boolean_t *waited);
 1192 extern void spa_notify_waiters(spa_t *spa);
 1193 extern void spa_wake_waiters(spa_t *spa);
 1194 
 1195 extern void spa_import_os(spa_t *spa);
 1196 extern void spa_export_os(spa_t *spa);
 1197 extern void spa_activate_os(spa_t *spa);
 1198 extern void spa_deactivate_os(spa_t *spa);
 1199 
 1200 /* module param call functions */
 1201 int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS);
 1202 int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS);
 1203 int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS);
 1204 int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS);
 1205 
 1206 #ifdef ZFS_DEBUG
 1207 #define dprintf_bp(bp, fmt, ...) do {                           \
 1208         if (zfs_flags & ZFS_DEBUG_DPRINTF) {                    \
 1209         char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP);  \
 1210         snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp));        \
 1211         dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf);            \
 1212         kmem_free(__blkbuf, BP_SPRINTF_LEN);                    \
 1213         } \
 1214 } while (0)
 1215 #else
 1216 #define dprintf_bp(bp, fmt, ...)
 1217 #endif
 1218 
 1219 extern spa_mode_t spa_mode_global;
 1220 extern int zfs_deadman_enabled;
 1221 extern uint64_t zfs_deadman_synctime_ms;
 1222 extern uint64_t zfs_deadman_ziotime_ms;
 1223 extern uint64_t zfs_deadman_checktime_ms;
 1224 
 1225 extern kmem_cache_t *zio_buf_cache[];
 1226 extern kmem_cache_t *zio_data_buf_cache[];
 1227 
 1228 #ifdef  __cplusplus
 1229 }
 1230 #endif
 1231 
 1232 #endif  /* _SYS_SPA_H */

Cache object: b3ef20b0b7e85f5479d37301ca793949


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.