| 
     1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   24  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
   25  * Copyright 2017 Nexenta Systems, Inc.
   26  */
   27 
   28 /*
   29  * The 512-byte leaf is broken into 32 16-byte chunks.
   30  * chunk number n means l_chunk[n], even though the header precedes it.
   31  * the names are stored null-terminated.
   32  */
   33 
   34 #include <sys/zio.h>
   35 #include <sys/spa.h>
   36 #include <sys/dmu.h>
   37 #include <sys/zfs_context.h>
   38 #include <sys/fs/zfs.h>
   39 #include <sys/zap.h>
   40 #include <sys/zap_impl.h>
   41 #include <sys/zap_leaf.h>
   42 #include <sys/arc.h>
   43 
   44 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
   45 
   46 #define CHAIN_END 0xffff /* end of the chunk chain */
   47 
   48 #define LEAF_HASH(l, h) \
   49         ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
   50         ((h) >> \
   51         (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
   52 
   53 #define LEAF_HASH_ENTPTR(l, h)  (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
   54 
   55 static void
   56 zap_memset(void *a, int c, size_t n)
   57 {
   58         char *cp = a;
   59         char *cpend = cp + n;
   60 
   61         while (cp < cpend)
   62                 *cp++ = c;
   63 }
   64 
   65 static void
   66 stv(int len, void *addr, uint64_t value)
   67 {
   68         switch (len) {
   69         case 1:
   70                 *(uint8_t *)addr = value;
   71                 return;
   72         case 2:
   73                 *(uint16_t *)addr = value;
   74                 return;
   75         case 4:
   76                 *(uint32_t *)addr = value;
   77                 return;
   78         case 8:
   79                 *(uint64_t *)addr = value;
   80                 return;
   81         default:
   82                 cmn_err(CE_PANIC, "bad int len %d", len);
   83         }
   84 }
   85 
   86 static uint64_t
   87 ldv(int len, const void *addr)
   88 {
   89         switch (len) {
   90         case 1:
   91                 return (*(uint8_t *)addr);
   92         case 2:
   93                 return (*(uint16_t *)addr);
   94         case 4:
   95                 return (*(uint32_t *)addr);
   96         case 8:
   97                 return (*(uint64_t *)addr);
   98         default:
   99                 cmn_err(CE_PANIC, "bad int len %d", len);
  100         }
  101         return (0xFEEDFACEDEADBEEFULL);
  102 }
  103 
  104 void
  105 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
  106 {
  107         zap_leaf_t l;
  108         dmu_buf_t l_dbuf;
  109 
  110         l_dbuf.db_data = buf;
  111         l.l_bs = highbit64(size) - 1;
  112         l.l_dbuf = &l_dbuf;
  113 
  114         buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
  115         buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
  116         buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
  117         buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
  118         buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
  119         buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
  120         buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
  121 
  122         for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
  123                 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
  124 
  125         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
  126                 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
  127                 struct zap_leaf_entry *le;
  128 
  129                 switch (lc->l_free.lf_type) {
  130                 case ZAP_CHUNK_ENTRY:
  131                         le = &lc->l_entry;
  132 
  133                         le->le_type =           BSWAP_8(le->le_type);
  134                         le->le_value_intlen =   BSWAP_8(le->le_value_intlen);
  135                         le->le_next =           BSWAP_16(le->le_next);
  136                         le->le_name_chunk =     BSWAP_16(le->le_name_chunk);
  137                         le->le_name_numints =   BSWAP_16(le->le_name_numints);
  138                         le->le_value_chunk =    BSWAP_16(le->le_value_chunk);
  139                         le->le_value_numints =  BSWAP_16(le->le_value_numints);
  140                         le->le_cd =             BSWAP_32(le->le_cd);
  141                         le->le_hash =           BSWAP_64(le->le_hash);
  142                         break;
  143                 case ZAP_CHUNK_FREE:
  144                         lc->l_free.lf_type =    BSWAP_8(lc->l_free.lf_type);
  145                         lc->l_free.lf_next =    BSWAP_16(lc->l_free.lf_next);
  146                         break;
  147                 case ZAP_CHUNK_ARRAY:
  148                         lc->l_array.la_type =   BSWAP_8(lc->l_array.la_type);
  149                         lc->l_array.la_next =   BSWAP_16(lc->l_array.la_next);
  150                         /* la_array doesn't need swapping */
  151                         break;
  152                 default:
  153                         cmn_err(CE_PANIC, "bad leaf type %d",
  154                             lc->l_free.lf_type);
  155                 }
  156         }
  157 }
  158 
  159 void
  160 zap_leaf_init(zap_leaf_t *l, boolean_t sort)
  161 {
  162         l->l_bs = highbit64(l->l_dbuf->db_size) - 1;
  163         zap_memset(&zap_leaf_phys(l)->l_hdr, 0,
  164             sizeof (struct zap_leaf_header));
  165         zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
  166             2*ZAP_LEAF_HASH_NUMENTRIES(l));
  167         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
  168                 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
  169                 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
  170         }
  171         ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
  172         zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF;
  173         zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
  174         zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
  175         if (sort)
  176                 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
  177 }
  178 
  179 /*
  180  * Routines which manipulate leaf chunks (l_chunk[]).
  181  */
  182 
  183 static uint16_t
  184 zap_leaf_chunk_alloc(zap_leaf_t *l)
  185 {
  186         ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0);
  187 
  188         int chunk = zap_leaf_phys(l)->l_hdr.lh_freelist;
  189         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  190         ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
  191 
  192         zap_leaf_phys(l)->l_hdr.lh_freelist =
  193             ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
  194 
  195         zap_leaf_phys(l)->l_hdr.lh_nfree--;
  196 
  197         return (chunk);
  198 }
  199 
  200 static void
  201 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
  202 {
  203         struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
  204         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
  205         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  206         ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
  207 
  208         zlf->lf_type = ZAP_CHUNK_FREE;
  209         zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist;
  210         memset(zlf->lf_pad, 0, sizeof (zlf->lf_pad)); /* help it to compress */
  211         zap_leaf_phys(l)->l_hdr.lh_freelist = chunk;
  212 
  213         zap_leaf_phys(l)->l_hdr.lh_nfree++;
  214 }
  215 
  216 /*
  217  * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
  218  */
  219 
  220 static uint16_t
  221 zap_leaf_array_create(zap_leaf_t *l, const char *buf,
  222     int integer_size, int num_integers)
  223 {
  224         uint16_t chunk_head;
  225         uint16_t *chunkp = &chunk_head;
  226         int byten = 0;
  227         uint64_t value = 0;
  228         int shift = (integer_size - 1) * 8;
  229         int len = num_integers;
  230 
  231         ASSERT3U(num_integers * integer_size, <=, ZAP_MAXVALUELEN);
  232 
  233         while (len > 0) {
  234                 uint16_t chunk = zap_leaf_chunk_alloc(l);
  235                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
  236 
  237                 la->la_type = ZAP_CHUNK_ARRAY;
  238                 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
  239                         if (byten == 0)
  240                                 value = ldv(integer_size, buf);
  241                         la->la_array[i] = value >> shift;
  242                         value <<= 8;
  243                         if (++byten == integer_size) {
  244                                 byten = 0;
  245                                 buf += integer_size;
  246                                 if (--len == 0)
  247                                         break;
  248                         }
  249                 }
  250 
  251                 *chunkp = chunk;
  252                 chunkp = &la->la_next;
  253         }
  254         *chunkp = CHAIN_END;
  255 
  256         return (chunk_head);
  257 }
  258 
  259 static void
  260 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
  261 {
  262         uint16_t chunk = *chunkp;
  263 
  264         *chunkp = CHAIN_END;
  265 
  266         while (chunk != CHAIN_END) {
  267                 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
  268                 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
  269                     ZAP_CHUNK_ARRAY);
  270                 zap_leaf_chunk_free(l, chunk);
  271                 chunk = nextchunk;
  272         }
  273 }
  274 
  275 /* array_len and buf_len are in integers, not bytes */
  276 static void
  277 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
  278     int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
  279     void *buf)
  280 {
  281         int len = MIN(array_len, buf_len);
  282         int byten = 0;
  283         uint64_t value = 0;
  284         char *p = buf;
  285 
  286         ASSERT3U(array_int_len, <=, buf_int_len);
  287 
  288         /* Fast path for one 8-byte integer */
  289         if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
  290                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
  291                 uint8_t *ip = la->la_array;
  292                 uint64_t *buf64 = buf;
  293 
  294                 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
  295                     (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
  296                     (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
  297                     (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
  298                 return;
  299         }
  300 
  301         /* Fast path for an array of 1-byte integers (eg. the entry name) */
  302         if (array_int_len == 1 && buf_int_len == 1 &&
  303             buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
  304                 while (chunk != CHAIN_END) {
  305                         struct zap_leaf_array *la =
  306                             &ZAP_LEAF_CHUNK(l, chunk).l_array;
  307                         memcpy(p, la->la_array, ZAP_LEAF_ARRAY_BYTES);
  308                         p += ZAP_LEAF_ARRAY_BYTES;
  309                         chunk = la->la_next;
  310                 }
  311                 return;
  312         }
  313 
  314         while (len > 0) {
  315                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
  316 
  317                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  318                 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
  319                         value = (value << 8) | la->la_array[i];
  320                         byten++;
  321                         if (byten == array_int_len) {
  322                                 stv(buf_int_len, p, value);
  323                                 byten = 0;
  324                                 len--;
  325                                 if (len == 0)
  326                                         return;
  327                                 p += buf_int_len;
  328                         }
  329                 }
  330                 chunk = la->la_next;
  331         }
  332 }
  333 
  334 static boolean_t
  335 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
  336     int chunk, int array_numints)
  337 {
  338         int bseen = 0;
  339 
  340         if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) {
  341                 uint64_t *thiskey =
  342                     kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP);
  343                 ASSERT(zn->zn_key_intlen == sizeof (*thiskey));
  344 
  345                 zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints,
  346                     sizeof (*thiskey), array_numints, thiskey);
  347                 boolean_t match = memcmp(thiskey, zn->zn_key_orig,
  348                     array_numints * sizeof (*thiskey)) == 0;
  349                 kmem_free(thiskey, array_numints * sizeof (*thiskey));
  350                 return (match);
  351         }
  352 
  353         ASSERT(zn->zn_key_intlen == 1);
  354         if (zn->zn_matchtype & MT_NORMALIZE) {
  355                 char *thisname = kmem_alloc(array_numints, KM_SLEEP);
  356 
  357                 zap_leaf_array_read(l, chunk, sizeof (char), array_numints,
  358                     sizeof (char), array_numints, thisname);
  359                 boolean_t match = zap_match(zn, thisname);
  360                 kmem_free(thisname, array_numints);
  361                 return (match);
  362         }
  363 
  364         /*
  365          * Fast path for exact matching.
  366          * First check that the lengths match, so that we don't read
  367          * past the end of the zn_key_orig array.
  368          */
  369         if (array_numints != zn->zn_key_orig_numints)
  370                 return (B_FALSE);
  371         while (bseen < array_numints) {
  372                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
  373                 int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES);
  374                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  375                 if (memcmp(la->la_array, (char *)zn->zn_key_orig + bseen,
  376                     toread))
  377                         break;
  378                 chunk = la->la_next;
  379                 bseen += toread;
  380         }
  381         return (bseen == array_numints);
  382 }
  383 
  384 /*
  385  * Routines which manipulate leaf entries.
  386  */
  387 
  388 int
  389 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
  390 {
  391         struct zap_leaf_entry *le;
  392 
  393         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
  394 
  395         for (uint16_t *chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
  396             *chunkp != CHAIN_END; chunkp = &le->le_next) {
  397                 uint16_t chunk = *chunkp;
  398                 le = ZAP_LEAF_ENTRY(l, chunk);
  399 
  400                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  401                 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
  402 
  403                 if (le->le_hash != zn->zn_hash)
  404                         continue;
  405 
  406                 /*
  407                  * NB: the entry chain is always sorted by cd on
  408                  * normalized zap objects, so this will find the
  409                  * lowest-cd match for MT_NORMALIZE.
  410                  */
  411                 ASSERT((zn->zn_matchtype == 0) ||
  412                     (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
  413                 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
  414                     le->le_name_numints)) {
  415                         zeh->zeh_num_integers = le->le_value_numints;
  416                         zeh->zeh_integer_size = le->le_value_intlen;
  417                         zeh->zeh_cd = le->le_cd;
  418                         zeh->zeh_hash = le->le_hash;
  419                         zeh->zeh_chunkp = chunkp;
  420                         zeh->zeh_leaf = l;
  421                         return (0);
  422                 }
  423         }
  424 
  425         return (SET_ERROR(ENOENT));
  426 }
  427 
  428 /* Return (h1,cd1 >= h2,cd2) */
  429 #define HCD_GTEQ(h1, cd1, h2, cd2) \
  430         ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
  431 
  432 int
  433 zap_leaf_lookup_closest(zap_leaf_t *l,
  434     uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
  435 {
  436         uint64_t besth = -1ULL;
  437         uint32_t bestcd = -1U;
  438         uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
  439         struct zap_leaf_entry *le;
  440 
  441         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
  442 
  443         for (uint16_t lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
  444                 for (uint16_t chunk = zap_leaf_phys(l)->l_hash[lh];
  445                     chunk != CHAIN_END; chunk = le->le_next) {
  446                         le = ZAP_LEAF_ENTRY(l, chunk);
  447 
  448                         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  449                         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
  450 
  451                         if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
  452                             HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
  453                                 ASSERT3U(bestlh, >=, lh);
  454                                 bestlh = lh;
  455                                 besth = le->le_hash;
  456                                 bestcd = le->le_cd;
  457 
  458                                 zeh->zeh_num_integers = le->le_value_numints;
  459                                 zeh->zeh_integer_size = le->le_value_intlen;
  460                                 zeh->zeh_cd = le->le_cd;
  461                                 zeh->zeh_hash = le->le_hash;
  462                                 zeh->zeh_fakechunk = chunk;
  463                                 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
  464                                 zeh->zeh_leaf = l;
  465                         }
  466                 }
  467         }
  468 
  469         return (bestcd == -1U ? SET_ERROR(ENOENT) : 0);
  470 }
  471 
  472 int
  473 zap_entry_read(const zap_entry_handle_t *zeh,
  474     uint8_t integer_size, uint64_t num_integers, void *buf)
  475 {
  476         struct zap_leaf_entry *le =
  477             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
  478         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
  479 
  480         if (le->le_value_intlen > integer_size)
  481                 return (SET_ERROR(EINVAL));
  482 
  483         zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
  484             le->le_value_intlen, le->le_value_numints,
  485             integer_size, num_integers, buf);
  486 
  487         if (zeh->zeh_num_integers > num_integers)
  488                 return (SET_ERROR(EOVERFLOW));
  489         return (0);
  490 
  491 }
  492 
  493 int
  494 zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
  495     char *buf)
  496 {
  497         struct zap_leaf_entry *le =
  498             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
  499         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
  500 
  501         if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
  502                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8,
  503                     le->le_name_numints, 8, buflen / 8, buf);
  504         } else {
  505                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
  506                     le->le_name_numints, 1, buflen, buf);
  507         }
  508         if (le->le_name_numints > buflen)
  509                 return (SET_ERROR(EOVERFLOW));
  510         return (0);
  511 }
  512 
  513 int
  514 zap_entry_update(zap_entry_handle_t *zeh,
  515     uint8_t integer_size, uint64_t num_integers, const void *buf)
  516 {
  517         zap_leaf_t *l = zeh->zeh_leaf;
  518         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
  519 
  520         int delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
  521             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
  522 
  523         if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
  524                 return (SET_ERROR(EAGAIN));
  525 
  526         zap_leaf_array_free(l, &le->le_value_chunk);
  527         le->le_value_chunk =
  528             zap_leaf_array_create(l, buf, integer_size, num_integers);
  529         le->le_value_numints = num_integers;
  530         le->le_value_intlen = integer_size;
  531         return (0);
  532 }
  533 
  534 void
  535 zap_entry_remove(zap_entry_handle_t *zeh)
  536 {
  537         zap_leaf_t *l = zeh->zeh_leaf;
  538 
  539         ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
  540 
  541         uint16_t entry_chunk = *zeh->zeh_chunkp;
  542         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
  543         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
  544 
  545         zap_leaf_array_free(l, &le->le_name_chunk);
  546         zap_leaf_array_free(l, &le->le_value_chunk);
  547 
  548         *zeh->zeh_chunkp = le->le_next;
  549         zap_leaf_chunk_free(l, entry_chunk);
  550 
  551         zap_leaf_phys(l)->l_hdr.lh_nentries--;
  552 }
  553 
  554 int
  555 zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
  556     uint8_t integer_size, uint64_t num_integers, const void *buf,
  557     zap_entry_handle_t *zeh)
  558 {
  559         uint16_t chunk;
  560         struct zap_leaf_entry *le;
  561         uint64_t h = zn->zn_hash;
  562 
  563         uint64_t valuelen = integer_size * num_integers;
  564 
  565         int numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
  566             zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
  567         if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
  568                 return (SET_ERROR(E2BIG));
  569 
  570         if (cd == ZAP_NEED_CD) {
  571                 /* find the lowest unused cd */
  572                 if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
  573                         cd = 0;
  574 
  575                         for (chunk = *LEAF_HASH_ENTPTR(l, h);
  576                             chunk != CHAIN_END; chunk = le->le_next) {
  577                                 le = ZAP_LEAF_ENTRY(l, chunk);
  578                                 if (le->le_cd > cd)
  579                                         break;
  580                                 if (le->le_hash == h) {
  581                                         ASSERT3U(cd, ==, le->le_cd);
  582                                         cd++;
  583                                 }
  584                         }
  585                 } else {
  586                         /* old unsorted format; do it the O(n^2) way */
  587                         for (cd = 0; ; cd++) {
  588                                 for (chunk = *LEAF_HASH_ENTPTR(l, h);
  589                                     chunk != CHAIN_END; chunk = le->le_next) {
  590                                         le = ZAP_LEAF_ENTRY(l, chunk);
  591                                         if (le->le_hash == h &&
  592                                             le->le_cd == cd) {
  593                                                 break;
  594                                         }
  595                                 }
  596                                 /* If this cd is not in use, we are good. */
  597                                 if (chunk == CHAIN_END)
  598                                         break;
  599                         }
  600                 }
  601                 /*
  602                  * We would run out of space in a block before we could
  603                  * store enough entries to run out of CD values.
  604                  */
  605                 ASSERT3U(cd, <, zap_maxcd(zn->zn_zap));
  606         }
  607 
  608         if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks)
  609                 return (SET_ERROR(EAGAIN));
  610 
  611         /* make the entry */
  612         chunk = zap_leaf_chunk_alloc(l);
  613         le = ZAP_LEAF_ENTRY(l, chunk);
  614         le->le_type = ZAP_CHUNK_ENTRY;
  615         le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig,
  616             zn->zn_key_intlen, zn->zn_key_orig_numints);
  617         le->le_name_numints = zn->zn_key_orig_numints;
  618         le->le_value_chunk =
  619             zap_leaf_array_create(l, buf, integer_size, num_integers);
  620         le->le_value_numints = num_integers;
  621         le->le_value_intlen = integer_size;
  622         le->le_hash = h;
  623         le->le_cd = cd;
  624 
  625         /* link it into the hash chain */
  626         /* XXX if we did the search above, we could just use that */
  627         uint16_t *chunkp = zap_leaf_rehash_entry(l, chunk);
  628 
  629         zap_leaf_phys(l)->l_hdr.lh_nentries++;
  630 
  631         zeh->zeh_leaf = l;
  632         zeh->zeh_num_integers = num_integers;
  633         zeh->zeh_integer_size = le->le_value_intlen;
  634         zeh->zeh_cd = le->le_cd;
  635         zeh->zeh_hash = le->le_hash;
  636         zeh->zeh_chunkp = chunkp;
  637 
  638         return (0);
  639 }
  640 
  641 /*
  642  * Determine if there is another entry with the same normalized form.
  643  * For performance purposes, either zn or name must be provided (the
  644  * other can be NULL).  Note, there usually won't be any hash
  645  * conflicts, in which case we don't need the concatenated/normalized
  646  * form of the name.  But all callers have one of these on hand anyway,
  647  * so might as well take advantage.  A cleaner but slower interface
  648  * would accept neither argument, and compute the normalized name as
  649  * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))).
  650  */
  651 boolean_t
  652 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
  653     const char *name, zap_t *zap)
  654 {
  655         struct zap_leaf_entry *le;
  656         boolean_t allocdzn = B_FALSE;
  657 
  658         if (zap->zap_normflags == 0)
  659                 return (B_FALSE);
  660 
  661         for (uint16_t chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
  662             chunk != CHAIN_END; chunk = le->le_next) {
  663                 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
  664                 if (le->le_hash != zeh->zeh_hash)
  665                         continue;
  666                 if (le->le_cd == zeh->zeh_cd)
  667                         continue;
  668 
  669                 if (zn == NULL) {
  670                         zn = zap_name_alloc_str(zap, name, MT_NORMALIZE);
  671                         allocdzn = B_TRUE;
  672                 }
  673                 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
  674                     le->le_name_chunk, le->le_name_numints)) {
  675                         if (allocdzn)
  676                                 zap_name_free(zn);
  677                         return (B_TRUE);
  678                 }
  679         }
  680         if (allocdzn)
  681                 zap_name_free(zn);
  682         return (B_FALSE);
  683 }
  684 
  685 /*
  686  * Routines for transferring entries between leafs.
  687  */
  688 
  689 static uint16_t *
  690 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
  691 {
  692         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
  693         struct zap_leaf_entry *le2;
  694         uint16_t *chunkp;
  695 
  696         /*
  697          * keep the entry chain sorted by cd
  698          * NB: this will not cause problems for unsorted leafs, though
  699          * it is unnecessary there.
  700          */
  701         for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
  702             *chunkp != CHAIN_END; chunkp = &le2->le_next) {
  703                 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
  704                 if (le2->le_cd > le->le_cd)
  705                         break;
  706         }
  707 
  708         le->le_next = *chunkp;
  709         *chunkp = entry;
  710         return (chunkp);
  711 }
  712 
  713 static uint16_t
  714 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
  715 {
  716         uint16_t new_chunk;
  717         uint16_t *nchunkp = &new_chunk;
  718 
  719         while (chunk != CHAIN_END) {
  720                 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
  721                 struct zap_leaf_array *nla =
  722                     &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
  723                 struct zap_leaf_array *la =
  724                     &ZAP_LEAF_CHUNK(l, chunk).l_array;
  725                 int nextchunk = la->la_next;
  726 
  727                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
  728                 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
  729 
  730                 *nla = *la; /* structure assignment */
  731 
  732                 zap_leaf_chunk_free(l, chunk);
  733                 chunk = nextchunk;
  734                 *nchunkp = nchunk;
  735                 nchunkp = &nla->la_next;
  736         }
  737         *nchunkp = CHAIN_END;
  738         return (new_chunk);
  739 }
  740 
  741 static void
  742 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
  743 {
  744         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
  745         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
  746 
  747         uint16_t chunk = zap_leaf_chunk_alloc(nl);
  748         struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk);
  749         *nle = *le; /* structure assignment */
  750 
  751         (void) zap_leaf_rehash_entry(nl, chunk);
  752 
  753         nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
  754         nle->le_value_chunk =
  755             zap_leaf_transfer_array(l, le->le_value_chunk, nl);
  756 
  757         zap_leaf_chunk_free(l, entry);
  758 
  759         zap_leaf_phys(l)->l_hdr.lh_nentries--;
  760         zap_leaf_phys(nl)->l_hdr.lh_nentries++;
  761 }
  762 
  763 /*
  764  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
  765  */
  766 void
  767 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
  768 {
  769         int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len;
  770 
  771         /* set new prefix and prefix_len */
  772         zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1;
  773         zap_leaf_phys(l)->l_hdr.lh_prefix_len++;
  774         zap_leaf_phys(nl)->l_hdr.lh_prefix =
  775             zap_leaf_phys(l)->l_hdr.lh_prefix | 1;
  776         zap_leaf_phys(nl)->l_hdr.lh_prefix_len =
  777             zap_leaf_phys(l)->l_hdr.lh_prefix_len;
  778 
  779         /* break existing hash chains */
  780         zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
  781             2*ZAP_LEAF_HASH_NUMENTRIES(l));
  782 
  783         if (sort)
  784                 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
  785 
  786         /*
  787          * Transfer entries whose hash bit 'bit' is set to nl; rehash
  788          * the remaining entries
  789          *
  790          * NB: We could find entries via the hashtable instead. That
  791          * would be O(hashents+numents) rather than O(numblks+numents),
  792          * but this accesses memory more sequentially, and when we're
  793          * called, the block is usually pretty full.
  794          */
  795         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
  796                 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
  797                 if (le->le_type != ZAP_CHUNK_ENTRY)
  798                         continue;
  799 
  800                 if (le->le_hash & (1ULL << bit))
  801                         zap_leaf_transfer_entry(l, i, nl);
  802                 else
  803                         (void) zap_leaf_rehash_entry(l, i);
  804         }
  805 }
  806 
  807 void
  808 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
  809 {
  810         int n = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
  811             zap_leaf_phys(l)->l_hdr.lh_prefix_len;
  812         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
  813         zs->zs_leafs_with_2n_pointers[n]++;
  814 
  815 
  816         n = zap_leaf_phys(l)->l_hdr.lh_nentries/5;
  817         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
  818         zs->zs_blocks_with_n5_entries[n]++;
  819 
  820         n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
  821             zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
  822             (1<<FZAP_BLOCK_SHIFT(zap));
  823         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
  824         zs->zs_blocks_n_tenths_full[n]++;
  825 
  826         for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
  827                 int nentries = 0;
  828                 int chunk = zap_leaf_phys(l)->l_hash[i];
  829 
  830                 while (chunk != CHAIN_END) {
  831                         struct zap_leaf_entry *le =
  832                             ZAP_LEAF_ENTRY(l, chunk);
  833 
  834                         n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) +
  835                             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints *
  836                             le->le_value_intlen);
  837                         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
  838                         zs->zs_entries_using_n_chunks[n]++;
  839 
  840                         chunk = le->le_next;
  841                         nentries++;
  842                 }
  843 
  844                 n = nentries;
  845                 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
  846                 zs->zs_buckets_with_n_entries[n]++;
  847         }
  848 }
Cache object: a67d4fbb8422136199e6d5b0948d0d68 
 
 |