The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/boot/zfs/zfsimpl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2007 Doug Rabson
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/8.0/sys/boot/zfs/zfsimpl.c 192194 2009-05-16 10:48:20Z dfr $");
   29 
   30 /*
   31  *      Stand-alone ZFS file reader.
   32  */
   33 
   34 #include "zfsimpl.h"
   35 #include "zfssubr.c"
   36 
   37 /*
   38  * List of all vdevs, chained through v_alllink.
   39  */
   40 static vdev_list_t zfs_vdevs;
   41 
   42 /*
   43  * List of all pools, chained through spa_link.
   44  */
   45 static spa_list_t zfs_pools;
   46 
   47 static uint64_t zfs_crc64_table[256];
   48 static const dnode_phys_t *dnode_cache_obj = 0;
   49 static uint64_t dnode_cache_bn;
   50 static char *dnode_cache_buf;
   51 static char *zap_scratch;
   52 static char *zfs_temp_buf, *zfs_temp_end, *zfs_temp_ptr;
   53 
   54 #define TEMP_SIZE       (1*SPA_MAXBLOCKSIZE)
   55 
   56 static void
   57 zfs_init(void)
   58 {
   59         STAILQ_INIT(&zfs_vdevs);
   60         STAILQ_INIT(&zfs_pools);
   61 
   62         zfs_temp_buf = malloc(TEMP_SIZE);
   63         zfs_temp_end = zfs_temp_buf + TEMP_SIZE;
   64         zfs_temp_ptr = zfs_temp_buf;
   65         dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
   66         zap_scratch = malloc(SPA_MAXBLOCKSIZE);
   67 
   68         zfs_init_crc();
   69 }
   70 
   71 static char *
   72 zfs_alloc_temp(size_t sz)
   73 {
   74         char *p;
   75 
   76         if (zfs_temp_ptr + sz > zfs_temp_end) {
   77                 printf("ZFS: out of temporary buffer space\n");
   78                 for (;;) ;
   79         }
   80         p = zfs_temp_ptr;
   81         zfs_temp_ptr += sz;
   82 
   83         return (p);
   84 }
   85 
   86 static void
   87 zfs_reset_temp(void)
   88 {
   89 
   90         zfs_temp_ptr = zfs_temp_buf;
   91 }
   92 
   93 static int
   94 xdr_int(const unsigned char **xdr, int *ip)
   95 {
   96         *ip = ((*xdr)[0] << 24)
   97                 | ((*xdr)[1] << 16)
   98                 | ((*xdr)[2] << 8)
   99                 | ((*xdr)[3] << 0);
  100         (*xdr) += 4;
  101         return (0);
  102 }
  103 
  104 static int
  105 xdr_u_int(const unsigned char **xdr, u_int *ip)
  106 {
  107         *ip = ((*xdr)[0] << 24)
  108                 | ((*xdr)[1] << 16)
  109                 | ((*xdr)[2] << 8)
  110                 | ((*xdr)[3] << 0);
  111         (*xdr) += 4;
  112         return (0);
  113 }
  114 
  115 static int
  116 xdr_uint64_t(const unsigned char **xdr, uint64_t *lp)
  117 {
  118         u_int hi, lo;
  119 
  120         xdr_u_int(xdr, &hi);
  121         xdr_u_int(xdr, &lo);
  122         *lp = (((uint64_t) hi) << 32) | lo;
  123         return (0);
  124 }
  125 
  126 static int
  127 nvlist_find(const unsigned char *nvlist, const char *name, int type,
  128             int* elementsp, void *valuep)
  129 {
  130         const unsigned char *p, *pair;
  131         int junk;
  132         int encoded_size, decoded_size;
  133 
  134         p = nvlist;
  135         xdr_int(&p, &junk);
  136         xdr_int(&p, &junk);
  137 
  138         pair = p;
  139         xdr_int(&p, &encoded_size);
  140         xdr_int(&p, &decoded_size);
  141         while (encoded_size && decoded_size) {
  142                 int namelen, pairtype, elements;
  143                 const char *pairname;
  144 
  145                 xdr_int(&p, &namelen);
  146                 pairname = (const char*) p;
  147                 p += roundup(namelen, 4);
  148                 xdr_int(&p, &pairtype);
  149 
  150                 if (!memcmp(name, pairname, namelen) && type == pairtype) {
  151                         xdr_int(&p, &elements);
  152                         if (elementsp)
  153                                 *elementsp = elements;
  154                         if (type == DATA_TYPE_UINT64) {
  155                                 xdr_uint64_t(&p, (uint64_t *) valuep);
  156                                 return (0);
  157                         } else if (type == DATA_TYPE_STRING) {
  158                                 int len;
  159                                 xdr_int(&p, &len);
  160                                 (*(const char**) valuep) = (const char*) p;
  161                                 return (0);
  162                         } else if (type == DATA_TYPE_NVLIST
  163                                    || type == DATA_TYPE_NVLIST_ARRAY) {
  164                                 (*(const unsigned char**) valuep) =
  165                                          (const unsigned char*) p;
  166                                 return (0);
  167                         } else {
  168                                 return (EIO);
  169                         }
  170                 } else {
  171                         /*
  172                          * Not the pair we are looking for, skip to the next one.
  173                          */
  174                         p = pair + encoded_size;
  175                 }
  176 
  177                 pair = p;
  178                 xdr_int(&p, &encoded_size);
  179                 xdr_int(&p, &decoded_size);
  180         }
  181 
  182         return (EIO);
  183 }
  184 
  185 /*
  186  * Return the next nvlist in an nvlist array.
  187  */
  188 static const unsigned char *
  189 nvlist_next(const unsigned char *nvlist)
  190 {
  191         const unsigned char *p, *pair;
  192         int junk;
  193         int encoded_size, decoded_size;
  194 
  195         p = nvlist;
  196         xdr_int(&p, &junk);
  197         xdr_int(&p, &junk);
  198 
  199         pair = p;
  200         xdr_int(&p, &encoded_size);
  201         xdr_int(&p, &decoded_size);
  202         while (encoded_size && decoded_size) {
  203                 p = pair + encoded_size;
  204 
  205                 pair = p;
  206                 xdr_int(&p, &encoded_size);
  207                 xdr_int(&p, &decoded_size);
  208         }
  209 
  210         return p;
  211 }
  212 
  213 #ifdef TEST
  214 
  215 static const unsigned char *
  216 nvlist_print(const unsigned char *nvlist, unsigned int indent)
  217 {
  218         static const char* typenames[] = {
  219                 "DATA_TYPE_UNKNOWN",
  220                 "DATA_TYPE_BOOLEAN",
  221                 "DATA_TYPE_BYTE",
  222                 "DATA_TYPE_INT16",
  223                 "DATA_TYPE_UINT16",
  224                 "DATA_TYPE_INT32",
  225                 "DATA_TYPE_UINT32",
  226                 "DATA_TYPE_INT64",
  227                 "DATA_TYPE_UINT64",
  228                 "DATA_TYPE_STRING",
  229                 "DATA_TYPE_BYTE_ARRAY",
  230                 "DATA_TYPE_INT16_ARRAY",
  231                 "DATA_TYPE_UINT16_ARRAY",
  232                 "DATA_TYPE_INT32_ARRAY",
  233                 "DATA_TYPE_UINT32_ARRAY",
  234                 "DATA_TYPE_INT64_ARRAY",
  235                 "DATA_TYPE_UINT64_ARRAY",
  236                 "DATA_TYPE_STRING_ARRAY",
  237                 "DATA_TYPE_HRTIME",
  238                 "DATA_TYPE_NVLIST",
  239                 "DATA_TYPE_NVLIST_ARRAY",
  240                 "DATA_TYPE_BOOLEAN_VALUE",
  241                 "DATA_TYPE_INT8",
  242                 "DATA_TYPE_UINT8",
  243                 "DATA_TYPE_BOOLEAN_ARRAY",
  244                 "DATA_TYPE_INT8_ARRAY",
  245                 "DATA_TYPE_UINT8_ARRAY"
  246         };
  247 
  248         unsigned int i, j;
  249         const unsigned char *p, *pair;
  250         int junk;
  251         int encoded_size, decoded_size;
  252 
  253         p = nvlist;
  254         xdr_int(&p, &junk);
  255         xdr_int(&p, &junk);
  256 
  257         pair = p;
  258         xdr_int(&p, &encoded_size);
  259         xdr_int(&p, &decoded_size);
  260         while (encoded_size && decoded_size) {
  261                 int namelen, pairtype, elements;
  262                 const char *pairname;
  263 
  264                 xdr_int(&p, &namelen);
  265                 pairname = (const char*) p;
  266                 p += roundup(namelen, 4);
  267                 xdr_int(&p, &pairtype);
  268 
  269                 for (i = 0; i < indent; i++)
  270                         printf(" ");
  271                 printf("%s %s", typenames[pairtype], pairname);
  272 
  273                 xdr_int(&p, &elements);
  274                 switch (pairtype) {
  275                 case DATA_TYPE_UINT64: {
  276                         uint64_t val;
  277                         xdr_uint64_t(&p, &val);
  278                         printf(" = 0x%llx\n", val);
  279                         break;
  280                 }
  281 
  282                 case DATA_TYPE_STRING: {
  283                         int len;
  284                         xdr_int(&p, &len);
  285                         printf(" = \"%s\"\n", p);
  286                         break;
  287                 }
  288 
  289                 case DATA_TYPE_NVLIST:
  290                         printf("\n");
  291                         nvlist_print(p, indent + 1);
  292                         break;
  293 
  294                 case DATA_TYPE_NVLIST_ARRAY:
  295                         for (j = 0; j < elements; j++) {
  296                                 printf("[%d]\n", j);
  297                                 p = nvlist_print(p, indent + 1);
  298                                 if (j != elements - 1) {
  299                                         for (i = 0; i < indent; i++)
  300                                                 printf(" ");
  301                                         printf("%s %s", typenames[pairtype], pairname);
  302                                 }
  303                         }
  304                         break;
  305 
  306                 default:
  307                         printf("\n");
  308                 }
  309 
  310                 p = pair + encoded_size;
  311 
  312                 pair = p;
  313                 xdr_int(&p, &encoded_size);
  314                 xdr_int(&p, &decoded_size);
  315         }
  316 
  317         return p;
  318 }
  319 
  320 #endif
  321 
  322 static int
  323 vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
  324     off_t offset, size_t size)
  325 {
  326         size_t psize;
  327         int rc;
  328 
  329         if (bp) {
  330                 psize = BP_GET_PSIZE(bp);
  331         } else {
  332                 psize = size;
  333         }
  334 
  335         /*printf("ZFS: reading %d bytes at 0x%llx to %p\n", psize, offset, buf);*/
  336         rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize);
  337         if (rc)
  338                 return (rc);
  339         if (bp && zio_checksum_error(bp, buf))
  340                 return (EIO);
  341 
  342         return (0);
  343 }
  344 
  345 static int
  346 vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
  347     off_t offset, size_t bytes)
  348 {
  349 
  350         return (vdev_read_phys(vdev, bp, buf,
  351                 offset + VDEV_LABEL_START_SIZE, bytes));
  352 }
  353 
  354 
  355 static int
  356 vdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
  357     off_t offset, size_t bytes)
  358 {
  359         vdev_t *kid;
  360         int rc;
  361 
  362         rc = EIO;
  363         STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
  364                 if (kid->v_state != VDEV_STATE_HEALTHY)
  365                         continue;
  366                 rc = kid->v_read(kid, bp, buf, offset, bytes);
  367                 if (!rc)
  368                         return (0);
  369         }
  370 
  371         return (rc);
  372 }
  373 
  374 static vdev_t *
  375 vdev_find(uint64_t guid)
  376 {
  377         vdev_t *vdev;
  378 
  379         STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink)
  380                 if (vdev->v_guid == guid)
  381                         return (vdev);
  382 
  383         return (0);
  384 }
  385 
  386 static vdev_t *
  387 vdev_create(uint64_t guid, vdev_read_t *read)
  388 {
  389         vdev_t *vdev;
  390 
  391         vdev = malloc(sizeof(vdev_t));
  392         memset(vdev, 0, sizeof(vdev_t));
  393         STAILQ_INIT(&vdev->v_children);
  394         vdev->v_guid = guid;
  395         vdev->v_state = VDEV_STATE_OFFLINE;
  396         vdev->v_read = read;
  397         vdev->v_phys_read = 0;
  398         vdev->v_read_priv = 0;
  399         STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);
  400 
  401         return (vdev);
  402 }
  403 
  404 static int
  405 vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp)
  406 {
  407         int rc;
  408         uint64_t guid, id, ashift, nparity;
  409         const char *type;
  410         const char *path;
  411         vdev_t *vdev, *kid;
  412         const unsigned char *kids;
  413         int nkids, i;
  414 
  415         if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID,
  416                         DATA_TYPE_UINT64, 0, &guid)
  417             || nvlist_find(nvlist, ZPOOL_CONFIG_ID,
  418                            DATA_TYPE_UINT64, 0, &id)
  419             || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE,
  420                            DATA_TYPE_STRING, 0, &type)) {
  421                 printf("ZFS: can't find vdev details\n");
  422                 return (ENOENT);
  423         }
  424 
  425         /*
  426          * Assume that if we've seen this vdev tree before, this one
  427          * will be identical.
  428          */
  429         vdev = vdev_find(guid);
  430         if (vdev) {
  431                 if (vdevp)
  432                         *vdevp = vdev;
  433                 return (0);
  434         }
  435 
  436         if (strcmp(type, VDEV_TYPE_MIRROR)
  437             && strcmp(type, VDEV_TYPE_DISK)
  438             && strcmp(type, VDEV_TYPE_RAIDZ)) {
  439                 printf("ZFS: can only boot from disk, mirror or raidz vdevs\n");
  440                 return (EIO);
  441         }
  442 
  443         if (!strcmp(type, VDEV_TYPE_MIRROR))
  444                 vdev = vdev_create(guid, vdev_mirror_read);
  445         else if (!strcmp(type, VDEV_TYPE_RAIDZ))
  446                 vdev = vdev_create(guid, vdev_raidz_read);
  447         else
  448                 vdev = vdev_create(guid, vdev_disk_read);
  449 
  450         vdev->v_id = id;
  451         if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT,
  452                 DATA_TYPE_UINT64, 0, &ashift) == 0)
  453                 vdev->v_ashift = ashift;
  454         else
  455                 vdev->v_ashift = 0;
  456         if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY,
  457                 DATA_TYPE_UINT64, 0, &nparity) == 0)
  458                 vdev->v_nparity = nparity;
  459         else
  460                 vdev->v_nparity = 0;
  461         if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH,
  462                         DATA_TYPE_STRING, 0, &path) == 0) {
  463                 if (strlen(path) > 5
  464                     && path[0] == '/'
  465                     && path[1] == 'd'
  466                     && path[2] == 'e'
  467                     && path[3] == 'v'
  468                     && path[4] == '/')
  469                         path += 5;
  470                 vdev->v_name = strdup(path);
  471         } else {
  472                 if (!strcmp(type, "raidz")) {
  473                         if (vdev->v_nparity == 1)
  474                                 vdev->v_name = "raidz1";
  475                         else
  476                                 vdev->v_name = "raidz2";
  477                 } else {
  478                         vdev->v_name = strdup(type);
  479                 }
  480         }
  481         rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN,
  482                          DATA_TYPE_NVLIST_ARRAY, &nkids, &kids);
  483         /*
  484          * Its ok if we don't have any kids.
  485          */
  486         if (rc == 0) {
  487                 vdev->v_nchildren = nkids;
  488                 for (i = 0; i < nkids; i++) {
  489                         rc = vdev_init_from_nvlist(kids, &kid);
  490                         if (rc)
  491                                 return (rc);
  492                         STAILQ_INSERT_TAIL(&vdev->v_children, kid, v_childlink);
  493                         kids = nvlist_next(kids);
  494                 }
  495         } else {
  496                 vdev->v_nchildren = 0;
  497         }
  498 
  499         if (vdevp)
  500                 *vdevp = vdev;
  501         return (0);
  502 }
  503 
  504 static void
  505 vdev_set_state(vdev_t *vdev)
  506 {
  507         vdev_t *kid;
  508         int good_kids;
  509         int bad_kids;
  510 
  511         /*
  512          * A mirror or raidz is healthy if all its kids are healthy. A
  513          * mirror is degraded if any of its kids is healthy; a raidz
  514          * is degraded if at most nparity kids are offline.
  515          */
  516         if (STAILQ_FIRST(&vdev->v_children)) {
  517                 good_kids = 0;
  518                 bad_kids = 0;
  519                 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
  520                         if (kid->v_state == VDEV_STATE_HEALTHY)
  521                                 good_kids++;
  522                         else
  523                                 bad_kids++;
  524                 }
  525                 if (bad_kids == 0) {
  526                         vdev->v_state = VDEV_STATE_HEALTHY;
  527                 } else {
  528                         if (vdev->v_read == vdev_mirror_read) {
  529                                 if (good_kids) {
  530                                         vdev->v_state = VDEV_STATE_DEGRADED;
  531                                 } else {
  532                                         vdev->v_state = VDEV_STATE_OFFLINE;
  533                                 }
  534                         } else if (vdev->v_read == vdev_raidz_read) {
  535                                 if (bad_kids > vdev->v_nparity) {
  536                                         vdev->v_state = VDEV_STATE_OFFLINE;
  537                                 } else {
  538                                         vdev->v_state = VDEV_STATE_DEGRADED;
  539                                 }
  540                         }
  541                 }
  542         }
  543 }
  544 
  545 static spa_t *
  546 spa_find_by_guid(uint64_t guid)
  547 {
  548         spa_t *spa;
  549 
  550         STAILQ_FOREACH(spa, &zfs_pools, spa_link)
  551                 if (spa->spa_guid == guid)
  552                         return (spa);
  553 
  554         return (0);
  555 }
  556 
  557 #ifdef BOOT2
  558 
  559 static spa_t *
  560 spa_find_by_name(const char *name)
  561 {
  562         spa_t *spa;
  563 
  564         STAILQ_FOREACH(spa, &zfs_pools, spa_link)
  565                 if (!strcmp(spa->spa_name, name))
  566                         return (spa);
  567 
  568         return (0);
  569 }
  570 
  571 #endif
  572 
  573 static spa_t *
  574 spa_create(uint64_t guid)
  575 {
  576         spa_t *spa;
  577 
  578         spa = malloc(sizeof(spa_t));
  579         memset(spa, 0, sizeof(spa_t));
  580         STAILQ_INIT(&spa->spa_vdevs);
  581         spa->spa_guid = guid;
  582         STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link);
  583 
  584         return (spa);
  585 }
  586 
  587 static const char *
  588 state_name(vdev_state_t state)
  589 {
  590         static const char* names[] = {
  591                 "UNKNOWN",
  592                 "CLOSED",
  593                 "OFFLINE",
  594                 "CANT_OPEN",
  595                 "DEGRADED",
  596                 "ONLINE"
  597         };
  598         return names[state];
  599 }
  600 
  601 #ifdef BOOT2
  602 
  603 #define pager_printf printf
  604 
  605 #else
  606 
  607 static void
  608 pager_printf(const char *fmt, ...)
  609 {
  610         char line[80];
  611         va_list args;
  612 
  613         va_start(args, fmt);
  614         vsprintf(line, fmt, args);
  615         va_end(args);
  616         pager_output(line);
  617 }
  618 
  619 #endif
  620 
  621 #define STATUS_FORMAT   "        %-16s %-10s\n"
  622 
  623 static void
  624 print_state(int indent, const char *name, vdev_state_t state)
  625 {
  626         int i;
  627         char buf[512];
  628 
  629         buf[0] = 0;
  630         for (i = 0; i < indent; i++)
  631                 strcat(buf, "  ");
  632         strcat(buf, name);
  633         pager_printf(STATUS_FORMAT, buf, state_name(state));
  634         
  635 }
  636 
  637 static void
  638 vdev_status(vdev_t *vdev, int indent)
  639 {
  640         vdev_t *kid;
  641         print_state(indent, vdev->v_name, vdev->v_state);
  642 
  643         STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
  644                 vdev_status(kid, indent + 1);
  645         }
  646 }
  647 
  648 static void
  649 spa_status(spa_t *spa)
  650 {
  651         vdev_t *vdev;
  652         int good_kids, bad_kids, degraded_kids;
  653         vdev_state_t state;
  654 
  655         pager_printf("  pool: %s\n", spa->spa_name);
  656         pager_printf("config:\n\n");
  657         pager_printf(STATUS_FORMAT, "NAME", "STATE");
  658 
  659         good_kids = 0;
  660         degraded_kids = 0;
  661         bad_kids = 0;
  662         STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
  663                 if (vdev->v_state == VDEV_STATE_HEALTHY)
  664                         good_kids++;
  665                 else if (vdev->v_state == VDEV_STATE_DEGRADED)
  666                         degraded_kids++;
  667                 else
  668                         bad_kids++;
  669         }
  670 
  671         state = VDEV_STATE_CLOSED;
  672         if (good_kids > 0 && (degraded_kids + bad_kids) == 0)
  673                 state = VDEV_STATE_HEALTHY;
  674         else if ((good_kids + degraded_kids) > 0)
  675                 state = VDEV_STATE_DEGRADED;
  676 
  677         print_state(0, spa->spa_name, state);
  678         STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
  679                 vdev_status(vdev, 1);
  680         }
  681 }
  682 
  683 static void
  684 spa_all_status(void)
  685 {
  686         spa_t *spa;
  687         int first = 1;
  688 
  689         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
  690                 if (!first)
  691                         pager_printf("\n");
  692                 first = 0;
  693                 spa_status(spa);
  694         }
  695 }
  696 
  697 static int
  698 vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
  699 {
  700         vdev_t vtmp;
  701         vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch;
  702         spa_t *spa;
  703         vdev_t *vdev, *top_vdev, *pool_vdev;
  704         off_t off;
  705         blkptr_t bp;
  706         const unsigned char *nvlist;
  707         uint64_t val;
  708         uint64_t guid;
  709         uint64_t pool_txg, pool_guid;
  710         const char *pool_name;
  711         const unsigned char *vdevs;
  712         int i, rc;
  713         char upbuf[1024];
  714         const struct uberblock *up;
  715 
  716         /*
  717          * Load the vdev label and figure out which
  718          * uberblock is most current.
  719          */
  720         memset(&vtmp, 0, sizeof(vtmp));
  721         vtmp.v_phys_read = read;
  722         vtmp.v_read_priv = read_priv;
  723         off = offsetof(vdev_label_t, vl_vdev_phys);
  724         BP_ZERO(&bp);
  725         BP_SET_LSIZE(&bp, sizeof(vdev_phys_t));
  726         BP_SET_PSIZE(&bp, sizeof(vdev_phys_t));
  727         BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
  728         BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
  729         ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
  730         if (vdev_read_phys(&vtmp, &bp, vdev_label, off, 0))
  731                 return (EIO);
  732 
  733         if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) {
  734                 return (EIO);
  735         }
  736 
  737         nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4;
  738 
  739         if (nvlist_find(nvlist,
  740                         ZPOOL_CONFIG_VERSION,
  741                         DATA_TYPE_UINT64, 0, &val)) {
  742                 return (EIO);
  743         }
  744 
  745         if (val > SPA_VERSION) {
  746                 printf("ZFS: unsupported ZFS version %u (should be %u)\n",
  747                     (unsigned) val, (unsigned) SPA_VERSION);
  748                 return (EIO);
  749         }
  750 
  751         if (nvlist_find(nvlist,
  752                         ZPOOL_CONFIG_POOL_STATE,
  753                         DATA_TYPE_UINT64, 0, &val)) {
  754                 return (EIO);
  755         }
  756 
  757 #ifndef TEST
  758         if (val != POOL_STATE_ACTIVE) {
  759                 /*
  760                  * Don't print a message here. If we happen to reboot
  761                  * while where is an exported pool around, we don't
  762                  * need a cascade of confusing messages during boot.
  763                  */
  764                 /*printf("ZFS: pool is not active\n");*/
  765                 return (EIO);
  766         }
  767 #endif
  768 
  769         if (nvlist_find(nvlist,
  770                         ZPOOL_CONFIG_POOL_TXG,
  771                         DATA_TYPE_UINT64, 0, &pool_txg)
  772             || nvlist_find(nvlist,
  773                            ZPOOL_CONFIG_POOL_GUID,
  774                            DATA_TYPE_UINT64, 0, &pool_guid)
  775             || nvlist_find(nvlist,
  776                            ZPOOL_CONFIG_POOL_NAME,
  777                            DATA_TYPE_STRING, 0, &pool_name)) {
  778                 /*
  779                  * Cache and spare devices end up here - just ignore
  780                  * them.
  781                  */
  782                 /*printf("ZFS: can't find pool details\n");*/
  783                 return (EIO);
  784         }
  785 
  786         /*
  787          * Create the pool if this is the first time we've seen it.
  788          */
  789         spa = spa_find_by_guid(pool_guid);
  790         if (!spa) {
  791                 spa = spa_create(pool_guid);
  792                 spa->spa_name = strdup(pool_name);
  793         }
  794         if (pool_txg > spa->spa_txg)
  795                 spa->spa_txg = pool_txg;
  796 
  797         /*
  798          * Get the vdev tree and create our in-core copy of it.
  799          * If we already have a healthy vdev with this guid, this must
  800          * be some kind of alias (overlapping slices, dangerously dedicated
  801          * disks etc).
  802          */
  803         if (nvlist_find(nvlist,
  804                         ZPOOL_CONFIG_GUID,
  805                         DATA_TYPE_UINT64, 0, &guid)) {
  806                 return (EIO);
  807         }
  808         vdev = vdev_find(guid);
  809         if (vdev && vdev->v_state == VDEV_STATE_HEALTHY) {
  810                 return (EIO);
  811         }
  812 
  813         if (nvlist_find(nvlist,
  814                         ZPOOL_CONFIG_VDEV_TREE,
  815                         DATA_TYPE_NVLIST, 0, &vdevs)) {
  816                 return (EIO);
  817         }
  818         rc = vdev_init_from_nvlist(vdevs, &top_vdev);
  819         if (rc)
  820                 return (rc);
  821 
  822         /*
  823          * Add the toplevel vdev to the pool if its not already there.
  824          */
  825         STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink)
  826                 if (top_vdev == pool_vdev)
  827                         break;
  828         if (!pool_vdev && top_vdev)
  829                 STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink);
  830 
  831         /*
  832          * We should already have created an incomplete vdev for this
  833          * vdev. Find it and initialise it with our read proc.
  834          */
  835         vdev = vdev_find(guid);
  836         if (vdev) {
  837                 vdev->v_phys_read = read;
  838                 vdev->v_read_priv = read_priv;
  839                 vdev->v_state = VDEV_STATE_HEALTHY;
  840         } else {
  841                 printf("ZFS: inconsistent nvlist contents\n");
  842                 return (EIO);
  843         }
  844 
  845         /*
  846          * Re-evaluate top-level vdev state.
  847          */
  848         vdev_set_state(top_vdev);
  849 
  850         /*
  851          * Ok, we are happy with the pool so far. Lets find
  852          * the best uberblock and then we can actually access
  853          * the contents of the pool.
  854          */
  855         for (i = 0;
  856              i < VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT;
  857              i++) {
  858                 off = offsetof(vdev_label_t, vl_uberblock);
  859                 off += i << UBERBLOCK_SHIFT;
  860                 BP_ZERO(&bp);
  861                 DVA_SET_OFFSET(&bp.blk_dva[0], off);
  862                 BP_SET_LSIZE(&bp, 1 << UBERBLOCK_SHIFT);
  863                 BP_SET_PSIZE(&bp, 1 << UBERBLOCK_SHIFT);
  864                 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
  865                 BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
  866                 ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
  867                 if (vdev_read_phys(vdev, &bp, upbuf, off, 0))
  868                         continue;
  869 
  870                 up = (const struct uberblock *) upbuf;
  871                 if (up->ub_magic != UBERBLOCK_MAGIC)
  872                         continue;
  873                 if (up->ub_txg < spa->spa_txg)
  874                         continue;
  875                 if (up->ub_txg > spa->spa_uberblock.ub_txg) {
  876                         spa->spa_uberblock = *up;
  877                 } else if (up->ub_txg == spa->spa_uberblock.ub_txg) {
  878                         if (up->ub_timestamp > spa->spa_uberblock.ub_timestamp)
  879                                 spa->spa_uberblock = *up;
  880                 }
  881         }
  882 
  883         if (spap)
  884                 *spap = spa;
  885         return (0);
  886 }
  887 
  888 static int
  889 ilog2(int n)
  890 {
  891         int v;
  892 
  893         for (v = 0; v < 32; v++)
  894                 if (n == (1 << v))
  895                         return v;
  896         return -1;
  897 }
  898 
  899 static int
  900 zio_read(spa_t *spa, const blkptr_t *bp, void *buf)
  901 {
  902         int cpfunc = BP_GET_COMPRESS(bp);
  903         size_t lsize = BP_GET_LSIZE(bp);
  904         size_t psize = BP_GET_PSIZE(bp);
  905         void *pbuf;
  906         int i;
  907 
  908         zfs_reset_temp();
  909         if (cpfunc != ZIO_COMPRESS_OFF)
  910                 pbuf = zfs_alloc_temp(psize);
  911         else
  912                 pbuf = buf;
  913 
  914         for (i = 0; i < SPA_DVAS_PER_BP; i++) {
  915                 const dva_t *dva = &bp->blk_dva[i];
  916                 vdev_t *vdev;
  917                 int vdevid;
  918                 off_t offset;
  919 
  920                 if (!dva->dva_word[0] && !dva->dva_word[1])
  921                         continue;
  922 
  923                 vdevid = DVA_GET_VDEV(dva);
  924                 offset = DVA_GET_OFFSET(dva);
  925                 STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink)
  926                         if (vdev->v_id == vdevid)
  927                                 break;
  928                 if (!vdev || !vdev->v_read)
  929                         continue;
  930                 if (vdev->v_read(vdev, bp, pbuf, offset, psize))
  931                         continue;
  932 
  933                 if (cpfunc != ZIO_COMPRESS_OFF) {
  934                         if (zio_decompress_data(cpfunc, pbuf, psize,
  935                                 buf, lsize))
  936                                 return (EIO);
  937                 }
  938 
  939                 return (0);
  940         }
  941         printf("ZFS: i/o error - all block copies unavailable\n");
  942 
  943         return (EIO);
  944 }
  945 
  946 static int
  947 dnode_read(spa_t *spa, const dnode_phys_t *dnode, off_t offset, void *buf, size_t buflen)
  948 {
  949         int ibshift = dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
  950         int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
  951         int nlevels = dnode->dn_nlevels;
  952         int i, rc;
  953 
  954         /*
  955          * Note: bsize may not be a power of two here so we need to do an
  956          * actual divide rather than a bitshift.
  957          */
  958         while (buflen > 0) {
  959                 uint64_t bn = offset / bsize;
  960                 int boff = offset % bsize;
  961                 int ibn;
  962                 const blkptr_t *indbp;
  963                 blkptr_t bp;
  964 
  965                 if (bn > dnode->dn_maxblkid)
  966                         return (EIO);
  967 
  968                 if (dnode == dnode_cache_obj && bn == dnode_cache_bn)
  969                         goto cached;
  970 
  971                 indbp = dnode->dn_blkptr;
  972                 for (i = 0; i < nlevels; i++) {
  973                         /*
  974                          * Copy the bp from the indirect array so that
  975                          * we can re-use the scratch buffer for multi-level
  976                          * objects.
  977                          */
  978                         ibn = bn >> ((nlevels - i - 1) * ibshift);
  979                         ibn &= ((1 << ibshift) - 1);
  980                         bp = indbp[ibn];
  981                         rc = zio_read(spa, &bp, dnode_cache_buf);
  982                         if (rc)
  983                                 return (rc);
  984                         indbp = (const blkptr_t *) dnode_cache_buf;
  985                 }
  986                 dnode_cache_obj = dnode;
  987                 dnode_cache_bn = bn;
  988         cached:
  989 
  990                 /*
  991                  * The buffer contains our data block. Copy what we
  992                  * need from it and loop.
  993                  */ 
  994                 i = bsize - boff;
  995                 if (i > buflen) i = buflen;
  996                 memcpy(buf, &dnode_cache_buf[boff], i);
  997                 buf = ((char*) buf) + i;
  998                 offset += i;
  999                 buflen -= i;
 1000         }
 1001 
 1002         return (0);
 1003 }
 1004 
 1005 /*
 1006  * Lookup a value in a microzap directory. Assumes that the zap
 1007  * scratch buffer contains the directory contents.
 1008  */
 1009 static int
 1010 mzap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
 1011 {
 1012         const mzap_phys_t *mz;
 1013         const mzap_ent_phys_t *mze;
 1014         size_t size;
 1015         int chunks, i;
 1016 
 1017         /*
 1018          * Microzap objects use exactly one block. Read the whole
 1019          * thing.
 1020          */
 1021         size = dnode->dn_datablkszsec * 512;
 1022 
 1023         mz = (const mzap_phys_t *) zap_scratch;
 1024         chunks = size / MZAP_ENT_LEN - 1;
 1025 
 1026         for (i = 0; i < chunks; i++) {
 1027                 mze = &mz->mz_chunk[i];
 1028                 if (!strcmp(mze->mze_name, name)) {
 1029                         *value = mze->mze_value;
 1030                         return (0);
 1031                 }
 1032         }
 1033 
 1034         return (ENOENT);
 1035 }
 1036 
 1037 /*
 1038  * Compare a name with a zap leaf entry. Return non-zero if the name
 1039  * matches.
 1040  */
 1041 static int
 1042 fzap_name_equal(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, const char *name)
 1043 {
 1044         size_t namelen;
 1045         const zap_leaf_chunk_t *nc;
 1046         const char *p;
 1047 
 1048         namelen = zc->l_entry.le_name_length;
 1049                         
 1050         nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk);
 1051         p = name;
 1052         while (namelen > 0) {
 1053                 size_t len;
 1054                 len = namelen;
 1055                 if (len > ZAP_LEAF_ARRAY_BYTES)
 1056                         len = ZAP_LEAF_ARRAY_BYTES;
 1057                 if (memcmp(p, nc->l_array.la_array, len))
 1058                         return (0);
 1059                 p += len;
 1060                 namelen -= len;
 1061                 nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next);
 1062         }
 1063 
 1064         return 1;
 1065 }
 1066 
 1067 /*
 1068  * Extract a uint64_t value from a zap leaf entry.
 1069  */
 1070 static uint64_t
 1071 fzap_leaf_value(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc)
 1072 {
 1073         const zap_leaf_chunk_t *vc;
 1074         int i;
 1075         uint64_t value;
 1076         const uint8_t *p;
 1077 
 1078         vc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_value_chunk);
 1079         for (i = 0, value = 0, p = vc->l_array.la_array; i < 8; i++) {
 1080                 value = (value << 8) | p[i];
 1081         }
 1082 
 1083         return value;
 1084 }
 1085 
 1086 /*
 1087  * Lookup a value in a fatzap directory. Assumes that the zap scratch
 1088  * buffer contains the directory header.
 1089  */
 1090 static int
 1091 fzap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
 1092 {
 1093         int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 1094         zap_phys_t zh = *(zap_phys_t *) zap_scratch;
 1095         fat_zap_t z;
 1096         uint64_t *ptrtbl;
 1097         uint64_t hash;
 1098         int rc;
 1099 
 1100         if (zh.zap_magic != ZAP_MAGIC)
 1101                 return (EIO);
 1102 
 1103         z.zap_block_shift = ilog2(bsize);
 1104         z.zap_phys = (zap_phys_t *) zap_scratch;
 1105 
 1106         /*
 1107          * Figure out where the pointer table is and read it in if necessary.
 1108          */
 1109         if (zh.zap_ptrtbl.zt_blk) {
 1110                 rc = dnode_read(spa, dnode, zh.zap_ptrtbl.zt_blk * bsize,
 1111                                zap_scratch, bsize);
 1112                 if (rc)
 1113                         return (rc);
 1114                 ptrtbl = (uint64_t *) zap_scratch;
 1115         } else {
 1116                 ptrtbl = &ZAP_EMBEDDED_PTRTBL_ENT(&z, 0);
 1117         }
 1118 
 1119         hash = zap_hash(zh.zap_salt, name);
 1120 
 1121         zap_leaf_t zl;
 1122         zl.l_bs = z.zap_block_shift;
 1123 
 1124         off_t off = ptrtbl[hash >> (64 - zh.zap_ptrtbl.zt_shift)] << zl.l_bs;
 1125         zap_leaf_chunk_t *zc;
 1126 
 1127         rc = dnode_read(spa, dnode, off, zap_scratch, bsize);
 1128         if (rc)
 1129                 return (rc);
 1130 
 1131         zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
 1132 
 1133         /*
 1134          * Make sure this chunk matches our hash.
 1135          */
 1136         if (zl.l_phys->l_hdr.lh_prefix_len > 0
 1137             && zl.l_phys->l_hdr.lh_prefix
 1138             != hash >> (64 - zl.l_phys->l_hdr.lh_prefix_len))
 1139                 return (ENOENT);
 1140 
 1141         /*
 1142          * Hash within the chunk to find our entry.
 1143          */
 1144         int shift = (64 - ZAP_LEAF_HASH_SHIFT(&zl) - zl.l_phys->l_hdr.lh_prefix_len);
 1145         int h = (hash >> shift) & ((1 << ZAP_LEAF_HASH_SHIFT(&zl)) - 1);
 1146         h = zl.l_phys->l_hash[h];
 1147         if (h == 0xffff)
 1148                 return (ENOENT);
 1149         zc = &ZAP_LEAF_CHUNK(&zl, h);
 1150         while (zc->l_entry.le_hash != hash) {
 1151                 if (zc->l_entry.le_next == 0xffff) {
 1152                         zc = 0;
 1153                         break;
 1154                 }
 1155                 zc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_next);
 1156         }
 1157         if (fzap_name_equal(&zl, zc, name)) {
 1158                 *value = fzap_leaf_value(&zl, zc);
 1159                 return (0);
 1160         }
 1161 
 1162         return (ENOENT);
 1163 }
 1164 
 1165 /*
 1166  * Lookup a name in a zap object and return its value as a uint64_t.
 1167  */
 1168 static int
 1169 zap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
 1170 {
 1171         int rc;
 1172         uint64_t zap_type;
 1173         size_t size = dnode->dn_datablkszsec * 512;
 1174 
 1175         rc = dnode_read(spa, dnode, 0, zap_scratch, size);
 1176         if (rc)
 1177                 return (rc);
 1178 
 1179         zap_type = *(uint64_t *) zap_scratch;
 1180         if (zap_type == ZBT_MICRO)
 1181                 return mzap_lookup(spa, dnode, name, value);
 1182         else
 1183                 return fzap_lookup(spa, dnode, name, value);
 1184 }
 1185 
 1186 #ifdef BOOT2
 1187 
 1188 /*
 1189  * List a microzap directory. Assumes that the zap scratch buffer contains
 1190  * the directory contents.
 1191  */
 1192 static int
 1193 mzap_list(spa_t *spa, const dnode_phys_t *dnode)
 1194 {
 1195         const mzap_phys_t *mz;
 1196         const mzap_ent_phys_t *mze;
 1197         size_t size;
 1198         int chunks, i;
 1199 
 1200         /*
 1201          * Microzap objects use exactly one block. Read the whole
 1202          * thing.
 1203          */
 1204         size = dnode->dn_datablkszsec * 512;
 1205         mz = (const mzap_phys_t *) zap_scratch;
 1206         chunks = size / MZAP_ENT_LEN - 1;
 1207 
 1208         for (i = 0; i < chunks; i++) {
 1209                 mze = &mz->mz_chunk[i];
 1210                 if (mze->mze_name[0])
 1211                         //printf("%-32s 0x%llx\n", mze->mze_name, mze->mze_value);
 1212                         printf("%s\n", mze->mze_name);
 1213         }
 1214 
 1215         return (0);
 1216 }
 1217 
 1218 /*
 1219  * List a fatzap directory. Assumes that the zap scratch buffer contains
 1220  * the directory header.
 1221  */
 1222 static int
 1223 fzap_list(spa_t *spa, const dnode_phys_t *dnode)
 1224 {
 1225         int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 1226         zap_phys_t zh = *(zap_phys_t *) zap_scratch;
 1227         fat_zap_t z;
 1228         int i, j;
 1229 
 1230         if (zh.zap_magic != ZAP_MAGIC)
 1231                 return (EIO);
 1232 
 1233         z.zap_block_shift = ilog2(bsize);
 1234         z.zap_phys = (zap_phys_t *) zap_scratch;
 1235 
 1236         /*
 1237          * This assumes that the leaf blocks start at block 1. The
 1238          * documentation isn't exactly clear on this.
 1239          */
 1240         zap_leaf_t zl;
 1241         zl.l_bs = z.zap_block_shift;
 1242         for (i = 0; i < zh.zap_num_leafs; i++) {
 1243                 off_t off = (i + 1) << zl.l_bs;
 1244                 char name[256], *p;
 1245                 uint64_t value;
 1246 
 1247                 if (dnode_read(spa, dnode, off, zap_scratch, bsize))
 1248                         return (EIO);
 1249 
 1250                 zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
 1251 
 1252                 for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
 1253                         zap_leaf_chunk_t *zc, *nc;
 1254                         int namelen;
 1255 
 1256                         zc = &ZAP_LEAF_CHUNK(&zl, j);
 1257                         if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 1258                                 continue;
 1259                         namelen = zc->l_entry.le_name_length;
 1260                         if (namelen > sizeof(name))
 1261                                 namelen = sizeof(name);
 1262                         
 1263                         /*
 1264                          * Paste the name back together.
 1265                          */
 1266                         nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 1267                         p = name;
 1268                         while (namelen > 0) {
 1269                                 int len;
 1270                                 len = namelen;
 1271                                 if (len > ZAP_LEAF_ARRAY_BYTES)
 1272                                         len = ZAP_LEAF_ARRAY_BYTES;
 1273                                 memcpy(p, nc->l_array.la_array, len);
 1274                                 p += len;
 1275                                 namelen -= len;
 1276                                 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 1277                         }
 1278 
 1279                         /*
 1280                          * Assume the first eight bytes of the value are
 1281                          * a uint64_t.
 1282                          */
 1283                         value = fzap_leaf_value(&zl, zc);
 1284 
 1285                         printf("%-32s 0x%llx\n", name, value);
 1286                 }
 1287         }
 1288 
 1289         return (0);
 1290 }
 1291 
 1292 /*
 1293  * List a zap directory.
 1294  */
 1295 static int
 1296 zap_list(spa_t *spa, const dnode_phys_t *dnode)
 1297 {
 1298         uint64_t zap_type;
 1299         size_t size = dnode->dn_datablkszsec * 512;
 1300 
 1301         if (dnode_read(spa, dnode, 0, zap_scratch, size))
 1302                 return (EIO);
 1303 
 1304         zap_type = *(uint64_t *) zap_scratch;
 1305         if (zap_type == ZBT_MICRO)
 1306                 return mzap_list(spa, dnode);
 1307         else
 1308                 return fzap_list(spa, dnode);
 1309 }
 1310 
 1311 #endif
 1312 
 1313 static int
 1314 objset_get_dnode(spa_t *spa, const objset_phys_t *os, uint64_t objnum, dnode_phys_t *dnode)
 1315 {
 1316         off_t offset;
 1317 
 1318         offset = objnum * sizeof(dnode_phys_t);
 1319         return dnode_read(spa, &os->os_meta_dnode, offset,
 1320                 dnode, sizeof(dnode_phys_t));
 1321 }
 1322 
 1323 /*
 1324  * Find the object set given the object number of its dataset object
 1325  * and return its details in *objset
 1326  */
 1327 static int
 1328 zfs_mount_dataset(spa_t *spa, uint64_t objnum, objset_phys_t *objset)
 1329 {
 1330         dnode_phys_t dataset;
 1331         dsl_dataset_phys_t *ds;
 1332 
 1333         if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
 1334                 printf("ZFS: can't find dataset %lld\n", objnum);
 1335                 return (EIO);
 1336         }
 1337 
 1338         ds = (dsl_dataset_phys_t *) &dataset.dn_bonus;
 1339         if (zio_read(spa, &ds->ds_bp, objset)) {
 1340                 printf("ZFS: can't read object set for dataset %lld\n", objnum);
 1341                 return (EIO);
 1342         }
 1343 
 1344         return (0);
 1345 }
 1346 
 1347 /*
 1348  * Find the object set pointed to by the BOOTFS property or the root
 1349  * dataset if there is none and return its details in *objset
 1350  */
 1351 static int
 1352 zfs_mount_root(spa_t *spa, objset_phys_t *objset)
 1353 {
 1354         dnode_phys_t dir, propdir;
 1355         uint64_t props, bootfs, root;
 1356 
 1357         /*
 1358          * Start with the MOS directory object.
 1359          */
 1360         if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) {
 1361                 printf("ZFS: can't read MOS object directory\n");
 1362                 return (EIO);
 1363         }
 1364 
 1365         /*
 1366          * Lookup the pool_props and see if we can find a bootfs.
 1367          */
 1368         if (zap_lookup(spa, &dir, DMU_POOL_PROPS, &props) == 0
 1369              && objset_get_dnode(spa, &spa->spa_mos, props, &propdir) == 0
 1370              && zap_lookup(spa, &propdir, "bootfs", &bootfs) == 0)
 1371                 return zfs_mount_dataset(spa, bootfs, objset);
 1372 
 1373         /*
 1374          * Lookup the root dataset directory
 1375          */
 1376         if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, &root)
 1377             || objset_get_dnode(spa, &spa->spa_mos, root, &dir)) {
 1378                 printf("ZFS: can't find root dsl_dir\n");
 1379                 return (EIO);
 1380         }
 1381 
 1382         /*
 1383          * Use the information from the dataset directory's bonus buffer
 1384          * to find the dataset object and from that the object set itself.
 1385          */
 1386         dsl_dir_phys_t *dd = (dsl_dir_phys_t *) &dir.dn_bonus;
 1387         return zfs_mount_dataset(spa, dd->dd_head_dataset_obj, objset);
 1388 }
 1389 
 1390 static int
 1391 zfs_mount_pool(spa_t *spa)
 1392 {
 1393         /*
 1394          * Find the MOS and work our way in from there.
 1395          */
 1396         if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) {
 1397                 printf("ZFS: can't read MOS\n");
 1398                 return (EIO);
 1399         }
 1400 
 1401         /*
 1402          * Find the root object set
 1403          */
 1404         if (zfs_mount_root(spa, &spa->spa_root_objset)) {
 1405                 printf("Can't find root filesystem - giving up\n");
 1406                 return (EIO);
 1407         }
 1408 
 1409         return (0);
 1410 }
 1411 
 1412 /*
 1413  * Lookup a file and return its dnode.
 1414  */
 1415 static int
 1416 zfs_lookup(spa_t *spa, const char *upath, dnode_phys_t *dnode)
 1417 {
 1418         int rc;
 1419         uint64_t objnum, rootnum, parentnum;
 1420         dnode_phys_t dn;
 1421         const znode_phys_t *zp = (const znode_phys_t *) dn.dn_bonus;
 1422         const char *p, *q;
 1423         char element[256];
 1424         char path[1024];
 1425         int symlinks_followed = 0;
 1426 
 1427         if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
 1428                 printf("ZFS: unexpected object set type %lld\n",
 1429                        spa->spa_root_objset.os_type);
 1430                 return (EIO);
 1431         }
 1432 
 1433         /*
 1434          * Get the root directory dnode.
 1435          */
 1436         rc = objset_get_dnode(spa, &spa->spa_root_objset, MASTER_NODE_OBJ, &dn);
 1437         if (rc)
 1438                 return (rc);
 1439 
 1440         rc = zap_lookup(spa, &dn, ZFS_ROOT_OBJ, &rootnum);
 1441         if (rc)
 1442                 return (rc);
 1443 
 1444         rc = objset_get_dnode(spa, &spa->spa_root_objset, rootnum, &dn);
 1445         if (rc)
 1446                 return (rc);
 1447 
 1448         objnum = rootnum;
 1449         p = upath;
 1450         while (p && *p) {
 1451                 while (*p == '/')
 1452                         p++;
 1453                 if (!*p)
 1454                         break;
 1455                 q = strchr(p, '/');
 1456                 if (q) {
 1457                         memcpy(element, p, q - p);
 1458                         element[q - p] = 0;
 1459                         p = q;
 1460                 } else {
 1461                         strcpy(element, p);
 1462                         p = 0;
 1463                 }
 1464 
 1465                 if ((zp->zp_mode >> 12) != 0x4) {
 1466                         return (ENOTDIR);
 1467                 }
 1468 
 1469                 parentnum = objnum;
 1470                 rc = zap_lookup(spa, &dn, element, &objnum);
 1471                 if (rc)
 1472                         return (rc);
 1473                 objnum = ZFS_DIRENT_OBJ(objnum);
 1474 
 1475                 rc = objset_get_dnode(spa, &spa->spa_root_objset, objnum, &dn);
 1476                 if (rc)
 1477                         return (rc);
 1478 
 1479                 /*
 1480                  * Check for symlink.
 1481                  */
 1482                 if ((zp->zp_mode >> 12) == 0xa) {
 1483                         if (symlinks_followed > 10)
 1484                                 return (EMLINK);
 1485                         symlinks_followed++;
 1486 
 1487                         /*
 1488                          * Read the link value and copy the tail of our
 1489                          * current path onto the end.
 1490                          */
 1491                         if (p)
 1492                                 strcpy(&path[zp->zp_size], p);
 1493                         else
 1494                                 path[zp->zp_size] = 0;
 1495                         if (zp->zp_size + sizeof(znode_phys_t) <= dn.dn_bonuslen) {
 1496                                 memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)],
 1497                                         zp->zp_size);
 1498                         } else {
 1499                                 rc = dnode_read(spa, &dn, 0, path, zp->zp_size);
 1500                                 if (rc)
 1501                                         return (rc);
 1502                         }
 1503 
 1504                         /*
 1505                          * Restart with the new path, starting either at
 1506                          * the root or at the parent depending whether or
 1507                          * not the link is relative.
 1508                          */
 1509                         p = path;
 1510                         if (*p == '/')
 1511                                 objnum = rootnum;
 1512                         else
 1513                                 objnum = parentnum;
 1514                         objset_get_dnode(spa, &spa->spa_root_objset, objnum, &dn);
 1515                 }
 1516         }
 1517 
 1518         *dnode = dn;
 1519         return (0);
 1520 }

Cache object: 0f5f56e381b90e5cabed087c058d2e45


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.