FreeBSD/Linux Kernel Cross Reference
sys/boot/zfs/zfsimpl.c
1 /*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: releng/8.0/sys/boot/zfs/zfsimpl.c 192194 2009-05-16 10:48:20Z dfr $");
29
30 /*
31 * Stand-alone ZFS file reader.
32 */
33
34 #include "zfsimpl.h"
35 #include "zfssubr.c"
36
37 /*
38 * List of all vdevs, chained through v_alllink.
39 */
40 static vdev_list_t zfs_vdevs;
41
42 /*
43 * List of all pools, chained through spa_link.
44 */
45 static spa_list_t zfs_pools;
46
47 static uint64_t zfs_crc64_table[256];
48 static const dnode_phys_t *dnode_cache_obj = 0;
49 static uint64_t dnode_cache_bn;
50 static char *dnode_cache_buf;
51 static char *zap_scratch;
52 static char *zfs_temp_buf, *zfs_temp_end, *zfs_temp_ptr;
53
54 #define TEMP_SIZE (1*SPA_MAXBLOCKSIZE)
55
56 static void
57 zfs_init(void)
58 {
59 STAILQ_INIT(&zfs_vdevs);
60 STAILQ_INIT(&zfs_pools);
61
62 zfs_temp_buf = malloc(TEMP_SIZE);
63 zfs_temp_end = zfs_temp_buf + TEMP_SIZE;
64 zfs_temp_ptr = zfs_temp_buf;
65 dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
66 zap_scratch = malloc(SPA_MAXBLOCKSIZE);
67
68 zfs_init_crc();
69 }
70
71 static char *
72 zfs_alloc_temp(size_t sz)
73 {
74 char *p;
75
76 if (zfs_temp_ptr + sz > zfs_temp_end) {
77 printf("ZFS: out of temporary buffer space\n");
78 for (;;) ;
79 }
80 p = zfs_temp_ptr;
81 zfs_temp_ptr += sz;
82
83 return (p);
84 }
85
86 static void
87 zfs_reset_temp(void)
88 {
89
90 zfs_temp_ptr = zfs_temp_buf;
91 }
92
93 static int
94 xdr_int(const unsigned char **xdr, int *ip)
95 {
96 *ip = ((*xdr)[0] << 24)
97 | ((*xdr)[1] << 16)
98 | ((*xdr)[2] << 8)
99 | ((*xdr)[3] << 0);
100 (*xdr) += 4;
101 return (0);
102 }
103
104 static int
105 xdr_u_int(const unsigned char **xdr, u_int *ip)
106 {
107 *ip = ((*xdr)[0] << 24)
108 | ((*xdr)[1] << 16)
109 | ((*xdr)[2] << 8)
110 | ((*xdr)[3] << 0);
111 (*xdr) += 4;
112 return (0);
113 }
114
115 static int
116 xdr_uint64_t(const unsigned char **xdr, uint64_t *lp)
117 {
118 u_int hi, lo;
119
120 xdr_u_int(xdr, &hi);
121 xdr_u_int(xdr, &lo);
122 *lp = (((uint64_t) hi) << 32) | lo;
123 return (0);
124 }
125
126 static int
127 nvlist_find(const unsigned char *nvlist, const char *name, int type,
128 int* elementsp, void *valuep)
129 {
130 const unsigned char *p, *pair;
131 int junk;
132 int encoded_size, decoded_size;
133
134 p = nvlist;
135 xdr_int(&p, &junk);
136 xdr_int(&p, &junk);
137
138 pair = p;
139 xdr_int(&p, &encoded_size);
140 xdr_int(&p, &decoded_size);
141 while (encoded_size && decoded_size) {
142 int namelen, pairtype, elements;
143 const char *pairname;
144
145 xdr_int(&p, &namelen);
146 pairname = (const char*) p;
147 p += roundup(namelen, 4);
148 xdr_int(&p, &pairtype);
149
150 if (!memcmp(name, pairname, namelen) && type == pairtype) {
151 xdr_int(&p, &elements);
152 if (elementsp)
153 *elementsp = elements;
154 if (type == DATA_TYPE_UINT64) {
155 xdr_uint64_t(&p, (uint64_t *) valuep);
156 return (0);
157 } else if (type == DATA_TYPE_STRING) {
158 int len;
159 xdr_int(&p, &len);
160 (*(const char**) valuep) = (const char*) p;
161 return (0);
162 } else if (type == DATA_TYPE_NVLIST
163 || type == DATA_TYPE_NVLIST_ARRAY) {
164 (*(const unsigned char**) valuep) =
165 (const unsigned char*) p;
166 return (0);
167 } else {
168 return (EIO);
169 }
170 } else {
171 /*
172 * Not the pair we are looking for, skip to the next one.
173 */
174 p = pair + encoded_size;
175 }
176
177 pair = p;
178 xdr_int(&p, &encoded_size);
179 xdr_int(&p, &decoded_size);
180 }
181
182 return (EIO);
183 }
184
185 /*
186 * Return the next nvlist in an nvlist array.
187 */
188 static const unsigned char *
189 nvlist_next(const unsigned char *nvlist)
190 {
191 const unsigned char *p, *pair;
192 int junk;
193 int encoded_size, decoded_size;
194
195 p = nvlist;
196 xdr_int(&p, &junk);
197 xdr_int(&p, &junk);
198
199 pair = p;
200 xdr_int(&p, &encoded_size);
201 xdr_int(&p, &decoded_size);
202 while (encoded_size && decoded_size) {
203 p = pair + encoded_size;
204
205 pair = p;
206 xdr_int(&p, &encoded_size);
207 xdr_int(&p, &decoded_size);
208 }
209
210 return p;
211 }
212
213 #ifdef TEST
214
215 static const unsigned char *
216 nvlist_print(const unsigned char *nvlist, unsigned int indent)
217 {
218 static const char* typenames[] = {
219 "DATA_TYPE_UNKNOWN",
220 "DATA_TYPE_BOOLEAN",
221 "DATA_TYPE_BYTE",
222 "DATA_TYPE_INT16",
223 "DATA_TYPE_UINT16",
224 "DATA_TYPE_INT32",
225 "DATA_TYPE_UINT32",
226 "DATA_TYPE_INT64",
227 "DATA_TYPE_UINT64",
228 "DATA_TYPE_STRING",
229 "DATA_TYPE_BYTE_ARRAY",
230 "DATA_TYPE_INT16_ARRAY",
231 "DATA_TYPE_UINT16_ARRAY",
232 "DATA_TYPE_INT32_ARRAY",
233 "DATA_TYPE_UINT32_ARRAY",
234 "DATA_TYPE_INT64_ARRAY",
235 "DATA_TYPE_UINT64_ARRAY",
236 "DATA_TYPE_STRING_ARRAY",
237 "DATA_TYPE_HRTIME",
238 "DATA_TYPE_NVLIST",
239 "DATA_TYPE_NVLIST_ARRAY",
240 "DATA_TYPE_BOOLEAN_VALUE",
241 "DATA_TYPE_INT8",
242 "DATA_TYPE_UINT8",
243 "DATA_TYPE_BOOLEAN_ARRAY",
244 "DATA_TYPE_INT8_ARRAY",
245 "DATA_TYPE_UINT8_ARRAY"
246 };
247
248 unsigned int i, j;
249 const unsigned char *p, *pair;
250 int junk;
251 int encoded_size, decoded_size;
252
253 p = nvlist;
254 xdr_int(&p, &junk);
255 xdr_int(&p, &junk);
256
257 pair = p;
258 xdr_int(&p, &encoded_size);
259 xdr_int(&p, &decoded_size);
260 while (encoded_size && decoded_size) {
261 int namelen, pairtype, elements;
262 const char *pairname;
263
264 xdr_int(&p, &namelen);
265 pairname = (const char*) p;
266 p += roundup(namelen, 4);
267 xdr_int(&p, &pairtype);
268
269 for (i = 0; i < indent; i++)
270 printf(" ");
271 printf("%s %s", typenames[pairtype], pairname);
272
273 xdr_int(&p, &elements);
274 switch (pairtype) {
275 case DATA_TYPE_UINT64: {
276 uint64_t val;
277 xdr_uint64_t(&p, &val);
278 printf(" = 0x%llx\n", val);
279 break;
280 }
281
282 case DATA_TYPE_STRING: {
283 int len;
284 xdr_int(&p, &len);
285 printf(" = \"%s\"\n", p);
286 break;
287 }
288
289 case DATA_TYPE_NVLIST:
290 printf("\n");
291 nvlist_print(p, indent + 1);
292 break;
293
294 case DATA_TYPE_NVLIST_ARRAY:
295 for (j = 0; j < elements; j++) {
296 printf("[%d]\n", j);
297 p = nvlist_print(p, indent + 1);
298 if (j != elements - 1) {
299 for (i = 0; i < indent; i++)
300 printf(" ");
301 printf("%s %s", typenames[pairtype], pairname);
302 }
303 }
304 break;
305
306 default:
307 printf("\n");
308 }
309
310 p = pair + encoded_size;
311
312 pair = p;
313 xdr_int(&p, &encoded_size);
314 xdr_int(&p, &decoded_size);
315 }
316
317 return p;
318 }
319
320 #endif
321
322 static int
323 vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
324 off_t offset, size_t size)
325 {
326 size_t psize;
327 int rc;
328
329 if (bp) {
330 psize = BP_GET_PSIZE(bp);
331 } else {
332 psize = size;
333 }
334
335 /*printf("ZFS: reading %d bytes at 0x%llx to %p\n", psize, offset, buf);*/
336 rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize);
337 if (rc)
338 return (rc);
339 if (bp && zio_checksum_error(bp, buf))
340 return (EIO);
341
342 return (0);
343 }
344
345 static int
346 vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
347 off_t offset, size_t bytes)
348 {
349
350 return (vdev_read_phys(vdev, bp, buf,
351 offset + VDEV_LABEL_START_SIZE, bytes));
352 }
353
354
355 static int
356 vdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
357 off_t offset, size_t bytes)
358 {
359 vdev_t *kid;
360 int rc;
361
362 rc = EIO;
363 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
364 if (kid->v_state != VDEV_STATE_HEALTHY)
365 continue;
366 rc = kid->v_read(kid, bp, buf, offset, bytes);
367 if (!rc)
368 return (0);
369 }
370
371 return (rc);
372 }
373
374 static vdev_t *
375 vdev_find(uint64_t guid)
376 {
377 vdev_t *vdev;
378
379 STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink)
380 if (vdev->v_guid == guid)
381 return (vdev);
382
383 return (0);
384 }
385
386 static vdev_t *
387 vdev_create(uint64_t guid, vdev_read_t *read)
388 {
389 vdev_t *vdev;
390
391 vdev = malloc(sizeof(vdev_t));
392 memset(vdev, 0, sizeof(vdev_t));
393 STAILQ_INIT(&vdev->v_children);
394 vdev->v_guid = guid;
395 vdev->v_state = VDEV_STATE_OFFLINE;
396 vdev->v_read = read;
397 vdev->v_phys_read = 0;
398 vdev->v_read_priv = 0;
399 STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);
400
401 return (vdev);
402 }
403
404 static int
405 vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp)
406 {
407 int rc;
408 uint64_t guid, id, ashift, nparity;
409 const char *type;
410 const char *path;
411 vdev_t *vdev, *kid;
412 const unsigned char *kids;
413 int nkids, i;
414
415 if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID,
416 DATA_TYPE_UINT64, 0, &guid)
417 || nvlist_find(nvlist, ZPOOL_CONFIG_ID,
418 DATA_TYPE_UINT64, 0, &id)
419 || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE,
420 DATA_TYPE_STRING, 0, &type)) {
421 printf("ZFS: can't find vdev details\n");
422 return (ENOENT);
423 }
424
425 /*
426 * Assume that if we've seen this vdev tree before, this one
427 * will be identical.
428 */
429 vdev = vdev_find(guid);
430 if (vdev) {
431 if (vdevp)
432 *vdevp = vdev;
433 return (0);
434 }
435
436 if (strcmp(type, VDEV_TYPE_MIRROR)
437 && strcmp(type, VDEV_TYPE_DISK)
438 && strcmp(type, VDEV_TYPE_RAIDZ)) {
439 printf("ZFS: can only boot from disk, mirror or raidz vdevs\n");
440 return (EIO);
441 }
442
443 if (!strcmp(type, VDEV_TYPE_MIRROR))
444 vdev = vdev_create(guid, vdev_mirror_read);
445 else if (!strcmp(type, VDEV_TYPE_RAIDZ))
446 vdev = vdev_create(guid, vdev_raidz_read);
447 else
448 vdev = vdev_create(guid, vdev_disk_read);
449
450 vdev->v_id = id;
451 if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT,
452 DATA_TYPE_UINT64, 0, &ashift) == 0)
453 vdev->v_ashift = ashift;
454 else
455 vdev->v_ashift = 0;
456 if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY,
457 DATA_TYPE_UINT64, 0, &nparity) == 0)
458 vdev->v_nparity = nparity;
459 else
460 vdev->v_nparity = 0;
461 if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH,
462 DATA_TYPE_STRING, 0, &path) == 0) {
463 if (strlen(path) > 5
464 && path[0] == '/'
465 && path[1] == 'd'
466 && path[2] == 'e'
467 && path[3] == 'v'
468 && path[4] == '/')
469 path += 5;
470 vdev->v_name = strdup(path);
471 } else {
472 if (!strcmp(type, "raidz")) {
473 if (vdev->v_nparity == 1)
474 vdev->v_name = "raidz1";
475 else
476 vdev->v_name = "raidz2";
477 } else {
478 vdev->v_name = strdup(type);
479 }
480 }
481 rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN,
482 DATA_TYPE_NVLIST_ARRAY, &nkids, &kids);
483 /*
484 * Its ok if we don't have any kids.
485 */
486 if (rc == 0) {
487 vdev->v_nchildren = nkids;
488 for (i = 0; i < nkids; i++) {
489 rc = vdev_init_from_nvlist(kids, &kid);
490 if (rc)
491 return (rc);
492 STAILQ_INSERT_TAIL(&vdev->v_children, kid, v_childlink);
493 kids = nvlist_next(kids);
494 }
495 } else {
496 vdev->v_nchildren = 0;
497 }
498
499 if (vdevp)
500 *vdevp = vdev;
501 return (0);
502 }
503
504 static void
505 vdev_set_state(vdev_t *vdev)
506 {
507 vdev_t *kid;
508 int good_kids;
509 int bad_kids;
510
511 /*
512 * A mirror or raidz is healthy if all its kids are healthy. A
513 * mirror is degraded if any of its kids is healthy; a raidz
514 * is degraded if at most nparity kids are offline.
515 */
516 if (STAILQ_FIRST(&vdev->v_children)) {
517 good_kids = 0;
518 bad_kids = 0;
519 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
520 if (kid->v_state == VDEV_STATE_HEALTHY)
521 good_kids++;
522 else
523 bad_kids++;
524 }
525 if (bad_kids == 0) {
526 vdev->v_state = VDEV_STATE_HEALTHY;
527 } else {
528 if (vdev->v_read == vdev_mirror_read) {
529 if (good_kids) {
530 vdev->v_state = VDEV_STATE_DEGRADED;
531 } else {
532 vdev->v_state = VDEV_STATE_OFFLINE;
533 }
534 } else if (vdev->v_read == vdev_raidz_read) {
535 if (bad_kids > vdev->v_nparity) {
536 vdev->v_state = VDEV_STATE_OFFLINE;
537 } else {
538 vdev->v_state = VDEV_STATE_DEGRADED;
539 }
540 }
541 }
542 }
543 }
544
545 static spa_t *
546 spa_find_by_guid(uint64_t guid)
547 {
548 spa_t *spa;
549
550 STAILQ_FOREACH(spa, &zfs_pools, spa_link)
551 if (spa->spa_guid == guid)
552 return (spa);
553
554 return (0);
555 }
556
557 #ifdef BOOT2
558
559 static spa_t *
560 spa_find_by_name(const char *name)
561 {
562 spa_t *spa;
563
564 STAILQ_FOREACH(spa, &zfs_pools, spa_link)
565 if (!strcmp(spa->spa_name, name))
566 return (spa);
567
568 return (0);
569 }
570
571 #endif
572
573 static spa_t *
574 spa_create(uint64_t guid)
575 {
576 spa_t *spa;
577
578 spa = malloc(sizeof(spa_t));
579 memset(spa, 0, sizeof(spa_t));
580 STAILQ_INIT(&spa->spa_vdevs);
581 spa->spa_guid = guid;
582 STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link);
583
584 return (spa);
585 }
586
587 static const char *
588 state_name(vdev_state_t state)
589 {
590 static const char* names[] = {
591 "UNKNOWN",
592 "CLOSED",
593 "OFFLINE",
594 "CANT_OPEN",
595 "DEGRADED",
596 "ONLINE"
597 };
598 return names[state];
599 }
600
601 #ifdef BOOT2
602
603 #define pager_printf printf
604
605 #else
606
607 static void
608 pager_printf(const char *fmt, ...)
609 {
610 char line[80];
611 va_list args;
612
613 va_start(args, fmt);
614 vsprintf(line, fmt, args);
615 va_end(args);
616 pager_output(line);
617 }
618
619 #endif
620
621 #define STATUS_FORMAT " %-16s %-10s\n"
622
623 static void
624 print_state(int indent, const char *name, vdev_state_t state)
625 {
626 int i;
627 char buf[512];
628
629 buf[0] = 0;
630 for (i = 0; i < indent; i++)
631 strcat(buf, " ");
632 strcat(buf, name);
633 pager_printf(STATUS_FORMAT, buf, state_name(state));
634
635 }
636
637 static void
638 vdev_status(vdev_t *vdev, int indent)
639 {
640 vdev_t *kid;
641 print_state(indent, vdev->v_name, vdev->v_state);
642
643 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
644 vdev_status(kid, indent + 1);
645 }
646 }
647
648 static void
649 spa_status(spa_t *spa)
650 {
651 vdev_t *vdev;
652 int good_kids, bad_kids, degraded_kids;
653 vdev_state_t state;
654
655 pager_printf(" pool: %s\n", spa->spa_name);
656 pager_printf("config:\n\n");
657 pager_printf(STATUS_FORMAT, "NAME", "STATE");
658
659 good_kids = 0;
660 degraded_kids = 0;
661 bad_kids = 0;
662 STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
663 if (vdev->v_state == VDEV_STATE_HEALTHY)
664 good_kids++;
665 else if (vdev->v_state == VDEV_STATE_DEGRADED)
666 degraded_kids++;
667 else
668 bad_kids++;
669 }
670
671 state = VDEV_STATE_CLOSED;
672 if (good_kids > 0 && (degraded_kids + bad_kids) == 0)
673 state = VDEV_STATE_HEALTHY;
674 else if ((good_kids + degraded_kids) > 0)
675 state = VDEV_STATE_DEGRADED;
676
677 print_state(0, spa->spa_name, state);
678 STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
679 vdev_status(vdev, 1);
680 }
681 }
682
683 static void
684 spa_all_status(void)
685 {
686 spa_t *spa;
687 int first = 1;
688
689 STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
690 if (!first)
691 pager_printf("\n");
692 first = 0;
693 spa_status(spa);
694 }
695 }
696
697 static int
698 vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
699 {
700 vdev_t vtmp;
701 vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch;
702 spa_t *spa;
703 vdev_t *vdev, *top_vdev, *pool_vdev;
704 off_t off;
705 blkptr_t bp;
706 const unsigned char *nvlist;
707 uint64_t val;
708 uint64_t guid;
709 uint64_t pool_txg, pool_guid;
710 const char *pool_name;
711 const unsigned char *vdevs;
712 int i, rc;
713 char upbuf[1024];
714 const struct uberblock *up;
715
716 /*
717 * Load the vdev label and figure out which
718 * uberblock is most current.
719 */
720 memset(&vtmp, 0, sizeof(vtmp));
721 vtmp.v_phys_read = read;
722 vtmp.v_read_priv = read_priv;
723 off = offsetof(vdev_label_t, vl_vdev_phys);
724 BP_ZERO(&bp);
725 BP_SET_LSIZE(&bp, sizeof(vdev_phys_t));
726 BP_SET_PSIZE(&bp, sizeof(vdev_phys_t));
727 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
728 BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
729 ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
730 if (vdev_read_phys(&vtmp, &bp, vdev_label, off, 0))
731 return (EIO);
732
733 if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) {
734 return (EIO);
735 }
736
737 nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4;
738
739 if (nvlist_find(nvlist,
740 ZPOOL_CONFIG_VERSION,
741 DATA_TYPE_UINT64, 0, &val)) {
742 return (EIO);
743 }
744
745 if (val > SPA_VERSION) {
746 printf("ZFS: unsupported ZFS version %u (should be %u)\n",
747 (unsigned) val, (unsigned) SPA_VERSION);
748 return (EIO);
749 }
750
751 if (nvlist_find(nvlist,
752 ZPOOL_CONFIG_POOL_STATE,
753 DATA_TYPE_UINT64, 0, &val)) {
754 return (EIO);
755 }
756
757 #ifndef TEST
758 if (val != POOL_STATE_ACTIVE) {
759 /*
760 * Don't print a message here. If we happen to reboot
761 * while where is an exported pool around, we don't
762 * need a cascade of confusing messages during boot.
763 */
764 /*printf("ZFS: pool is not active\n");*/
765 return (EIO);
766 }
767 #endif
768
769 if (nvlist_find(nvlist,
770 ZPOOL_CONFIG_POOL_TXG,
771 DATA_TYPE_UINT64, 0, &pool_txg)
772 || nvlist_find(nvlist,
773 ZPOOL_CONFIG_POOL_GUID,
774 DATA_TYPE_UINT64, 0, &pool_guid)
775 || nvlist_find(nvlist,
776 ZPOOL_CONFIG_POOL_NAME,
777 DATA_TYPE_STRING, 0, &pool_name)) {
778 /*
779 * Cache and spare devices end up here - just ignore
780 * them.
781 */
782 /*printf("ZFS: can't find pool details\n");*/
783 return (EIO);
784 }
785
786 /*
787 * Create the pool if this is the first time we've seen it.
788 */
789 spa = spa_find_by_guid(pool_guid);
790 if (!spa) {
791 spa = spa_create(pool_guid);
792 spa->spa_name = strdup(pool_name);
793 }
794 if (pool_txg > spa->spa_txg)
795 spa->spa_txg = pool_txg;
796
797 /*
798 * Get the vdev tree and create our in-core copy of it.
799 * If we already have a healthy vdev with this guid, this must
800 * be some kind of alias (overlapping slices, dangerously dedicated
801 * disks etc).
802 */
803 if (nvlist_find(nvlist,
804 ZPOOL_CONFIG_GUID,
805 DATA_TYPE_UINT64, 0, &guid)) {
806 return (EIO);
807 }
808 vdev = vdev_find(guid);
809 if (vdev && vdev->v_state == VDEV_STATE_HEALTHY) {
810 return (EIO);
811 }
812
813 if (nvlist_find(nvlist,
814 ZPOOL_CONFIG_VDEV_TREE,
815 DATA_TYPE_NVLIST, 0, &vdevs)) {
816 return (EIO);
817 }
818 rc = vdev_init_from_nvlist(vdevs, &top_vdev);
819 if (rc)
820 return (rc);
821
822 /*
823 * Add the toplevel vdev to the pool if its not already there.
824 */
825 STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink)
826 if (top_vdev == pool_vdev)
827 break;
828 if (!pool_vdev && top_vdev)
829 STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink);
830
831 /*
832 * We should already have created an incomplete vdev for this
833 * vdev. Find it and initialise it with our read proc.
834 */
835 vdev = vdev_find(guid);
836 if (vdev) {
837 vdev->v_phys_read = read;
838 vdev->v_read_priv = read_priv;
839 vdev->v_state = VDEV_STATE_HEALTHY;
840 } else {
841 printf("ZFS: inconsistent nvlist contents\n");
842 return (EIO);
843 }
844
845 /*
846 * Re-evaluate top-level vdev state.
847 */
848 vdev_set_state(top_vdev);
849
850 /*
851 * Ok, we are happy with the pool so far. Lets find
852 * the best uberblock and then we can actually access
853 * the contents of the pool.
854 */
855 for (i = 0;
856 i < VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT;
857 i++) {
858 off = offsetof(vdev_label_t, vl_uberblock);
859 off += i << UBERBLOCK_SHIFT;
860 BP_ZERO(&bp);
861 DVA_SET_OFFSET(&bp.blk_dva[0], off);
862 BP_SET_LSIZE(&bp, 1 << UBERBLOCK_SHIFT);
863 BP_SET_PSIZE(&bp, 1 << UBERBLOCK_SHIFT);
864 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
865 BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
866 ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
867 if (vdev_read_phys(vdev, &bp, upbuf, off, 0))
868 continue;
869
870 up = (const struct uberblock *) upbuf;
871 if (up->ub_magic != UBERBLOCK_MAGIC)
872 continue;
873 if (up->ub_txg < spa->spa_txg)
874 continue;
875 if (up->ub_txg > spa->spa_uberblock.ub_txg) {
876 spa->spa_uberblock = *up;
877 } else if (up->ub_txg == spa->spa_uberblock.ub_txg) {
878 if (up->ub_timestamp > spa->spa_uberblock.ub_timestamp)
879 spa->spa_uberblock = *up;
880 }
881 }
882
883 if (spap)
884 *spap = spa;
885 return (0);
886 }
887
888 static int
889 ilog2(int n)
890 {
891 int v;
892
893 for (v = 0; v < 32; v++)
894 if (n == (1 << v))
895 return v;
896 return -1;
897 }
898
899 static int
900 zio_read(spa_t *spa, const blkptr_t *bp, void *buf)
901 {
902 int cpfunc = BP_GET_COMPRESS(bp);
903 size_t lsize = BP_GET_LSIZE(bp);
904 size_t psize = BP_GET_PSIZE(bp);
905 void *pbuf;
906 int i;
907
908 zfs_reset_temp();
909 if (cpfunc != ZIO_COMPRESS_OFF)
910 pbuf = zfs_alloc_temp(psize);
911 else
912 pbuf = buf;
913
914 for (i = 0; i < SPA_DVAS_PER_BP; i++) {
915 const dva_t *dva = &bp->blk_dva[i];
916 vdev_t *vdev;
917 int vdevid;
918 off_t offset;
919
920 if (!dva->dva_word[0] && !dva->dva_word[1])
921 continue;
922
923 vdevid = DVA_GET_VDEV(dva);
924 offset = DVA_GET_OFFSET(dva);
925 STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink)
926 if (vdev->v_id == vdevid)
927 break;
928 if (!vdev || !vdev->v_read)
929 continue;
930 if (vdev->v_read(vdev, bp, pbuf, offset, psize))
931 continue;
932
933 if (cpfunc != ZIO_COMPRESS_OFF) {
934 if (zio_decompress_data(cpfunc, pbuf, psize,
935 buf, lsize))
936 return (EIO);
937 }
938
939 return (0);
940 }
941 printf("ZFS: i/o error - all block copies unavailable\n");
942
943 return (EIO);
944 }
945
946 static int
947 dnode_read(spa_t *spa, const dnode_phys_t *dnode, off_t offset, void *buf, size_t buflen)
948 {
949 int ibshift = dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
950 int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
951 int nlevels = dnode->dn_nlevels;
952 int i, rc;
953
954 /*
955 * Note: bsize may not be a power of two here so we need to do an
956 * actual divide rather than a bitshift.
957 */
958 while (buflen > 0) {
959 uint64_t bn = offset / bsize;
960 int boff = offset % bsize;
961 int ibn;
962 const blkptr_t *indbp;
963 blkptr_t bp;
964
965 if (bn > dnode->dn_maxblkid)
966 return (EIO);
967
968 if (dnode == dnode_cache_obj && bn == dnode_cache_bn)
969 goto cached;
970
971 indbp = dnode->dn_blkptr;
972 for (i = 0; i < nlevels; i++) {
973 /*
974 * Copy the bp from the indirect array so that
975 * we can re-use the scratch buffer for multi-level
976 * objects.
977 */
978 ibn = bn >> ((nlevels - i - 1) * ibshift);
979 ibn &= ((1 << ibshift) - 1);
980 bp = indbp[ibn];
981 rc = zio_read(spa, &bp, dnode_cache_buf);
982 if (rc)
983 return (rc);
984 indbp = (const blkptr_t *) dnode_cache_buf;
985 }
986 dnode_cache_obj = dnode;
987 dnode_cache_bn = bn;
988 cached:
989
990 /*
991 * The buffer contains our data block. Copy what we
992 * need from it and loop.
993 */
994 i = bsize - boff;
995 if (i > buflen) i = buflen;
996 memcpy(buf, &dnode_cache_buf[boff], i);
997 buf = ((char*) buf) + i;
998 offset += i;
999 buflen -= i;
1000 }
1001
1002 return (0);
1003 }
1004
1005 /*
1006 * Lookup a value in a microzap directory. Assumes that the zap
1007 * scratch buffer contains the directory contents.
1008 */
1009 static int
1010 mzap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
1011 {
1012 const mzap_phys_t *mz;
1013 const mzap_ent_phys_t *mze;
1014 size_t size;
1015 int chunks, i;
1016
1017 /*
1018 * Microzap objects use exactly one block. Read the whole
1019 * thing.
1020 */
1021 size = dnode->dn_datablkszsec * 512;
1022
1023 mz = (const mzap_phys_t *) zap_scratch;
1024 chunks = size / MZAP_ENT_LEN - 1;
1025
1026 for (i = 0; i < chunks; i++) {
1027 mze = &mz->mz_chunk[i];
1028 if (!strcmp(mze->mze_name, name)) {
1029 *value = mze->mze_value;
1030 return (0);
1031 }
1032 }
1033
1034 return (ENOENT);
1035 }
1036
1037 /*
1038 * Compare a name with a zap leaf entry. Return non-zero if the name
1039 * matches.
1040 */
1041 static int
1042 fzap_name_equal(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, const char *name)
1043 {
1044 size_t namelen;
1045 const zap_leaf_chunk_t *nc;
1046 const char *p;
1047
1048 namelen = zc->l_entry.le_name_length;
1049
1050 nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk);
1051 p = name;
1052 while (namelen > 0) {
1053 size_t len;
1054 len = namelen;
1055 if (len > ZAP_LEAF_ARRAY_BYTES)
1056 len = ZAP_LEAF_ARRAY_BYTES;
1057 if (memcmp(p, nc->l_array.la_array, len))
1058 return (0);
1059 p += len;
1060 namelen -= len;
1061 nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next);
1062 }
1063
1064 return 1;
1065 }
1066
1067 /*
1068 * Extract a uint64_t value from a zap leaf entry.
1069 */
1070 static uint64_t
1071 fzap_leaf_value(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc)
1072 {
1073 const zap_leaf_chunk_t *vc;
1074 int i;
1075 uint64_t value;
1076 const uint8_t *p;
1077
1078 vc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_value_chunk);
1079 for (i = 0, value = 0, p = vc->l_array.la_array; i < 8; i++) {
1080 value = (value << 8) | p[i];
1081 }
1082
1083 return value;
1084 }
1085
1086 /*
1087 * Lookup a value in a fatzap directory. Assumes that the zap scratch
1088 * buffer contains the directory header.
1089 */
1090 static int
1091 fzap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
1092 {
1093 int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1094 zap_phys_t zh = *(zap_phys_t *) zap_scratch;
1095 fat_zap_t z;
1096 uint64_t *ptrtbl;
1097 uint64_t hash;
1098 int rc;
1099
1100 if (zh.zap_magic != ZAP_MAGIC)
1101 return (EIO);
1102
1103 z.zap_block_shift = ilog2(bsize);
1104 z.zap_phys = (zap_phys_t *) zap_scratch;
1105
1106 /*
1107 * Figure out where the pointer table is and read it in if necessary.
1108 */
1109 if (zh.zap_ptrtbl.zt_blk) {
1110 rc = dnode_read(spa, dnode, zh.zap_ptrtbl.zt_blk * bsize,
1111 zap_scratch, bsize);
1112 if (rc)
1113 return (rc);
1114 ptrtbl = (uint64_t *) zap_scratch;
1115 } else {
1116 ptrtbl = &ZAP_EMBEDDED_PTRTBL_ENT(&z, 0);
1117 }
1118
1119 hash = zap_hash(zh.zap_salt, name);
1120
1121 zap_leaf_t zl;
1122 zl.l_bs = z.zap_block_shift;
1123
1124 off_t off = ptrtbl[hash >> (64 - zh.zap_ptrtbl.zt_shift)] << zl.l_bs;
1125 zap_leaf_chunk_t *zc;
1126
1127 rc = dnode_read(spa, dnode, off, zap_scratch, bsize);
1128 if (rc)
1129 return (rc);
1130
1131 zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
1132
1133 /*
1134 * Make sure this chunk matches our hash.
1135 */
1136 if (zl.l_phys->l_hdr.lh_prefix_len > 0
1137 && zl.l_phys->l_hdr.lh_prefix
1138 != hash >> (64 - zl.l_phys->l_hdr.lh_prefix_len))
1139 return (ENOENT);
1140
1141 /*
1142 * Hash within the chunk to find our entry.
1143 */
1144 int shift = (64 - ZAP_LEAF_HASH_SHIFT(&zl) - zl.l_phys->l_hdr.lh_prefix_len);
1145 int h = (hash >> shift) & ((1 << ZAP_LEAF_HASH_SHIFT(&zl)) - 1);
1146 h = zl.l_phys->l_hash[h];
1147 if (h == 0xffff)
1148 return (ENOENT);
1149 zc = &ZAP_LEAF_CHUNK(&zl, h);
1150 while (zc->l_entry.le_hash != hash) {
1151 if (zc->l_entry.le_next == 0xffff) {
1152 zc = 0;
1153 break;
1154 }
1155 zc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_next);
1156 }
1157 if (fzap_name_equal(&zl, zc, name)) {
1158 *value = fzap_leaf_value(&zl, zc);
1159 return (0);
1160 }
1161
1162 return (ENOENT);
1163 }
1164
1165 /*
1166 * Lookup a name in a zap object and return its value as a uint64_t.
1167 */
1168 static int
1169 zap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
1170 {
1171 int rc;
1172 uint64_t zap_type;
1173 size_t size = dnode->dn_datablkszsec * 512;
1174
1175 rc = dnode_read(spa, dnode, 0, zap_scratch, size);
1176 if (rc)
1177 return (rc);
1178
1179 zap_type = *(uint64_t *) zap_scratch;
1180 if (zap_type == ZBT_MICRO)
1181 return mzap_lookup(spa, dnode, name, value);
1182 else
1183 return fzap_lookup(spa, dnode, name, value);
1184 }
1185
1186 #ifdef BOOT2
1187
1188 /*
1189 * List a microzap directory. Assumes that the zap scratch buffer contains
1190 * the directory contents.
1191 */
1192 static int
1193 mzap_list(spa_t *spa, const dnode_phys_t *dnode)
1194 {
1195 const mzap_phys_t *mz;
1196 const mzap_ent_phys_t *mze;
1197 size_t size;
1198 int chunks, i;
1199
1200 /*
1201 * Microzap objects use exactly one block. Read the whole
1202 * thing.
1203 */
1204 size = dnode->dn_datablkszsec * 512;
1205 mz = (const mzap_phys_t *) zap_scratch;
1206 chunks = size / MZAP_ENT_LEN - 1;
1207
1208 for (i = 0; i < chunks; i++) {
1209 mze = &mz->mz_chunk[i];
1210 if (mze->mze_name[0])
1211 //printf("%-32s 0x%llx\n", mze->mze_name, mze->mze_value);
1212 printf("%s\n", mze->mze_name);
1213 }
1214
1215 return (0);
1216 }
1217
1218 /*
1219 * List a fatzap directory. Assumes that the zap scratch buffer contains
1220 * the directory header.
1221 */
1222 static int
1223 fzap_list(spa_t *spa, const dnode_phys_t *dnode)
1224 {
1225 int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1226 zap_phys_t zh = *(zap_phys_t *) zap_scratch;
1227 fat_zap_t z;
1228 int i, j;
1229
1230 if (zh.zap_magic != ZAP_MAGIC)
1231 return (EIO);
1232
1233 z.zap_block_shift = ilog2(bsize);
1234 z.zap_phys = (zap_phys_t *) zap_scratch;
1235
1236 /*
1237 * This assumes that the leaf blocks start at block 1. The
1238 * documentation isn't exactly clear on this.
1239 */
1240 zap_leaf_t zl;
1241 zl.l_bs = z.zap_block_shift;
1242 for (i = 0; i < zh.zap_num_leafs; i++) {
1243 off_t off = (i + 1) << zl.l_bs;
1244 char name[256], *p;
1245 uint64_t value;
1246
1247 if (dnode_read(spa, dnode, off, zap_scratch, bsize))
1248 return (EIO);
1249
1250 zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
1251
1252 for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
1253 zap_leaf_chunk_t *zc, *nc;
1254 int namelen;
1255
1256 zc = &ZAP_LEAF_CHUNK(&zl, j);
1257 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
1258 continue;
1259 namelen = zc->l_entry.le_name_length;
1260 if (namelen > sizeof(name))
1261 namelen = sizeof(name);
1262
1263 /*
1264 * Paste the name back together.
1265 */
1266 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
1267 p = name;
1268 while (namelen > 0) {
1269 int len;
1270 len = namelen;
1271 if (len > ZAP_LEAF_ARRAY_BYTES)
1272 len = ZAP_LEAF_ARRAY_BYTES;
1273 memcpy(p, nc->l_array.la_array, len);
1274 p += len;
1275 namelen -= len;
1276 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
1277 }
1278
1279 /*
1280 * Assume the first eight bytes of the value are
1281 * a uint64_t.
1282 */
1283 value = fzap_leaf_value(&zl, zc);
1284
1285 printf("%-32s 0x%llx\n", name, value);
1286 }
1287 }
1288
1289 return (0);
1290 }
1291
1292 /*
1293 * List a zap directory.
1294 */
1295 static int
1296 zap_list(spa_t *spa, const dnode_phys_t *dnode)
1297 {
1298 uint64_t zap_type;
1299 size_t size = dnode->dn_datablkszsec * 512;
1300
1301 if (dnode_read(spa, dnode, 0, zap_scratch, size))
1302 return (EIO);
1303
1304 zap_type = *(uint64_t *) zap_scratch;
1305 if (zap_type == ZBT_MICRO)
1306 return mzap_list(spa, dnode);
1307 else
1308 return fzap_list(spa, dnode);
1309 }
1310
1311 #endif
1312
1313 static int
1314 objset_get_dnode(spa_t *spa, const objset_phys_t *os, uint64_t objnum, dnode_phys_t *dnode)
1315 {
1316 off_t offset;
1317
1318 offset = objnum * sizeof(dnode_phys_t);
1319 return dnode_read(spa, &os->os_meta_dnode, offset,
1320 dnode, sizeof(dnode_phys_t));
1321 }
1322
1323 /*
1324 * Find the object set given the object number of its dataset object
1325 * and return its details in *objset
1326 */
1327 static int
1328 zfs_mount_dataset(spa_t *spa, uint64_t objnum, objset_phys_t *objset)
1329 {
1330 dnode_phys_t dataset;
1331 dsl_dataset_phys_t *ds;
1332
1333 if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
1334 printf("ZFS: can't find dataset %lld\n", objnum);
1335 return (EIO);
1336 }
1337
1338 ds = (dsl_dataset_phys_t *) &dataset.dn_bonus;
1339 if (zio_read(spa, &ds->ds_bp, objset)) {
1340 printf("ZFS: can't read object set for dataset %lld\n", objnum);
1341 return (EIO);
1342 }
1343
1344 return (0);
1345 }
1346
1347 /*
1348 * Find the object set pointed to by the BOOTFS property or the root
1349 * dataset if there is none and return its details in *objset
1350 */
1351 static int
1352 zfs_mount_root(spa_t *spa, objset_phys_t *objset)
1353 {
1354 dnode_phys_t dir, propdir;
1355 uint64_t props, bootfs, root;
1356
1357 /*
1358 * Start with the MOS directory object.
1359 */
1360 if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) {
1361 printf("ZFS: can't read MOS object directory\n");
1362 return (EIO);
1363 }
1364
1365 /*
1366 * Lookup the pool_props and see if we can find a bootfs.
1367 */
1368 if (zap_lookup(spa, &dir, DMU_POOL_PROPS, &props) == 0
1369 && objset_get_dnode(spa, &spa->spa_mos, props, &propdir) == 0
1370 && zap_lookup(spa, &propdir, "bootfs", &bootfs) == 0)
1371 return zfs_mount_dataset(spa, bootfs, objset);
1372
1373 /*
1374 * Lookup the root dataset directory
1375 */
1376 if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, &root)
1377 || objset_get_dnode(spa, &spa->spa_mos, root, &dir)) {
1378 printf("ZFS: can't find root dsl_dir\n");
1379 return (EIO);
1380 }
1381
1382 /*
1383 * Use the information from the dataset directory's bonus buffer
1384 * to find the dataset object and from that the object set itself.
1385 */
1386 dsl_dir_phys_t *dd = (dsl_dir_phys_t *) &dir.dn_bonus;
1387 return zfs_mount_dataset(spa, dd->dd_head_dataset_obj, objset);
1388 }
1389
1390 static int
1391 zfs_mount_pool(spa_t *spa)
1392 {
1393 /*
1394 * Find the MOS and work our way in from there.
1395 */
1396 if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) {
1397 printf("ZFS: can't read MOS\n");
1398 return (EIO);
1399 }
1400
1401 /*
1402 * Find the root object set
1403 */
1404 if (zfs_mount_root(spa, &spa->spa_root_objset)) {
1405 printf("Can't find root filesystem - giving up\n");
1406 return (EIO);
1407 }
1408
1409 return (0);
1410 }
1411
1412 /*
1413 * Lookup a file and return its dnode.
1414 */
1415 static int
1416 zfs_lookup(spa_t *spa, const char *upath, dnode_phys_t *dnode)
1417 {
1418 int rc;
1419 uint64_t objnum, rootnum, parentnum;
1420 dnode_phys_t dn;
1421 const znode_phys_t *zp = (const znode_phys_t *) dn.dn_bonus;
1422 const char *p, *q;
1423 char element[256];
1424 char path[1024];
1425 int symlinks_followed = 0;
1426
1427 if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
1428 printf("ZFS: unexpected object set type %lld\n",
1429 spa->spa_root_objset.os_type);
1430 return (EIO);
1431 }
1432
1433 /*
1434 * Get the root directory dnode.
1435 */
1436 rc = objset_get_dnode(spa, &spa->spa_root_objset, MASTER_NODE_OBJ, &dn);
1437 if (rc)
1438 return (rc);
1439
1440 rc = zap_lookup(spa, &dn, ZFS_ROOT_OBJ, &rootnum);
1441 if (rc)
1442 return (rc);
1443
1444 rc = objset_get_dnode(spa, &spa->spa_root_objset, rootnum, &dn);
1445 if (rc)
1446 return (rc);
1447
1448 objnum = rootnum;
1449 p = upath;
1450 while (p && *p) {
1451 while (*p == '/')
1452 p++;
1453 if (!*p)
1454 break;
1455 q = strchr(p, '/');
1456 if (q) {
1457 memcpy(element, p, q - p);
1458 element[q - p] = 0;
1459 p = q;
1460 } else {
1461 strcpy(element, p);
1462 p = 0;
1463 }
1464
1465 if ((zp->zp_mode >> 12) != 0x4) {
1466 return (ENOTDIR);
1467 }
1468
1469 parentnum = objnum;
1470 rc = zap_lookup(spa, &dn, element, &objnum);
1471 if (rc)
1472 return (rc);
1473 objnum = ZFS_DIRENT_OBJ(objnum);
1474
1475 rc = objset_get_dnode(spa, &spa->spa_root_objset, objnum, &dn);
1476 if (rc)
1477 return (rc);
1478
1479 /*
1480 * Check for symlink.
1481 */
1482 if ((zp->zp_mode >> 12) == 0xa) {
1483 if (symlinks_followed > 10)
1484 return (EMLINK);
1485 symlinks_followed++;
1486
1487 /*
1488 * Read the link value and copy the tail of our
1489 * current path onto the end.
1490 */
1491 if (p)
1492 strcpy(&path[zp->zp_size], p);
1493 else
1494 path[zp->zp_size] = 0;
1495 if (zp->zp_size + sizeof(znode_phys_t) <= dn.dn_bonuslen) {
1496 memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)],
1497 zp->zp_size);
1498 } else {
1499 rc = dnode_read(spa, &dn, 0, path, zp->zp_size);
1500 if (rc)
1501 return (rc);
1502 }
1503
1504 /*
1505 * Restart with the new path, starting either at
1506 * the root or at the parent depending whether or
1507 * not the link is relative.
1508 */
1509 p = path;
1510 if (*p == '/')
1511 objnum = rootnum;
1512 else
1513 objnum = parentnum;
1514 objset_get_dnode(spa, &spa->spa_root_objset, objnum, &dn);
1515 }
1516 }
1517
1518 *dnode = dn;
1519 return (0);
1520 }
Cache object: 0f5f56e381b90e5cabed087c058d2e45
|