1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25 * Copyright 2015 RackTop Systems.
26 * Copyright (c) 2016, Intel Corporation.
27 */
28
29 /*
30 * Pool import support functions.
31 *
32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
33 * these commands are expected to run in the global zone, we can assume
34 * that the devices are all readable when called.
35 *
36 * To import a pool, we rely on reading the configuration information from the
37 * ZFS label of each device. If we successfully read the label, then we
38 * organize the configuration information in the following hierarchy:
39 *
40 * pool guid -> toplevel vdev guid -> label txg
41 *
42 * Duplicate entries matching this same tuple will be discarded. Once we have
43 * examined every device, we pick the best label txg config for each toplevel
44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
45 * update any paths that have changed. Finally, we attempt to import the pool
46 * using our derived config, and record the results.
47 */
48
49 #include <ctype.h>
50 #include <dirent.h>
51 #include <errno.h>
52 #include <libintl.h>
53 #include <libgen.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <sys/stat.h>
59 #include <unistd.h>
60 #include <fcntl.h>
61 #include <sys/dktp/fdisk.h>
62 #include <sys/vdev_impl.h>
63 #include <sys/fs/zfs.h>
64
65 #include <thread_pool.h>
66 #include <libzutil.h>
67 #include <libnvpair.h>
68 #include <libzfs.h>
69
70 #include "zutil_import.h"
71
72 #ifdef HAVE_LIBUDEV
73 #include <libudev.h>
74 #include <sched.h>
75 #endif
76 #include <blkid/blkid.h>
77
78 #define DEV_BYID_PATH "/dev/disk/by-id/"
79
80 /*
81 * Skip devices with well known prefixes:
82 * there can be side effects when opening devices which need to be avoided.
83 *
84 * hpet - High Precision Event Timer
85 * watchdog[N] - Watchdog must be closed in a special way.
86 */
87 static boolean_t
88 should_skip_dev(const char *dev)
89 {
90 return ((strcmp(dev, "watchdog") == 0) ||
91 (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) ||
92 (strcmp(dev, "hpet") == 0));
93 }
94
95 int
96 zfs_dev_flush(int fd)
97 {
98 return (ioctl(fd, BLKFLSBUF));
99 }
100
101 void
102 zpool_open_func(void *arg)
103 {
104 rdsk_node_t *rn = arg;
105 libpc_handle_t *hdl = rn->rn_hdl;
106 struct stat64 statbuf;
107 nvlist_t *config;
108 uint64_t vdev_guid = 0;
109 int error;
110 int num_labels = 0;
111 int fd;
112
113 if (should_skip_dev(zfs_basename(rn->rn_name)))
114 return;
115
116 /*
117 * Ignore failed stats. We only want regular files and block devices.
118 * Ignore files that are too small to hold a zpool.
119 */
120 if (stat64(rn->rn_name, &statbuf) != 0 ||
121 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) ||
122 (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE))
123 return;
124
125 /*
126 * Preferentially open using O_DIRECT to bypass the block device
127 * cache which may be stale for multipath devices. An EINVAL errno
128 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
129 */
130 fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
131 if ((fd < 0) && (errno == EINVAL))
132 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
133 if ((fd < 0) && (errno == EACCES))
134 hdl->lpc_open_access_error = B_TRUE;
135 if (fd < 0)
136 return;
137
138 error = zpool_read_label(fd, &config, &num_labels);
139 if (error != 0) {
140 (void) close(fd);
141 return;
142 }
143
144 if (num_labels == 0) {
145 (void) close(fd);
146 nvlist_free(config);
147 return;
148 }
149
150 /*
151 * Check that the vdev is for the expected guid. Additional entries
152 * are speculatively added based on the paths stored in the labels.
153 * Entries with valid paths but incorrect guids must be removed.
154 */
155 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
156 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
157 (void) close(fd);
158 nvlist_free(config);
159 return;
160 }
161
162 (void) close(fd);
163
164 rn->rn_config = config;
165 rn->rn_num_labels = num_labels;
166
167 /*
168 * Add additional entries for paths described by this label.
169 */
170 if (rn->rn_labelpaths) {
171 char *path = NULL;
172 char *devid = NULL;
173 char *env = NULL;
174 rdsk_node_t *slice;
175 avl_index_t where;
176 int timeout;
177 int error;
178
179 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
180 return;
181
182 env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
183 if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
184 timeout < 0) {
185 timeout = DISK_LABEL_WAIT;
186 }
187
188 /*
189 * Allow devlinks to stabilize so all paths are available.
190 */
191 zpool_label_disk_wait(rn->rn_name, timeout);
192
193 if (path != NULL) {
194 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
195 slice->rn_name = zutil_strdup(hdl, path);
196 slice->rn_vdev_guid = vdev_guid;
197 slice->rn_avl = rn->rn_avl;
198 slice->rn_hdl = hdl;
199 slice->rn_order = IMPORT_ORDER_PREFERRED_1;
200 slice->rn_labelpaths = B_FALSE;
201 pthread_mutex_lock(rn->rn_lock);
202 if (avl_find(rn->rn_avl, slice, &where)) {
203 pthread_mutex_unlock(rn->rn_lock);
204 free(slice->rn_name);
205 free(slice);
206 } else {
207 avl_insert(rn->rn_avl, slice, where);
208 pthread_mutex_unlock(rn->rn_lock);
209 zpool_open_func(slice);
210 }
211 }
212
213 if (devid != NULL) {
214 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
215 error = asprintf(&slice->rn_name, "%s%s",
216 DEV_BYID_PATH, devid);
217 if (error == -1) {
218 free(slice);
219 return;
220 }
221
222 slice->rn_vdev_guid = vdev_guid;
223 slice->rn_avl = rn->rn_avl;
224 slice->rn_hdl = hdl;
225 slice->rn_order = IMPORT_ORDER_PREFERRED_2;
226 slice->rn_labelpaths = B_FALSE;
227 pthread_mutex_lock(rn->rn_lock);
228 if (avl_find(rn->rn_avl, slice, &where)) {
229 pthread_mutex_unlock(rn->rn_lock);
230 free(slice->rn_name);
231 free(slice);
232 } else {
233 avl_insert(rn->rn_avl, slice, where);
234 pthread_mutex_unlock(rn->rn_lock);
235 zpool_open_func(slice);
236 }
237 }
238 }
239 }
240
241 static const char * const
242 zpool_default_import_path[] = {
243 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */
244 "/dev/mapper", /* Use multipath devices before components */
245 "/dev/disk/by-partlabel", /* Single unique entry set by user */
246 "/dev/disk/by-partuuid", /* Generated partition uuid */
247 "/dev/disk/by-label", /* Custom persistent labels */
248 "/dev/disk/by-uuid", /* Single unique entry and persistent */
249 "/dev/disk/by-id", /* May be multiple entries and persistent */
250 "/dev/disk/by-path", /* Encodes physical location and persistent */
251 "/dev" /* UNSAFE device names will change */
252 };
253
254 const char * const *
255 zpool_default_search_paths(size_t *count)
256 {
257 *count = ARRAY_SIZE(zpool_default_import_path);
258 return (zpool_default_import_path);
259 }
260
261 /*
262 * Given a full path to a device determine if that device appears in the
263 * import search path. If it does return the first match and store the
264 * index in the passed 'order' variable, otherwise return an error.
265 */
266 static int
267 zfs_path_order(const char *name, int *order)
268 {
269 const char *env = getenv("ZPOOL_IMPORT_PATH");
270
271 if (env) {
272 for (int i = 0; ; ++i) {
273 env += strspn(env, ":");
274 size_t dirlen = strcspn(env, ":");
275 if (dirlen) {
276 if (strncmp(name, env, dirlen) == 0) {
277 *order = i;
278 return (0);
279 }
280
281 env += dirlen;
282 } else
283 break;
284 }
285 } else {
286 for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path);
287 ++i) {
288 if (strncmp(name, zpool_default_import_path[i],
289 strlen(zpool_default_import_path[i])) == 0) {
290 *order = i;
291 return (0);
292 }
293 }
294 }
295
296 return (ENOENT);
297 }
298
299 /*
300 * Use libblkid to quickly enumerate all known zfs devices.
301 */
302 int
303 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
304 avl_tree_t **slice_cache)
305 {
306 rdsk_node_t *slice;
307 blkid_cache cache;
308 blkid_dev_iterate iter;
309 blkid_dev dev;
310 avl_index_t where;
311 int error;
312
313 *slice_cache = NULL;
314
315 error = blkid_get_cache(&cache, NULL);
316 if (error != 0)
317 return (error);
318
319 error = blkid_probe_all_new(cache);
320 if (error != 0) {
321 blkid_put_cache(cache);
322 return (error);
323 }
324
325 iter = blkid_dev_iterate_begin(cache);
326 if (iter == NULL) {
327 blkid_put_cache(cache);
328 return (EINVAL);
329 }
330
331 /* Only const char *s since 2.32 */
332 error = blkid_dev_set_search(iter,
333 (char *)"TYPE", (char *)"zfs_member");
334 if (error != 0) {
335 blkid_dev_iterate_end(iter);
336 blkid_put_cache(cache);
337 return (error);
338 }
339
340 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
341 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
342 offsetof(rdsk_node_t, rn_node));
343
344 while (blkid_dev_next(iter, &dev) == 0) {
345 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
346 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
347 slice->rn_vdev_guid = 0;
348 slice->rn_lock = lock;
349 slice->rn_avl = *slice_cache;
350 slice->rn_hdl = hdl;
351 slice->rn_labelpaths = B_TRUE;
352
353 error = zfs_path_order(slice->rn_name, &slice->rn_order);
354 if (error == 0)
355 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
356 else
357 slice->rn_order = IMPORT_ORDER_DEFAULT;
358
359 pthread_mutex_lock(lock);
360 if (avl_find(*slice_cache, slice, &where)) {
361 free(slice->rn_name);
362 free(slice);
363 } else {
364 avl_insert(*slice_cache, slice, where);
365 }
366 pthread_mutex_unlock(lock);
367 }
368
369 blkid_dev_iterate_end(iter);
370 blkid_put_cache(cache);
371
372 return (0);
373 }
374
375 /*
376 * Linux persistent device strings for vdev labels
377 *
378 * based on libudev for consistency with libudev disk add/remove events
379 */
380
381 typedef struct vdev_dev_strs {
382 char vds_devid[128];
383 char vds_devphys[128];
384 } vdev_dev_strs_t;
385
386 #ifdef HAVE_LIBUDEV
387
388 /*
389 * Obtain the persistent device id string (describes what)
390 *
391 * used by ZED vdev matching for auto-{online,expand,replace}
392 */
393 int
394 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
395 {
396 struct udev_list_entry *entry;
397 const char *bus;
398 char devbyid[MAXPATHLEN];
399
400 /* The bus based by-id path is preferred */
401 bus = udev_device_get_property_value(dev, "ID_BUS");
402
403 if (bus == NULL) {
404 const char *dm_uuid;
405
406 /*
407 * For multipath nodes use the persistent uuid based identifier
408 *
409 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
410 */
411 dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
412 if (dm_uuid != NULL) {
413 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
414 return (0);
415 }
416
417 /*
418 * For volumes use the persistent /dev/zvol/dataset identifier
419 */
420 entry = udev_device_get_devlinks_list_entry(dev);
421 while (entry != NULL) {
422 const char *name;
423
424 name = udev_list_entry_get_name(entry);
425 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
426 (void) strlcpy(bufptr, name, buflen);
427 return (0);
428 }
429 entry = udev_list_entry_get_next(entry);
430 }
431
432 /*
433 * NVME 'by-id' symlinks are similar to bus case
434 */
435 struct udev_device *parent;
436
437 parent = udev_device_get_parent_with_subsystem_devtype(dev,
438 "nvme", NULL);
439 if (parent != NULL)
440 bus = "nvme"; /* continue with bus symlink search */
441 else
442 return (ENODATA);
443 }
444
445 /*
446 * locate the bus specific by-id link
447 */
448 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
449 entry = udev_device_get_devlinks_list_entry(dev);
450 while (entry != NULL) {
451 const char *name;
452
453 name = udev_list_entry_get_name(entry);
454 if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
455 name += strlen(DEV_BYID_PATH);
456 (void) strlcpy(bufptr, name, buflen);
457 return (0);
458 }
459 entry = udev_list_entry_get_next(entry);
460 }
461
462 return (ENODATA);
463 }
464
465 /*
466 * Obtain the persistent physical location string (describes where)
467 *
468 * used by ZED vdev matching for auto-{online,expand,replace}
469 */
470 int
471 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
472 {
473 const char *physpath = NULL;
474 struct udev_list_entry *entry;
475
476 /*
477 * Normal disks use ID_PATH for their physical path.
478 */
479 physpath = udev_device_get_property_value(dev, "ID_PATH");
480 if (physpath != NULL && strlen(physpath) > 0) {
481 (void) strlcpy(bufptr, physpath, buflen);
482 return (0);
483 }
484
485 /*
486 * Device mapper devices are virtual and don't have a physical
487 * path. For them we use ID_VDEV instead, which is setup via the
488 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
489 * to a virtual device. If you don't have vdev_id.conf setup,
490 * you cannot use multipath autoreplace with device mapper.
491 */
492 physpath = udev_device_get_property_value(dev, "ID_VDEV");
493 if (physpath != NULL && strlen(physpath) > 0) {
494 (void) strlcpy(bufptr, physpath, buflen);
495 return (0);
496 }
497
498 /*
499 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
500 */
501 entry = udev_device_get_devlinks_list_entry(dev);
502 while (entry != NULL) {
503 physpath = udev_list_entry_get_name(entry);
504 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
505 (void) strlcpy(bufptr, physpath, buflen);
506 return (0);
507 }
508 entry = udev_list_entry_get_next(entry);
509 }
510
511 /*
512 * For all other devices fallback to using the by-uuid name.
513 */
514 entry = udev_device_get_devlinks_list_entry(dev);
515 while (entry != NULL) {
516 physpath = udev_list_entry_get_name(entry);
517 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
518 (void) strlcpy(bufptr, physpath, buflen);
519 return (0);
520 }
521 entry = udev_list_entry_get_next(entry);
522 }
523
524 return (ENODATA);
525 }
526
527 /*
528 * A disk is considered a multipath whole disk when:
529 * DEVNAME key value has "dm-"
530 * DM_NAME key value has "mpath" prefix
531 * DM_UUID key exists
532 * ID_PART_TABLE_TYPE key does not exist or is not gpt
533 */
534 static boolean_t
535 udev_mpath_whole_disk(struct udev_device *dev)
536 {
537 const char *devname, *type, *uuid;
538
539 devname = udev_device_get_property_value(dev, "DEVNAME");
540 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
541 uuid = udev_device_get_property_value(dev, "DM_UUID");
542
543 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
544 ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
545 (uuid != NULL)) {
546 return (B_TRUE);
547 }
548
549 return (B_FALSE);
550 }
551
552 static int
553 udev_device_is_ready(struct udev_device *dev)
554 {
555 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
556 return (udev_device_get_is_initialized(dev));
557 #else
558 /* wait for DEVLINKS property to be initialized */
559 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
560 #endif
561 }
562
563 #else
564
565 int
566 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
567 {
568 (void) dev, (void) bufptr, (void) buflen;
569 return (ENODATA);
570 }
571
572 int
573 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
574 {
575 (void) dev, (void) bufptr, (void) buflen;
576 return (ENODATA);
577 }
578
579 #endif /* HAVE_LIBUDEV */
580
581 /*
582 * Wait up to timeout_ms for udev to set up the device node. The device is
583 * considered ready when libudev determines it has been initialized, all of
584 * the device links have been verified to exist, and it has been allowed to
585 * settle. At this point the device the device can be accessed reliably.
586 * Depending on the complexity of the udev rules this process could take
587 * several seconds.
588 */
589 int
590 zpool_label_disk_wait(const char *path, int timeout_ms)
591 {
592 #ifdef HAVE_LIBUDEV
593 struct udev *udev;
594 struct udev_device *dev = NULL;
595 char nodepath[MAXPATHLEN];
596 char *sysname = NULL;
597 int ret = ENODEV;
598 int settle_ms = 50;
599 long sleep_ms = 10;
600 hrtime_t start, settle;
601
602 if ((udev = udev_new()) == NULL)
603 return (ENXIO);
604
605 start = gethrtime();
606 settle = 0;
607
608 do {
609 if (sysname == NULL) {
610 if (realpath(path, nodepath) != NULL) {
611 sysname = strrchr(nodepath, '/') + 1;
612 } else {
613 (void) usleep(sleep_ms * MILLISEC);
614 continue;
615 }
616 }
617
618 dev = udev_device_new_from_subsystem_sysname(udev,
619 "block", sysname);
620 if ((dev != NULL) && udev_device_is_ready(dev)) {
621 struct udev_list_entry *links, *link = NULL;
622
623 ret = 0;
624 links = udev_device_get_devlinks_list_entry(dev);
625
626 udev_list_entry_foreach(link, links) {
627 struct stat64 statbuf;
628 const char *name;
629
630 name = udev_list_entry_get_name(link);
631 errno = 0;
632 if (stat64(name, &statbuf) == 0 && errno == 0)
633 continue;
634
635 settle = 0;
636 ret = ENODEV;
637 break;
638 }
639
640 if (ret == 0) {
641 if (settle == 0) {
642 settle = gethrtime();
643 } else if (NSEC2MSEC(gethrtime() - settle) >=
644 settle_ms) {
645 udev_device_unref(dev);
646 break;
647 }
648 }
649 }
650
651 udev_device_unref(dev);
652 (void) usleep(sleep_ms * MILLISEC);
653
654 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
655
656 udev_unref(udev);
657
658 return (ret);
659 #else
660 int settle_ms = 50;
661 long sleep_ms = 10;
662 hrtime_t start, settle;
663 struct stat64 statbuf;
664
665 start = gethrtime();
666 settle = 0;
667
668 do {
669 errno = 0;
670 if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
671 if (settle == 0)
672 settle = gethrtime();
673 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
674 return (0);
675 } else if (errno != ENOENT) {
676 return (errno);
677 }
678
679 usleep(sleep_ms * MILLISEC);
680 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
681
682 return (ENODEV);
683 #endif /* HAVE_LIBUDEV */
684 }
685
686 /*
687 * Encode the persistent devices strings
688 * used for the vdev disk label
689 */
690 static int
691 encode_device_strings(const char *path, vdev_dev_strs_t *ds,
692 boolean_t wholedisk)
693 {
694 #ifdef HAVE_LIBUDEV
695 struct udev *udev;
696 struct udev_device *dev = NULL;
697 char nodepath[MAXPATHLEN];
698 char *sysname;
699 int ret = ENODEV;
700 hrtime_t start;
701
702 if ((udev = udev_new()) == NULL)
703 return (ENXIO);
704
705 /* resolve path to a runtime device node instance */
706 if (realpath(path, nodepath) == NULL)
707 goto no_dev;
708
709 sysname = strrchr(nodepath, '/') + 1;
710
711 /*
712 * Wait up to 3 seconds for udev to set up the device node context
713 */
714 start = gethrtime();
715 do {
716 dev = udev_device_new_from_subsystem_sysname(udev, "block",
717 sysname);
718 if (dev == NULL)
719 goto no_dev;
720 if (udev_device_is_ready(dev))
721 break; /* udev ready */
722
723 udev_device_unref(dev);
724 dev = NULL;
725
726 if (NSEC2MSEC(gethrtime() - start) < 10)
727 (void) sched_yield(); /* yield/busy wait up to 10ms */
728 else
729 (void) usleep(10 * MILLISEC);
730
731 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
732
733 if (dev == NULL)
734 goto no_dev;
735
736 /*
737 * Only whole disks require extra device strings
738 */
739 if (!wholedisk && !udev_mpath_whole_disk(dev))
740 goto no_dev;
741
742 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
743 if (ret != 0)
744 goto no_dev_ref;
745
746 /* physical location string (optional) */
747 if (zfs_device_get_physical(dev, ds->vds_devphys,
748 sizeof (ds->vds_devphys)) != 0) {
749 ds->vds_devphys[0] = '\0'; /* empty string --> not available */
750 }
751
752 no_dev_ref:
753 udev_device_unref(dev);
754 no_dev:
755 udev_unref(udev);
756
757 return (ret);
758 #else
759 (void) path;
760 (void) ds;
761 (void) wholedisk;
762 return (ENOENT);
763 #endif
764 }
765
766 /*
767 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
768 * in the nvlist * (if applicable). Like:
769 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
770 */
771 static void
772 update_vdev_config_dev_sysfs_path(nvlist_t *nv, char *path)
773 {
774 char *upath, *spath;
775
776 /* Add enclosure sysfs path (if disk is in an enclosure). */
777 upath = zfs_get_underlying_path(path);
778 spath = zfs_get_enclosure_sysfs_path(upath);
779
780 if (spath) {
781 nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath);
782 } else {
783 nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
784 }
785
786 free(upath);
787 free(spath);
788 }
789
790 /*
791 * This will get called for each leaf vdev.
792 */
793 static int
794 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
795 {
796 (void) hdl_data, (void) data;
797
798 char *path = NULL;
799 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
800 return (1);
801
802 /* Rescan our enclosure sysfs path for this vdev */
803 update_vdev_config_dev_sysfs_path(nv, path);
804 return (0);
805 }
806
807 /*
808 * Given an nvlist for our pool (with vdev tree), iterate over all the
809 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
810 */
811 void
812 update_vdevs_config_dev_sysfs_path(nvlist_t *config)
813 {
814 nvlist_t *nvroot = NULL;
815 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
816 &nvroot) == 0);
817 for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
818 }
819
820 /*
821 * Update a leaf vdev's persistent device strings
822 *
823 * - only applies for a dedicated leaf vdev (aka whole disk)
824 * - updated during pool create|add|attach|import
825 * - used for matching device matching during auto-{online,expand,replace}
826 * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
827 * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
828 *
829 * single device node example:
830 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
831 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
832 *
833 * multipath device node example:
834 * devid: 'dm-uuid-mpath-35000c5006304de3f'
835 *
836 * We also store the enclosure sysfs path for turning on enclosure LEDs
837 * (if applicable):
838 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
839 */
840 void
841 update_vdev_config_dev_strs(nvlist_t *nv)
842 {
843 vdev_dev_strs_t vds;
844 char *env, *type, *path;
845 uint64_t wholedisk = 0;
846
847 /*
848 * For the benefit of legacy ZFS implementations, allow
849 * for opting out of devid strings in the vdev label.
850 *
851 * example use:
852 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
853 *
854 * explanation:
855 * Older OpenZFS implementations had issues when attempting to
856 * display pool config VDEV names if a "devid" NVP value is
857 * present in the pool's config.
858 *
859 * For example, a pool that originated on illumos platform would
860 * have a devid value in the config and "zpool status" would fail
861 * when listing the config.
862 *
863 * A pool can be stripped of any "devid" values on import or
864 * prevented from adding them on zpool create|add by setting
865 * ZFS_VDEV_DEVID_OPT_OUT.
866 */
867 env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
868 if (env && (strtoul(env, NULL, 0) > 0 ||
869 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
870 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
871 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
872 return;
873 }
874
875 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
876 strcmp(type, VDEV_TYPE_DISK) != 0) {
877 return;
878 }
879 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
880 return;
881 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
882
883 /*
884 * Update device string values in the config nvlist.
885 */
886 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
887 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
888 if (vds.vds_devphys[0] != '\0') {
889 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
890 vds.vds_devphys);
891 }
892 update_vdev_config_dev_sysfs_path(nv, path);
893 } else {
894 /* Clear out any stale entries. */
895 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
896 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
897 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
898 }
899 }
Cache object: 16e0cb08605fb39ec708fc5e4af04117
|