The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
   23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   24  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
   25  * Copyright 2015 RackTop Systems.
   26  * Copyright (c) 2016, Intel Corporation.
   27  */
   28 
   29 /*
   30  * Pool import support functions.
   31  *
   32  * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
   33  * these commands are expected to run in the global zone, we can assume
   34  * that the devices are all readable when called.
   35  *
   36  * To import a pool, we rely on reading the configuration information from the
   37  * ZFS label of each device.  If we successfully read the label, then we
   38  * organize the configuration information in the following hierarchy:
   39  *
   40  *      pool guid -> toplevel vdev guid -> label txg
   41  *
   42  * Duplicate entries matching this same tuple will be discarded.  Once we have
   43  * examined every device, we pick the best label txg config for each toplevel
   44  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
   45  * update any paths that have changed.  Finally, we attempt to import the pool
   46  * using our derived config, and record the results.
   47  */
   48 
   49 #include <ctype.h>
   50 #include <dirent.h>
   51 #include <errno.h>
   52 #include <libintl.h>
   53 #include <libgen.h>
   54 #include <stddef.h>
   55 #include <stdlib.h>
   56 #include <stdio.h>
   57 #include <string.h>
   58 #include <sys/stat.h>
   59 #include <unistd.h>
   60 #include <fcntl.h>
   61 #include <sys/dktp/fdisk.h>
   62 #include <sys/vdev_impl.h>
   63 #include <sys/fs/zfs.h>
   64 
   65 #include <thread_pool.h>
   66 #include <libzutil.h>
   67 #include <libnvpair.h>
   68 #include <libzfs.h>
   69 
   70 #include "zutil_import.h"
   71 
   72 #ifdef HAVE_LIBUDEV
   73 #include <libudev.h>
   74 #include <sched.h>
   75 #endif
   76 #include <blkid/blkid.h>
   77 
   78 #define DEV_BYID_PATH   "/dev/disk/by-id/"
   79 
   80 /*
   81  * Skip devices with well known prefixes:
   82  * there can be side effects when opening devices which need to be avoided.
   83  *
   84  * hpet        - High Precision Event Timer
   85  * watchdog[N] - Watchdog must be closed in a special way.
   86  */
   87 static boolean_t
   88 should_skip_dev(const char *dev)
   89 {
   90         return ((strcmp(dev, "watchdog") == 0) ||
   91             (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) ||
   92             (strcmp(dev, "hpet") == 0));
   93 }
   94 
   95 int
   96 zfs_dev_flush(int fd)
   97 {
   98         return (ioctl(fd, BLKFLSBUF));
   99 }
  100 
  101 void
  102 zpool_open_func(void *arg)
  103 {
  104         rdsk_node_t *rn = arg;
  105         libpc_handle_t *hdl = rn->rn_hdl;
  106         struct stat64 statbuf;
  107         nvlist_t *config;
  108         uint64_t vdev_guid = 0;
  109         int error;
  110         int num_labels = 0;
  111         int fd;
  112 
  113         if (should_skip_dev(zfs_basename(rn->rn_name)))
  114                 return;
  115 
  116         /*
  117          * Ignore failed stats.  We only want regular files and block devices.
  118          * Ignore files that are too small to hold a zpool.
  119          */
  120         if (stat64(rn->rn_name, &statbuf) != 0 ||
  121             (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) ||
  122             (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE))
  123                 return;
  124 
  125         /*
  126          * Preferentially open using O_DIRECT to bypass the block device
  127          * cache which may be stale for multipath devices.  An EINVAL errno
  128          * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
  129          */
  130         fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
  131         if ((fd < 0) && (errno == EINVAL))
  132                 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
  133         if ((fd < 0) && (errno == EACCES))
  134                 hdl->lpc_open_access_error = B_TRUE;
  135         if (fd < 0)
  136                 return;
  137 
  138         error = zpool_read_label(fd, &config, &num_labels);
  139         if (error != 0) {
  140                 (void) close(fd);
  141                 return;
  142         }
  143 
  144         if (num_labels == 0) {
  145                 (void) close(fd);
  146                 nvlist_free(config);
  147                 return;
  148         }
  149 
  150         /*
  151          * Check that the vdev is for the expected guid.  Additional entries
  152          * are speculatively added based on the paths stored in the labels.
  153          * Entries with valid paths but incorrect guids must be removed.
  154          */
  155         error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
  156         if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
  157                 (void) close(fd);
  158                 nvlist_free(config);
  159                 return;
  160         }
  161 
  162         (void) close(fd);
  163 
  164         rn->rn_config = config;
  165         rn->rn_num_labels = num_labels;
  166 
  167         /*
  168          * Add additional entries for paths described by this label.
  169          */
  170         if (rn->rn_labelpaths) {
  171                 char *path = NULL;
  172                 char *devid = NULL;
  173                 char *env = NULL;
  174                 rdsk_node_t *slice;
  175                 avl_index_t where;
  176                 int timeout;
  177                 int error;
  178 
  179                 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
  180                         return;
  181 
  182                 env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
  183                 if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
  184                     timeout < 0) {
  185                         timeout = DISK_LABEL_WAIT;
  186                 }
  187 
  188                 /*
  189                  * Allow devlinks to stabilize so all paths are available.
  190                  */
  191                 zpool_label_disk_wait(rn->rn_name, timeout);
  192 
  193                 if (path != NULL) {
  194                         slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
  195                         slice->rn_name = zutil_strdup(hdl, path);
  196                         slice->rn_vdev_guid = vdev_guid;
  197                         slice->rn_avl = rn->rn_avl;
  198                         slice->rn_hdl = hdl;
  199                         slice->rn_order = IMPORT_ORDER_PREFERRED_1;
  200                         slice->rn_labelpaths = B_FALSE;
  201                         pthread_mutex_lock(rn->rn_lock);
  202                         if (avl_find(rn->rn_avl, slice, &where)) {
  203                         pthread_mutex_unlock(rn->rn_lock);
  204                                 free(slice->rn_name);
  205                                 free(slice);
  206                         } else {
  207                                 avl_insert(rn->rn_avl, slice, where);
  208                                 pthread_mutex_unlock(rn->rn_lock);
  209                                 zpool_open_func(slice);
  210                         }
  211                 }
  212 
  213                 if (devid != NULL) {
  214                         slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
  215                         error = asprintf(&slice->rn_name, "%s%s",
  216                             DEV_BYID_PATH, devid);
  217                         if (error == -1) {
  218                                 free(slice);
  219                                 return;
  220                         }
  221 
  222                         slice->rn_vdev_guid = vdev_guid;
  223                         slice->rn_avl = rn->rn_avl;
  224                         slice->rn_hdl = hdl;
  225                         slice->rn_order = IMPORT_ORDER_PREFERRED_2;
  226                         slice->rn_labelpaths = B_FALSE;
  227                         pthread_mutex_lock(rn->rn_lock);
  228                         if (avl_find(rn->rn_avl, slice, &where)) {
  229                                 pthread_mutex_unlock(rn->rn_lock);
  230                                 free(slice->rn_name);
  231                                 free(slice);
  232                         } else {
  233                                 avl_insert(rn->rn_avl, slice, where);
  234                                 pthread_mutex_unlock(rn->rn_lock);
  235                                 zpool_open_func(slice);
  236                         }
  237                 }
  238         }
  239 }
  240 
  241 static const char * const
  242 zpool_default_import_path[] = {
  243         "/dev/disk/by-vdev",    /* Custom rules, use first if they exist */
  244         "/dev/mapper",          /* Use multipath devices before components */
  245         "/dev/disk/by-partlabel", /* Single unique entry set by user */
  246         "/dev/disk/by-partuuid", /* Generated partition uuid */
  247         "/dev/disk/by-label",   /* Custom persistent labels */
  248         "/dev/disk/by-uuid",    /* Single unique entry and persistent */
  249         "/dev/disk/by-id",      /* May be multiple entries and persistent */
  250         "/dev/disk/by-path",    /* Encodes physical location and persistent */
  251         "/dev"                  /* UNSAFE device names will change */
  252 };
  253 
  254 const char * const *
  255 zpool_default_search_paths(size_t *count)
  256 {
  257         *count = ARRAY_SIZE(zpool_default_import_path);
  258         return (zpool_default_import_path);
  259 }
  260 
  261 /*
  262  * Given a full path to a device determine if that device appears in the
  263  * import search path.  If it does return the first match and store the
  264  * index in the passed 'order' variable, otherwise return an error.
  265  */
  266 static int
  267 zfs_path_order(const char *name, int *order)
  268 {
  269         const char *env = getenv("ZPOOL_IMPORT_PATH");
  270 
  271         if (env) {
  272                 for (int i = 0; ; ++i) {
  273                         env += strspn(env, ":");
  274                         size_t dirlen = strcspn(env, ":");
  275                         if (dirlen) {
  276                                 if (strncmp(name, env, dirlen) == 0) {
  277                                         *order = i;
  278                                         return (0);
  279                                 }
  280 
  281                                 env += dirlen;
  282                         } else
  283                                 break;
  284                 }
  285         } else {
  286                 for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path);
  287                     ++i) {
  288                         if (strncmp(name, zpool_default_import_path[i],
  289                             strlen(zpool_default_import_path[i])) == 0) {
  290                                 *order = i;
  291                                 return (0);
  292                         }
  293                 }
  294         }
  295 
  296         return (ENOENT);
  297 }
  298 
  299 /*
  300  * Use libblkid to quickly enumerate all known zfs devices.
  301  */
  302 int
  303 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
  304     avl_tree_t **slice_cache)
  305 {
  306         rdsk_node_t *slice;
  307         blkid_cache cache;
  308         blkid_dev_iterate iter;
  309         blkid_dev dev;
  310         avl_index_t where;
  311         int error;
  312 
  313         *slice_cache = NULL;
  314 
  315         error = blkid_get_cache(&cache, NULL);
  316         if (error != 0)
  317                 return (error);
  318 
  319         error = blkid_probe_all_new(cache);
  320         if (error != 0) {
  321                 blkid_put_cache(cache);
  322                 return (error);
  323         }
  324 
  325         iter = blkid_dev_iterate_begin(cache);
  326         if (iter == NULL) {
  327                 blkid_put_cache(cache);
  328                 return (EINVAL);
  329         }
  330 
  331         /* Only const char *s since 2.32 */
  332         error = blkid_dev_set_search(iter,
  333             (char *)"TYPE", (char *)"zfs_member");
  334         if (error != 0) {
  335                 blkid_dev_iterate_end(iter);
  336                 blkid_put_cache(cache);
  337                 return (error);
  338         }
  339 
  340         *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
  341         avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
  342             offsetof(rdsk_node_t, rn_node));
  343 
  344         while (blkid_dev_next(iter, &dev) == 0) {
  345                 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
  346                 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
  347                 slice->rn_vdev_guid = 0;
  348                 slice->rn_lock = lock;
  349                 slice->rn_avl = *slice_cache;
  350                 slice->rn_hdl = hdl;
  351                 slice->rn_labelpaths = B_TRUE;
  352 
  353                 error = zfs_path_order(slice->rn_name, &slice->rn_order);
  354                 if (error == 0)
  355                         slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
  356                 else
  357                         slice->rn_order = IMPORT_ORDER_DEFAULT;
  358 
  359                 pthread_mutex_lock(lock);
  360                 if (avl_find(*slice_cache, slice, &where)) {
  361                         free(slice->rn_name);
  362                         free(slice);
  363                 } else {
  364                         avl_insert(*slice_cache, slice, where);
  365                 }
  366                 pthread_mutex_unlock(lock);
  367         }
  368 
  369         blkid_dev_iterate_end(iter);
  370         blkid_put_cache(cache);
  371 
  372         return (0);
  373 }
  374 
  375 /*
  376  * Linux persistent device strings for vdev labels
  377  *
  378  * based on libudev for consistency with libudev disk add/remove events
  379  */
  380 
  381 typedef struct vdev_dev_strs {
  382         char    vds_devid[128];
  383         char    vds_devphys[128];
  384 } vdev_dev_strs_t;
  385 
  386 #ifdef HAVE_LIBUDEV
  387 
  388 /*
  389  * Obtain the persistent device id string (describes what)
  390  *
  391  * used by ZED vdev matching for auto-{online,expand,replace}
  392  */
  393 int
  394 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
  395 {
  396         struct udev_list_entry *entry;
  397         const char *bus;
  398         char devbyid[MAXPATHLEN];
  399 
  400         /* The bus based by-id path is preferred */
  401         bus = udev_device_get_property_value(dev, "ID_BUS");
  402 
  403         if (bus == NULL) {
  404                 const char *dm_uuid;
  405 
  406                 /*
  407                  * For multipath nodes use the persistent uuid based identifier
  408                  *
  409                  * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
  410                  */
  411                 dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
  412                 if (dm_uuid != NULL) {
  413                         (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
  414                         return (0);
  415                 }
  416 
  417                 /*
  418                  * For volumes use the persistent /dev/zvol/dataset identifier
  419                  */
  420                 entry = udev_device_get_devlinks_list_entry(dev);
  421                 while (entry != NULL) {
  422                         const char *name;
  423 
  424                         name = udev_list_entry_get_name(entry);
  425                         if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
  426                                 (void) strlcpy(bufptr, name, buflen);
  427                                 return (0);
  428                         }
  429                         entry = udev_list_entry_get_next(entry);
  430                 }
  431 
  432                 /*
  433                  * NVME 'by-id' symlinks are similar to bus case
  434                  */
  435                 struct udev_device *parent;
  436 
  437                 parent = udev_device_get_parent_with_subsystem_devtype(dev,
  438                     "nvme", NULL);
  439                 if (parent != NULL)
  440                         bus = "nvme";   /* continue with bus symlink search */
  441                 else
  442                         return (ENODATA);
  443         }
  444 
  445         /*
  446          * locate the bus specific by-id link
  447          */
  448         (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
  449         entry = udev_device_get_devlinks_list_entry(dev);
  450         while (entry != NULL) {
  451                 const char *name;
  452 
  453                 name = udev_list_entry_get_name(entry);
  454                 if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
  455                         name += strlen(DEV_BYID_PATH);
  456                         (void) strlcpy(bufptr, name, buflen);
  457                         return (0);
  458                 }
  459                 entry = udev_list_entry_get_next(entry);
  460         }
  461 
  462         return (ENODATA);
  463 }
  464 
  465 /*
  466  * Obtain the persistent physical location string (describes where)
  467  *
  468  * used by ZED vdev matching for auto-{online,expand,replace}
  469  */
  470 int
  471 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
  472 {
  473         const char *physpath = NULL;
  474         struct udev_list_entry *entry;
  475 
  476         /*
  477          * Normal disks use ID_PATH for their physical path.
  478          */
  479         physpath = udev_device_get_property_value(dev, "ID_PATH");
  480         if (physpath != NULL && strlen(physpath) > 0) {
  481                 (void) strlcpy(bufptr, physpath, buflen);
  482                 return (0);
  483         }
  484 
  485         /*
  486          * Device mapper devices are virtual and don't have a physical
  487          * path. For them we use ID_VDEV instead, which is setup via the
  488          * /etc/vdev_id.conf file.  ID_VDEV provides a persistent path
  489          * to a virtual device.  If you don't have vdev_id.conf setup,
  490          * you cannot use multipath autoreplace with device mapper.
  491          */
  492         physpath = udev_device_get_property_value(dev, "ID_VDEV");
  493         if (physpath != NULL && strlen(physpath) > 0) {
  494                 (void) strlcpy(bufptr, physpath, buflen);
  495                 return (0);
  496         }
  497 
  498         /*
  499          * For ZFS volumes use the persistent /dev/zvol/dataset identifier
  500          */
  501         entry = udev_device_get_devlinks_list_entry(dev);
  502         while (entry != NULL) {
  503                 physpath = udev_list_entry_get_name(entry);
  504                 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
  505                         (void) strlcpy(bufptr, physpath, buflen);
  506                         return (0);
  507                 }
  508                 entry = udev_list_entry_get_next(entry);
  509         }
  510 
  511         /*
  512          * For all other devices fallback to using the by-uuid name.
  513          */
  514         entry = udev_device_get_devlinks_list_entry(dev);
  515         while (entry != NULL) {
  516                 physpath = udev_list_entry_get_name(entry);
  517                 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
  518                         (void) strlcpy(bufptr, physpath, buflen);
  519                         return (0);
  520                 }
  521                 entry = udev_list_entry_get_next(entry);
  522         }
  523 
  524         return (ENODATA);
  525 }
  526 
  527 /*
  528  * A disk is considered a multipath whole disk when:
  529  *      DEVNAME key value has "dm-"
  530  *      DM_NAME key value has "mpath" prefix
  531  *      DM_UUID key exists
  532  *      ID_PART_TABLE_TYPE key does not exist or is not gpt
  533  */
  534 static boolean_t
  535 udev_mpath_whole_disk(struct udev_device *dev)
  536 {
  537         const char *devname, *type, *uuid;
  538 
  539         devname = udev_device_get_property_value(dev, "DEVNAME");
  540         type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
  541         uuid = udev_device_get_property_value(dev, "DM_UUID");
  542 
  543         if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
  544             ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
  545             (uuid != NULL)) {
  546                 return (B_TRUE);
  547         }
  548 
  549         return (B_FALSE);
  550 }
  551 
  552 static int
  553 udev_device_is_ready(struct udev_device *dev)
  554 {
  555 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
  556         return (udev_device_get_is_initialized(dev));
  557 #else
  558         /* wait for DEVLINKS property to be initialized */
  559         return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
  560 #endif
  561 }
  562 
  563 #else
  564 
  565 int
  566 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
  567 {
  568         (void) dev, (void) bufptr, (void) buflen;
  569         return (ENODATA);
  570 }
  571 
  572 int
  573 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
  574 {
  575         (void) dev, (void) bufptr, (void) buflen;
  576         return (ENODATA);
  577 }
  578 
  579 #endif /* HAVE_LIBUDEV */
  580 
  581 /*
  582  * Wait up to timeout_ms for udev to set up the device node.  The device is
  583  * considered ready when libudev determines it has been initialized, all of
  584  * the device links have been verified to exist, and it has been allowed to
  585  * settle.  At this point the device the device can be accessed reliably.
  586  * Depending on the complexity of the udev rules this process could take
  587  * several seconds.
  588  */
  589 int
  590 zpool_label_disk_wait(const char *path, int timeout_ms)
  591 {
  592 #ifdef HAVE_LIBUDEV
  593         struct udev *udev;
  594         struct udev_device *dev = NULL;
  595         char nodepath[MAXPATHLEN];
  596         char *sysname = NULL;
  597         int ret = ENODEV;
  598         int settle_ms = 50;
  599         long sleep_ms = 10;
  600         hrtime_t start, settle;
  601 
  602         if ((udev = udev_new()) == NULL)
  603                 return (ENXIO);
  604 
  605         start = gethrtime();
  606         settle = 0;
  607 
  608         do {
  609                 if (sysname == NULL) {
  610                         if (realpath(path, nodepath) != NULL) {
  611                                 sysname = strrchr(nodepath, '/') + 1;
  612                         } else {
  613                                 (void) usleep(sleep_ms * MILLISEC);
  614                                 continue;
  615                         }
  616                 }
  617 
  618                 dev = udev_device_new_from_subsystem_sysname(udev,
  619                     "block", sysname);
  620                 if ((dev != NULL) && udev_device_is_ready(dev)) {
  621                         struct udev_list_entry *links, *link = NULL;
  622 
  623                         ret = 0;
  624                         links = udev_device_get_devlinks_list_entry(dev);
  625 
  626                         udev_list_entry_foreach(link, links) {
  627                                 struct stat64 statbuf;
  628                                 const char *name;
  629 
  630                                 name = udev_list_entry_get_name(link);
  631                                 errno = 0;
  632                                 if (stat64(name, &statbuf) == 0 && errno == 0)
  633                                         continue;
  634 
  635                                 settle = 0;
  636                                 ret = ENODEV;
  637                                 break;
  638                         }
  639 
  640                         if (ret == 0) {
  641                                 if (settle == 0) {
  642                                         settle = gethrtime();
  643                                 } else if (NSEC2MSEC(gethrtime() - settle) >=
  644                                     settle_ms) {
  645                                         udev_device_unref(dev);
  646                                         break;
  647                                 }
  648                         }
  649                 }
  650 
  651                 udev_device_unref(dev);
  652                 (void) usleep(sleep_ms * MILLISEC);
  653 
  654         } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
  655 
  656         udev_unref(udev);
  657 
  658         return (ret);
  659 #else
  660         int settle_ms = 50;
  661         long sleep_ms = 10;
  662         hrtime_t start, settle;
  663         struct stat64 statbuf;
  664 
  665         start = gethrtime();
  666         settle = 0;
  667 
  668         do {
  669                 errno = 0;
  670                 if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
  671                         if (settle == 0)
  672                                 settle = gethrtime();
  673                         else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
  674                                 return (0);
  675                 } else if (errno != ENOENT) {
  676                         return (errno);
  677                 }
  678 
  679                 usleep(sleep_ms * MILLISEC);
  680         } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
  681 
  682         return (ENODEV);
  683 #endif /* HAVE_LIBUDEV */
  684 }
  685 
  686 /*
  687  * Encode the persistent devices strings
  688  * used for the vdev disk label
  689  */
  690 static int
  691 encode_device_strings(const char *path, vdev_dev_strs_t *ds,
  692     boolean_t wholedisk)
  693 {
  694 #ifdef HAVE_LIBUDEV
  695         struct udev *udev;
  696         struct udev_device *dev = NULL;
  697         char nodepath[MAXPATHLEN];
  698         char *sysname;
  699         int ret = ENODEV;
  700         hrtime_t start;
  701 
  702         if ((udev = udev_new()) == NULL)
  703                 return (ENXIO);
  704 
  705         /* resolve path to a runtime device node instance */
  706         if (realpath(path, nodepath) == NULL)
  707                 goto no_dev;
  708 
  709         sysname = strrchr(nodepath, '/') + 1;
  710 
  711         /*
  712          * Wait up to 3 seconds for udev to set up the device node context
  713          */
  714         start = gethrtime();
  715         do {
  716                 dev = udev_device_new_from_subsystem_sysname(udev, "block",
  717                     sysname);
  718                 if (dev == NULL)
  719                         goto no_dev;
  720                 if (udev_device_is_ready(dev))
  721                         break;  /* udev ready */
  722 
  723                 udev_device_unref(dev);
  724                 dev = NULL;
  725 
  726                 if (NSEC2MSEC(gethrtime() - start) < 10)
  727                         (void) sched_yield();   /* yield/busy wait up to 10ms */
  728                 else
  729                         (void) usleep(10 * MILLISEC);
  730 
  731         } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
  732 
  733         if (dev == NULL)
  734                 goto no_dev;
  735 
  736         /*
  737          * Only whole disks require extra device strings
  738          */
  739         if (!wholedisk && !udev_mpath_whole_disk(dev))
  740                 goto no_dev;
  741 
  742         ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
  743         if (ret != 0)
  744                 goto no_dev_ref;
  745 
  746         /* physical location string (optional) */
  747         if (zfs_device_get_physical(dev, ds->vds_devphys,
  748             sizeof (ds->vds_devphys)) != 0) {
  749                 ds->vds_devphys[0] = '\0'; /* empty string --> not available */
  750         }
  751 
  752 no_dev_ref:
  753         udev_device_unref(dev);
  754 no_dev:
  755         udev_unref(udev);
  756 
  757         return (ret);
  758 #else
  759         (void) path;
  760         (void) ds;
  761         (void) wholedisk;
  762         return (ENOENT);
  763 #endif
  764 }
  765 
  766 /*
  767  * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
  768  * in the nvlist * (if applicable).  Like:
  769  *    vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
  770  */
  771 static void
  772 update_vdev_config_dev_sysfs_path(nvlist_t *nv, char *path)
  773 {
  774         char *upath, *spath;
  775 
  776         /* Add enclosure sysfs path (if disk is in an enclosure). */
  777         upath = zfs_get_underlying_path(path);
  778         spath = zfs_get_enclosure_sysfs_path(upath);
  779 
  780         if (spath) {
  781                 nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath);
  782         } else {
  783                 nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
  784         }
  785 
  786         free(upath);
  787         free(spath);
  788 }
  789 
  790 /*
  791  * This will get called for each leaf vdev.
  792  */
  793 static int
  794 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
  795 {
  796         (void) hdl_data, (void) data;
  797 
  798         char *path = NULL;
  799         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
  800                 return (1);
  801 
  802         /* Rescan our enclosure sysfs path for this vdev */
  803         update_vdev_config_dev_sysfs_path(nv, path);
  804         return (0);
  805 }
  806 
  807 /*
  808  * Given an nvlist for our pool (with vdev tree), iterate over all the
  809  * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
  810  */
  811 void
  812 update_vdevs_config_dev_sysfs_path(nvlist_t *config)
  813 {
  814         nvlist_t *nvroot = NULL;
  815         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
  816             &nvroot) == 0);
  817         for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
  818 }
  819 
  820 /*
  821  * Update a leaf vdev's persistent device strings
  822  *
  823  * - only applies for a dedicated leaf vdev (aka whole disk)
  824  * - updated during pool create|add|attach|import
  825  * - used for matching device matching during auto-{online,expand,replace}
  826  * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
  827  * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
  828  *
  829  * single device node example:
  830  *      devid:          'scsi-MG03SCA300_350000494a8cb3d67-part1'
  831  *      phys_path:      'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
  832  *
  833  * multipath device node example:
  834  *      devid:          'dm-uuid-mpath-35000c5006304de3f'
  835  *
  836  * We also store the enclosure sysfs path for turning on enclosure LEDs
  837  * (if applicable):
  838  *      vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
  839  */
  840 void
  841 update_vdev_config_dev_strs(nvlist_t *nv)
  842 {
  843         vdev_dev_strs_t vds;
  844         char *env, *type, *path;
  845         uint64_t wholedisk = 0;
  846 
  847         /*
  848          * For the benefit of legacy ZFS implementations, allow
  849          * for opting out of devid strings in the vdev label.
  850          *
  851          * example use:
  852          *      env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
  853          *
  854          * explanation:
  855          * Older OpenZFS implementations had issues when attempting to
  856          * display pool config VDEV names if a "devid" NVP value is
  857          * present in the pool's config.
  858          *
  859          * For example, a pool that originated on illumos platform would
  860          * have a devid value in the config and "zpool status" would fail
  861          * when listing the config.
  862          *
  863          * A pool can be stripped of any "devid" values on import or
  864          * prevented from adding them on zpool create|add by setting
  865          * ZFS_VDEV_DEVID_OPT_OUT.
  866          */
  867         env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
  868         if (env && (strtoul(env, NULL, 0) > 0 ||
  869             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
  870                 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
  871                 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
  872                 return;
  873         }
  874 
  875         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
  876             strcmp(type, VDEV_TYPE_DISK) != 0) {
  877                 return;
  878         }
  879         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
  880                 return;
  881         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
  882 
  883         /*
  884          * Update device string values in the config nvlist.
  885          */
  886         if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
  887                 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
  888                 if (vds.vds_devphys[0] != '\0') {
  889                         (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
  890                             vds.vds_devphys);
  891                 }
  892                 update_vdev_config_dev_sysfs_path(nv, path);
  893         } else {
  894                 /* Clear out any stale entries. */
  895                 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
  896                 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
  897                 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
  898         }
  899 }

Cache object: 16e0cb08605fb39ec708fc5e4af04117


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.