The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/cmd/zpool/os/linux/zpool_vdev_os.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   24  * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
   25  * Copyright (c) 2016, 2017 Intel Corporation.
   26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
   27  */
   28 
   29 /*
   30  * Functions to convert between a list of vdevs and an nvlist representing the
   31  * configuration.  Each entry in the list can be one of:
   32  *
   33  *      Device vdevs
   34  *              disk=(path=..., devid=...)
   35  *              file=(path=...)
   36  *
   37  *      Group vdevs
   38  *              raidz[1|2]=(...)
   39  *              mirror=(...)
   40  *
   41  *      Hot spares
   42  *
   43  * While the underlying implementation supports it, group vdevs cannot contain
   44  * other group vdevs.  All userland verification of devices is contained within
   45  * this file.  If successful, the nvlist returned can be passed directly to the
   46  * kernel; we've done as much verification as possible in userland.
   47  *
   48  * Hot spares are a special case, and passed down as an array of disk vdevs, at
   49  * the same level as the root of the vdev tree.
   50  *
   51  * The only function exported by this file is 'make_root_vdev'.  The
   52  * function performs several passes:
   53  *
   54  *      1. Construct the vdev specification.  Performs syntax validation and
   55  *         makes sure each device is valid.
   56  *      2. Check for devices in use.  Using libblkid to make sure that no
   57  *         devices are also in use.  Some can be overridden using the 'force'
   58  *         flag, others cannot.
   59  *      3. Check for replication errors if the 'force' flag is not specified.
   60  *         validates that the replication level is consistent across the
   61  *         entire pool.
   62  *      4. Call libzfs to label any whole disks with an EFI label.
   63  */
   64 
   65 #include <assert.h>
   66 #include <ctype.h>
   67 #include <errno.h>
   68 #include <fcntl.h>
   69 #include <libintl.h>
   70 #include <libnvpair.h>
   71 #include <libzutil.h>
   72 #include <limits.h>
   73 #include <sys/spa.h>
   74 #include <stdio.h>
   75 #include <string.h>
   76 #include <unistd.h>
   77 #include "zpool_util.h"
   78 #include <sys/zfs_context.h>
   79 
   80 #include <scsi/scsi.h>
   81 #include <scsi/sg.h>
   82 #include <sys/efi_partition.h>
   83 #include <sys/stat.h>
   84 #include <sys/mntent.h>
   85 #include <uuid/uuid.h>
   86 #include <blkid/blkid.h>
   87 
   88 typedef struct vdev_disk_db_entry
   89 {
   90         char id[24];
   91         int sector_size;
   92 } vdev_disk_db_entry_t;
   93 
   94 /*
   95  * Database of block devices that lie about physical sector sizes.  The
   96  * identification string must be precisely 24 characters to avoid false
   97  * negatives
   98  */
   99 static vdev_disk_db_entry_t vdev_disk_database[] = {
  100         {"ATA     ADATA SSD S396 3", 8192},
  101         {"ATA     APPLE SSD SM128E", 8192},
  102         {"ATA     APPLE SSD SM256E", 8192},
  103         {"ATA     APPLE SSD SM512E", 8192},
  104         {"ATA     APPLE SSD SM768E", 8192},
  105         {"ATA     C400-MTFDDAC064M", 8192},
  106         {"ATA     C400-MTFDDAC128M", 8192},
  107         {"ATA     C400-MTFDDAC256M", 8192},
  108         {"ATA     C400-MTFDDAC512M", 8192},
  109         {"ATA     Corsair Force 3 ", 8192},
  110         {"ATA     Corsair Force GS", 8192},
  111         {"ATA     INTEL SSDSA2CT04", 8192},
  112         {"ATA     INTEL SSDSA2BZ10", 8192},
  113         {"ATA     INTEL SSDSA2BZ20", 8192},
  114         {"ATA     INTEL SSDSA2BZ30", 8192},
  115         {"ATA     INTEL SSDSA2CW04", 8192},
  116         {"ATA     INTEL SSDSA2CW08", 8192},
  117         {"ATA     INTEL SSDSA2CW12", 8192},
  118         {"ATA     INTEL SSDSA2CW16", 8192},
  119         {"ATA     INTEL SSDSA2CW30", 8192},
  120         {"ATA     INTEL SSDSA2CW60", 8192},
  121         {"ATA     INTEL SSDSC2CT06", 8192},
  122         {"ATA     INTEL SSDSC2CT12", 8192},
  123         {"ATA     INTEL SSDSC2CT18", 8192},
  124         {"ATA     INTEL SSDSC2CT24", 8192},
  125         {"ATA     INTEL SSDSC2CW06", 8192},
  126         {"ATA     INTEL SSDSC2CW12", 8192},
  127         {"ATA     INTEL SSDSC2CW18", 8192},
  128         {"ATA     INTEL SSDSC2CW24", 8192},
  129         {"ATA     INTEL SSDSC2CW48", 8192},
  130         {"ATA     KINGSTON SH100S3", 8192},
  131         {"ATA     KINGSTON SH103S3", 8192},
  132         {"ATA     M4-CT064M4SSD2  ", 8192},
  133         {"ATA     M4-CT128M4SSD2  ", 8192},
  134         {"ATA     M4-CT256M4SSD2  ", 8192},
  135         {"ATA     M4-CT512M4SSD2  ", 8192},
  136         {"ATA     OCZ-AGILITY2    ", 8192},
  137         {"ATA     OCZ-AGILITY3    ", 8192},
  138         {"ATA     OCZ-VERTEX2 3.5 ", 8192},
  139         {"ATA     OCZ-VERTEX3     ", 8192},
  140         {"ATA     OCZ-VERTEX3 LT  ", 8192},
  141         {"ATA     OCZ-VERTEX3 MI  ", 8192},
  142         {"ATA     OCZ-VERTEX4     ", 8192},
  143         {"ATA     SAMSUNG MZ7WD120", 8192},
  144         {"ATA     SAMSUNG MZ7WD240", 8192},
  145         {"ATA     SAMSUNG MZ7WD480", 8192},
  146         {"ATA     SAMSUNG MZ7WD960", 8192},
  147         {"ATA     SAMSUNG SSD 830 ", 8192},
  148         {"ATA     Samsung SSD 840 ", 8192},
  149         {"ATA     SanDisk SSD U100", 8192},
  150         {"ATA     TOSHIBA THNSNH06", 8192},
  151         {"ATA     TOSHIBA THNSNH12", 8192},
  152         {"ATA     TOSHIBA THNSNH25", 8192},
  153         {"ATA     TOSHIBA THNSNH51", 8192},
  154         {"ATA     APPLE SSD TS064C", 4096},
  155         {"ATA     APPLE SSD TS128C", 4096},
  156         {"ATA     APPLE SSD TS256C", 4096},
  157         {"ATA     APPLE SSD TS512C", 4096},
  158         {"ATA     INTEL SSDSA2M040", 4096},
  159         {"ATA     INTEL SSDSA2M080", 4096},
  160         {"ATA     INTEL SSDSA2M160", 4096},
  161         {"ATA     INTEL SSDSC2MH12", 4096},
  162         {"ATA     INTEL SSDSC2MH25", 4096},
  163         {"ATA     OCZ CORE_SSD    ", 4096},
  164         {"ATA     OCZ-VERTEX      ", 4096},
  165         {"ATA     SAMSUNG MCCOE32G", 4096},
  166         {"ATA     SAMSUNG MCCOE64G", 4096},
  167         {"ATA     SAMSUNG SSD PM80", 4096},
  168         /* Flash drives optimized for 4KB IOs on larger pages */
  169         {"ATA     INTEL SSDSC2BA10", 4096},
  170         {"ATA     INTEL SSDSC2BA20", 4096},
  171         {"ATA     INTEL SSDSC2BA40", 4096},
  172         {"ATA     INTEL SSDSC2BA80", 4096},
  173         {"ATA     INTEL SSDSC2BB08", 4096},
  174         {"ATA     INTEL SSDSC2BB12", 4096},
  175         {"ATA     INTEL SSDSC2BB16", 4096},
  176         {"ATA     INTEL SSDSC2BB24", 4096},
  177         {"ATA     INTEL SSDSC2BB30", 4096},
  178         {"ATA     INTEL SSDSC2BB40", 4096},
  179         {"ATA     INTEL SSDSC2BB48", 4096},
  180         {"ATA     INTEL SSDSC2BB60", 4096},
  181         {"ATA     INTEL SSDSC2BB80", 4096},
  182         {"ATA     INTEL SSDSC2BW24", 4096},
  183         {"ATA     INTEL SSDSC2BW48", 4096},
  184         {"ATA     INTEL SSDSC2BP24", 4096},
  185         {"ATA     INTEL SSDSC2BP48", 4096},
  186         {"NA      SmrtStorSDLKAE9W", 4096},
  187         {"NVMe    Amazon EC2 NVMe ", 4096},
  188         /* Imported from Open Solaris */
  189         {"ATA     MARVELL SD88SA02", 4096},
  190         /* Advanced format Hard drives */
  191         {"ATA     Hitachi HDS5C303", 4096},
  192         {"ATA     SAMSUNG HD204UI ", 4096},
  193         {"ATA     ST2000DL004 HD20", 4096},
  194         {"ATA     WDC WD10EARS-00M", 4096},
  195         {"ATA     WDC WD10EARS-00S", 4096},
  196         {"ATA     WDC WD10EARS-00Z", 4096},
  197         {"ATA     WDC WD15EARS-00M", 4096},
  198         {"ATA     WDC WD15EARS-00S", 4096},
  199         {"ATA     WDC WD15EARS-00Z", 4096},
  200         {"ATA     WDC WD20EARS-00M", 4096},
  201         {"ATA     WDC WD20EARS-00S", 4096},
  202         {"ATA     WDC WD20EARS-00Z", 4096},
  203         {"ATA     WDC WD1600BEVT-0", 4096},
  204         {"ATA     WDC WD2500BEVT-0", 4096},
  205         {"ATA     WDC WD3200BEVT-0", 4096},
  206         {"ATA     WDC WD5000BEVT-0", 4096},
  207 };
  208 
  209 
  210 #define INQ_REPLY_LEN   96
  211 #define INQ_CMD_LEN     6
  212 
  213 static const int vdev_disk_database_size =
  214         sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
  215 
  216 boolean_t
  217 check_sector_size_database(char *path, int *sector_size)
  218 {
  219         unsigned char inq_buff[INQ_REPLY_LEN];
  220         unsigned char sense_buffer[32];
  221         unsigned char inq_cmd_blk[INQ_CMD_LEN] =
  222             {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
  223         sg_io_hdr_t io_hdr;
  224         int error;
  225         int fd;
  226         int i;
  227 
  228         /* Prepare INQUIRY command */
  229         memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
  230         io_hdr.interface_id = 'S';
  231         io_hdr.cmd_len = sizeof (inq_cmd_blk);
  232         io_hdr.mx_sb_len = sizeof (sense_buffer);
  233         io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
  234         io_hdr.dxfer_len = INQ_REPLY_LEN;
  235         io_hdr.dxferp = inq_buff;
  236         io_hdr.cmdp = inq_cmd_blk;
  237         io_hdr.sbp = sense_buffer;
  238         io_hdr.timeout = 10;            /* 10 milliseconds is ample time */
  239 
  240         if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
  241                 return (B_FALSE);
  242 
  243         error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
  244 
  245         (void) close(fd);
  246 
  247         if (error < 0)
  248                 return (B_FALSE);
  249 
  250         if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
  251                 return (B_FALSE);
  252 
  253         for (i = 0; i < vdev_disk_database_size; i++) {
  254                 if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
  255                         continue;
  256 
  257                 *sector_size = vdev_disk_database[i].sector_size;
  258                 return (B_TRUE);
  259         }
  260 
  261         return (B_FALSE);
  262 }
  263 
  264 static int
  265 check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
  266 {
  267         int err;
  268         char *value;
  269 
  270         /* No valid type detected device is safe to use */
  271         value = blkid_get_tag_value(cache, "TYPE", path);
  272         if (value == NULL)
  273                 return (0);
  274 
  275         /*
  276          * If libblkid detects a ZFS device, we check the device
  277          * using check_file() to see if it's safe.  The one safe
  278          * case is a spare device shared between multiple pools.
  279          */
  280         if (strcmp(value, "zfs_member") == 0) {
  281                 err = check_file(path, force, isspare);
  282         } else {
  283                 if (force) {
  284                         err = 0;
  285                 } else {
  286                         err = -1;
  287                         vdev_error(gettext("%s contains a filesystem of "
  288                             "type '%s'\n"), path, value);
  289                 }
  290         }
  291 
  292         free(value);
  293 
  294         return (err);
  295 }
  296 
  297 /*
  298  * Validate that a disk including all partitions are safe to use.
  299  *
  300  * For EFI labeled disks this can done relatively easily with the libefi
  301  * library.  The partition numbers are extracted from the label and used
  302  * to generate the expected /dev/ paths.  Each partition can then be
  303  * checked for conflicts.
  304  *
  305  * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
  306  * but due to the lack of a readily available libraries this scanning is
  307  * not implemented.  Instead only the device path as given is checked.
  308  */
  309 static int
  310 check_disk(const char *path, blkid_cache cache, int force,
  311     boolean_t isspare, boolean_t iswholedisk)
  312 {
  313         struct dk_gpt *vtoc;
  314         char slice_path[MAXPATHLEN];
  315         int err = 0;
  316         int fd, i;
  317         int flags = O_RDONLY|O_DIRECT;
  318 
  319         if (!iswholedisk)
  320                 return (check_slice(path, cache, force, isspare));
  321 
  322         /* only spares can be shared, other devices require exclusive access */
  323         if (!isspare)
  324                 flags |= O_EXCL;
  325 
  326         if ((fd = open(path, flags)) < 0) {
  327                 char *value = blkid_get_tag_value(cache, "TYPE", path);
  328                 (void) fprintf(stderr, gettext("%s is in use and contains "
  329                     "a %s filesystem.\n"), path, value ? value : "unknown");
  330                 free(value);
  331                 return (-1);
  332         }
  333 
  334         /*
  335          * Expected to fail for non-EFI labeled disks.  Just check the device
  336          * as given and do not attempt to detect and scan partitions.
  337          */
  338         err = efi_alloc_and_read(fd, &vtoc);
  339         if (err) {
  340                 (void) close(fd);
  341                 return (check_slice(path, cache, force, isspare));
  342         }
  343 
  344         /*
  345          * The primary efi partition label is damaged however the secondary
  346          * label at the end of the device is intact.  Rather than use this
  347          * label we should play it safe and treat this as a non efi device.
  348          */
  349         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
  350                 efi_free(vtoc);
  351                 (void) close(fd);
  352 
  353                 if (force) {
  354                         /* Partitions will now be created using the backup */
  355                         return (0);
  356                 } else {
  357                         vdev_error(gettext("%s contains a corrupt primary "
  358                             "EFI label.\n"), path);
  359                         return (-1);
  360                 }
  361         }
  362 
  363         for (i = 0; i < vtoc->efi_nparts; i++) {
  364 
  365                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
  366                     uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
  367                         continue;
  368 
  369                 if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
  370                         (void) snprintf(slice_path, sizeof (slice_path),
  371                             "%s%s%d", path, "-part", i+1);
  372                 else
  373                         (void) snprintf(slice_path, sizeof (slice_path),
  374                             "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
  375                             "p" : "", i+1);
  376 
  377                 err = check_slice(slice_path, cache, force, isspare);
  378                 if (err)
  379                         break;
  380         }
  381 
  382         efi_free(vtoc);
  383         (void) close(fd);
  384 
  385         return (err);
  386 }
  387 
  388 int
  389 check_device(const char *path, boolean_t force,
  390     boolean_t isspare, boolean_t iswholedisk)
  391 {
  392         blkid_cache cache;
  393         int error;
  394 
  395         error = blkid_get_cache(&cache, NULL);
  396         if (error != 0) {
  397                 (void) fprintf(stderr, gettext("unable to access the blkid "
  398                     "cache.\n"));
  399                 return (-1);
  400         }
  401 
  402         error = check_disk(path, cache, force, isspare, iswholedisk);
  403         blkid_put_cache(cache);
  404 
  405         return (error);
  406 }
  407 
  408 void
  409 after_zpool_upgrade(zpool_handle_t *zhp)
  410 {
  411         (void) zhp;
  412 }
  413 
  414 int
  415 check_file(const char *file, boolean_t force, boolean_t isspare)
  416 {
  417         return (check_file_generic(file, force, isspare));
  418 }

Cache object: e006027207a57613ac761cb9692d0403


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.