The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
   24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   25  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
   26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
   27  * Copyright (c) 2018 Datto Inc.
   28  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
   29  * Copyright (c) 2017, Intel Corporation.
   30  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
   31  */
   32 
   33 #include <errno.h>
   34 #include <libintl.h>
   35 #include <stdio.h>
   36 #include <stdlib.h>
   37 #include <string.h>
   38 #include <unistd.h>
   39 #include <libgen.h>
   40 #include <zone.h>
   41 #include <sys/stat.h>
   42 #include <sys/efi_partition.h>
   43 #include <sys/systeminfo.h>
   44 #include <sys/zfs_ioctl.h>
   45 #include <sys/vdev_disk.h>
   46 #include <dlfcn.h>
   47 #include <libzutil.h>
   48 
   49 #include "zfs_namecheck.h"
   50 #include "zfs_prop.h"
   51 #include "../../libzfs_impl.h"
   52 #include "zfs_comutil.h"
   53 #include "zfeature_common.h"
   54 
   55 /*
   56  * If the device has being dynamically expanded then we need to relabel
   57  * the disk to use the new unallocated space.
   58  */
   59 int
   60 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
   61 {
   62         int fd, error;
   63 
   64         if ((fd = open(path, O_RDWR|O_DIRECT|O_CLOEXEC)) < 0) {
   65                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
   66                     "relabel '%s': unable to open device: %d"), path, errno);
   67                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
   68         }
   69 
   70         /*
   71          * It's possible that we might encounter an error if the device
   72          * does not have any unallocated space left. If so, we simply
   73          * ignore that error and continue on.
   74          */
   75         error = efi_use_whole_disk(fd);
   76 
   77         /* Flush the buffers to disk and invalidate the page cache. */
   78         (void) fsync(fd);
   79         (void) ioctl(fd, BLKFLSBUF);
   80 
   81         (void) close(fd);
   82         if (error && error != VT_ENOSPC) {
   83                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
   84                     "relabel '%s': unable to read disk capacity"), path);
   85                 return (zfs_error(hdl, EZFS_NOCAP, msg));
   86         }
   87         return (0);
   88 }
   89 
   90 /*
   91  * Read the EFI label from the config, if a label does not exist then
   92  * pass back the error to the caller. If the caller has passed a non-NULL
   93  * diskaddr argument then we set it to the starting address of the EFI
   94  * partition.
   95  */
   96 static int
   97 read_efi_label(nvlist_t *config, diskaddr_t *sb)
   98 {
   99         char *path;
  100         int fd;
  101         char diskname[MAXPATHLEN];
  102         int err = -1;
  103 
  104         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
  105                 return (err);
  106 
  107         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
  108             strrchr(path, '/'));
  109         if ((fd = open(diskname, O_RDONLY|O_DIRECT|O_CLOEXEC)) >= 0) {
  110                 struct dk_gpt *vtoc;
  111 
  112                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
  113                         if (sb != NULL)
  114                                 *sb = vtoc->efi_parts[0].p_start;
  115                         efi_free(vtoc);
  116                 }
  117                 (void) close(fd);
  118         }
  119         return (err);
  120 }
  121 
  122 /*
  123  * determine where a partition starts on a disk in the current
  124  * configuration
  125  */
  126 static diskaddr_t
  127 find_start_block(nvlist_t *config)
  128 {
  129         nvlist_t **child;
  130         uint_t c, children;
  131         diskaddr_t sb = MAXOFFSET_T;
  132         uint64_t wholedisk;
  133 
  134         if (nvlist_lookup_nvlist_array(config,
  135             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
  136                 if (nvlist_lookup_uint64(config,
  137                     ZPOOL_CONFIG_WHOLE_DISK,
  138                     &wholedisk) != 0 || !wholedisk) {
  139                         return (MAXOFFSET_T);
  140                 }
  141                 if (read_efi_label(config, &sb) < 0)
  142                         sb = MAXOFFSET_T;
  143                 return (sb);
  144         }
  145 
  146         for (c = 0; c < children; c++) {
  147                 sb = find_start_block(child[c]);
  148                 if (sb != MAXOFFSET_T) {
  149                         return (sb);
  150                 }
  151         }
  152         return (MAXOFFSET_T);
  153 }
  154 
  155 static int
  156 zpool_label_disk_check(char *path)
  157 {
  158         struct dk_gpt *vtoc;
  159         int fd, err;
  160 
  161         if ((fd = open(path, O_RDONLY|O_DIRECT|O_CLOEXEC)) < 0)
  162                 return (errno);
  163 
  164         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
  165                 (void) close(fd);
  166                 return (err);
  167         }
  168 
  169         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
  170                 efi_free(vtoc);
  171                 (void) close(fd);
  172                 return (EIDRM);
  173         }
  174 
  175         efi_free(vtoc);
  176         (void) close(fd);
  177         return (0);
  178 }
  179 
  180 /*
  181  * Generate a unique partition name for the ZFS member.  Partitions must
  182  * have unique names to ensure udev will be able to create symlinks under
  183  * /dev/disk/by-partlabel/ for all pool members.  The partition names are
  184  * of the form <pool>-<unique-id>.
  185  */
  186 static void
  187 zpool_label_name(char *label_name, int label_size)
  188 {
  189         uint64_t id = 0;
  190         int fd;
  191 
  192         fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
  193         if (fd >= 0) {
  194                 if (read(fd, &id, sizeof (id)) != sizeof (id))
  195                         id = 0;
  196 
  197                 close(fd);
  198         }
  199 
  200         if (id == 0)
  201                 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
  202 
  203         snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
  204 }
  205 
  206 /*
  207  * Label an individual disk.  The name provided is the short name,
  208  * stripped of any leading /dev path.
  209  */
  210 int
  211 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
  212 {
  213         char path[MAXPATHLEN];
  214         struct dk_gpt *vtoc;
  215         int rval, fd;
  216         size_t resv = EFI_MIN_RESV_SIZE;
  217         uint64_t slice_size;
  218         diskaddr_t start_block;
  219         char errbuf[ERRBUFLEN];
  220 
  221         /* prepare an error message just in case */
  222         (void) snprintf(errbuf, sizeof (errbuf),
  223             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
  224 
  225         if (zhp) {
  226                 nvlist_t *nvroot = fnvlist_lookup_nvlist(zhp->zpool_config,
  227                     ZPOOL_CONFIG_VDEV_TREE);
  228 
  229                 if (zhp->zpool_start_block == 0)
  230                         start_block = find_start_block(nvroot);
  231                 else
  232                         start_block = zhp->zpool_start_block;
  233                 zhp->zpool_start_block = start_block;
  234         } else {
  235                 /* new pool */
  236                 start_block = NEW_START_BLOCK;
  237         }
  238 
  239         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
  240 
  241         if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL|O_CLOEXEC)) < 0) {
  242                 /*
  243                  * This shouldn't happen.  We've long since verified that this
  244                  * is a valid device.
  245                  */
  246                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
  247                     "label '%s': unable to open device: %d"), path, errno);
  248                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
  249         }
  250 
  251         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
  252                 /*
  253                  * The only way this can fail is if we run out of memory, or we
  254                  * were unable to read the disk's capacity
  255                  */
  256                 if (errno == ENOMEM)
  257                         (void) no_memory(hdl);
  258 
  259                 (void) close(fd);
  260                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
  261                     "label '%s': unable to read disk capacity"), path);
  262 
  263                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
  264         }
  265 
  266         slice_size = vtoc->efi_last_u_lba + 1;
  267         slice_size -= EFI_MIN_RESV_SIZE;
  268         if (start_block == MAXOFFSET_T)
  269                 start_block = NEW_START_BLOCK;
  270         slice_size -= start_block;
  271         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
  272 
  273         vtoc->efi_parts[0].p_start = start_block;
  274         vtoc->efi_parts[0].p_size = slice_size;
  275 
  276         /*
  277          * Why we use V_USR: V_BACKUP confuses users, and is considered
  278          * disposable by some EFI utilities (since EFI doesn't have a backup
  279          * slice).  V_UNASSIGNED is supposed to be used only for zero size
  280          * partitions, and efi_write() will fail if we use it.
  281          * Other available types were all pretty specific.
  282          * V_USR is as close to reality as we
  283          * can get, in the absence of V_OTHER.
  284          */
  285         vtoc->efi_parts[0].p_tag = V_USR;
  286         zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
  287 
  288         vtoc->efi_parts[8].p_start = slice_size + start_block;
  289         vtoc->efi_parts[8].p_size = resv;
  290         vtoc->efi_parts[8].p_tag = V_RESERVED;
  291 
  292         rval = efi_write(fd, vtoc);
  293 
  294         /* Flush the buffers to disk and invalidate the page cache. */
  295         (void) fsync(fd);
  296         (void) ioctl(fd, BLKFLSBUF);
  297 
  298         if (rval == 0)
  299                 rval = efi_rescan(fd);
  300 
  301         /*
  302          * Some block drivers (like pcata) may not support EFI GPT labels.
  303          * Print out a helpful error message directing the user to manually
  304          * label the disk and give a specific slice.
  305          */
  306         if (rval != 0) {
  307                 (void) close(fd);
  308                 efi_free(vtoc);
  309 
  310                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
  311                     "parted(8) and then provide a specific slice: %d"), rval);
  312                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
  313         }
  314 
  315         (void) close(fd);
  316         efi_free(vtoc);
  317 
  318         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
  319         (void) zfs_append_partition(path, MAXPATHLEN);
  320 
  321         /* Wait to udev to signal use the device has settled. */
  322         rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
  323         if (rval) {
  324                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
  325                     "detect device partitions on '%s': %d"), path, rval);
  326                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
  327         }
  328 
  329         /* We can't be to paranoid.  Read the label back and verify it. */
  330         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
  331         rval = zpool_label_disk_check(path);
  332         if (rval) {
  333                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
  334                     "EFI label on '%s' is damaged.  Ensure\nthis device "
  335                     "is not in use, and is functioning properly: %d"),
  336                     path, rval);
  337                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
  338         }
  339         return (0);
  340 }

Cache object: 9a945338cb4782d37cb773bc2b0d0c41


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.