The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/lib/libefi/rdwr_efi.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   24  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
   25  * Copyright (c) 2018 by Delphix. All rights reserved.
   26  */
   27 
   28 #include <stdio.h>
   29 #include <stdlib.h>
   30 #include <errno.h>
   31 #include <string.h>
   32 #include <unistd.h>
   33 #include <uuid/uuid.h>
   34 #include <zlib.h>
   35 #include <libintl.h>
   36 #include <sys/types.h>
   37 #include <sys/dkio.h>
   38 #include <sys/mhd.h>
   39 #include <sys/param.h>
   40 #include <sys/dktp/fdisk.h>
   41 #include <sys/efi_partition.h>
   42 #include <sys/byteorder.h>
   43 #include <sys/vdev_disk.h>
   44 #include <linux/fs.h>
   45 #include <linux/blkpg.h>
   46 
   47 static struct uuid_to_ptag {
   48         struct uuid     uuid;
   49 } conversion_array[] = {
   50         { EFI_UNUSED },
   51         { EFI_BOOT },
   52         { EFI_ROOT },
   53         { EFI_SWAP },
   54         { EFI_USR },
   55         { EFI_BACKUP },
   56         { EFI_UNUSED },         /* STAND is never used */
   57         { EFI_VAR },
   58         { EFI_HOME },
   59         { EFI_ALTSCTR },
   60         { EFI_UNUSED },         /* CACHE (cachefs) is never used */
   61         { EFI_RESERVED },
   62         { EFI_SYSTEM },
   63         { EFI_LEGACY_MBR },
   64         { EFI_SYMC_PUB },
   65         { EFI_SYMC_CDS },
   66         { EFI_MSFT_RESV },
   67         { EFI_DELL_BASIC },
   68         { EFI_DELL_RAID },
   69         { EFI_DELL_SWAP },
   70         { EFI_DELL_LVM },
   71         { EFI_DELL_RESV },
   72         { EFI_AAPL_HFS },
   73         { EFI_AAPL_UFS },
   74         { EFI_FREEBSD_BOOT },
   75         { EFI_FREEBSD_SWAP },
   76         { EFI_FREEBSD_UFS },
   77         { EFI_FREEBSD_VINUM },
   78         { EFI_FREEBSD_ZFS },
   79         { EFI_BIOS_BOOT },
   80         { EFI_INTC_RS },
   81         { EFI_SNE_BOOT },
   82         { EFI_LENOVO_BOOT },
   83         { EFI_MSFT_LDMM },
   84         { EFI_MSFT_LDMD },
   85         { EFI_MSFT_RE },
   86         { EFI_IBM_GPFS },
   87         { EFI_MSFT_STORAGESPACES },
   88         { EFI_HPQ_DATA },
   89         { EFI_HPQ_SVC },
   90         { EFI_RHT_DATA },
   91         { EFI_RHT_HOME },
   92         { EFI_RHT_SRV },
   93         { EFI_RHT_DMCRYPT },
   94         { EFI_RHT_LUKS },
   95         { EFI_FREEBSD_DISKLABEL },
   96         { EFI_AAPL_RAID },
   97         { EFI_AAPL_RAIDOFFLINE },
   98         { EFI_AAPL_BOOT },
   99         { EFI_AAPL_LABEL },
  100         { EFI_AAPL_TVRECOVERY },
  101         { EFI_AAPL_CORESTORAGE },
  102         { EFI_NETBSD_SWAP },
  103         { EFI_NETBSD_FFS },
  104         { EFI_NETBSD_LFS },
  105         { EFI_NETBSD_RAID },
  106         { EFI_NETBSD_CAT },
  107         { EFI_NETBSD_CRYPT },
  108         { EFI_GOOG_KERN },
  109         { EFI_GOOG_ROOT },
  110         { EFI_GOOG_RESV },
  111         { EFI_HAIKU_BFS },
  112         { EFI_MIDNIGHTBSD_BOOT },
  113         { EFI_MIDNIGHTBSD_DATA },
  114         { EFI_MIDNIGHTBSD_SWAP },
  115         { EFI_MIDNIGHTBSD_UFS },
  116         { EFI_MIDNIGHTBSD_VINUM },
  117         { EFI_MIDNIGHTBSD_ZFS },
  118         { EFI_CEPH_JOURNAL },
  119         { EFI_CEPH_DMCRYPTJOURNAL },
  120         { EFI_CEPH_OSD },
  121         { EFI_CEPH_DMCRYPTOSD },
  122         { EFI_CEPH_CREATE },
  123         { EFI_CEPH_DMCRYPTCREATE },
  124         { EFI_OPENBSD_DISKLABEL },
  125         { EFI_BBRY_QNX },
  126         { EFI_BELL_PLAN9 },
  127         { EFI_VMW_KCORE },
  128         { EFI_VMW_VMFS },
  129         { EFI_VMW_RESV },
  130         { EFI_RHT_ROOTX86 },
  131         { EFI_RHT_ROOTAMD64 },
  132         { EFI_RHT_ROOTARM },
  133         { EFI_RHT_ROOTARM64 },
  134         { EFI_ACRONIS_SECUREZONE },
  135         { EFI_ONIE_BOOT },
  136         { EFI_ONIE_CONFIG },
  137         { EFI_IBM_PPRPBOOT },
  138         { EFI_FREEDESKTOP_BOOT }
  139 };
  140 
  141 int efi_debug = 0;
  142 
  143 static int efi_read(int, struct dk_gpt *);
  144 
  145 /*
  146  * Return a 32-bit CRC of the contents of the buffer.  Pre-and-post
  147  * one's conditioning will be handled by crc32() internally.
  148  */
  149 static uint32_t
  150 efi_crc32(const unsigned char *buf, unsigned int size)
  151 {
  152         uint32_t crc = crc32(0, Z_NULL, 0);
  153 
  154         crc = crc32(crc, buf, size);
  155 
  156         return (crc);
  157 }
  158 
  159 static int
  160 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
  161 {
  162         int sector_size;
  163         unsigned long long capacity_size;
  164 
  165         if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
  166                 return (-1);
  167 
  168         if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
  169                 return (-1);
  170 
  171         *lbsize = (uint_t)sector_size;
  172         *capacity = (diskaddr_t)(capacity_size / sector_size);
  173 
  174         return (0);
  175 }
  176 
  177 /*
  178  * Return back the device name associated with the file descriptor. The
  179  * caller is responsible for freeing the memory associated with the
  180  * returned string.
  181  */
  182 static char *
  183 efi_get_devname(int fd)
  184 {
  185         char path[32];
  186 
  187         /*
  188          * The libefi API only provides the open fd and not the file path.
  189          * To handle this realpath(3) is used to resolve the block device
  190          * name from /proc/self/fd/<fd>.
  191          */
  192         (void) snprintf(path, sizeof (path), "/proc/self/fd/%d", fd);
  193         return (realpath(path, NULL));
  194 }
  195 
  196 static int
  197 efi_get_info(int fd, struct dk_cinfo *dki_info)
  198 {
  199         char *dev_path;
  200         int rval = 0;
  201 
  202         memset(dki_info, 0, sizeof (*dki_info));
  203 
  204         /*
  205          * The simplest way to get the partition number under linux is
  206          * to parse it out of the /dev/<disk><partition> block device name.
  207          * The kernel creates this using the partition number when it
  208          * populates /dev/ so it may be trusted.  The tricky bit here is
  209          * that the naming convention is based on the block device type.
  210          * So we need to take this in to account when parsing out the
  211          * partition information.  Aside from the partition number we collect
  212          * some additional device info.
  213          */
  214         dev_path = efi_get_devname(fd);
  215         if (dev_path == NULL)
  216                 goto error;
  217 
  218         if ((strncmp(dev_path, "/dev/sd", 7) == 0)) {
  219                 strcpy(dki_info->dki_cname, "sd");
  220                 dki_info->dki_ctype = DKC_SCSI_CCS;
  221                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
  222                     dki_info->dki_dname,
  223                     &dki_info->dki_partition);
  224         } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) {
  225                 strcpy(dki_info->dki_cname, "hd");
  226                 dki_info->dki_ctype = DKC_DIRECT;
  227                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
  228                     dki_info->dki_dname,
  229                     &dki_info->dki_partition);
  230         } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) {
  231                 strcpy(dki_info->dki_cname, "pseudo");
  232                 dki_info->dki_ctype = DKC_MD;
  233                 strcpy(dki_info->dki_dname, "md");
  234                 rval = sscanf(dev_path, "/dev/md%[0-9]p%hu",
  235                     dki_info->dki_dname + 2,
  236                     &dki_info->dki_partition);
  237         } else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) {
  238                 strcpy(dki_info->dki_cname, "vd");
  239                 dki_info->dki_ctype = DKC_MD;
  240                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
  241                     dki_info->dki_dname,
  242                     &dki_info->dki_partition);
  243         } else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) {
  244                 strcpy(dki_info->dki_cname, "xvd");
  245                 dki_info->dki_ctype = DKC_MD;
  246                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
  247                     dki_info->dki_dname,
  248                     &dki_info->dki_partition);
  249         } else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) {
  250                 strcpy(dki_info->dki_cname, "zd");
  251                 dki_info->dki_ctype = DKC_MD;
  252                 strcpy(dki_info->dki_dname, "zd");
  253                 rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu",
  254                     dki_info->dki_dname + 2,
  255                     &dki_info->dki_partition);
  256         } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) {
  257                 strcpy(dki_info->dki_cname, "pseudo");
  258                 dki_info->dki_ctype = DKC_VBD;
  259                 strcpy(dki_info->dki_dname, "dm-");
  260                 rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu",
  261                     dki_info->dki_dname + 3,
  262                     &dki_info->dki_partition);
  263         } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) {
  264                 strcpy(dki_info->dki_cname, "pseudo");
  265                 dki_info->dki_ctype = DKC_PCMCIA_MEM;
  266                 strcpy(dki_info->dki_dname, "ram");
  267                 rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu",
  268                     dki_info->dki_dname + 3,
  269                     &dki_info->dki_partition);
  270         } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) {
  271                 strcpy(dki_info->dki_cname, "pseudo");
  272                 dki_info->dki_ctype = DKC_VBD;
  273                 strcpy(dki_info->dki_dname, "loop");
  274                 rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu",
  275                     dki_info->dki_dname + 4,
  276                     &dki_info->dki_partition);
  277         } else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) {
  278                 strcpy(dki_info->dki_cname, "nvme");
  279                 dki_info->dki_ctype = DKC_SCSI_CCS;
  280                 strcpy(dki_info->dki_dname, "nvme");
  281                 (void) sscanf(dev_path, "/dev/nvme%[0-9]",
  282                     dki_info->dki_dname + 4);
  283                 size_t controller_length = strlen(
  284                     dki_info->dki_dname);
  285                 strcpy(dki_info->dki_dname + controller_length,
  286                     "n");
  287                 rval = sscanf(dev_path,
  288                     "/dev/nvme%*[0-9]n%[0-9]p%hu",
  289                     dki_info->dki_dname + controller_length + 1,
  290                     &dki_info->dki_partition);
  291         } else {
  292                 strcpy(dki_info->dki_dname, "unknown");
  293                 strcpy(dki_info->dki_cname, "unknown");
  294                 dki_info->dki_ctype = DKC_UNKNOWN;
  295         }
  296 
  297         switch (rval) {
  298         case 0:
  299                 errno = EINVAL;
  300                 goto error;
  301         case 1:
  302                 dki_info->dki_partition = 0;
  303         }
  304 
  305         free(dev_path);
  306 
  307         return (0);
  308 error:
  309         if (efi_debug)
  310                 (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
  311 
  312         switch (errno) {
  313         case EIO:
  314                 return (VT_EIO);
  315         case EINVAL:
  316                 return (VT_EINVAL);
  317         default:
  318                 return (VT_ERROR);
  319         }
  320 }
  321 
  322 /*
  323  * the number of blocks the EFI label takes up (round up to nearest
  324  * block)
  325  */
  326 #define NBLOCKS(p, l)   (1 + ((((p) * (int)sizeof (efi_gpe_t))  + \
  327                                 ((l) - 1)) / (l)))
  328 /* number of partitions -- limited by what we can malloc */
  329 #define MAX_PARTS       ((4294967295UL - sizeof (struct dk_gpt)) / \
  330                             sizeof (struct dk_part))
  331 
  332 int
  333 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
  334 {
  335         diskaddr_t      capacity = 0;
  336         uint_t          lbsize = 0;
  337         uint_t          nblocks;
  338         size_t          length;
  339         struct dk_gpt   *vptr;
  340         struct uuid     uuid;
  341         struct dk_cinfo dki_info;
  342 
  343         if (read_disk_info(fd, &capacity, &lbsize) != 0)
  344                 return (-1);
  345 
  346         if (efi_get_info(fd, &dki_info) != 0)
  347                 return (-1);
  348 
  349         if (dki_info.dki_partition != 0)
  350                 return (-1);
  351 
  352         if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
  353             (dki_info.dki_ctype == DKC_VBD) ||
  354             (dki_info.dki_ctype == DKC_UNKNOWN))
  355                 return (-1);
  356 
  357         nblocks = NBLOCKS(nparts, lbsize);
  358         if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
  359                 /* 16K plus one block for the GPT */
  360                 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1;
  361         }
  362 
  363         if (nparts > MAX_PARTS) {
  364                 if (efi_debug) {
  365                         (void) fprintf(stderr,
  366                         "the maximum number of partitions supported is %lu\n",
  367                             MAX_PARTS);
  368                 }
  369                 return (-1);
  370         }
  371 
  372         length = sizeof (struct dk_gpt) +
  373             sizeof (struct dk_part) * (nparts - 1);
  374 
  375         vptr = calloc(1, length);
  376         if (vptr == NULL)
  377                 return (-1);
  378 
  379         *vtoc = vptr;
  380 
  381         vptr->efi_version = EFI_VERSION_CURRENT;
  382         vptr->efi_lbasize = lbsize;
  383         vptr->efi_nparts = nparts;
  384         /*
  385          * add one block here for the PMBR; on disks with a 512 byte
  386          * block size and 128 or fewer partitions, efi_first_u_lba
  387          * should work out to "34"
  388          */
  389         vptr->efi_first_u_lba = nblocks + 1;
  390         vptr->efi_last_lba = capacity - 1;
  391         vptr->efi_altern_lba = capacity -1;
  392         vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks;
  393 
  394         (void) uuid_generate((uchar_t *)&uuid);
  395         UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid);
  396         return (0);
  397 }
  398 
  399 /*
  400  * Read EFI - return partition number upon success.
  401  */
  402 int
  403 efi_alloc_and_read(int fd, struct dk_gpt **vtoc)
  404 {
  405         int                     rval;
  406         uint32_t                nparts;
  407         int                     length;
  408         struct dk_gpt           *vptr;
  409 
  410         /* figure out the number of entries that would fit into 16K */
  411         nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t);
  412         length = (int) sizeof (struct dk_gpt) +
  413             (int) sizeof (struct dk_part) * (nparts - 1);
  414         vptr = calloc(1, length);
  415 
  416         if (vptr == NULL)
  417                 return (VT_ERROR);
  418 
  419         vptr->efi_nparts = nparts;
  420         rval = efi_read(fd, vptr);
  421 
  422         if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) {
  423                 void *tmp;
  424                 length = (int) sizeof (struct dk_gpt) +
  425                     (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1);
  426                 if ((tmp = realloc(vptr, length)) == NULL) {
  427                         /* cppcheck-suppress doubleFree */
  428                         free(vptr);
  429                         *vtoc = NULL;
  430                         return (VT_ERROR);
  431                 } else {
  432                         vptr = tmp;
  433                         rval = efi_read(fd, vptr);
  434                 }
  435         }
  436 
  437         if (rval < 0) {
  438                 if (efi_debug) {
  439                         (void) fprintf(stderr,
  440                             "read of EFI table failed, rval=%d\n", rval);
  441                 }
  442                 free(vptr);
  443                 *vtoc = NULL;
  444         } else {
  445                 *vtoc = vptr;
  446         }
  447 
  448         return (rval);
  449 }
  450 
  451 static int
  452 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
  453 {
  454         void *data = dk_ioc->dki_data;
  455         int error;
  456         diskaddr_t capacity;
  457         uint_t lbsize;
  458 
  459         /*
  460          * When the IO is not being performed in kernel as an ioctl we need
  461          * to know the sector size so we can seek to the proper byte offset.
  462          */
  463         if (read_disk_info(fd, &capacity, &lbsize) == -1) {
  464                 if (efi_debug)
  465                         fprintf(stderr, "unable to read disk info: %d", errno);
  466 
  467                 errno = EIO;
  468                 return (-1);
  469         }
  470 
  471         switch (cmd) {
  472         case DKIOCGETEFI:
  473                 if (lbsize == 0) {
  474                         if (efi_debug)
  475                                 (void) fprintf(stderr, "DKIOCGETEFI assuming "
  476                                     "LBA %d bytes\n", DEV_BSIZE);
  477 
  478                         lbsize = DEV_BSIZE;
  479                 }
  480 
  481                 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
  482                 if (error == -1) {
  483                         if (efi_debug)
  484                                 (void) fprintf(stderr, "DKIOCGETEFI lseek "
  485                                     "error: %d\n", errno);
  486                         return (error);
  487                 }
  488 
  489                 error = read(fd, data, dk_ioc->dki_length);
  490                 if (error == -1) {
  491                         if (efi_debug)
  492                                 (void) fprintf(stderr, "DKIOCGETEFI read "
  493                                     "error: %d\n", errno);
  494                         return (error);
  495                 }
  496 
  497                 if (error != dk_ioc->dki_length) {
  498                         if (efi_debug)
  499                                 (void) fprintf(stderr, "DKIOCGETEFI short "
  500                                     "read of %d bytes\n", error);
  501                         errno = EIO;
  502                         return (-1);
  503                 }
  504                 error = 0;
  505                 break;
  506 
  507         case DKIOCSETEFI:
  508                 if (lbsize == 0) {
  509                         if (efi_debug)
  510                                 (void) fprintf(stderr, "DKIOCSETEFI unknown "
  511                                     "LBA size\n");
  512                         errno = EIO;
  513                         return (-1);
  514                 }
  515 
  516                 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
  517                 if (error == -1) {
  518                         if (efi_debug)
  519                                 (void) fprintf(stderr, "DKIOCSETEFI lseek "
  520                                     "error: %d\n", errno);
  521                         return (error);
  522                 }
  523 
  524                 error = write(fd, data, dk_ioc->dki_length);
  525                 if (error == -1) {
  526                         if (efi_debug)
  527                                 (void) fprintf(stderr, "DKIOCSETEFI write "
  528                                     "error: %d\n", errno);
  529                         return (error);
  530                 }
  531 
  532                 if (error != dk_ioc->dki_length) {
  533                         if (efi_debug)
  534                                 (void) fprintf(stderr, "DKIOCSETEFI short "
  535                                     "write of %d bytes\n", error);
  536                         errno = EIO;
  537                         return (-1);
  538                 }
  539 
  540                 /* Sync the new EFI table to disk */
  541                 error = fsync(fd);
  542                 if (error == -1)
  543                         return (error);
  544 
  545                 /* Ensure any local disk cache is also flushed */
  546                 if (ioctl(fd, BLKFLSBUF, 0) == -1)
  547                         return (error);
  548 
  549                 error = 0;
  550                 break;
  551 
  552         default:
  553                 if (efi_debug)
  554                         (void) fprintf(stderr, "unsupported ioctl()\n");
  555 
  556                 errno = EIO;
  557                 return (-1);
  558         }
  559 
  560         return (error);
  561 }
  562 
  563 int
  564 efi_rescan(int fd)
  565 {
  566         int retry = 10;
  567 
  568         /* Notify the kernel a devices partition table has been updated */
  569         while (ioctl(fd, BLKRRPART) != 0) {
  570                 if ((--retry == 0) || (errno != EBUSY)) {
  571                         (void) fprintf(stderr, "the kernel failed to rescan "
  572                             "the partition table: %d\n", errno);
  573                         return (-1);
  574                 }
  575                 usleep(50000);
  576         }
  577 
  578         return (0);
  579 }
  580 
  581 static int
  582 check_label(int fd, dk_efi_t *dk_ioc)
  583 {
  584         efi_gpt_t               *efi;
  585         uint_t                  crc;
  586 
  587         if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) {
  588                 switch (errno) {
  589                 case EIO:
  590                         return (VT_EIO);
  591                 default:
  592                         return (VT_ERROR);
  593                 }
  594         }
  595         efi = dk_ioc->dki_data;
  596         if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
  597                 if (efi_debug)
  598                         (void) fprintf(stderr,
  599                             "Bad EFI signature: 0x%llx != 0x%llx\n",
  600                             (long long)efi->efi_gpt_Signature,
  601                             (long long)LE_64(EFI_SIGNATURE));
  602                 return (VT_EINVAL);
  603         }
  604 
  605         /*
  606          * check CRC of the header; the size of the header should
  607          * never be larger than one block
  608          */
  609         crc = efi->efi_gpt_HeaderCRC32;
  610         efi->efi_gpt_HeaderCRC32 = 0;
  611         len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize);
  612 
  613         if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) {
  614                 if (efi_debug)
  615                         (void) fprintf(stderr,
  616                             "Invalid EFI HeaderSize %llu.  Assuming %d.\n",
  617                             headerSize, EFI_MIN_LABEL_SIZE);
  618         }
  619 
  620         if ((headerSize > dk_ioc->dki_length) ||
  621             crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) {
  622                 if (efi_debug)
  623                         (void) fprintf(stderr,
  624                             "Bad EFI CRC: 0x%x != 0x%x\n",
  625                             crc, LE_32(efi_crc32((unsigned char *)efi,
  626                             headerSize)));
  627                 return (VT_EINVAL);
  628         }
  629 
  630         return (0);
  631 }
  632 
  633 static int
  634 efi_read(int fd, struct dk_gpt *vtoc)
  635 {
  636         int                     i, j;
  637         int                     label_len;
  638         int                     rval = 0;
  639         int                     md_flag = 0;
  640         int                     vdc_flag = 0;
  641         diskaddr_t              capacity = 0;
  642         uint_t                  lbsize = 0;
  643         struct dk_minfo         disk_info;
  644         dk_efi_t                dk_ioc;
  645         efi_gpt_t               *efi;
  646         efi_gpe_t               *efi_parts;
  647         struct dk_cinfo         dki_info;
  648         uint32_t                user_length;
  649         boolean_t               legacy_label = B_FALSE;
  650 
  651         /*
  652          * get the partition number for this file descriptor.
  653          */
  654         if ((rval = efi_get_info(fd, &dki_info)) != 0)
  655                 return (rval);
  656 
  657         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
  658             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
  659                 md_flag++;
  660         } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) &&
  661             (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) {
  662                 /*
  663                  * The controller and drive name "vdc" (virtual disk client)
  664                  * indicates a LDoms virtual disk.
  665                  */
  666                 vdc_flag++;
  667         }
  668 
  669         /* get the LBA size */
  670         if (read_disk_info(fd, &capacity, &lbsize) == -1) {
  671                 if (efi_debug) {
  672                         (void) fprintf(stderr,
  673                             "unable to read disk info: %d",
  674                             errno);
  675                 }
  676                 return (VT_EINVAL);
  677         }
  678 
  679         disk_info.dki_lbsize = lbsize;
  680         disk_info.dki_capacity = capacity;
  681 
  682         if (disk_info.dki_lbsize == 0) {
  683                 if (efi_debug) {
  684                         (void) fprintf(stderr,
  685                             "efi_read: assuming LBA 512 bytes\n");
  686                 }
  687                 disk_info.dki_lbsize = DEV_BSIZE;
  688         }
  689         /*
  690          * Read the EFI GPT to figure out how many partitions we need
  691          * to deal with.
  692          */
  693         dk_ioc.dki_lba = 1;
  694         if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) {
  695                 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize;
  696         } else {
  697                 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) +
  698                     disk_info.dki_lbsize;
  699                 if (label_len % disk_info.dki_lbsize) {
  700                         /* pad to physical sector size */
  701                         label_len += disk_info.dki_lbsize;
  702                         label_len &= ~(disk_info.dki_lbsize - 1);
  703                 }
  704         }
  705 
  706         if (posix_memalign((void **)&dk_ioc.dki_data,
  707             disk_info.dki_lbsize, label_len))
  708                 return (VT_ERROR);
  709 
  710         memset(dk_ioc.dki_data, 0, label_len);
  711         dk_ioc.dki_length = disk_info.dki_lbsize;
  712         user_length = vtoc->efi_nparts;
  713         efi = dk_ioc.dki_data;
  714         if (md_flag) {
  715                 dk_ioc.dki_length = label_len;
  716                 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
  717                         switch (errno) {
  718                         case EIO:
  719                                 return (VT_EIO);
  720                         default:
  721                                 return (VT_ERROR);
  722                         }
  723                 }
  724         } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) {
  725                 /*
  726                  * No valid label here; try the alternate. Note that here
  727                  * we just read GPT header and save it into dk_ioc.data,
  728                  * Later, we will read GUID partition entry array if we
  729                  * can get valid GPT header.
  730                  */
  731 
  732                 /*
  733                  * This is a workaround for legacy systems. In the past, the
  734                  * last sector of SCSI disk was invisible on x86 platform. At
  735                  * that time, backup label was saved on the next to the last
  736                  * sector. It is possible for users to move a disk from previous
  737                  * solaris system to present system. Here, we attempt to search
  738                  * legacy backup EFI label first.
  739                  */
  740                 dk_ioc.dki_lba = disk_info.dki_capacity - 2;
  741                 dk_ioc.dki_length = disk_info.dki_lbsize;
  742                 rval = check_label(fd, &dk_ioc);
  743                 if (rval == VT_EINVAL) {
  744                         /*
  745                          * we didn't find legacy backup EFI label, try to
  746                          * search backup EFI label in the last block.
  747                          */
  748                         dk_ioc.dki_lba = disk_info.dki_capacity - 1;
  749                         dk_ioc.dki_length = disk_info.dki_lbsize;
  750                         rval = check_label(fd, &dk_ioc);
  751                         if (rval == 0) {
  752                                 legacy_label = B_TRUE;
  753                                 if (efi_debug)
  754                                         (void) fprintf(stderr,
  755                                             "efi_read: primary label corrupt; "
  756                                             "using EFI backup label located on"
  757                                             " the last block\n");
  758                         }
  759                 } else {
  760                         if ((efi_debug) && (rval == 0))
  761                                 (void) fprintf(stderr, "efi_read: primary label"
  762                                     " corrupt; using legacy EFI backup label "
  763                                     " located on the next to last block\n");
  764                 }
  765 
  766                 if (rval == 0) {
  767                         dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
  768                         vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT;
  769                         vtoc->efi_nparts =
  770                             LE_32(efi->efi_gpt_NumberOfPartitionEntries);
  771                         /*
  772                          * Partition tables are between backup GPT header
  773                          * table and ParitionEntryLBA (the starting LBA of
  774                          * the GUID partition entries array). Now that we
  775                          * already got valid GPT header and saved it in
  776                          * dk_ioc.dki_data, we try to get GUID partition
  777                          * entry array here.
  778                          */
  779                         /* LINTED */
  780                         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
  781                             + disk_info.dki_lbsize);
  782                         if (legacy_label)
  783                                 dk_ioc.dki_length = disk_info.dki_capacity - 1 -
  784                                     dk_ioc.dki_lba;
  785                         else
  786                                 dk_ioc.dki_length = disk_info.dki_capacity - 2 -
  787                                     dk_ioc.dki_lba;
  788                         dk_ioc.dki_length *= disk_info.dki_lbsize;
  789                         if (dk_ioc.dki_length >
  790                             ((len_t)label_len - sizeof (*dk_ioc.dki_data))) {
  791                                 rval = VT_EINVAL;
  792                         } else {
  793                                 /*
  794                                  * read GUID partition entry array
  795                                  */
  796                                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
  797                         }
  798                 }
  799 
  800         } else if (rval == 0) {
  801 
  802                 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
  803                 /* LINTED */
  804                 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
  805                     + disk_info.dki_lbsize);
  806                 dk_ioc.dki_length = label_len - disk_info.dki_lbsize;
  807                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
  808 
  809         } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) {
  810                 /*
  811                  * When the device is a LDoms virtual disk, the DKIOCGETEFI
  812                  * ioctl can fail with EINVAL if the virtual disk backend
  813                  * is a ZFS volume serviced by a domain running an old version
  814                  * of Solaris. This is because the DKIOCGETEFI ioctl was
  815                  * initially incorrectly implemented for a ZFS volume and it
  816                  * expected the GPT and GPE to be retrieved with a single ioctl.
  817                  * So we try to read the GPT and the GPE using that old style
  818                  * ioctl.
  819                  */
  820                 dk_ioc.dki_lba = 1;
  821                 dk_ioc.dki_length = label_len;
  822                 rval = check_label(fd, &dk_ioc);
  823         }
  824 
  825         if (rval < 0) {
  826                 free(efi);
  827                 return (rval);
  828         }
  829 
  830         /* LINTED -- always longlong aligned */
  831         efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize);
  832 
  833         /*
  834          * Assemble this into a "dk_gpt" struct for easier
  835          * digestibility by applications.
  836          */
  837         vtoc->efi_version = LE_32(efi->efi_gpt_Revision);
  838         vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries);
  839         vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry);
  840         vtoc->efi_lbasize = disk_info.dki_lbsize;
  841         vtoc->efi_last_lba = disk_info.dki_capacity - 1;
  842         vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA);
  843         vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA);
  844         vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
  845         UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID);
  846 
  847         /*
  848          * If the array the user passed in is too small, set the length
  849          * to what it needs to be and return
  850          */
  851         if (user_length < vtoc->efi_nparts) {
  852                 return (VT_EINVAL);
  853         }
  854 
  855         for (i = 0; i < vtoc->efi_nparts; i++) {
  856                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid,
  857                     efi_parts[i].efi_gpe_PartitionTypeGUID);
  858 
  859                 for (j = 0;
  860                     j < sizeof (conversion_array)
  861                     / sizeof (struct uuid_to_ptag); j++) {
  862 
  863                         if (memcmp(&vtoc->efi_parts[i].p_guid,
  864                             &conversion_array[j].uuid,
  865                             sizeof (struct uuid)) == 0) {
  866                                 vtoc->efi_parts[i].p_tag = j;
  867                                 break;
  868                         }
  869                 }
  870                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
  871                         continue;
  872                 vtoc->efi_parts[i].p_flag =
  873                     LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs);
  874                 vtoc->efi_parts[i].p_start =
  875                     LE_64(efi_parts[i].efi_gpe_StartingLBA);
  876                 vtoc->efi_parts[i].p_size =
  877                     LE_64(efi_parts[i].efi_gpe_EndingLBA) -
  878                     vtoc->efi_parts[i].p_start + 1;
  879                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
  880                         vtoc->efi_parts[i].p_name[j] =
  881                             (uchar_t)LE_16(
  882                             efi_parts[i].efi_gpe_PartitionName[j]);
  883                 }
  884 
  885                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid,
  886                     efi_parts[i].efi_gpe_UniquePartitionGUID);
  887         }
  888         free(efi);
  889 
  890         return (dki_info.dki_partition);
  891 }
  892 
  893 /* writes a "protective" MBR */
  894 static int
  895 write_pmbr(int fd, struct dk_gpt *vtoc)
  896 {
  897         dk_efi_t        dk_ioc;
  898         struct mboot    mb;
  899         uchar_t         *cp;
  900         diskaddr_t      size_in_lba;
  901         uchar_t         *buf;
  902         int             len;
  903 
  904         len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
  905         if (posix_memalign((void **)&buf, len, len))
  906                 return (VT_ERROR);
  907 
  908         /*
  909          * Preserve any boot code and disk signature if the first block is
  910          * already an MBR.
  911          */
  912         memset(buf, 0, len);
  913         dk_ioc.dki_lba = 0;
  914         dk_ioc.dki_length = len;
  915         /* LINTED -- always longlong aligned */
  916         dk_ioc.dki_data = (efi_gpt_t *)buf;
  917         if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
  918                 memset(&mb, 0, sizeof (mb));
  919                 mb.signature = LE_16(MBB_MAGIC);
  920         } else {
  921                 (void) memcpy(&mb, buf, sizeof (mb));
  922                 if (mb.signature != LE_16(MBB_MAGIC)) {
  923                         memset(&mb, 0, sizeof (mb));
  924                         mb.signature = LE_16(MBB_MAGIC);
  925                 }
  926         }
  927 
  928         memset(&mb.parts, 0, sizeof (mb.parts));
  929         cp = (uchar_t *)&mb.parts[0];
  930         /* bootable or not */
  931         *cp++ = 0;
  932         /* beginning CHS; 0xffffff if not representable */
  933         *cp++ = 0xff;
  934         *cp++ = 0xff;
  935         *cp++ = 0xff;
  936         /* OS type */
  937         *cp++ = EFI_PMBR;
  938         /* ending CHS; 0xffffff if not representable */
  939         *cp++ = 0xff;
  940         *cp++ = 0xff;
  941         *cp++ = 0xff;
  942         /* starting LBA: 1 (little endian format) by EFI definition */
  943         *cp++ = 0x01;
  944         *cp++ = 0x00;
  945         *cp++ = 0x00;
  946         *cp++ = 0x00;
  947         /* ending LBA: last block on the disk (little endian format) */
  948         size_in_lba = vtoc->efi_last_lba;
  949         if (size_in_lba < 0xffffffff) {
  950                 *cp++ = (size_in_lba & 0x000000ff);
  951                 *cp++ = (size_in_lba & 0x0000ff00) >> 8;
  952                 *cp++ = (size_in_lba & 0x00ff0000) >> 16;
  953                 *cp++ = (size_in_lba & 0xff000000) >> 24;
  954         } else {
  955                 *cp++ = 0xff;
  956                 *cp++ = 0xff;
  957                 *cp++ = 0xff;
  958                 *cp++ = 0xff;
  959         }
  960 
  961         (void) memcpy(buf, &mb, sizeof (mb));
  962         /* LINTED -- always longlong aligned */
  963         dk_ioc.dki_data = (efi_gpt_t *)buf;
  964         dk_ioc.dki_lba = 0;
  965         dk_ioc.dki_length = len;
  966         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
  967                 free(buf);
  968                 switch (errno) {
  969                 case EIO:
  970                         return (VT_EIO);
  971                 case EINVAL:
  972                         return (VT_EINVAL);
  973                 default:
  974                         return (VT_ERROR);
  975                 }
  976         }
  977         free(buf);
  978         return (0);
  979 }
  980 
  981 /* make sure the user specified something reasonable */
  982 static int
  983 check_input(struct dk_gpt *vtoc)
  984 {
  985         int                     resv_part = -1;
  986         int                     i, j;
  987         diskaddr_t              istart, jstart, isize, jsize, endsect;
  988 
  989         /*
  990          * Sanity-check the input (make sure no partitions overlap)
  991          */
  992         for (i = 0; i < vtoc->efi_nparts; i++) {
  993                 /* It can't be unassigned and have an actual size */
  994                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
  995                     (vtoc->efi_parts[i].p_size != 0)) {
  996                         if (efi_debug) {
  997                                 (void) fprintf(stderr, "partition %d is "
  998                                     "\"unassigned\" but has a size of %llu",
  999                                     i, vtoc->efi_parts[i].p_size);
 1000                         }
 1001                         return (VT_EINVAL);
 1002                 }
 1003                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
 1004                         if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
 1005                                 continue;
 1006                         /* we have encountered an unknown uuid */
 1007                         vtoc->efi_parts[i].p_tag = 0xff;
 1008                 }
 1009                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
 1010                         if (resv_part != -1) {
 1011                                 if (efi_debug) {
 1012                                         (void) fprintf(stderr, "found "
 1013                                             "duplicate reserved partition "
 1014                                             "at %d\n", i);
 1015                                 }
 1016                                 return (VT_EINVAL);
 1017                         }
 1018                         resv_part = i;
 1019                 }
 1020                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
 1021                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
 1022                         if (efi_debug) {
 1023                                 (void) fprintf(stderr,
 1024                                     "Partition %d starts at %llu.  ",
 1025                                     i,
 1026                                     vtoc->efi_parts[i].p_start);
 1027                                 (void) fprintf(stderr,
 1028                                     "It must be between %llu and %llu.\n",
 1029                                     vtoc->efi_first_u_lba,
 1030                                     vtoc->efi_last_u_lba);
 1031                         }
 1032                         return (VT_EINVAL);
 1033                 }
 1034                 if ((vtoc->efi_parts[i].p_start +
 1035                     vtoc->efi_parts[i].p_size <
 1036                     vtoc->efi_first_u_lba) ||
 1037                     (vtoc->efi_parts[i].p_start +
 1038                     vtoc->efi_parts[i].p_size >
 1039                     vtoc->efi_last_u_lba + 1)) {
 1040                         if (efi_debug) {
 1041                                 (void) fprintf(stderr,
 1042                                     "Partition %d ends at %llu.  ",
 1043                                     i,
 1044                                     vtoc->efi_parts[i].p_start +
 1045                                     vtoc->efi_parts[i].p_size);
 1046                                 (void) fprintf(stderr,
 1047                                     "It must be between %llu and %llu.\n",
 1048                                     vtoc->efi_first_u_lba,
 1049                                     vtoc->efi_last_u_lba);
 1050                         }
 1051                         return (VT_EINVAL);
 1052                 }
 1053 
 1054                 for (j = 0; j < vtoc->efi_nparts; j++) {
 1055                         isize = vtoc->efi_parts[i].p_size;
 1056                         jsize = vtoc->efi_parts[j].p_size;
 1057                         istart = vtoc->efi_parts[i].p_start;
 1058                         jstart = vtoc->efi_parts[j].p_start;
 1059                         if ((i != j) && (isize != 0) && (jsize != 0)) {
 1060                                 endsect = jstart + jsize -1;
 1061                                 if ((jstart <= istart) &&
 1062                                     (istart <= endsect)) {
 1063                                         if (efi_debug) {
 1064                                                 (void) fprintf(stderr,
 1065                                                     "Partition %d overlaps "
 1066                                                     "partition %d.", i, j);
 1067                                         }
 1068                                         return (VT_EINVAL);
 1069                                 }
 1070                         }
 1071                 }
 1072         }
 1073         /* just a warning for now */
 1074         if ((resv_part == -1) && efi_debug) {
 1075                 (void) fprintf(stderr,
 1076                     "no reserved partition found\n");
 1077         }
 1078         return (0);
 1079 }
 1080 
 1081 static int
 1082 call_blkpg_ioctl(int fd, int command, diskaddr_t start,
 1083     diskaddr_t size, uint_t pno)
 1084 {
 1085         struct blkpg_ioctl_arg ioctl_arg;
 1086         struct blkpg_partition  linux_part;
 1087         memset(&linux_part, 0, sizeof (linux_part));
 1088 
 1089         char *path = efi_get_devname(fd);
 1090         if (path == NULL) {
 1091                 (void) fprintf(stderr, "failed to retrieve device name\n");
 1092                 return (VT_EINVAL);
 1093         }
 1094 
 1095         linux_part.start = start;
 1096         linux_part.length = size;
 1097         linux_part.pno = pno;
 1098         snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno);
 1099         linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0';
 1100         free(path);
 1101 
 1102         ioctl_arg.op = command;
 1103         ioctl_arg.flags = 0;
 1104         ioctl_arg.datalen = sizeof (struct blkpg_partition);
 1105         ioctl_arg.data = &linux_part;
 1106 
 1107         return (ioctl(fd, BLKPG, &ioctl_arg));
 1108 }
 1109 
 1110 /*
 1111  * add all the unallocated space to the current label
 1112  */
 1113 int
 1114 efi_use_whole_disk(int fd)
 1115 {
 1116         struct dk_gpt *efi_label = NULL;
 1117         int rval;
 1118         int i;
 1119         uint_t resv_index = 0, data_index = 0;
 1120         diskaddr_t resv_start = 0, data_start = 0;
 1121         diskaddr_t data_size, limit, difference;
 1122         boolean_t sync_needed = B_FALSE;
 1123         uint_t nblocks;
 1124 
 1125         rval = efi_alloc_and_read(fd, &efi_label);
 1126         if (rval < 0) {
 1127                 if (efi_label != NULL)
 1128                         efi_free(efi_label);
 1129                 return (rval);
 1130         }
 1131 
 1132         /*
 1133          * Find the last physically non-zero partition.
 1134          * This should be the reserved partition.
 1135          */
 1136         for (i = 0; i < efi_label->efi_nparts; i ++) {
 1137                 if (resv_start < efi_label->efi_parts[i].p_start) {
 1138                         resv_start = efi_label->efi_parts[i].p_start;
 1139                         resv_index = i;
 1140                 }
 1141         }
 1142 
 1143         /*
 1144          * Find the last physically non-zero partition before that.
 1145          * This is the data partition.
 1146          */
 1147         for (i = 0; i < resv_index; i ++) {
 1148                 if (data_start < efi_label->efi_parts[i].p_start) {
 1149                         data_start = efi_label->efi_parts[i].p_start;
 1150                         data_index = i;
 1151                 }
 1152         }
 1153         data_size = efi_label->efi_parts[data_index].p_size;
 1154 
 1155         /*
 1156          * See the "efi_alloc_and_init" function for more information
 1157          * about where this "nblocks" value comes from.
 1158          */
 1159         nblocks = efi_label->efi_first_u_lba - 1;
 1160 
 1161         /*
 1162          * Determine if the EFI label is out of sync. We check that:
 1163          *
 1164          * 1. the data partition ends at the limit we set, and
 1165          * 2. the reserved partition starts at the limit we set.
 1166          *
 1167          * If either of these conditions is not met, then we need to
 1168          * resync the EFI label.
 1169          *
 1170          * The limit is the last usable LBA, determined by the last LBA
 1171          * and the first usable LBA fields on the EFI label of the disk
 1172          * (see the lines directly above). Additionally, we factor in
 1173          * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and
 1174          * P2ALIGN it to ensure the partition boundaries are aligned
 1175          * (for performance reasons). The alignment should match the
 1176          * alignment used by the "zpool_label_disk" function.
 1177          */
 1178         limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE,
 1179             PARTITION_END_ALIGNMENT);
 1180         if (data_start + data_size != limit || resv_start != limit)
 1181                 sync_needed = B_TRUE;
 1182 
 1183         if (efi_debug && sync_needed)
 1184                 (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n");
 1185 
 1186         /*
 1187          * If alter_lba is 1, we are using the backup label.
 1188          * Since we can locate the backup label by disk capacity,
 1189          * there must be no unallocated space.
 1190          */
 1191         if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba
 1192             >= efi_label->efi_last_lba && !sync_needed)) {
 1193                 if (efi_debug) {
 1194                         (void) fprintf(stderr,
 1195                             "efi_use_whole_disk: requested space not found\n");
 1196                 }
 1197                 efi_free(efi_label);
 1198                 return (VT_ENOSPC);
 1199         }
 1200 
 1201         /*
 1202          * Verify that we've found the reserved partition by checking
 1203          * that it looks the way it did when we created it in zpool_label_disk.
 1204          * If we've found the incorrect partition, then we know that this
 1205          * device was reformatted and no longer is solely used by ZFS.
 1206          */
 1207         if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
 1208             (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
 1209             (resv_index != 8)) {
 1210                 if (efi_debug) {
 1211                         (void) fprintf(stderr,
 1212                             "efi_use_whole_disk: wholedisk not available\n");
 1213                 }
 1214                 efi_free(efi_label);
 1215                 return (VT_ENOSPC);
 1216         }
 1217 
 1218         if (data_start + data_size != resv_start) {
 1219                 if (efi_debug) {
 1220                         (void) fprintf(stderr,
 1221                             "efi_use_whole_disk: "
 1222                             "data_start (%lli) + "
 1223                             "data_size (%lli) != "
 1224                             "resv_start (%lli)\n",
 1225                             data_start, data_size, resv_start);
 1226                 }
 1227 
 1228                 return (VT_EINVAL);
 1229         }
 1230 
 1231         if (limit < resv_start) {
 1232                 if (efi_debug) {
 1233                         (void) fprintf(stderr,
 1234                             "efi_use_whole_disk: "
 1235                             "limit (%lli) < resv_start (%lli)\n",
 1236                             limit, resv_start);
 1237                 }
 1238 
 1239                 return (VT_EINVAL);
 1240         }
 1241 
 1242         difference = limit - resv_start;
 1243 
 1244         if (efi_debug)
 1245                 (void) fprintf(stderr,
 1246                     "efi_use_whole_disk: difference is %lli\n", difference);
 1247 
 1248         /*
 1249          * Move the reserved partition. There is currently no data in
 1250          * here except fabricated devids (which get generated via
 1251          * efi_write()). So there is no need to copy data.
 1252          */
 1253         efi_label->efi_parts[data_index].p_size += difference;
 1254         efi_label->efi_parts[resv_index].p_start += difference;
 1255         efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks;
 1256 
 1257         /*
 1258          * Rescanning the partition table in the kernel can result
 1259          * in the device links to be removed (see comment in vdev_disk_open).
 1260          * If BLKPG_RESIZE_PARTITION is available, then we can resize
 1261          * the partition table online and avoid having to remove the device
 1262          * links used by the pool. This provides a very deterministic
 1263          * approach to resizing devices and does not require any
 1264          * loops waiting for devices to reappear.
 1265          */
 1266 #ifdef BLKPG_RESIZE_PARTITION
 1267         /*
 1268          * Delete the reserved partition since we're about to expand
 1269          * the data partition and it would overlap with the reserved
 1270          * partition.
 1271          * NOTE: The starting index for the ioctl is 1 while for the
 1272          * EFI partitions it's 0. For that reason we have to add one
 1273          * whenever we make an ioctl call.
 1274          */
 1275         rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1);
 1276         if (rval != 0)
 1277                 goto out;
 1278 
 1279         /*
 1280          * Expand the data partition
 1281          */
 1282         rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION,
 1283             efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize,
 1284             efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize,
 1285             data_index + 1);
 1286         if (rval != 0) {
 1287                 (void) fprintf(stderr, "Unable to resize data "
 1288                     "partition:  %d\n", rval);
 1289                 /*
 1290                  * Since we failed to resize, we need to reset the start
 1291                  * of the reserve partition and re-create it.
 1292                  */
 1293                 efi_label->efi_parts[resv_index].p_start -= difference;
 1294         }
 1295 
 1296         /*
 1297          * Re-add the reserved partition. If we've expanded the data partition
 1298          * then we'll move the reserve partition to the end of the data
 1299          * partition. Otherwise, we'll recreate the partition in its original
 1300          * location. Note that we do this as best-effort and ignore any
 1301          * errors that may arise here. This will ensure that we finish writing
 1302          * the EFI label.
 1303          */
 1304         (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION,
 1305             efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize,
 1306             efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize,
 1307             resv_index + 1);
 1308 #endif
 1309 
 1310         /*
 1311          * We're now ready to write the EFI label.
 1312          */
 1313         if (rval == 0) {
 1314                 rval = efi_write(fd, efi_label);
 1315                 if (rval < 0 && efi_debug) {
 1316                         (void) fprintf(stderr, "efi_use_whole_disk:fail "
 1317                             "to write label, rval=%d\n", rval);
 1318                 }
 1319         }
 1320 
 1321 out:
 1322         efi_free(efi_label);
 1323         return (rval);
 1324 }
 1325 
 1326 /*
 1327  * write EFI label and backup label
 1328  */
 1329 int
 1330 efi_write(int fd, struct dk_gpt *vtoc)
 1331 {
 1332         dk_efi_t                dk_ioc;
 1333         efi_gpt_t               *efi;
 1334         efi_gpe_t               *efi_parts;
 1335         int                     i, j;
 1336         struct dk_cinfo         dki_info;
 1337         int                     rval;
 1338         int                     md_flag = 0;
 1339         int                     nblocks;
 1340         diskaddr_t              lba_backup_gpt_hdr;
 1341 
 1342         if ((rval = efi_get_info(fd, &dki_info)) != 0)
 1343                 return (rval);
 1344 
 1345         /* check if we are dealing with a metadevice */
 1346         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
 1347             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
 1348                 md_flag = 1;
 1349         }
 1350 
 1351         if (check_input(vtoc)) {
 1352                 /*
 1353                  * not valid; if it's a metadevice just pass it down
 1354                  * because SVM will do its own checking
 1355                  */
 1356                 if (md_flag == 0) {
 1357                         return (VT_EINVAL);
 1358                 }
 1359         }
 1360 
 1361         dk_ioc.dki_lba = 1;
 1362         if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) {
 1363                 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize;
 1364         } else {
 1365                 dk_ioc.dki_length = (len_t)NBLOCKS(vtoc->efi_nparts,
 1366                     vtoc->efi_lbasize) *
 1367                     vtoc->efi_lbasize;
 1368         }
 1369 
 1370         /*
 1371          * the number of blocks occupied by GUID partition entry array
 1372          */
 1373         nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1;
 1374 
 1375         /*
 1376          * Backup GPT header is located on the block after GUID
 1377          * partition entry array. Here, we calculate the address
 1378          * for backup GPT header.
 1379          */
 1380         lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
 1381         if (posix_memalign((void **)&dk_ioc.dki_data,
 1382             vtoc->efi_lbasize, dk_ioc.dki_length))
 1383                 return (VT_ERROR);
 1384 
 1385         memset(dk_ioc.dki_data, 0, dk_ioc.dki_length);
 1386         efi = dk_ioc.dki_data;
 1387 
 1388         /* stuff user's input into EFI struct */
 1389         efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
 1390         efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */
 1391         efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD);
 1392         efi->efi_gpt_Reserved1 = 0;
 1393         efi->efi_gpt_MyLBA = LE_64(1ULL);
 1394         efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr);
 1395         efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba);
 1396         efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba);
 1397         efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL);
 1398         efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts);
 1399         efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe));
 1400         UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid);
 1401 
 1402         /* LINTED -- always longlong aligned */
 1403         efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize);
 1404 
 1405         for (i = 0; i < vtoc->efi_nparts; i++) {
 1406                 for (j = 0;
 1407                     j < sizeof (conversion_array) /
 1408                     sizeof (struct uuid_to_ptag); j++) {
 1409 
 1410                         if (vtoc->efi_parts[i].p_tag == j) {
 1411                                 UUID_LE_CONVERT(
 1412                                     efi_parts[i].efi_gpe_PartitionTypeGUID,
 1413                                     conversion_array[j].uuid);
 1414                                 break;
 1415                         }
 1416                 }
 1417 
 1418                 if (j == sizeof (conversion_array) /
 1419                     sizeof (struct uuid_to_ptag)) {
 1420                         /*
 1421                          * If we didn't have a matching uuid match, bail here.
 1422                          * Don't write a label with unknown uuid.
 1423                          */
 1424                         if (efi_debug) {
 1425                                 (void) fprintf(stderr,
 1426                                     "Unknown uuid for p_tag %d\n",
 1427                                     vtoc->efi_parts[i].p_tag);
 1428                         }
 1429                         return (VT_EINVAL);
 1430                 }
 1431 
 1432                 /* Zero's should be written for empty partitions */
 1433                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
 1434                         continue;
 1435 
 1436                 efi_parts[i].efi_gpe_StartingLBA =
 1437                     LE_64(vtoc->efi_parts[i].p_start);
 1438                 efi_parts[i].efi_gpe_EndingLBA =
 1439                     LE_64(vtoc->efi_parts[i].p_start +
 1440                     vtoc->efi_parts[i].p_size - 1);
 1441                 efi_parts[i].efi_gpe_Attributes.PartitionAttrs =
 1442                     LE_16(vtoc->efi_parts[i].p_flag);
 1443                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
 1444                         efi_parts[i].efi_gpe_PartitionName[j] =
 1445                             LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]);
 1446                 }
 1447                 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) &&
 1448                     uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) {
 1449                         (void) uuid_generate((uchar_t *)
 1450                             &vtoc->efi_parts[i].p_uguid);
 1451                 }
 1452                 memcpy(&efi_parts[i].efi_gpe_UniquePartitionGUID,
 1453                     &vtoc->efi_parts[i].p_uguid,
 1454                     sizeof (uuid_t));
 1455         }
 1456         efi->efi_gpt_PartitionEntryArrayCRC32 =
 1457             LE_32(efi_crc32((unsigned char *)efi_parts,
 1458             vtoc->efi_nparts * (int)sizeof (struct efi_gpe)));
 1459         efi->efi_gpt_HeaderCRC32 =
 1460             LE_32(efi_crc32((unsigned char *)efi,
 1461             LE_32(efi->efi_gpt_HeaderSize)));
 1462 
 1463         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
 1464                 free(dk_ioc.dki_data);
 1465                 switch (errno) {
 1466                 case EIO:
 1467                         return (VT_EIO);
 1468                 case EINVAL:
 1469                         return (VT_EINVAL);
 1470                 default:
 1471                         return (VT_ERROR);
 1472                 }
 1473         }
 1474         /* if it's a metadevice we're done */
 1475         if (md_flag) {
 1476                 free(dk_ioc.dki_data);
 1477                 return (0);
 1478         }
 1479 
 1480         /* write backup partition array */
 1481         dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1;
 1482         dk_ioc.dki_length -= vtoc->efi_lbasize;
 1483         /* LINTED */
 1484         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data +
 1485             vtoc->efi_lbasize);
 1486 
 1487         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
 1488                 /*
 1489                  * we wrote the primary label okay, so don't fail
 1490                  */
 1491                 if (efi_debug) {
 1492                         (void) fprintf(stderr,
 1493                             "write of backup partitions to block %llu "
 1494                             "failed, errno %d\n",
 1495                             vtoc->efi_last_u_lba + 1,
 1496                             errno);
 1497                 }
 1498         }
 1499         /*
 1500          * now swap MyLBA and AlternateLBA fields and write backup
 1501          * partition table header
 1502          */
 1503         dk_ioc.dki_lba = lba_backup_gpt_hdr;
 1504         dk_ioc.dki_length = vtoc->efi_lbasize;
 1505         /* LINTED */
 1506         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data -
 1507             vtoc->efi_lbasize);
 1508         efi->efi_gpt_AlternateLBA = LE_64(1ULL);
 1509         efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr);
 1510         efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1);
 1511         efi->efi_gpt_HeaderCRC32 = 0;
 1512         efi->efi_gpt_HeaderCRC32 =
 1513             LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data,
 1514             LE_32(efi->efi_gpt_HeaderSize)));
 1515 
 1516         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
 1517                 if (efi_debug) {
 1518                         (void) fprintf(stderr,
 1519                             "write of backup header to block %llu failed, "
 1520                             "errno %d\n",
 1521                             lba_backup_gpt_hdr,
 1522                             errno);
 1523                 }
 1524         }
 1525         /* write the PMBR */
 1526         (void) write_pmbr(fd, vtoc);
 1527         free(dk_ioc.dki_data);
 1528 
 1529         return (0);
 1530 }
 1531 
 1532 void
 1533 efi_free(struct dk_gpt *ptr)
 1534 {
 1535         free(ptr);
 1536 }
 1537 
 1538 void
 1539 efi_err_check(struct dk_gpt *vtoc)
 1540 {
 1541         int                     resv_part = -1;
 1542         int                     i, j;
 1543         diskaddr_t              istart, jstart, isize, jsize, endsect;
 1544         int                     overlap = 0;
 1545 
 1546         /*
 1547          * make sure no partitions overlap
 1548          */
 1549         for (i = 0; i < vtoc->efi_nparts; i++) {
 1550                 /* It can't be unassigned and have an actual size */
 1551                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
 1552                     (vtoc->efi_parts[i].p_size != 0)) {
 1553                         (void) fprintf(stderr,
 1554                             "partition %d is \"unassigned\" but has a size "
 1555                             "of %llu\n", i, vtoc->efi_parts[i].p_size);
 1556                 }
 1557                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
 1558                         continue;
 1559                 }
 1560                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
 1561                         if (resv_part != -1) {
 1562                                 (void) fprintf(stderr,
 1563                                     "found duplicate reserved partition at "
 1564                                     "%d\n", i);
 1565                         }
 1566                         resv_part = i;
 1567                         if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE)
 1568                                 (void) fprintf(stderr,
 1569                                     "Warning: reserved partition size must "
 1570                                     "be %d sectors\n", EFI_MIN_RESV_SIZE);
 1571                 }
 1572                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
 1573                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
 1574                         (void) fprintf(stderr,
 1575                             "Partition %d starts at %llu\n",
 1576                             i,
 1577                             vtoc->efi_parts[i].p_start);
 1578                         (void) fprintf(stderr,
 1579                             "It must be between %llu and %llu.\n",
 1580                             vtoc->efi_first_u_lba,
 1581                             vtoc->efi_last_u_lba);
 1582                 }
 1583                 if ((vtoc->efi_parts[i].p_start +
 1584                     vtoc->efi_parts[i].p_size <
 1585                     vtoc->efi_first_u_lba) ||
 1586                     (vtoc->efi_parts[i].p_start +
 1587                     vtoc->efi_parts[i].p_size >
 1588                     vtoc->efi_last_u_lba + 1)) {
 1589                         (void) fprintf(stderr,
 1590                             "Partition %d ends at %llu\n",
 1591                             i,
 1592                             vtoc->efi_parts[i].p_start +
 1593                             vtoc->efi_parts[i].p_size);
 1594                         (void) fprintf(stderr,
 1595                             "It must be between %llu and %llu.\n",
 1596                             vtoc->efi_first_u_lba,
 1597                             vtoc->efi_last_u_lba);
 1598                 }
 1599 
 1600                 for (j = 0; j < vtoc->efi_nparts; j++) {
 1601                         isize = vtoc->efi_parts[i].p_size;
 1602                         jsize = vtoc->efi_parts[j].p_size;
 1603                         istart = vtoc->efi_parts[i].p_start;
 1604                         jstart = vtoc->efi_parts[j].p_start;
 1605                         if ((i != j) && (isize != 0) && (jsize != 0)) {
 1606                                 endsect = jstart + jsize -1;
 1607                                 if ((jstart <= istart) &&
 1608                                     (istart <= endsect)) {
 1609                                         if (!overlap) {
 1610                                         (void) fprintf(stderr,
 1611                                             "label error: EFI Labels do not "
 1612                                             "support overlapping partitions\n");
 1613                                         }
 1614                                         (void) fprintf(stderr,
 1615                                             "Partition %d overlaps partition "
 1616                                             "%d.\n", i, j);
 1617                                         overlap = 1;
 1618                                 }
 1619                         }
 1620                 }
 1621         }
 1622         /* make sure there is a reserved partition */
 1623         if (resv_part == -1) {
 1624                 (void) fprintf(stderr,
 1625                     "no reserved partition found\n");
 1626         }
 1627 }

Cache object: 8b07e1bb4ce8c6508fe0ee6c4f0e5dfa


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.