The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/md_intel.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/8.3/sys/geom/raid/md_intel.c 230245 2012-01-17 00:02:45Z jimharris $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bio.h>
   32 #include <sys/endian.h>
   33 #include <sys/kernel.h>
   34 #include <sys/kobj.h>
   35 #include <sys/limits.h>
   36 #include <sys/lock.h>
   37 #include <sys/malloc.h>
   38 #include <sys/mutex.h>
   39 #include <sys/systm.h>
   40 #include <sys/taskqueue.h>
   41 #include <geom/geom.h>
   42 #include "geom/raid/g_raid.h"
   43 #include "g_raid_md_if.h"
   44 
   45 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
   46 
   47 struct intel_raid_map {
   48         uint32_t        offset;
   49         uint32_t        disk_sectors;
   50         uint32_t        stripe_count;
   51         uint16_t        strip_sectors;
   52         uint8_t         status;
   53 #define INTEL_S_READY           0x00
   54 #define INTEL_S_UNINITIALIZED   0x01
   55 #define INTEL_S_DEGRADED        0x02
   56 #define INTEL_S_FAILURE         0x03
   57 
   58         uint8_t         type;
   59 #define INTEL_T_RAID0           0x00
   60 #define INTEL_T_RAID1           0x01
   61 #define INTEL_T_RAID5           0x05
   62 
   63         uint8_t         total_disks;
   64         uint8_t         total_domains;
   65         uint8_t         failed_disk_num;
   66         uint8_t         ddf;
   67         uint32_t        offset_hi;
   68         uint32_t        disk_sectors_hi;
   69         uint32_t        stripe_count_hi;
   70         uint32_t        filler_2[4];
   71         uint32_t        disk_idx[1];    /* total_disks entries. */
   72 #define INTEL_DI_IDX    0x00ffffff
   73 #define INTEL_DI_RBLD   0x01000000
   74 } __packed;
   75 
   76 struct intel_raid_vol {
   77         uint8_t         name[16];
   78         u_int64_t       total_sectors __packed;
   79         uint32_t        state;
   80 #define INTEL_ST_BOOTABLE               0x00000001
   81 #define INTEL_ST_BOOT_DEVICE            0x00000002
   82 #define INTEL_ST_READ_COALESCING        0x00000004
   83 #define INTEL_ST_WRITE_COALESCING       0x00000008
   84 #define INTEL_ST_LAST_SHUTDOWN_DIRTY    0x00000010
   85 #define INTEL_ST_HIDDEN_AT_BOOT         0x00000020
   86 #define INTEL_ST_CURRENTLY_HIDDEN       0x00000040
   87 #define INTEL_ST_VERIFY_AND_FIX         0x00000080
   88 #define INTEL_ST_MAP_STATE_UNINIT       0x00000100
   89 #define INTEL_ST_NO_AUTO_RECOVERY       0x00000200
   90 #define INTEL_ST_CLONE_N_GO             0x00000400
   91 #define INTEL_ST_CLONE_MAN_SYNC         0x00000800
   92 #define INTEL_ST_CNG_MASTER_DISK_NUM    0x00001000
   93         uint32_t        reserved;
   94         uint8_t         migr_priority;
   95         uint8_t         num_sub_vols;
   96         uint8_t         tid;
   97         uint8_t         cng_master_disk;
   98         uint16_t        cache_policy;
   99         uint8_t         cng_state;
  100         uint8_t         cng_sub_state;
  101         uint32_t        filler_0[10];
  102 
  103         uint32_t        curr_migr_unit;
  104         uint32_t        checkpoint_id;
  105         uint8_t         migr_state;
  106         uint8_t         migr_type;
  107 #define INTEL_MT_INIT           0
  108 #define INTEL_MT_REBUILD        1
  109 #define INTEL_MT_VERIFY         2
  110 #define INTEL_MT_GEN_MIGR       3
  111 #define INTEL_MT_STATE_CHANGE   4
  112 #define INTEL_MT_REPAIR         5
  113         uint8_t         dirty;
  114         uint8_t         fs_state;
  115         uint16_t        verify_errors;
  116         uint16_t        bad_blocks;
  117         uint32_t        curr_migr_unit_hi;
  118         uint32_t        filler_1[3];
  119         struct intel_raid_map map[1];   /* 2 entries if migr_state != 0. */
  120 } __packed;
  121 
  122 struct intel_raid_disk {
  123 #define INTEL_SERIAL_LEN        16
  124         uint8_t         serial[INTEL_SERIAL_LEN];
  125         uint32_t        sectors;
  126         uint32_t        id;
  127         uint32_t        flags;
  128 #define INTEL_F_SPARE           0x01
  129 #define INTEL_F_ASSIGNED        0x02
  130 #define INTEL_F_FAILED          0x04
  131 #define INTEL_F_ONLINE          0x08
  132         uint32_t        owner_cfg_num;
  133         uint32_t        sectors_hi;
  134         uint32_t        filler[3];
  135 } __packed;
  136 
  137 struct intel_raid_conf {
  138         uint8_t         intel_id[24];
  139 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
  140 
  141         uint8_t         version[6];
  142 #define INTEL_VERSION_1000      "1.0.00"        /* RAID0 */
  143 #define INTEL_VERSION_1100      "1.1.00"        /* RAID1 */
  144 #define INTEL_VERSION_1200      "1.2.00"        /* Many volumes */
  145 #define INTEL_VERSION_1201      "1.2.01"        /* 3 or 4 disks */
  146 #define INTEL_VERSION_1202      "1.2.02"        /* RAID5 */
  147 #define INTEL_VERSION_1204      "1.2.04"        /* 5 or 6 disks */
  148 #define INTEL_VERSION_1206      "1.2.06"        /* CNG */
  149 #define INTEL_VERSION_1300      "1.3.00"        /* Attributes */
  150 
  151         uint8_t         dummy_0[2];
  152         uint32_t        checksum;
  153         uint32_t        config_size;
  154         uint32_t        config_id;
  155         uint32_t        generation;
  156         uint32_t        error_log_size;
  157         uint32_t        attributes;
  158 #define INTEL_ATTR_RAID0        0x00000001
  159 #define INTEL_ATTR_RAID1        0x00000002
  160 #define INTEL_ATTR_RAID10       0x00000004
  161 #define INTEL_ATTR_RAID1E       0x00000008
  162 #define INTEL_ATTR_RAID5        0x00000010
  163 #define INTEL_ATTR_RAIDCNG      0x00000020
  164 #define INTEL_ATTR_2TB          0x20000000
  165 #define INTEL_ATTR_PM           0x40000000
  166 #define INTEL_ATTR_CHECKSUM     0x80000000
  167 
  168         uint8_t         total_disks;
  169         uint8_t         total_volumes;
  170         uint8_t         dummy_2[2];
  171         uint32_t        filler_0[39];
  172         struct intel_raid_disk  disk[1];        /* total_disks entries. */
  173         /* Here goes total_volumes of struct intel_raid_vol. */
  174 } __packed;
  175 
  176 #define INTEL_MAX_MD_SIZE(ndisks)                               \
  177     (sizeof(struct intel_raid_conf) +                           \
  178      sizeof(struct intel_raid_disk) * (ndisks - 1) +            \
  179      sizeof(struct intel_raid_vol) * 2 +                        \
  180      sizeof(struct intel_raid_map) * 2 +                        \
  181      sizeof(uint32_t) * (ndisks - 1) * 4)
  182 
  183 struct g_raid_md_intel_perdisk {
  184         struct intel_raid_conf  *pd_meta;
  185         int                      pd_disk_pos;
  186         struct intel_raid_disk   pd_disk_meta;
  187 };
  188 
  189 struct g_raid_md_intel_object {
  190         struct g_raid_md_object  mdio_base;
  191         uint32_t                 mdio_config_id;
  192         uint32_t                 mdio_generation;
  193         struct intel_raid_conf  *mdio_meta;
  194         struct callout           mdio_start_co; /* STARTING state timer. */
  195         int                      mdio_disks_present;
  196         int                      mdio_started;
  197         int                      mdio_incomplete;
  198         struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
  199 };
  200 
  201 static g_raid_md_create_t g_raid_md_create_intel;
  202 static g_raid_md_taste_t g_raid_md_taste_intel;
  203 static g_raid_md_event_t g_raid_md_event_intel;
  204 static g_raid_md_ctl_t g_raid_md_ctl_intel;
  205 static g_raid_md_write_t g_raid_md_write_intel;
  206 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
  207 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
  208 static g_raid_md_free_t g_raid_md_free_intel;
  209 
  210 static kobj_method_t g_raid_md_intel_methods[] = {
  211         KOBJMETHOD(g_raid_md_create,    g_raid_md_create_intel),
  212         KOBJMETHOD(g_raid_md_taste,     g_raid_md_taste_intel),
  213         KOBJMETHOD(g_raid_md_event,     g_raid_md_event_intel),
  214         KOBJMETHOD(g_raid_md_ctl,       g_raid_md_ctl_intel),
  215         KOBJMETHOD(g_raid_md_write,     g_raid_md_write_intel),
  216         KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_intel),
  217         KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel),
  218         KOBJMETHOD(g_raid_md_free,      g_raid_md_free_intel),
  219         { 0, 0 }
  220 };
  221 
  222 static struct g_raid_md_class g_raid_md_intel_class = {
  223         "Intel",
  224         g_raid_md_intel_methods,
  225         sizeof(struct g_raid_md_intel_object),
  226         .mdc_priority = 100
  227 };
  228 
  229 
  230 static struct intel_raid_map *
  231 intel_get_map(struct intel_raid_vol *mvol, int i)
  232 {
  233         struct intel_raid_map *mmap;
  234 
  235         if (i > (mvol->migr_state ? 1 : 0))
  236                 return (NULL);
  237         mmap = &mvol->map[0];
  238         for (; i > 0; i--) {
  239                 mmap = (struct intel_raid_map *)
  240                     &mmap->disk_idx[mmap->total_disks];
  241         }
  242         return ((struct intel_raid_map *)mmap);
  243 }
  244 
  245 static struct intel_raid_vol *
  246 intel_get_volume(struct intel_raid_conf *meta, int i)
  247 {
  248         struct intel_raid_vol *mvol;
  249         struct intel_raid_map *mmap;
  250 
  251         if (i > 1)
  252                 return (NULL);
  253         mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
  254         for (; i > 0; i--) {
  255                 mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
  256                 mvol = (struct intel_raid_vol *)
  257                     &mmap->disk_idx[mmap->total_disks];
  258         }
  259         return (mvol);
  260 }
  261 
  262 static off_t
  263 intel_get_map_offset(struct intel_raid_map *mmap)
  264 {
  265         off_t offset = (off_t)mmap->offset_hi << 32;
  266 
  267         offset += mmap->offset;
  268         return (offset);
  269 }
  270 
  271 static void
  272 intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
  273 {
  274 
  275         mmap->offset = offset & 0xffffffff;
  276         mmap->offset_hi = offset >> 32;
  277 }
  278 
  279 static off_t
  280 intel_get_map_disk_sectors(struct intel_raid_map *mmap)
  281 {
  282         off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
  283 
  284         disk_sectors += mmap->disk_sectors;
  285         return (disk_sectors);
  286 }
  287 
  288 static void
  289 intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
  290 {
  291 
  292         mmap->disk_sectors = disk_sectors & 0xffffffff;
  293         mmap->disk_sectors_hi = disk_sectors >> 32;
  294 }
  295 
  296 static void
  297 intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
  298 {
  299 
  300         mmap->stripe_count = stripe_count & 0xffffffff;
  301         mmap->stripe_count_hi = stripe_count >> 32;
  302 }
  303 
  304 static off_t
  305 intel_get_disk_sectors(struct intel_raid_disk *disk)
  306 {
  307         off_t sectors = (off_t)disk->sectors_hi << 32;
  308 
  309         sectors += disk->sectors;
  310         return (sectors);
  311 }
  312 
  313 static void
  314 intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
  315 {
  316 
  317         disk->sectors = sectors & 0xffffffff;
  318         disk->sectors_hi = sectors >> 32;
  319 }
  320 
  321 static off_t
  322 intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
  323 {
  324         off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
  325 
  326         curr_migr_unit += vol->curr_migr_unit;
  327         return (curr_migr_unit);
  328 }
  329 
  330 static void
  331 intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
  332 {
  333 
  334         vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
  335         vol->curr_migr_unit_hi = curr_migr_unit >> 32;
  336 }
  337 
  338 static void
  339 g_raid_md_intel_print(struct intel_raid_conf *meta)
  340 {
  341         struct intel_raid_vol *mvol;
  342         struct intel_raid_map *mmap;
  343         int i, j, k;
  344 
  345         if (g_raid_debug < 1)
  346                 return;
  347 
  348         printf("********* ATA Intel MatrixRAID Metadata *********\n");
  349         printf("intel_id            <%.24s>\n", meta->intel_id);
  350         printf("version             <%.6s>\n", meta->version);
  351         printf("checksum            0x%08x\n", meta->checksum);
  352         printf("config_size         0x%08x\n", meta->config_size);
  353         printf("config_id           0x%08x\n", meta->config_id);
  354         printf("generation          0x%08x\n", meta->generation);
  355         printf("attributes          0x%08x\n", meta->attributes);
  356         printf("total_disks         %u\n", meta->total_disks);
  357         printf("total_volumes       %u\n", meta->total_volumes);
  358         printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags\n");
  359         for (i = 0; i < meta->total_disks; i++ ) {
  360                 printf("    %d   <%.16s> %u %u 0x%08x 0x%08x\n", i,
  361                     meta->disk[i].serial, meta->disk[i].sectors,
  362                     meta->disk[i].sectors_hi,
  363                     meta->disk[i].id, meta->disk[i].flags);
  364         }
  365         for (i = 0; i < meta->total_volumes; i++) {
  366                 mvol = intel_get_volume(meta, i);
  367                 printf(" ****** Volume %d ******\n", i);
  368                 printf(" name               %.16s\n", mvol->name);
  369                 printf(" total_sectors      %ju\n", mvol->total_sectors);
  370                 printf(" state              %u\n", mvol->state);
  371                 printf(" reserved           %u\n", mvol->reserved);
  372                 printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
  373                 printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
  374                 printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
  375                 printf(" migr_state         %u\n", mvol->migr_state);
  376                 printf(" migr_type          %u\n", mvol->migr_type);
  377                 printf(" dirty              %u\n", mvol->dirty);
  378 
  379                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
  380                         printf("  *** Map %d ***\n", j);
  381                         mmap = intel_get_map(mvol, j);
  382                         printf("  offset            %u\n", mmap->offset);
  383                         printf("  offset_hi         %u\n", mmap->offset_hi);
  384                         printf("  disk_sectors      %u\n", mmap->disk_sectors);
  385                         printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
  386                         printf("  stripe_count      %u\n", mmap->stripe_count);
  387                         printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
  388                         printf("  strip_sectors     %u\n", mmap->strip_sectors);
  389                         printf("  status            %u\n", mmap->status);
  390                         printf("  type              %u\n", mmap->type);
  391                         printf("  total_disks       %u\n", mmap->total_disks);
  392                         printf("  total_domains     %u\n", mmap->total_domains);
  393                         printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
  394                         printf("  ddf               %u\n", mmap->ddf);
  395                         printf("  disk_idx         ");
  396                         for (k = 0; k < mmap->total_disks; k++)
  397                                 printf(" 0x%08x", mmap->disk_idx[k]);
  398                         printf("\n");
  399                 }
  400         }
  401         printf("=================================================\n");
  402 }
  403 
  404 static struct intel_raid_conf *
  405 intel_meta_copy(struct intel_raid_conf *meta)
  406 {
  407         struct intel_raid_conf *nmeta;
  408 
  409         nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
  410         memcpy(nmeta, meta, meta->config_size);
  411         return (nmeta);
  412 }
  413 
  414 static int
  415 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
  416 {
  417         int pos;
  418 
  419         for (pos = 0; pos < meta->total_disks; pos++) {
  420                 if (strncmp(meta->disk[pos].serial,
  421                     serial, INTEL_SERIAL_LEN) == 0)
  422                         return (pos);
  423         }
  424         return (-1);
  425 }
  426 
  427 static struct intel_raid_conf *
  428 intel_meta_read(struct g_consumer *cp)
  429 {
  430         struct g_provider *pp;
  431         struct intel_raid_conf *meta;
  432         struct intel_raid_vol *mvol;
  433         struct intel_raid_map *mmap;
  434         char *buf;
  435         int error, i, j, k, left, size;
  436         uint32_t checksum, *ptr;
  437 
  438         pp = cp->provider;
  439 
  440         /* Read the anchor sector. */
  441         buf = g_read_data(cp,
  442             pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
  443         if (buf == NULL) {
  444                 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
  445                     pp->name, error);
  446                 return (NULL);
  447         }
  448         meta = (struct intel_raid_conf *)buf;
  449 
  450         /* Check if this is an Intel RAID struct */
  451         if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
  452                 G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
  453                 g_free(buf);
  454                 return (NULL);
  455         }
  456         if (meta->config_size > 65536 ||
  457             meta->config_size < sizeof(struct intel_raid_conf)) {
  458                 G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
  459                     meta->config_size);
  460                 g_free(buf);
  461                 return (NULL);
  462         }
  463         size = meta->config_size;
  464         meta = malloc(size, M_MD_INTEL, M_WAITOK);
  465         memcpy(meta, buf, min(size, pp->sectorsize));
  466         g_free(buf);
  467 
  468         /* Read all the rest, if needed. */
  469         if (meta->config_size > pp->sectorsize) {
  470                 left = (meta->config_size - 1) / pp->sectorsize;
  471                 buf = g_read_data(cp,
  472                     pp->mediasize - pp->sectorsize * (2 + left),
  473                     pp->sectorsize * left, &error);
  474                 if (buf == NULL) {
  475                         G_RAID_DEBUG(1, "Cannot read remaining metadata"
  476                             " part from %s (error=%d).",
  477                             pp->name, error);
  478                         free(meta, M_MD_INTEL);
  479                         return (NULL);
  480                 }
  481                 memcpy(((char *)meta) + pp->sectorsize, buf,
  482                     pp->sectorsize * left);
  483                 g_free(buf);
  484         }
  485 
  486         /* Check metadata checksum. */
  487         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
  488             i < (meta->config_size / sizeof(uint32_t)); i++) {
  489                 checksum += *ptr++;
  490         }
  491         checksum -= meta->checksum;
  492         if (checksum != meta->checksum) {
  493                 G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
  494                 free(meta, M_MD_INTEL);
  495                 return (NULL);
  496         }
  497 
  498         /* Validate metadata size. */
  499         size = sizeof(struct intel_raid_conf) +
  500             sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
  501             sizeof(struct intel_raid_vol) * meta->total_volumes;
  502         if (size > meta->config_size) {
  503 badsize:
  504                 G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
  505                     meta->config_size, size);
  506                 free(meta, M_MD_INTEL);
  507                 return (NULL);
  508         }
  509         for (i = 0; i < meta->total_volumes; i++) {
  510                 mvol = intel_get_volume(meta, i);
  511                 mmap = intel_get_map(mvol, 0);
  512                 size += 4 * (mmap->total_disks - 1);
  513                 if (size > meta->config_size)
  514                         goto badsize;
  515                 if (mvol->migr_state) {
  516                         size += sizeof(struct intel_raid_map);
  517                         if (size > meta->config_size)
  518                                 goto badsize;
  519                         mmap = intel_get_map(mvol, 1);
  520                         size += 4 * (mmap->total_disks - 1);
  521                         if (size > meta->config_size)
  522                                 goto badsize;
  523                 }
  524         }
  525 
  526         /* Validate disk indexes. */
  527         for (i = 0; i < meta->total_volumes; i++) {
  528                 mvol = intel_get_volume(meta, i);
  529                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
  530                         mmap = intel_get_map(mvol, j);
  531                         for (k = 0; k < mmap->total_disks; k++) {
  532                                 if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
  533                                     meta->total_disks) {
  534                                         G_RAID_DEBUG(1, "Intel metadata disk"
  535                                             " index %d too big (>%d)",
  536                                             mmap->disk_idx[k] & INTEL_DI_IDX,
  537                                             meta->total_disks);
  538                                         free(meta, M_MD_INTEL);
  539                                         return (NULL);
  540                                 }
  541                         }
  542                 }
  543         }
  544 
  545         /* Validate migration types. */
  546         for (i = 0; i < meta->total_volumes; i++) {
  547                 mvol = intel_get_volume(meta, i);
  548                 if (mvol->migr_state &&
  549                     mvol->migr_type != INTEL_MT_INIT &&
  550                     mvol->migr_type != INTEL_MT_REBUILD &&
  551                     mvol->migr_type != INTEL_MT_VERIFY &&
  552                     mvol->migr_type != INTEL_MT_REPAIR) {
  553                         G_RAID_DEBUG(1, "Intel metadata has unsupported"
  554                             " migration type %d", mvol->migr_type);
  555                         free(meta, M_MD_INTEL);
  556                         return (NULL);
  557                 }
  558         }
  559 
  560         return (meta);
  561 }
  562 
  563 static int
  564 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
  565 {
  566         struct g_provider *pp;
  567         char *buf;
  568         int error, i, sectors;
  569         uint32_t checksum, *ptr;
  570 
  571         pp = cp->provider;
  572 
  573         /* Recalculate checksum for case if metadata were changed. */
  574         meta->checksum = 0;
  575         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
  576             i < (meta->config_size / sizeof(uint32_t)); i++) {
  577                 checksum += *ptr++;
  578         }
  579         meta->checksum = checksum;
  580 
  581         /* Create and fill buffer. */
  582         sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
  583         buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
  584         if (sectors > 1) {
  585                 memcpy(buf, ((char *)meta) + pp->sectorsize,
  586                     (sectors - 1) * pp->sectorsize);
  587         }
  588         memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
  589 
  590         error = g_write_data(cp,
  591             pp->mediasize - pp->sectorsize * (1 + sectors),
  592             buf, pp->sectorsize * sectors);
  593         if (error != 0) {
  594                 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
  595                     pp->name, error);
  596         }
  597 
  598         free(buf, M_MD_INTEL);
  599         return (error);
  600 }
  601 
  602 static int
  603 intel_meta_erase(struct g_consumer *cp)
  604 {
  605         struct g_provider *pp;
  606         char *buf;
  607         int error;
  608 
  609         pp = cp->provider;
  610         buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
  611         error = g_write_data(cp,
  612             pp->mediasize - 2 * pp->sectorsize,
  613             buf, pp->sectorsize);
  614         if (error != 0) {
  615                 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
  616                     pp->name, error);
  617         }
  618         free(buf, M_MD_INTEL);
  619         return (error);
  620 }
  621 
  622 static int
  623 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
  624 {
  625         struct intel_raid_conf *meta;
  626         int error;
  627 
  628         /* Fill anchor and single disk. */
  629         meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
  630         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
  631         memcpy(&meta->version[0], INTEL_VERSION_1000,
  632             sizeof(INTEL_VERSION_1000) - 1);
  633         meta->config_size = INTEL_MAX_MD_SIZE(1);
  634         meta->config_id = arc4random();
  635         meta->generation = 1;
  636         meta->total_disks = 1;
  637         meta->disk[0] = *d;
  638         error = intel_meta_write(cp, meta);
  639         free(meta, M_MD_INTEL);
  640         return (error);
  641 }
  642 
  643 static struct g_raid_disk *
  644 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
  645 {
  646         struct g_raid_disk      *disk;
  647         struct g_raid_md_intel_perdisk *pd;
  648 
  649         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  650                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
  651                 if (pd->pd_disk_pos == id)
  652                         break;
  653         }
  654         return (disk);
  655 }
  656 
  657 static int
  658 g_raid_md_intel_supported(int level, int qual, int disks, int force)
  659 {
  660 
  661         switch (level) {
  662         case G_RAID_VOLUME_RL_RAID0:
  663                 if (disks < 1)
  664                         return (0);
  665                 if (!force && (disks < 2 || disks > 6))
  666                         return (0);
  667                 break;
  668         case G_RAID_VOLUME_RL_RAID1:
  669                 if (disks < 1)
  670                         return (0);
  671                 if (!force && (disks != 2))
  672                         return (0);
  673                 break;
  674         case G_RAID_VOLUME_RL_RAID1E:
  675                 if (disks < 2)
  676                         return (0);
  677                 if (!force && (disks != 4))
  678                         return (0);
  679                 break;
  680         case G_RAID_VOLUME_RL_RAID5:
  681                 if (disks < 3)
  682                         return (0);
  683                 if (!force && disks > 6)
  684                         return (0);
  685                 break;
  686         default:
  687                 return (0);
  688         }
  689         if (qual != G_RAID_VOLUME_RLQ_NONE)
  690                 return (0);
  691         return (1);
  692 }
  693 
  694 static struct g_raid_volume *
  695 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
  696 {
  697         struct g_raid_volume    *mvol;
  698 
  699         TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
  700                 if ((intptr_t)(mvol->v_md_data) == id)
  701                         break;
  702         }
  703         return (mvol);
  704 }
  705 
  706 static int
  707 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
  708 {
  709         struct g_raid_softc *sc;
  710         struct g_raid_subdisk *sd, *tmpsd;
  711         struct g_raid_disk *olddisk, *tmpdisk;
  712         struct g_raid_md_object *md;
  713         struct g_raid_md_intel_object *mdi;
  714         struct g_raid_md_intel_perdisk *pd, *oldpd;
  715         struct intel_raid_conf *meta;
  716         struct intel_raid_vol *mvol;
  717         struct intel_raid_map *mmap0, *mmap1;
  718         int disk_pos, resurrection = 0;
  719 
  720         sc = disk->d_softc;
  721         md = sc->sc_md;
  722         mdi = (struct g_raid_md_intel_object *)md;
  723         meta = mdi->mdio_meta;
  724         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
  725         olddisk = NULL;
  726 
  727         /* Find disk position in metadata by it's serial. */
  728         disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
  729         if (disk_pos < 0) {
  730                 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
  731                 /* Failed stale disk is useless for us. */
  732                 if (pd->pd_disk_meta.flags & INTEL_F_FAILED) {
  733                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
  734                         return (0);
  735                 }
  736                 /* If we are in the start process, that's all for now. */
  737                 if (!mdi->mdio_started)
  738                         goto nofit;
  739                 /*
  740                  * If we have already started - try to get use of the disk.
  741                  * Try to replace OFFLINE disks first, then FAILED.
  742                  */
  743                 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
  744                         if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
  745                             tmpdisk->d_state != G_RAID_DISK_S_FAILED)
  746                                 continue;
  747                         /* Make sure this disk is big enough. */
  748                         TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
  749                                 off_t disk_sectors = 
  750                                     intel_get_disk_sectors(&pd->pd_disk_meta);
  751 
  752                                 if (sd->sd_offset + sd->sd_size + 4096 >
  753                                     disk_sectors * 512) {
  754                                         G_RAID_DEBUG1(1, sc,
  755                                             "Disk too small (%llu < %llu)",
  756                                             (unsigned long long)
  757                                             disk_sectors * 512,
  758                                             (unsigned long long)
  759                                             sd->sd_offset + sd->sd_size + 4096);
  760                                         break;
  761                                 }
  762                         }
  763                         if (sd != NULL)
  764                                 continue;
  765                         if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
  766                                 olddisk = tmpdisk;
  767                                 break;
  768                         } else if (olddisk == NULL)
  769                                 olddisk = tmpdisk;
  770                 }
  771                 if (olddisk == NULL) {
  772 nofit:
  773                         if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
  774                                 g_raid_change_disk_state(disk,
  775                                     G_RAID_DISK_S_SPARE);
  776                                 return (1);
  777                         } else {
  778                                 g_raid_change_disk_state(disk,
  779                                     G_RAID_DISK_S_STALE);
  780                                 return (0);
  781                         }
  782                 }
  783                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
  784                 disk_pos = oldpd->pd_disk_pos;
  785                 resurrection = 1;
  786         }
  787 
  788         if (olddisk == NULL) {
  789                 /* Find placeholder by position. */
  790                 olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
  791                 if (olddisk == NULL)
  792                         panic("No disk at position %d!", disk_pos);
  793                 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
  794                         G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
  795                             disk_pos);
  796                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
  797                         return (0);
  798                 }
  799                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
  800         }
  801 
  802         /* Replace failed disk or placeholder with new disk. */
  803         TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
  804                 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
  805                 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
  806                 sd->sd_disk = disk;
  807         }
  808         oldpd->pd_disk_pos = -2;
  809         pd->pd_disk_pos = disk_pos;
  810 
  811         /* If it was placeholder -- destroy it. */
  812         if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
  813                 g_raid_destroy_disk(olddisk);
  814         } else {
  815                 /* Otherwise, make it STALE_FAILED. */
  816                 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
  817                 /* Update global metadata just in case. */
  818                 memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
  819                     sizeof(struct intel_raid_disk));
  820         }
  821 
  822         /* Welcome the new disk. */
  823         if (resurrection)
  824                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
  825         else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
  826                 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
  827         else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
  828                 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
  829         else
  830                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
  831         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
  832                 mvol = intel_get_volume(meta,
  833                     (uintptr_t)(sd->sd_volume->v_md_data));
  834                 mmap0 = intel_get_map(mvol, 0);
  835                 if (mvol->migr_state)
  836                         mmap1 = intel_get_map(mvol, 1);
  837                 else
  838                         mmap1 = mmap0;
  839 
  840                 if (resurrection) {
  841                         /* Stale disk, almost same as new. */
  842                         g_raid_change_subdisk_state(sd,
  843                             G_RAID_SUBDISK_S_NEW);
  844                 } else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
  845                         /* Failed disk, almost useless. */
  846                         g_raid_change_subdisk_state(sd,
  847                             G_RAID_SUBDISK_S_FAILED);
  848                 } else if (mvol->migr_state == 0) {
  849                         if (mmap0->status == INTEL_S_UNINITIALIZED) {
  850                                 /* Freshly created uninitialized volume. */
  851                                 g_raid_change_subdisk_state(sd,
  852                                     G_RAID_SUBDISK_S_UNINITIALIZED);
  853                         } else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  854                                 /* Freshly inserted disk. */
  855                                 g_raid_change_subdisk_state(sd,
  856                                     G_RAID_SUBDISK_S_NEW);
  857                         } else if (mvol->dirty) {
  858                                 /* Dirty volume (unclean shutdown). */
  859                                 g_raid_change_subdisk_state(sd,
  860                                     G_RAID_SUBDISK_S_STALE);
  861                         } else {
  862                                 /* Up to date disk. */
  863                                 g_raid_change_subdisk_state(sd,
  864                                     G_RAID_SUBDISK_S_ACTIVE);
  865                         }
  866                 } else if (mvol->migr_type == INTEL_MT_INIT ||
  867                            mvol->migr_type == INTEL_MT_REBUILD) {
  868                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  869                                 /* Freshly inserted disk. */
  870                                 g_raid_change_subdisk_state(sd,
  871                                     G_RAID_SUBDISK_S_NEW);
  872                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  873                                 /* Rebuilding disk. */
  874                                 g_raid_change_subdisk_state(sd,
  875                                     G_RAID_SUBDISK_S_REBUILD);
  876                                 if (mvol->dirty) {
  877                                         sd->sd_rebuild_pos = 0;
  878                                 } else {
  879                                         sd->sd_rebuild_pos =
  880                                             intel_get_vol_curr_migr_unit(mvol) *
  881                                             sd->sd_volume->v_strip_size *
  882                                             mmap0->total_domains;
  883                                 }
  884                         } else if (mvol->dirty) {
  885                                 /* Dirty volume (unclean shutdown). */
  886                                 g_raid_change_subdisk_state(sd,
  887                                     G_RAID_SUBDISK_S_STALE);
  888                         } else {
  889                                 /* Up to date disk. */
  890                                 g_raid_change_subdisk_state(sd,
  891                                     G_RAID_SUBDISK_S_ACTIVE);
  892                         }
  893                 } else if (mvol->migr_type == INTEL_MT_VERIFY ||
  894                            mvol->migr_type == INTEL_MT_REPAIR) {
  895                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  896                                 /* Freshly inserted disk. */
  897                                 g_raid_change_subdisk_state(sd,
  898                                     G_RAID_SUBDISK_S_NEW);
  899                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  900                                 /* Resyncing disk. */
  901                                 g_raid_change_subdisk_state(sd,
  902                                     G_RAID_SUBDISK_S_RESYNC);
  903                                 if (mvol->dirty) {
  904                                         sd->sd_rebuild_pos = 0;
  905                                 } else {
  906                                         sd->sd_rebuild_pos =
  907                                             intel_get_vol_curr_migr_unit(mvol) *
  908                                             sd->sd_volume->v_strip_size *
  909                                             mmap0->total_domains;
  910                                 }
  911                         } else if (mvol->dirty) {
  912                                 /* Dirty volume (unclean shutdown). */
  913                                 g_raid_change_subdisk_state(sd,
  914                                     G_RAID_SUBDISK_S_STALE);
  915                         } else {
  916                                 /* Up to date disk. */
  917                                 g_raid_change_subdisk_state(sd,
  918                                     G_RAID_SUBDISK_S_ACTIVE);
  919                         }
  920                 }
  921                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
  922                     G_RAID_EVENT_SUBDISK);
  923         }
  924 
  925         /* Update status of our need for spare. */
  926         if (mdi->mdio_started) {
  927                 mdi->mdio_incomplete =
  928                     (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
  929                      meta->total_disks);
  930         }
  931 
  932         return (resurrection);
  933 }
  934 
  935 static void
  936 g_disk_md_intel_retaste(void *arg, int pending)
  937 {
  938 
  939         G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
  940         g_retaste(&g_raid_class);
  941         free(arg, M_MD_INTEL);
  942 }
  943 
  944 static void
  945 g_raid_md_intel_refill(struct g_raid_softc *sc)
  946 {
  947         struct g_raid_md_object *md;
  948         struct g_raid_md_intel_object *mdi;
  949         struct intel_raid_conf *meta;
  950         struct g_raid_disk *disk;
  951         struct task *task;
  952         int update, na;
  953 
  954         md = sc->sc_md;
  955         mdi = (struct g_raid_md_intel_object *)md;
  956         meta = mdi->mdio_meta;
  957         update = 0;
  958         do {
  959                 /* Make sure we miss anything. */
  960                 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
  961                 if (na == meta->total_disks)
  962                         break;
  963 
  964                 G_RAID_DEBUG1(1, md->mdo_softc,
  965                     "Array is not complete (%d of %d), "
  966                     "trying to refill.", na, meta->total_disks);
  967 
  968                 /* Try to get use some of STALE disks. */
  969                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  970                         if (disk->d_state == G_RAID_DISK_S_STALE) {
  971                                 update += g_raid_md_intel_start_disk(disk);
  972                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
  973                                         break;
  974                         }
  975                 }
  976                 if (disk != NULL)
  977                         continue;
  978 
  979                 /* Try to get use some of SPARE disks. */
  980                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  981                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
  982                                 update += g_raid_md_intel_start_disk(disk);
  983                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
  984                                         break;
  985                         }
  986                 }
  987         } while (disk != NULL);
  988 
  989         /* Write new metadata if we changed something. */
  990         if (update) {
  991                 g_raid_md_write_intel(md, NULL, NULL, NULL);
  992                 meta = mdi->mdio_meta;
  993         }
  994 
  995         /* Update status of our need for spare. */
  996         mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
  997             meta->total_disks);
  998 
  999         /* Request retaste hoping to find spare. */
 1000         if (mdi->mdio_incomplete) {
 1001                 task = malloc(sizeof(struct task),
 1002                     M_MD_INTEL, M_WAITOK | M_ZERO);
 1003                 TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
 1004                 taskqueue_enqueue(taskqueue_swi, task);
 1005         }
 1006 }
 1007 
 1008 static void
 1009 g_raid_md_intel_start(struct g_raid_softc *sc)
 1010 {
 1011         struct g_raid_md_object *md;
 1012         struct g_raid_md_intel_object *mdi;
 1013         struct g_raid_md_intel_perdisk *pd;
 1014         struct intel_raid_conf *meta;
 1015         struct intel_raid_vol *mvol;
 1016         struct intel_raid_map *mmap;
 1017         struct g_raid_volume *vol;
 1018         struct g_raid_subdisk *sd;
 1019         struct g_raid_disk *disk;
 1020         int i, j, disk_pos;
 1021 
 1022         md = sc->sc_md;
 1023         mdi = (struct g_raid_md_intel_object *)md;
 1024         meta = mdi->mdio_meta;
 1025 
 1026         /* Create volumes and subdisks. */
 1027         for (i = 0; i < meta->total_volumes; i++) {
 1028                 mvol = intel_get_volume(meta, i);
 1029                 mmap = intel_get_map(mvol, 0);
 1030                 vol = g_raid_create_volume(sc, mvol->name, -1);
 1031                 vol->v_md_data = (void *)(intptr_t)i;
 1032                 if (mmap->type == INTEL_T_RAID0)
 1033                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 1034                 else if (mmap->type == INTEL_T_RAID1 &&
 1035                     mmap->total_domains >= 2 &&
 1036                     mmap->total_domains <= mmap->total_disks) {
 1037                         /* Assume total_domains is correct. */
 1038                         if (mmap->total_domains == mmap->total_disks)
 1039                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 1040                         else
 1041                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 1042                 } else if (mmap->type == INTEL_T_RAID1) {
 1043                         /* total_domains looks wrong. */
 1044                         if (mmap->total_disks <= 2)
 1045                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 1046                         else
 1047                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 1048                 } else if (mmap->type == INTEL_T_RAID5)
 1049                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 1050                 else
 1051                         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 1052                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 1053                 vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
 1054                 vol->v_disks_count = mmap->total_disks;
 1055                 vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
 1056                 vol->v_sectorsize = 512; //ZZZ
 1057                 for (j = 0; j < vol->v_disks_count; j++) {
 1058                         sd = &vol->v_subdisks[j];
 1059                         sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
 1060                         sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
 1061                 }
 1062                 g_raid_start_volume(vol);
 1063         }
 1064 
 1065         /* Create disk placeholders to store data for later writing. */
 1066         for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
 1067                 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1068                 pd->pd_disk_pos = disk_pos;
 1069                 pd->pd_disk_meta = meta->disk[disk_pos];
 1070                 disk = g_raid_create_disk(sc);
 1071                 disk->d_md_data = (void *)pd;
 1072                 disk->d_state = G_RAID_DISK_S_OFFLINE;
 1073                 for (i = 0; i < meta->total_volumes; i++) {
 1074                         mvol = intel_get_volume(meta, i);
 1075                         mmap = intel_get_map(mvol, 0);
 1076                         for (j = 0; j < mmap->total_disks; j++) {
 1077                                 if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
 1078                                         break;
 1079                         }
 1080                         if (j == mmap->total_disks)
 1081                                 continue;
 1082                         vol = g_raid_md_intel_get_volume(sc, i);
 1083                         sd = &vol->v_subdisks[j];
 1084                         sd->sd_disk = disk;
 1085                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1086                 }
 1087         }
 1088 
 1089         /* Make all disks found till the moment take their places. */
 1090         do {
 1091                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1092                         if (disk->d_state == G_RAID_DISK_S_NONE) {
 1093                                 g_raid_md_intel_start_disk(disk);
 1094                                 break;
 1095                         }
 1096                 }
 1097         } while (disk != NULL);
 1098 
 1099         mdi->mdio_started = 1;
 1100         G_RAID_DEBUG1(0, sc, "Array started.");
 1101         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1102 
 1103         /* Pickup any STALE/SPARE disks to refill array if needed. */
 1104         g_raid_md_intel_refill(sc);
 1105 
 1106         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1107                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1108                     G_RAID_EVENT_VOLUME);
 1109         }
 1110 
 1111         callout_stop(&mdi->mdio_start_co);
 1112         G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 1113         root_mount_rel(mdi->mdio_rootmount);
 1114         mdi->mdio_rootmount = NULL;
 1115 }
 1116 
 1117 static void
 1118 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
 1119 {
 1120         struct g_raid_softc *sc;
 1121         struct g_raid_md_object *md;
 1122         struct g_raid_md_intel_object *mdi;
 1123         struct intel_raid_conf *pdmeta;
 1124         struct g_raid_md_intel_perdisk *pd;
 1125 
 1126         sc = disk->d_softc;
 1127         md = sc->sc_md;
 1128         mdi = (struct g_raid_md_intel_object *)md;
 1129         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1130         pdmeta = pd->pd_meta;
 1131 
 1132         if (mdi->mdio_started) {
 1133                 if (g_raid_md_intel_start_disk(disk))
 1134                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1135         } else {
 1136                 /* If we haven't started yet - check metadata freshness. */
 1137                 if (mdi->mdio_meta == NULL ||
 1138                     ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
 1139                         G_RAID_DEBUG1(1, sc, "Newer disk");
 1140                         if (mdi->mdio_meta != NULL)
 1141                                 free(mdi->mdio_meta, M_MD_INTEL);
 1142                         mdi->mdio_meta = intel_meta_copy(pdmeta);
 1143                         mdi->mdio_generation = mdi->mdio_meta->generation;
 1144                         mdi->mdio_disks_present = 1;
 1145                 } else if (pdmeta->generation == mdi->mdio_generation) {
 1146                         mdi->mdio_disks_present++;
 1147                         G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 1148                             mdi->mdio_disks_present,
 1149                             mdi->mdio_meta->total_disks);
 1150                 } else {
 1151                         G_RAID_DEBUG1(1, sc, "Older disk");
 1152                 }
 1153                 /* If we collected all needed disks - start array. */
 1154                 if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
 1155                         g_raid_md_intel_start(sc);
 1156         }
 1157 }
 1158 
 1159 static void
 1160 g_raid_intel_go(void *arg)
 1161 {
 1162         struct g_raid_softc *sc;
 1163         struct g_raid_md_object *md;
 1164         struct g_raid_md_intel_object *mdi;
 1165 
 1166         sc = arg;
 1167         md = sc->sc_md;
 1168         mdi = (struct g_raid_md_intel_object *)md;
 1169         if (!mdi->mdio_started) {
 1170                 G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 1171                 g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 1172         }
 1173 }
 1174 
 1175 static int
 1176 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
 1177     struct g_geom **gp)
 1178 {
 1179         struct g_raid_softc *sc;
 1180         struct g_raid_md_intel_object *mdi;
 1181         char name[16];
 1182 
 1183         mdi = (struct g_raid_md_intel_object *)md;
 1184         mdi->mdio_config_id = arc4random();
 1185         mdi->mdio_generation = 0;
 1186         snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
 1187         sc = g_raid_create_node(mp, name, md);
 1188         if (sc == NULL)
 1189                 return (G_RAID_MD_TASTE_FAIL);
 1190         md->mdo_softc = sc;
 1191         *gp = sc->sc_geom;
 1192         return (G_RAID_MD_TASTE_NEW);
 1193 }
 1194 
 1195 /*
 1196  * Return the last N characters of the serial label.  The Linux and
 1197  * ataraid(7) code always uses the last 16 characters of the label to
 1198  * store into the Intel meta format.  Generalize this to N characters
 1199  * since that's easy.  Labels can be up to 20 characters for SATA drives
 1200  * and up 251 characters for SAS drives.  Since intel controllers don't
 1201  * support SAS drives, just stick with the SATA limits for stack friendliness.
 1202  */
 1203 static int
 1204 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
 1205 {
 1206         char serial_buffer[24];
 1207         int len, error;
 1208         
 1209         len = sizeof(serial_buffer);
 1210         error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
 1211         if (error != 0)
 1212                 return (error);
 1213         len = strlen(serial_buffer);
 1214         if (len > serlen)
 1215                 len -= serlen;
 1216         else
 1217                 len = 0;
 1218         strncpy(serial, serial_buffer + len, serlen);
 1219         return (0);
 1220 }
 1221 
 1222 static int
 1223 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
 1224                               struct g_consumer *cp, struct g_geom **gp)
 1225 {
 1226         struct g_consumer *rcp;
 1227         struct g_provider *pp;
 1228         struct g_raid_md_intel_object *mdi, *mdi1;
 1229         struct g_raid_softc *sc;
 1230         struct g_raid_disk *disk;
 1231         struct intel_raid_conf *meta;
 1232         struct g_raid_md_intel_perdisk *pd;
 1233         struct g_geom *geom;
 1234         int error, disk_pos, result, spare, len;
 1235         char serial[INTEL_SERIAL_LEN];
 1236         char name[16];
 1237         uint16_t vendor;
 1238 
 1239         G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
 1240         mdi = (struct g_raid_md_intel_object *)md;
 1241         pp = cp->provider;
 1242 
 1243         /* Read metadata from device. */
 1244         meta = NULL;
 1245         vendor = 0xffff;
 1246         disk_pos = 0;
 1247         if (g_access(cp, 1, 0, 0) != 0)
 1248                 return (G_RAID_MD_TASTE_FAIL);
 1249         g_topology_unlock();
 1250         error = g_raid_md_get_label(cp, serial, sizeof(serial));
 1251         if (error != 0) {
 1252                 G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
 1253                     pp->name, error);
 1254                 goto fail2;
 1255         }
 1256         len = 2;
 1257         if (pp->geom->rank == 1)
 1258                 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 1259         meta = intel_meta_read(cp);
 1260         g_topology_lock();
 1261         g_access(cp, -1, 0, 0);
 1262         if (meta == NULL) {
 1263                 if (g_raid_aggressive_spare) {
 1264                         if (vendor != 0x8086) {
 1265                                 G_RAID_DEBUG(1,
 1266                                     "Intel vendor mismatch 0x%04x != 0x8086",
 1267                                     vendor);
 1268                         } else {
 1269                                 G_RAID_DEBUG(1,
 1270                                     "No Intel metadata, forcing spare.");
 1271                                 spare = 2;
 1272                                 goto search;
 1273                         }
 1274                 }
 1275                 return (G_RAID_MD_TASTE_FAIL);
 1276         }
 1277 
 1278         /* Check this disk position in obtained metadata. */
 1279         disk_pos = intel_meta_find_disk(meta, serial);
 1280         if (disk_pos < 0) {
 1281                 G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
 1282                 goto fail1;
 1283         }
 1284         if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
 1285             (pp->mediasize / pp->sectorsize)) {
 1286                 G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
 1287                     intel_get_disk_sectors(&meta->disk[disk_pos]),
 1288                     (off_t)(pp->mediasize / pp->sectorsize));
 1289                 goto fail1;
 1290         }
 1291 
 1292         /* Metadata valid. Print it. */
 1293         g_raid_md_intel_print(meta);
 1294         G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
 1295         spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
 1296 
 1297 search:
 1298         /* Search for matching node. */
 1299         sc = NULL;
 1300         mdi1 = NULL;
 1301         LIST_FOREACH(geom, &mp->geom, geom) {
 1302                 sc = geom->softc;
 1303                 if (sc == NULL)
 1304                         continue;
 1305                 if (sc->sc_stopping != 0)
 1306                         continue;
 1307                 if (sc->sc_md->mdo_class != md->mdo_class)
 1308                         continue;
 1309                 mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
 1310                 if (spare) {
 1311                         if (mdi1->mdio_incomplete)
 1312                                 break;
 1313                 } else {
 1314                         if (mdi1->mdio_config_id == meta->config_id)
 1315                                 break;
 1316                 }
 1317         }
 1318 
 1319         /* Found matching node. */
 1320         if (geom != NULL) {
 1321                 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 1322                 result = G_RAID_MD_TASTE_EXISTING;
 1323 
 1324         } else if (spare) { /* Not found needy node -- left for later. */
 1325                 G_RAID_DEBUG(1, "Spare is not needed at this time");
 1326                 goto fail1;
 1327 
 1328         } else { /* Not found matching node -- create one. */
 1329                 result = G_RAID_MD_TASTE_NEW;
 1330                 mdi->mdio_config_id = meta->config_id;
 1331                 snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
 1332                 sc = g_raid_create_node(mp, name, md);
 1333                 md->mdo_softc = sc;
 1334                 geom = sc->sc_geom;
 1335                 callout_init(&mdi->mdio_start_co, 1);
 1336                 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 1337                     g_raid_intel_go, sc);
 1338                 mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
 1339                 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 1340         }
 1341 
 1342         rcp = g_new_consumer(geom);
 1343         g_attach(rcp, pp);
 1344         if (g_access(rcp, 1, 1, 1) != 0)
 1345                 ; //goto fail1;
 1346 
 1347         g_topology_unlock();
 1348         sx_xlock(&sc->sc_lock);
 1349 
 1350         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1351         pd->pd_meta = meta;
 1352         pd->pd_disk_pos = -1;
 1353         if (spare == 2) {
 1354                 memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
 1355                 intel_set_disk_sectors(&pd->pd_disk_meta, 
 1356                     pp->mediasize / pp->sectorsize);
 1357                 pd->pd_disk_meta.id = 0;
 1358                 pd->pd_disk_meta.flags = INTEL_F_SPARE;
 1359         } else {
 1360                 pd->pd_disk_meta = meta->disk[disk_pos];
 1361         }
 1362         disk = g_raid_create_disk(sc);
 1363         disk->d_md_data = (void *)pd;
 1364         disk->d_consumer = rcp;
 1365         rcp->private = disk;
 1366 
 1367         /* Read kernel dumping information. */
 1368         disk->d_kd.offset = 0;
 1369         disk->d_kd.length = OFF_MAX;
 1370         len = sizeof(disk->d_kd);
 1371         error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
 1372         if (disk->d_kd.di.dumper == NULL)
 1373                 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 
 1374                     rcp->provider->name, error);
 1375 
 1376         g_raid_md_intel_new_disk(disk);
 1377 
 1378         sx_xunlock(&sc->sc_lock);
 1379         g_topology_lock();
 1380         *gp = geom;
 1381         return (result);
 1382 fail2:
 1383         g_topology_lock();
 1384         g_access(cp, -1, 0, 0);
 1385 fail1:
 1386         free(meta, M_MD_INTEL);
 1387         return (G_RAID_MD_TASTE_FAIL);
 1388 }
 1389 
 1390 static int
 1391 g_raid_md_event_intel(struct g_raid_md_object *md,
 1392     struct g_raid_disk *disk, u_int event)
 1393 {
 1394         struct g_raid_softc *sc;
 1395         struct g_raid_subdisk *sd;
 1396         struct g_raid_md_intel_object *mdi;
 1397         struct g_raid_md_intel_perdisk *pd;
 1398 
 1399         sc = md->mdo_softc;
 1400         mdi = (struct g_raid_md_intel_object *)md;
 1401         if (disk == NULL) {
 1402                 switch (event) {
 1403                 case G_RAID_NODE_E_START:
 1404                         if (!mdi->mdio_started)
 1405                                 g_raid_md_intel_start(sc);
 1406                         return (0);
 1407                 }
 1408                 return (-1);
 1409         }
 1410         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1411         switch (event) {
 1412         case G_RAID_DISK_E_DISCONNECTED:
 1413                 /* If disk was assigned, just update statuses. */
 1414                 if (pd->pd_disk_pos >= 0) {
 1415                         g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1416                         if (disk->d_consumer) {
 1417                                 g_raid_kill_consumer(sc, disk->d_consumer);
 1418                                 disk->d_consumer = NULL;
 1419                         }
 1420                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 1421                                 g_raid_change_subdisk_state(sd,
 1422                                     G_RAID_SUBDISK_S_NONE);
 1423                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1424                                     G_RAID_EVENT_SUBDISK);
 1425                         }
 1426                 } else {
 1427                         /* Otherwise -- delete. */
 1428                         g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 1429                         g_raid_destroy_disk(disk);
 1430                 }
 1431 
 1432                 /* Write updated metadata to all disks. */
 1433                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1434 
 1435                 /* Check if anything left except placeholders. */
 1436                 if (g_raid_ndisks(sc, -1) ==
 1437                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 1438                         g_raid_destroy_node(sc, 0);
 1439                 else
 1440                         g_raid_md_intel_refill(sc);
 1441                 return (0);
 1442         }
 1443         return (-2);
 1444 }
 1445 
 1446 static int
 1447 g_raid_md_ctl_intel(struct g_raid_md_object *md,
 1448     struct gctl_req *req)
 1449 {
 1450         struct g_raid_softc *sc;
 1451         struct g_raid_volume *vol, *vol1;
 1452         struct g_raid_subdisk *sd;
 1453         struct g_raid_disk *disk;
 1454         struct g_raid_md_intel_object *mdi;
 1455         struct g_raid_md_intel_perdisk *pd;
 1456         struct g_consumer *cp;
 1457         struct g_provider *pp;
 1458         char arg[16], serial[INTEL_SERIAL_LEN];
 1459         const char *verb, *volname, *levelname, *diskname;
 1460         char *tmp;
 1461         int *nargs, *force;
 1462         off_t off, size, sectorsize, strip, disk_sectors;
 1463         intmax_t *sizearg, *striparg;
 1464         int numdisks, i, len, level, qual, update;
 1465         int error;
 1466 
 1467         sc = md->mdo_softc;
 1468         mdi = (struct g_raid_md_intel_object *)md;
 1469         verb = gctl_get_param(req, "verb", NULL);
 1470         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 1471         error = 0;
 1472         if (strcmp(verb, "label") == 0) {
 1473 
 1474                 if (*nargs < 4) {
 1475                         gctl_error(req, "Invalid number of arguments.");
 1476                         return (-1);
 1477                 }
 1478                 volname = gctl_get_asciiparam(req, "arg1");
 1479                 if (volname == NULL) {
 1480                         gctl_error(req, "No volume name.");
 1481                         return (-2);
 1482                 }
 1483                 levelname = gctl_get_asciiparam(req, "arg2");
 1484                 if (levelname == NULL) {
 1485                         gctl_error(req, "No RAID level.");
 1486                         return (-3);
 1487                 }
 1488                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
 1489                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
 1490                         return (-4);
 1491                 }
 1492                 numdisks = *nargs - 3;
 1493                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1494                 if (!g_raid_md_intel_supported(level, qual, numdisks,
 1495                     force ? *force : 0)) {
 1496                         gctl_error(req, "Unsupported RAID level "
 1497                             "(0x%02x/0x%02x), or number of disks (%d).",
 1498                             level, qual, numdisks);
 1499                         return (-5);
 1500                 }
 1501 
 1502                 /* Search for disks, connect them and probe. */
 1503                 size = 0x7fffffffffffffffllu;
 1504                 sectorsize = 0;
 1505                 for (i = 0; i < numdisks; i++) {
 1506                         snprintf(arg, sizeof(arg), "arg%d", i + 3);
 1507                         diskname = gctl_get_asciiparam(req, arg);
 1508                         if (diskname == NULL) {
 1509                                 gctl_error(req, "No disk name (%s).", arg);
 1510                                 error = -6;
 1511                                 break;
 1512                         }
 1513                         if (strcmp(diskname, "NONE") == 0) {
 1514                                 cp = NULL;
 1515                                 pp = NULL;
 1516                         } else {
 1517                                 g_topology_lock();
 1518                                 cp = g_raid_open_consumer(sc, diskname);
 1519                                 if (cp == NULL) {
 1520                                         gctl_error(req, "Can't open disk '%s'.",
 1521                                             diskname);
 1522                                         g_topology_unlock();
 1523                                         error = -7;
 1524                                         break;
 1525                                 }
 1526                                 pp = cp->provider;
 1527                         }
 1528                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1529                         pd->pd_disk_pos = i;
 1530                         disk = g_raid_create_disk(sc);
 1531                         disk->d_md_data = (void *)pd;
 1532                         disk->d_consumer = cp;
 1533                         if (cp == NULL) {
 1534                                 strcpy(&pd->pd_disk_meta.serial[0], "NONE");
 1535                                 pd->pd_disk_meta.id = 0xffffffff;
 1536                                 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 1537                                 continue;
 1538                         }
 1539                         cp->private = disk;
 1540                         g_topology_unlock();
 1541 
 1542                         error = g_raid_md_get_label(cp,
 1543                             &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
 1544                         if (error != 0) {
 1545                                 gctl_error(req,
 1546                                     "Can't get serial for provider '%s'.",
 1547                                     diskname);
 1548                                 error = -8;
 1549                                 break;
 1550                         }
 1551 
 1552                         /* Read kernel dumping information. */
 1553                         disk->d_kd.offset = 0;
 1554                         disk->d_kd.length = OFF_MAX;
 1555                         len = sizeof(disk->d_kd);
 1556                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 1557                         if (disk->d_kd.di.dumper == NULL)
 1558                                 G_RAID_DEBUG1(2, sc,
 1559                                     "Dumping not supported by %s.",
 1560                                     cp->provider->name);
 1561 
 1562                         intel_set_disk_sectors(&pd->pd_disk_meta,
 1563                             pp->mediasize / pp->sectorsize);
 1564                         if (size > pp->mediasize)
 1565                                 size = pp->mediasize;
 1566                         if (sectorsize < pp->sectorsize)
 1567                                 sectorsize = pp->sectorsize;
 1568                         pd->pd_disk_meta.id = 0;
 1569                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
 1570                 }
 1571                 if (error != 0)
 1572                         return (error);
 1573 
 1574                 if (sectorsize <= 0) {
 1575                         gctl_error(req, "Can't get sector size.");
 1576                         return (-8);
 1577                 }
 1578 
 1579                 /* Reserve some space for metadata. */
 1580                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 1581 
 1582                 /* Handle size argument. */
 1583                 len = sizeof(*sizearg);
 1584                 sizearg = gctl_get_param(req, "size", &len);
 1585                 if (sizearg != NULL && len == sizeof(*sizearg) &&
 1586                     *sizearg > 0) {
 1587                         if (*sizearg > size) {
 1588                                 gctl_error(req, "Size too big %lld > %lld.",
 1589                                     (long long)*sizearg, (long long)size);
 1590                                 return (-9);
 1591                         }
 1592                         size = *sizearg;
 1593                 }
 1594 
 1595                 /* Handle strip argument. */
 1596                 strip = 131072;
 1597                 len = sizeof(*striparg);
 1598                 striparg = gctl_get_param(req, "strip", &len);
 1599                 if (striparg != NULL && len == sizeof(*striparg) &&
 1600                     *striparg > 0) {
 1601                         if (*striparg < sectorsize) {
 1602                                 gctl_error(req, "Strip size too small.");
 1603                                 return (-10);
 1604                         }
 1605                         if (*striparg % sectorsize != 0) {
 1606                                 gctl_error(req, "Incorrect strip size.");
 1607                                 return (-11);
 1608                         }
 1609                         if (strip > 65535 * sectorsize) {
 1610                                 gctl_error(req, "Strip size too big.");
 1611                                 return (-12);
 1612                         }
 1613                         strip = *striparg;
 1614                 }
 1615 
 1616                 /* Round size down to strip or sector. */
 1617                 if (level == G_RAID_VOLUME_RL_RAID1)
 1618                         size -= (size % sectorsize);
 1619                 else if (level == G_RAID_VOLUME_RL_RAID1E &&
 1620                     (numdisks & 1) != 0)
 1621                         size -= (size % (2 * strip));
 1622                 else
 1623                         size -= (size % strip);
 1624                 if (size <= 0) {
 1625                         gctl_error(req, "Size too small.");
 1626                         return (-13);
 1627                 }
 1628 
 1629                 /* We have all we need, create things: volume, ... */
 1630                 mdi->mdio_started = 1;
 1631                 vol = g_raid_create_volume(sc, volname, -1);
 1632                 vol->v_md_data = (void *)(intptr_t)0;
 1633                 vol->v_raid_level = level;
 1634                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 1635                 vol->v_strip_size = strip;
 1636                 vol->v_disks_count = numdisks;
 1637                 if (level == G_RAID_VOLUME_RL_RAID0)
 1638                         vol->v_mediasize = size * numdisks;
 1639                 else if (level == G_RAID_VOLUME_RL_RAID1)
 1640                         vol->v_mediasize = size;
 1641                 else if (level == G_RAID_VOLUME_RL_RAID5)
 1642                         vol->v_mediasize = size * (numdisks - 1);
 1643                 else { /* RAID1E */
 1644                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
 1645                             strip;
 1646                 }
 1647                 vol->v_sectorsize = sectorsize;
 1648                 g_raid_start_volume(vol);
 1649 
 1650                 /* , and subdisks. */
 1651                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1652                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1653                         sd = &vol->v_subdisks[pd->pd_disk_pos];
 1654                         sd->sd_disk = disk;
 1655                         sd->sd_offset = 0;
 1656                         sd->sd_size = size;
 1657                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1658                         if (sd->sd_disk->d_consumer != NULL) {
 1659                                 g_raid_change_disk_state(disk,
 1660                                     G_RAID_DISK_S_ACTIVE);
 1661                                 g_raid_change_subdisk_state(sd,
 1662                                     G_RAID_SUBDISK_S_ACTIVE);
 1663                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 1664                                     G_RAID_EVENT_SUBDISK);
 1665                         } else {
 1666                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1667                         }
 1668                 }
 1669 
 1670                 /* Write metadata based on created entities. */
 1671                 G_RAID_DEBUG1(0, sc, "Array started.");
 1672                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1673 
 1674                 /* Pickup any STALE/SPARE disks to refill array if needed. */
 1675                 g_raid_md_intel_refill(sc);
 1676 
 1677                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1678                     G_RAID_EVENT_VOLUME);
 1679                 return (0);
 1680         }
 1681         if (strcmp(verb, "add") == 0) {
 1682 
 1683                 if (*nargs != 3) {
 1684                         gctl_error(req, "Invalid number of arguments.");
 1685                         return (-1);
 1686                 }
 1687                 volname = gctl_get_asciiparam(req, "arg1");
 1688                 if (volname == NULL) {
 1689                         gctl_error(req, "No volume name.");
 1690                         return (-2);
 1691                 }
 1692                 levelname = gctl_get_asciiparam(req, "arg2");
 1693                 if (levelname == NULL) {
 1694                         gctl_error(req, "No RAID level.");
 1695                         return (-3);
 1696                 }
 1697                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
 1698                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
 1699                         return (-4);
 1700                 }
 1701 
 1702                 /* Look for existing volumes. */
 1703                 i = 0;
 1704                 vol1 = NULL;
 1705                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1706                         vol1 = vol;
 1707                         i++;
 1708                 }
 1709                 if (i > 1) {
 1710                         gctl_error(req, "Maximum two volumes supported.");
 1711                         return (-6);
 1712                 }
 1713                 if (vol1 == NULL) {
 1714                         gctl_error(req, "At least one volume must exist.");
 1715                         return (-7);
 1716                 }
 1717 
 1718                 numdisks = vol1->v_disks_count;
 1719                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1720                 if (!g_raid_md_intel_supported(level, qual, numdisks,
 1721                     force ? *force : 0)) {
 1722                         gctl_error(req, "Unsupported RAID level "
 1723                             "(0x%02x/0x%02x), or number of disks (%d).",
 1724                             level, qual, numdisks);
 1725                         return (-5);
 1726                 }
 1727 
 1728                 /* Collect info about present disks. */
 1729                 size = 0x7fffffffffffffffllu;
 1730                 sectorsize = 512;
 1731                 for (i = 0; i < numdisks; i++) {
 1732                         disk = vol1->v_subdisks[i].sd_disk;
 1733                         pd = (struct g_raid_md_intel_perdisk *)
 1734                             disk->d_md_data;
 1735                         disk_sectors = 
 1736                             intel_get_disk_sectors(&pd->pd_disk_meta);
 1737 
 1738                         if (disk_sectors * 512 < size)
 1739                                 size = disk_sectors * 512;
 1740                         if (disk->d_consumer != NULL &&
 1741                             disk->d_consumer->provider != NULL &&
 1742                             disk->d_consumer->provider->sectorsize >
 1743                              sectorsize) {
 1744                                 sectorsize =
 1745                                     disk->d_consumer->provider->sectorsize;
 1746                         }
 1747                 }
 1748 
 1749                 /* Reserve some space for metadata. */
 1750                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 1751 
 1752                 /* Decide insert before or after. */
 1753                 sd = &vol1->v_subdisks[0];
 1754                 if (sd->sd_offset >
 1755                     size - (sd->sd_offset + sd->sd_size)) {
 1756                         off = 0;
 1757                         size = sd->sd_offset;
 1758                 } else {
 1759                         off = sd->sd_offset + sd->sd_size;
 1760                         size = size - (sd->sd_offset + sd->sd_size);
 1761                 }
 1762 
 1763                 /* Handle strip argument. */
 1764                 strip = 131072;
 1765                 len = sizeof(*striparg);
 1766                 striparg = gctl_get_param(req, "strip", &len);
 1767                 if (striparg != NULL && len == sizeof(*striparg) &&
 1768                     *striparg > 0) {
 1769                         if (*striparg < sectorsize) {
 1770                                 gctl_error(req, "Strip size too small.");
 1771                                 return (-10);
 1772                         }
 1773                         if (*striparg % sectorsize != 0) {
 1774                                 gctl_error(req, "Incorrect strip size.");
 1775                                 return (-11);
 1776                         }
 1777                         if (strip > 65535 * sectorsize) {
 1778                                 gctl_error(req, "Strip size too big.");
 1779                                 return (-12);
 1780                         }
 1781                         strip = *striparg;
 1782                 }
 1783 
 1784                 /* Round offset up to strip. */
 1785                 if (off % strip != 0) {
 1786                         size -= strip - off % strip;
 1787                         off += strip - off % strip;
 1788                 }
 1789 
 1790                 /* Handle size argument. */
 1791                 len = sizeof(*sizearg);
 1792                 sizearg = gctl_get_param(req, "size", &len);
 1793                 if (sizearg != NULL && len == sizeof(*sizearg) &&
 1794                     *sizearg > 0) {
 1795                         if (*sizearg > size) {
 1796                                 gctl_error(req, "Size too big %lld > %lld.",
 1797                                     (long long)*sizearg, (long long)size);
 1798                                 return (-9);
 1799                         }
 1800                         size = *sizearg;
 1801                 }
 1802 
 1803                 /* Round size down to strip or sector. */
 1804                 if (level == G_RAID_VOLUME_RL_RAID1)
 1805                         size -= (size % sectorsize);
 1806                 else
 1807                         size -= (size % strip);
 1808                 if (size <= 0) {
 1809                         gctl_error(req, "Size too small.");
 1810                         return (-13);
 1811                 }
 1812                 if (size > 0xffffffffllu * sectorsize) {
 1813                         gctl_error(req, "Size too big.");
 1814                         return (-14);
 1815                 }
 1816 
 1817                 /* We have all we need, create things: volume, ... */
 1818                 vol = g_raid_create_volume(sc, volname, -1);
 1819                 vol->v_md_data = (void *)(intptr_t)i;
 1820                 vol->v_raid_level = level;
 1821                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 1822                 vol->v_strip_size = strip;
 1823                 vol->v_disks_count = numdisks;
 1824                 if (level == G_RAID_VOLUME_RL_RAID0)
 1825                         vol->v_mediasize = size * numdisks;
 1826                 else if (level == G_RAID_VOLUME_RL_RAID1)
 1827                         vol->v_mediasize = size;
 1828                 else if (level == G_RAID_VOLUME_RL_RAID5)
 1829                         vol->v_mediasize = size * (numdisks - 1);
 1830                 else { /* RAID1E */
 1831                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
 1832                             strip;
 1833                 }
 1834                 vol->v_sectorsize = sectorsize;
 1835                 g_raid_start_volume(vol);
 1836 
 1837                 /* , and subdisks. */
 1838                 for (i = 0; i < numdisks; i++) {
 1839                         disk = vol1->v_subdisks[i].sd_disk;
 1840                         sd = &vol->v_subdisks[i];
 1841                         sd->sd_disk = disk;
 1842                         sd->sd_offset = off;
 1843                         sd->sd_size = size;
 1844                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1845                         if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 1846                                 g_raid_change_subdisk_state(sd,
 1847                                     G_RAID_SUBDISK_S_ACTIVE);
 1848                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 1849                                     G_RAID_EVENT_SUBDISK);
 1850                         }
 1851                 }
 1852 
 1853                 /* Write metadata based on created entities. */
 1854                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1855 
 1856                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1857                     G_RAID_EVENT_VOLUME);
 1858                 return (0);
 1859         }
 1860         if (strcmp(verb, "delete") == 0) {
 1861 
 1862                 /* Full node destruction. */
 1863                 if (*nargs == 1) {
 1864                         /* Check if some volume is still open. */
 1865                         force = gctl_get_paraml(req, "force", sizeof(*force));
 1866                         if (force != NULL && *force == 0 &&
 1867                             g_raid_nopens(sc) != 0) {
 1868                                 gctl_error(req, "Some volume is still open.");
 1869                                 return (-4);
 1870                         }
 1871 
 1872                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1873                                 if (disk->d_consumer)
 1874                                         intel_meta_erase(disk->d_consumer);
 1875                         }
 1876                         g_raid_destroy_node(sc, 0);
 1877                         return (0);
 1878                 }
 1879 
 1880                 /* Destroy specified volume. If it was last - all node. */
 1881                 if (*nargs != 2) {
 1882                         gctl_error(req, "Invalid number of arguments.");
 1883                         return (-1);
 1884                 }
 1885                 volname = gctl_get_asciiparam(req, "arg1");
 1886                 if (volname == NULL) {
 1887                         gctl_error(req, "No volume name.");
 1888                         return (-2);
 1889                 }
 1890 
 1891                 /* Search for volume. */
 1892                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1893                         if (strcmp(vol->v_name, volname) == 0)
 1894                                 break;
 1895                 }
 1896                 if (vol == NULL) {
 1897                         i = strtol(volname, &tmp, 10);
 1898                         if (verb != volname && tmp[0] == 0) {
 1899                                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1900                                         if (vol->v_global_id == i)
 1901                                                 break;
 1902                                 }
 1903                         }
 1904                 }
 1905                 if (vol == NULL) {
 1906                         gctl_error(req, "Volume '%s' not found.", volname);
 1907                         return (-3);
 1908                 }
 1909 
 1910                 /* Check if volume is still open. */
 1911                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1912                 if (force != NULL && *force == 0 &&
 1913                     vol->v_provider_open != 0) {
 1914                         gctl_error(req, "Volume is still open.");
 1915                         return (-4);
 1916                 }
 1917 
 1918                 /* Destroy volume and potentially node. */
 1919                 i = 0;
 1920                 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 1921                         i++;
 1922                 if (i >= 2) {
 1923                         g_raid_destroy_volume(vol);
 1924                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1925                 } else {
 1926                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1927                                 if (disk->d_consumer)
 1928                                         intel_meta_erase(disk->d_consumer);
 1929                         }
 1930                         g_raid_destroy_node(sc, 0);
 1931                 }
 1932                 return (0);
 1933         }
 1934         if (strcmp(verb, "remove") == 0 ||
 1935             strcmp(verb, "fail") == 0) {
 1936                 if (*nargs < 2) {
 1937                         gctl_error(req, "Invalid number of arguments.");
 1938                         return (-1);
 1939                 }
 1940                 for (i = 1; i < *nargs; i++) {
 1941                         snprintf(arg, sizeof(arg), "arg%d", i);
 1942                         diskname = gctl_get_asciiparam(req, arg);
 1943                         if (diskname == NULL) {
 1944                                 gctl_error(req, "No disk name (%s).", arg);
 1945                                 error = -2;
 1946                                 break;
 1947                         }
 1948                         if (strncmp(diskname, "/dev/", 5) == 0)
 1949                                 diskname += 5;
 1950 
 1951                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1952                                 if (disk->d_consumer != NULL && 
 1953                                     disk->d_consumer->provider != NULL &&
 1954                                     strcmp(disk->d_consumer->provider->name,
 1955                                      diskname) == 0)
 1956                                         break;
 1957                         }
 1958                         if (disk == NULL) {
 1959                                 gctl_error(req, "Disk '%s' not found.",
 1960                                     diskname);
 1961                                 error = -3;
 1962                                 break;
 1963                         }
 1964 
 1965                         if (strcmp(verb, "fail") == 0) {
 1966                                 g_raid_md_fail_disk_intel(md, NULL, disk);
 1967                                 continue;
 1968                         }
 1969 
 1970                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1971 
 1972                         /* Erase metadata on deleting disk. */
 1973                         intel_meta_erase(disk->d_consumer);
 1974 
 1975                         /* If disk was assigned, just update statuses. */
 1976                         if (pd->pd_disk_pos >= 0) {
 1977                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1978                                 g_raid_kill_consumer(sc, disk->d_consumer);
 1979                                 disk->d_consumer = NULL;
 1980                                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 1981                                         g_raid_change_subdisk_state(sd,
 1982                                             G_RAID_SUBDISK_S_NONE);
 1983                                         g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1984                                             G_RAID_EVENT_SUBDISK);
 1985                                 }
 1986                         } else {
 1987                                 /* Otherwise -- delete. */
 1988                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 1989                                 g_raid_destroy_disk(disk);
 1990                         }
 1991                 }
 1992 
 1993                 /* Write updated metadata to remaining disks. */
 1994                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1995 
 1996                 /* Check if anything left except placeholders. */
 1997                 if (g_raid_ndisks(sc, -1) ==
 1998                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 1999                         g_raid_destroy_node(sc, 0);
 2000                 else
 2001                         g_raid_md_intel_refill(sc);
 2002                 return (error);
 2003         }
 2004         if (strcmp(verb, "insert") == 0) {
 2005                 if (*nargs < 2) {
 2006                         gctl_error(req, "Invalid number of arguments.");
 2007                         return (-1);
 2008                 }
 2009                 update = 0;
 2010                 for (i = 1; i < *nargs; i++) {
 2011                         /* Get disk name. */
 2012                         snprintf(arg, sizeof(arg), "arg%d", i);
 2013                         diskname = gctl_get_asciiparam(req, arg);
 2014                         if (diskname == NULL) {
 2015                                 gctl_error(req, "No disk name (%s).", arg);
 2016                                 error = -3;
 2017                                 break;
 2018                         }
 2019 
 2020                         /* Try to find provider with specified name. */
 2021                         g_topology_lock();
 2022                         cp = g_raid_open_consumer(sc, diskname);
 2023                         if (cp == NULL) {
 2024                                 gctl_error(req, "Can't open disk '%s'.",
 2025                                     diskname);
 2026                                 g_topology_unlock();
 2027                                 error = -4;
 2028                                 break;
 2029                         }
 2030                         pp = cp->provider;
 2031                         g_topology_unlock();
 2032 
 2033                         /* Read disk serial. */
 2034                         error = g_raid_md_get_label(cp,
 2035                             &serial[0], INTEL_SERIAL_LEN);
 2036                         if (error != 0) {
 2037                                 gctl_error(req,
 2038                                     "Can't get serial for provider '%s'.",
 2039                                     diskname);
 2040                                 g_raid_kill_consumer(sc, cp);
 2041                                 error = -7;
 2042                                 break;
 2043                         }
 2044 
 2045                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 2046                         pd->pd_disk_pos = -1;
 2047 
 2048                         disk = g_raid_create_disk(sc);
 2049                         disk->d_consumer = cp;
 2050                         disk->d_md_data = (void *)pd;
 2051                         cp->private = disk;
 2052 
 2053                         /* Read kernel dumping information. */
 2054                         disk->d_kd.offset = 0;
 2055                         disk->d_kd.length = OFF_MAX;
 2056                         len = sizeof(disk->d_kd);
 2057                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 2058                         if (disk->d_kd.di.dumper == NULL)
 2059                                 G_RAID_DEBUG1(2, sc,
 2060                                     "Dumping not supported by %s.",
 2061                                     cp->provider->name);
 2062 
 2063                         memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
 2064                             INTEL_SERIAL_LEN);
 2065                         intel_set_disk_sectors(&pd->pd_disk_meta,
 2066                             pp->mediasize / pp->sectorsize);
 2067                         pd->pd_disk_meta.id = 0;
 2068                         pd->pd_disk_meta.flags = INTEL_F_SPARE;
 2069 
 2070                         /* Welcome the "new" disk. */
 2071                         update += g_raid_md_intel_start_disk(disk);
 2072                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
 2073                                 intel_meta_write_spare(cp, &pd->pd_disk_meta);
 2074                                 g_raid_destroy_disk(disk);
 2075                         } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 2076                                 gctl_error(req, "Disk '%s' doesn't fit.",
 2077                                     diskname);
 2078                                 g_raid_destroy_disk(disk);
 2079                                 error = -8;
 2080                                 break;
 2081                         }
 2082                 }
 2083 
 2084                 /* Write new metadata if we changed something. */
 2085                 if (update)
 2086                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 2087                 return (error);
 2088         }
 2089         return (-100);
 2090 }
 2091 
 2092 static int
 2093 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
 2094     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 2095 {
 2096         struct g_raid_softc *sc;
 2097         struct g_raid_volume *vol;
 2098         struct g_raid_subdisk *sd;
 2099         struct g_raid_disk *disk;
 2100         struct g_raid_md_intel_object *mdi;
 2101         struct g_raid_md_intel_perdisk *pd;
 2102         struct intel_raid_conf *meta;
 2103         struct intel_raid_vol *mvol;
 2104         struct intel_raid_map *mmap0, *mmap1;
 2105         off_t sectorsize = 512, pos;
 2106         const char *version, *cv;
 2107         int vi, sdi, numdisks, len, state, stale;
 2108 
 2109         sc = md->mdo_softc;
 2110         mdi = (struct g_raid_md_intel_object *)md;
 2111 
 2112         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 2113                 return (0);
 2114 
 2115         /* Bump generation. Newly written metadata may differ from previous. */
 2116         mdi->mdio_generation++;
 2117 
 2118         /* Count number of disks. */
 2119         numdisks = 0;
 2120         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2121                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2122                 if (pd->pd_disk_pos < 0)
 2123                         continue;
 2124                 numdisks++;
 2125                 if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 2126                         pd->pd_disk_meta.flags =
 2127                             INTEL_F_ONLINE | INTEL_F_ASSIGNED;
 2128                 } else if (disk->d_state == G_RAID_DISK_S_FAILED) {
 2129                         pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
 2130                 } else {
 2131                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 2132                         if (pd->pd_disk_meta.id != 0xffffffff) {
 2133                                 pd->pd_disk_meta.id = 0xffffffff;
 2134                                 len = strlen(pd->pd_disk_meta.serial);
 2135                                 len = min(len, INTEL_SERIAL_LEN - 3);
 2136                                 strcpy(pd->pd_disk_meta.serial + len, ":0");
 2137                         }
 2138                 }
 2139         }
 2140 
 2141         /* Fill anchor and disks. */
 2142         meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
 2143             M_MD_INTEL, M_WAITOK | M_ZERO);
 2144         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
 2145         meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
 2146         meta->config_id = mdi->mdio_config_id;
 2147         meta->generation = mdi->mdio_generation;
 2148         meta->attributes = INTEL_ATTR_CHECKSUM;
 2149         meta->total_disks = numdisks;
 2150         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2151                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2152                 if (pd->pd_disk_pos < 0)
 2153                         continue;
 2154                 meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
 2155         }
 2156 
 2157         /* Fill volumes and maps. */
 2158         vi = 0;
 2159         version = INTEL_VERSION_1000;
 2160         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 2161                 if (vol->v_stopping)
 2162                         continue;
 2163                 mvol = intel_get_volume(meta, vi);
 2164 
 2165                 /* New metadata may have different volumes order. */
 2166                 vol->v_md_data = (void *)(intptr_t)vi;
 2167 
 2168                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2169                         sd = &vol->v_subdisks[sdi];
 2170                         if (sd->sd_disk != NULL)
 2171                                 break;
 2172                 }
 2173                 if (sdi >= vol->v_disks_count)
 2174                         panic("No any filled subdisk in volume");
 2175                 if (vol->v_mediasize >= 0x20000000000llu)
 2176                         meta->attributes |= INTEL_ATTR_2TB;
 2177                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 2178                         meta->attributes |= INTEL_ATTR_RAID0;
 2179                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2180                         meta->attributes |= INTEL_ATTR_RAID1;
 2181                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 2182                         meta->attributes |= INTEL_ATTR_RAID5;
 2183                 else
 2184                         meta->attributes |= INTEL_ATTR_RAID10;
 2185 
 2186                 if (meta->attributes & INTEL_ATTR_2TB)
 2187                         cv = INTEL_VERSION_1300;
 2188 //              else if (dev->status == DEV_CLONE_N_GO)
 2189 //                      cv = INTEL_VERSION_1206;
 2190                 else if (vol->v_disks_count > 4)
 2191                         cv = INTEL_VERSION_1204;
 2192                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 2193                         cv = INTEL_VERSION_1202;
 2194                 else if (vol->v_disks_count > 2)
 2195                         cv = INTEL_VERSION_1201;
 2196                 else if (vi > 0)
 2197                         cv = INTEL_VERSION_1200;
 2198                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2199                         cv = INTEL_VERSION_1100;
 2200                 else
 2201                         cv = INTEL_VERSION_1000;
 2202                 if (strcmp(cv, version) > 0)
 2203                         version = cv;
 2204 
 2205                 strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
 2206                 mvol->total_sectors = vol->v_mediasize / sectorsize;
 2207 
 2208                 /* Check for any recovery in progress. */
 2209                 state = G_RAID_SUBDISK_S_ACTIVE;
 2210                 pos = 0x7fffffffffffffffllu;
 2211                 stale = 0;
 2212                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2213                         sd = &vol->v_subdisks[sdi];
 2214                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
 2215                                 state = G_RAID_SUBDISK_S_REBUILD;
 2216                         else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
 2217                             state != G_RAID_SUBDISK_S_REBUILD)
 2218                                 state = G_RAID_SUBDISK_S_RESYNC;
 2219                         else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
 2220                                 stale = 1;
 2221                         if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2222                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 2223                              sd->sd_rebuild_pos < pos)
 2224                                 pos = sd->sd_rebuild_pos;
 2225                 }
 2226                 if (state == G_RAID_SUBDISK_S_REBUILD) {
 2227                         mvol->migr_state = 1;
 2228                         mvol->migr_type = INTEL_MT_REBUILD;
 2229                 } else if (state == G_RAID_SUBDISK_S_RESYNC) {
 2230                         mvol->migr_state = 1;
 2231                         /* mvol->migr_type = INTEL_MT_REPAIR; */
 2232                         mvol->migr_type = INTEL_MT_VERIFY;
 2233                         mvol->state |= INTEL_ST_VERIFY_AND_FIX;
 2234                 } else
 2235                         mvol->migr_state = 0;
 2236                 mvol->dirty = (vol->v_dirty || stale);
 2237 
 2238                 mmap0 = intel_get_map(mvol, 0);
 2239 
 2240                 /* Write map / common part of two maps. */
 2241                 intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
 2242                 intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
 2243                 mmap0->strip_sectors = vol->v_strip_size / sectorsize;
 2244                 if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
 2245                         mmap0->status = INTEL_S_FAILURE;
 2246                 else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
 2247                         mmap0->status = INTEL_S_DEGRADED;
 2248                 else
 2249                         mmap0->status = INTEL_S_READY;
 2250                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 2251                         mmap0->type = INTEL_T_RAID0;
 2252                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 2253                     vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 2254                         mmap0->type = INTEL_T_RAID1;
 2255                 else
 2256                         mmap0->type = INTEL_T_RAID5;
 2257                 mmap0->total_disks = vol->v_disks_count;
 2258                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2259                         mmap0->total_domains = vol->v_disks_count;
 2260                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 2261                         mmap0->total_domains = 2;
 2262                 else
 2263                         mmap0->total_domains = 1;
 2264                 intel_set_map_stripe_count(mmap0,
 2265                     sd->sd_size / vol->v_strip_size / mmap0->total_domains);
 2266                 mmap0->failed_disk_num = 0xff;
 2267                 mmap0->ddf = 1;
 2268 
 2269                 /* If there are two maps - copy common and update. */
 2270                 if (mvol->migr_state) {
 2271                         intel_set_vol_curr_migr_unit(mvol,
 2272                             pos / vol->v_strip_size / mmap0->total_domains);
 2273                         mmap1 = intel_get_map(mvol, 1);
 2274                         memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
 2275                         mmap0->status = INTEL_S_READY;
 2276                 } else
 2277                         mmap1 = NULL;
 2278 
 2279                 /* Write disk indexes and put rebuild flags. */
 2280                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2281                         sd = &vol->v_subdisks[sdi];
 2282                         pd = (struct g_raid_md_intel_perdisk *)
 2283                             sd->sd_disk->d_md_data;
 2284                         mmap0->disk_idx[sdi] = pd->pd_disk_pos;
 2285                         if (mvol->migr_state)
 2286                                 mmap1->disk_idx[sdi] = pd->pd_disk_pos;
 2287                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2288                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2289                                 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 2290                         } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
 2291                             sd->sd_state != G_RAID_SUBDISK_S_STALE) {
 2292                                 mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
 2293                                 if (mvol->migr_state)
 2294                                         mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 2295                         }
 2296                         if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 2297                              sd->sd_state == G_RAID_SUBDISK_S_FAILED) &&
 2298                             mmap0->failed_disk_num == 0xff) {
 2299                                 mmap0->failed_disk_num = sdi;
 2300                                 if (mvol->migr_state)
 2301                                         mmap1->failed_disk_num = sdi;
 2302                         }
 2303                 }
 2304                 vi++;
 2305         }
 2306         meta->total_volumes = vi;
 2307         if (strcmp(version, INTEL_VERSION_1300) != 0)
 2308                 meta->attributes &= INTEL_ATTR_CHECKSUM;
 2309         memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
 2310 
 2311         /* We are done. Print meta data and store them to disks. */
 2312         g_raid_md_intel_print(meta);
 2313         if (mdi->mdio_meta != NULL)
 2314                 free(mdi->mdio_meta, M_MD_INTEL);
 2315         mdi->mdio_meta = meta;
 2316         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2317                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2318                 if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 2319                         continue;
 2320                 if (pd->pd_meta != NULL) {
 2321                         free(pd->pd_meta, M_MD_INTEL);
 2322                         pd->pd_meta = NULL;
 2323                 }
 2324                 pd->pd_meta = intel_meta_copy(meta);
 2325                 intel_meta_write(disk->d_consumer, meta);
 2326         }
 2327         return (0);
 2328 }
 2329 
 2330 static int
 2331 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
 2332     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 2333 {
 2334         struct g_raid_softc *sc;
 2335         struct g_raid_md_intel_object *mdi;
 2336         struct g_raid_md_intel_perdisk *pd;
 2337         struct g_raid_subdisk *sd;
 2338 
 2339         sc = md->mdo_softc;
 2340         mdi = (struct g_raid_md_intel_object *)md;
 2341         pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
 2342 
 2343         /* We can't fail disk that is not a part of array now. */
 2344         if (pd->pd_disk_pos < 0)
 2345                 return (-1);
 2346 
 2347         /*
 2348          * Mark disk as failed in metadata and try to write that metadata
 2349          * to the disk itself to prevent it's later resurrection as STALE.
 2350          */
 2351         mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
 2352         pd->pd_disk_meta.flags = INTEL_F_FAILED;
 2353         g_raid_md_intel_print(mdi->mdio_meta);
 2354         if (tdisk->d_consumer)
 2355                 intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
 2356 
 2357         /* Change states. */
 2358         g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 2359         TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 2360                 g_raid_change_subdisk_state(sd,
 2361                     G_RAID_SUBDISK_S_FAILED);
 2362                 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 2363                     G_RAID_EVENT_SUBDISK);
 2364         }
 2365 
 2366         /* Write updated metadata to remaining disks. */
 2367         g_raid_md_write_intel(md, NULL, NULL, tdisk);
 2368 
 2369         /* Check if anything left except placeholders. */
 2370         if (g_raid_ndisks(sc, -1) ==
 2371             g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 2372                 g_raid_destroy_node(sc, 0);
 2373         else
 2374                 g_raid_md_intel_refill(sc);
 2375         return (0);
 2376 }
 2377 
 2378 static int
 2379 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
 2380     struct g_raid_disk *disk)
 2381 {
 2382         struct g_raid_md_intel_perdisk *pd;
 2383 
 2384         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2385         if (pd->pd_meta != NULL) {
 2386                 free(pd->pd_meta, M_MD_INTEL);
 2387                 pd->pd_meta = NULL;
 2388         }
 2389         free(pd, M_MD_INTEL);
 2390         disk->d_md_data = NULL;
 2391         return (0);
 2392 }
 2393 
 2394 static int
 2395 g_raid_md_free_intel(struct g_raid_md_object *md)
 2396 {
 2397         struct g_raid_md_intel_object *mdi;
 2398 
 2399         mdi = (struct g_raid_md_intel_object *)md;
 2400         if (!mdi->mdio_started) {
 2401                 mdi->mdio_started = 0;
 2402                 callout_stop(&mdi->mdio_start_co);
 2403                 G_RAID_DEBUG1(1, md->mdo_softc,
 2404                     "root_mount_rel %p", mdi->mdio_rootmount);
 2405                 root_mount_rel(mdi->mdio_rootmount);
 2406                 mdi->mdio_rootmount = NULL;
 2407         }
 2408         if (mdi->mdio_meta != NULL) {
 2409                 free(mdi->mdio_meta, M_MD_INTEL);
 2410                 mdi->mdio_meta = NULL;
 2411         }
 2412         return (0);
 2413 }
 2414 
 2415 G_RAID_MD_DECLARE(g_raid_md_intel);

Cache object: 93d9a805ecd1338e16497edc1b538c63


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.