The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/md_intel.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
    3  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/9.1/sys/geom/raid/md_intel.c 240558 2012-09-16 11:02:22Z mav $");
   30 
   31 #include <sys/param.h>
   32 #include <sys/bio.h>
   33 #include <sys/endian.h>
   34 #include <sys/kernel.h>
   35 #include <sys/kobj.h>
   36 #include <sys/limits.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mutex.h>
   40 #include <sys/systm.h>
   41 #include <sys/taskqueue.h>
   42 #include <geom/geom.h>
   43 #include "geom/raid/g_raid.h"
   44 #include "g_raid_md_if.h"
   45 
   46 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
   47 
   48 struct intel_raid_map {
   49         uint32_t        offset;
   50         uint32_t        disk_sectors;
   51         uint32_t        stripe_count;
   52         uint16_t        strip_sectors;
   53         uint8_t         status;
   54 #define INTEL_S_READY           0x00
   55 #define INTEL_S_UNINITIALIZED   0x01
   56 #define INTEL_S_DEGRADED        0x02
   57 #define INTEL_S_FAILURE         0x03
   58 
   59         uint8_t         type;
   60 #define INTEL_T_RAID0           0x00
   61 #define INTEL_T_RAID1           0x01
   62 #define INTEL_T_RAID5           0x05
   63 
   64         uint8_t         total_disks;
   65         uint8_t         total_domains;
   66         uint8_t         failed_disk_num;
   67         uint8_t         ddf;
   68         uint32_t        offset_hi;
   69         uint32_t        disk_sectors_hi;
   70         uint32_t        stripe_count_hi;
   71         uint32_t        filler_2[4];
   72         uint32_t        disk_idx[1];    /* total_disks entries. */
   73 #define INTEL_DI_IDX    0x00ffffff
   74 #define INTEL_DI_RBLD   0x01000000
   75 } __packed;
   76 
   77 struct intel_raid_vol {
   78         uint8_t         name[16];
   79         u_int64_t       total_sectors __packed;
   80         uint32_t        state;
   81 #define INTEL_ST_BOOTABLE               0x00000001
   82 #define INTEL_ST_BOOT_DEVICE            0x00000002
   83 #define INTEL_ST_READ_COALESCING        0x00000004
   84 #define INTEL_ST_WRITE_COALESCING       0x00000008
   85 #define INTEL_ST_LAST_SHUTDOWN_DIRTY    0x00000010
   86 #define INTEL_ST_HIDDEN_AT_BOOT         0x00000020
   87 #define INTEL_ST_CURRENTLY_HIDDEN       0x00000040
   88 #define INTEL_ST_VERIFY_AND_FIX         0x00000080
   89 #define INTEL_ST_MAP_STATE_UNINIT       0x00000100
   90 #define INTEL_ST_NO_AUTO_RECOVERY       0x00000200
   91 #define INTEL_ST_CLONE_N_GO             0x00000400
   92 #define INTEL_ST_CLONE_MAN_SYNC         0x00000800
   93 #define INTEL_ST_CNG_MASTER_DISK_NUM    0x00001000
   94         uint32_t        reserved;
   95         uint8_t         migr_priority;
   96         uint8_t         num_sub_vols;
   97         uint8_t         tid;
   98         uint8_t         cng_master_disk;
   99         uint16_t        cache_policy;
  100         uint8_t         cng_state;
  101         uint8_t         cng_sub_state;
  102         uint32_t        filler_0[10];
  103 
  104         uint32_t        curr_migr_unit;
  105         uint32_t        checkpoint_id;
  106         uint8_t         migr_state;
  107         uint8_t         migr_type;
  108 #define INTEL_MT_INIT           0
  109 #define INTEL_MT_REBUILD        1
  110 #define INTEL_MT_VERIFY         2
  111 #define INTEL_MT_GEN_MIGR       3
  112 #define INTEL_MT_STATE_CHANGE   4
  113 #define INTEL_MT_REPAIR         5
  114         uint8_t         dirty;
  115         uint8_t         fs_state;
  116         uint16_t        verify_errors;
  117         uint16_t        bad_blocks;
  118         uint32_t        curr_migr_unit_hi;
  119         uint32_t        filler_1[3];
  120         struct intel_raid_map map[1];   /* 2 entries if migr_state != 0. */
  121 } __packed;
  122 
  123 struct intel_raid_disk {
  124 #define INTEL_SERIAL_LEN        16
  125         uint8_t         serial[INTEL_SERIAL_LEN];
  126         uint32_t        sectors;
  127         uint32_t        id;
  128         uint32_t        flags;
  129 #define INTEL_F_SPARE           0x01
  130 #define INTEL_F_ASSIGNED        0x02
  131 #define INTEL_F_FAILED          0x04
  132 #define INTEL_F_ONLINE          0x08
  133         uint32_t        owner_cfg_num;
  134         uint32_t        sectors_hi;
  135         uint32_t        filler[3];
  136 } __packed;
  137 
  138 struct intel_raid_conf {
  139         uint8_t         intel_id[24];
  140 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
  141 
  142         uint8_t         version[6];
  143 #define INTEL_VERSION_1000      "1.0.00"        /* RAID0 */
  144 #define INTEL_VERSION_1100      "1.1.00"        /* RAID1 */
  145 #define INTEL_VERSION_1200      "1.2.00"        /* Many volumes */
  146 #define INTEL_VERSION_1201      "1.2.01"        /* 3 or 4 disks */
  147 #define INTEL_VERSION_1202      "1.2.02"        /* RAID5 */
  148 #define INTEL_VERSION_1204      "1.2.04"        /* 5 or 6 disks */
  149 #define INTEL_VERSION_1206      "1.2.06"        /* CNG */
  150 #define INTEL_VERSION_1300      "1.3.00"        /* Attributes */
  151 
  152         uint8_t         dummy_0[2];
  153         uint32_t        checksum;
  154         uint32_t        config_size;
  155         uint32_t        config_id;
  156         uint32_t        generation;
  157         uint32_t        error_log_size;
  158         uint32_t        attributes;
  159 #define INTEL_ATTR_RAID0        0x00000001
  160 #define INTEL_ATTR_RAID1        0x00000002
  161 #define INTEL_ATTR_RAID10       0x00000004
  162 #define INTEL_ATTR_RAID1E       0x00000008
  163 #define INTEL_ATTR_RAID5        0x00000010
  164 #define INTEL_ATTR_RAIDCNG      0x00000020
  165 #define INTEL_ATTR_2TB          0x20000000
  166 #define INTEL_ATTR_PM           0x40000000
  167 #define INTEL_ATTR_CHECKSUM     0x80000000
  168 
  169         uint8_t         total_disks;
  170         uint8_t         total_volumes;
  171         uint8_t         dummy_2[2];
  172         uint32_t        filler_0[39];
  173         struct intel_raid_disk  disk[1];        /* total_disks entries. */
  174         /* Here goes total_volumes of struct intel_raid_vol. */
  175 } __packed;
  176 
  177 #define INTEL_MAX_MD_SIZE(ndisks)                               \
  178     (sizeof(struct intel_raid_conf) +                           \
  179      sizeof(struct intel_raid_disk) * (ndisks - 1) +            \
  180      sizeof(struct intel_raid_vol) * 2 +                        \
  181      sizeof(struct intel_raid_map) * 2 +                        \
  182      sizeof(uint32_t) * (ndisks - 1) * 4)
  183 
  184 struct g_raid_md_intel_perdisk {
  185         struct intel_raid_conf  *pd_meta;
  186         int                      pd_disk_pos;
  187         struct intel_raid_disk   pd_disk_meta;
  188 };
  189 
  190 struct g_raid_md_intel_object {
  191         struct g_raid_md_object  mdio_base;
  192         uint32_t                 mdio_config_id;
  193         uint32_t                 mdio_generation;
  194         struct intel_raid_conf  *mdio_meta;
  195         struct callout           mdio_start_co; /* STARTING state timer. */
  196         int                      mdio_disks_present;
  197         int                      mdio_started;
  198         int                      mdio_incomplete;
  199         struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
  200 };
  201 
  202 static g_raid_md_create_t g_raid_md_create_intel;
  203 static g_raid_md_taste_t g_raid_md_taste_intel;
  204 static g_raid_md_event_t g_raid_md_event_intel;
  205 static g_raid_md_ctl_t g_raid_md_ctl_intel;
  206 static g_raid_md_write_t g_raid_md_write_intel;
  207 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
  208 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
  209 static g_raid_md_free_t g_raid_md_free_intel;
  210 
  211 static kobj_method_t g_raid_md_intel_methods[] = {
  212         KOBJMETHOD(g_raid_md_create,    g_raid_md_create_intel),
  213         KOBJMETHOD(g_raid_md_taste,     g_raid_md_taste_intel),
  214         KOBJMETHOD(g_raid_md_event,     g_raid_md_event_intel),
  215         KOBJMETHOD(g_raid_md_ctl,       g_raid_md_ctl_intel),
  216         KOBJMETHOD(g_raid_md_write,     g_raid_md_write_intel),
  217         KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_intel),
  218         KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel),
  219         KOBJMETHOD(g_raid_md_free,      g_raid_md_free_intel),
  220         { 0, 0 }
  221 };
  222 
  223 static struct g_raid_md_class g_raid_md_intel_class = {
  224         "Intel",
  225         g_raid_md_intel_methods,
  226         sizeof(struct g_raid_md_intel_object),
  227         .mdc_enable = 1,
  228         .mdc_priority = 100
  229 };
  230 
  231 
  232 static struct intel_raid_map *
  233 intel_get_map(struct intel_raid_vol *mvol, int i)
  234 {
  235         struct intel_raid_map *mmap;
  236 
  237         if (i > (mvol->migr_state ? 1 : 0))
  238                 return (NULL);
  239         mmap = &mvol->map[0];
  240         for (; i > 0; i--) {
  241                 mmap = (struct intel_raid_map *)
  242                     &mmap->disk_idx[mmap->total_disks];
  243         }
  244         return ((struct intel_raid_map *)mmap);
  245 }
  246 
  247 static struct intel_raid_vol *
  248 intel_get_volume(struct intel_raid_conf *meta, int i)
  249 {
  250         struct intel_raid_vol *mvol;
  251         struct intel_raid_map *mmap;
  252 
  253         if (i > 1)
  254                 return (NULL);
  255         mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
  256         for (; i > 0; i--) {
  257                 mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
  258                 mvol = (struct intel_raid_vol *)
  259                     &mmap->disk_idx[mmap->total_disks];
  260         }
  261         return (mvol);
  262 }
  263 
  264 static off_t
  265 intel_get_map_offset(struct intel_raid_map *mmap)
  266 {
  267         off_t offset = (off_t)mmap->offset_hi << 32;
  268 
  269         offset += mmap->offset;
  270         return (offset);
  271 }
  272 
  273 static void
  274 intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
  275 {
  276 
  277         mmap->offset = offset & 0xffffffff;
  278         mmap->offset_hi = offset >> 32;
  279 }
  280 
  281 static off_t
  282 intel_get_map_disk_sectors(struct intel_raid_map *mmap)
  283 {
  284         off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
  285 
  286         disk_sectors += mmap->disk_sectors;
  287         return (disk_sectors);
  288 }
  289 
  290 static void
  291 intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
  292 {
  293 
  294         mmap->disk_sectors = disk_sectors & 0xffffffff;
  295         mmap->disk_sectors_hi = disk_sectors >> 32;
  296 }
  297 
  298 static void
  299 intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
  300 {
  301 
  302         mmap->stripe_count = stripe_count & 0xffffffff;
  303         mmap->stripe_count_hi = stripe_count >> 32;
  304 }
  305 
  306 static off_t
  307 intel_get_disk_sectors(struct intel_raid_disk *disk)
  308 {
  309         off_t sectors = (off_t)disk->sectors_hi << 32;
  310 
  311         sectors += disk->sectors;
  312         return (sectors);
  313 }
  314 
  315 static void
  316 intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
  317 {
  318 
  319         disk->sectors = sectors & 0xffffffff;
  320         disk->sectors_hi = sectors >> 32;
  321 }
  322 
  323 static off_t
  324 intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
  325 {
  326         off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
  327 
  328         curr_migr_unit += vol->curr_migr_unit;
  329         return (curr_migr_unit);
  330 }
  331 
  332 static void
  333 intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
  334 {
  335 
  336         vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
  337         vol->curr_migr_unit_hi = curr_migr_unit >> 32;
  338 }
  339 
  340 static void
  341 g_raid_md_intel_print(struct intel_raid_conf *meta)
  342 {
  343         struct intel_raid_vol *mvol;
  344         struct intel_raid_map *mmap;
  345         int i, j, k;
  346 
  347         if (g_raid_debug < 1)
  348                 return;
  349 
  350         printf("********* ATA Intel MatrixRAID Metadata *********\n");
  351         printf("intel_id            <%.24s>\n", meta->intel_id);
  352         printf("version             <%.6s>\n", meta->version);
  353         printf("checksum            0x%08x\n", meta->checksum);
  354         printf("config_size         0x%08x\n", meta->config_size);
  355         printf("config_id           0x%08x\n", meta->config_id);
  356         printf("generation          0x%08x\n", meta->generation);
  357         printf("attributes          0x%08x\n", meta->attributes);
  358         printf("total_disks         %u\n", meta->total_disks);
  359         printf("total_volumes       %u\n", meta->total_volumes);
  360         printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags\n");
  361         for (i = 0; i < meta->total_disks; i++ ) {
  362                 printf("    %d   <%.16s> %u %u 0x%08x 0x%08x\n", i,
  363                     meta->disk[i].serial, meta->disk[i].sectors,
  364                     meta->disk[i].sectors_hi,
  365                     meta->disk[i].id, meta->disk[i].flags);
  366         }
  367         for (i = 0; i < meta->total_volumes; i++) {
  368                 mvol = intel_get_volume(meta, i);
  369                 printf(" ****** Volume %d ******\n", i);
  370                 printf(" name               %.16s\n", mvol->name);
  371                 printf(" total_sectors      %ju\n", mvol->total_sectors);
  372                 printf(" state              %u\n", mvol->state);
  373                 printf(" reserved           %u\n", mvol->reserved);
  374                 printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
  375                 printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
  376                 printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
  377                 printf(" migr_state         %u\n", mvol->migr_state);
  378                 printf(" migr_type          %u\n", mvol->migr_type);
  379                 printf(" dirty              %u\n", mvol->dirty);
  380 
  381                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
  382                         printf("  *** Map %d ***\n", j);
  383                         mmap = intel_get_map(mvol, j);
  384                         printf("  offset            %u\n", mmap->offset);
  385                         printf("  offset_hi         %u\n", mmap->offset_hi);
  386                         printf("  disk_sectors      %u\n", mmap->disk_sectors);
  387                         printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
  388                         printf("  stripe_count      %u\n", mmap->stripe_count);
  389                         printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
  390                         printf("  strip_sectors     %u\n", mmap->strip_sectors);
  391                         printf("  status            %u\n", mmap->status);
  392                         printf("  type              %u\n", mmap->type);
  393                         printf("  total_disks       %u\n", mmap->total_disks);
  394                         printf("  total_domains     %u\n", mmap->total_domains);
  395                         printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
  396                         printf("  ddf               %u\n", mmap->ddf);
  397                         printf("  disk_idx         ");
  398                         for (k = 0; k < mmap->total_disks; k++)
  399                                 printf(" 0x%08x", mmap->disk_idx[k]);
  400                         printf("\n");
  401                 }
  402         }
  403         printf("=================================================\n");
  404 }
  405 
  406 static struct intel_raid_conf *
  407 intel_meta_copy(struct intel_raid_conf *meta)
  408 {
  409         struct intel_raid_conf *nmeta;
  410 
  411         nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
  412         memcpy(nmeta, meta, meta->config_size);
  413         return (nmeta);
  414 }
  415 
  416 static int
  417 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
  418 {
  419         int pos;
  420 
  421         for (pos = 0; pos < meta->total_disks; pos++) {
  422                 if (strncmp(meta->disk[pos].serial,
  423                     serial, INTEL_SERIAL_LEN) == 0)
  424                         return (pos);
  425         }
  426         return (-1);
  427 }
  428 
  429 static struct intel_raid_conf *
  430 intel_meta_read(struct g_consumer *cp)
  431 {
  432         struct g_provider *pp;
  433         struct intel_raid_conf *meta;
  434         struct intel_raid_vol *mvol;
  435         struct intel_raid_map *mmap;
  436         char *buf;
  437         int error, i, j, k, left, size;
  438         uint32_t checksum, *ptr;
  439 
  440         pp = cp->provider;
  441 
  442         /* Read the anchor sector. */
  443         buf = g_read_data(cp,
  444             pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
  445         if (buf == NULL) {
  446                 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
  447                     pp->name, error);
  448                 return (NULL);
  449         }
  450         meta = (struct intel_raid_conf *)buf;
  451 
  452         /* Check if this is an Intel RAID struct */
  453         if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
  454                 G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
  455                 g_free(buf);
  456                 return (NULL);
  457         }
  458         if (meta->config_size > 65536 ||
  459             meta->config_size < sizeof(struct intel_raid_conf)) {
  460                 G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
  461                     meta->config_size);
  462                 g_free(buf);
  463                 return (NULL);
  464         }
  465         size = meta->config_size;
  466         meta = malloc(size, M_MD_INTEL, M_WAITOK);
  467         memcpy(meta, buf, min(size, pp->sectorsize));
  468         g_free(buf);
  469 
  470         /* Read all the rest, if needed. */
  471         if (meta->config_size > pp->sectorsize) {
  472                 left = (meta->config_size - 1) / pp->sectorsize;
  473                 buf = g_read_data(cp,
  474                     pp->mediasize - pp->sectorsize * (2 + left),
  475                     pp->sectorsize * left, &error);
  476                 if (buf == NULL) {
  477                         G_RAID_DEBUG(1, "Cannot read remaining metadata"
  478                             " part from %s (error=%d).",
  479                             pp->name, error);
  480                         free(meta, M_MD_INTEL);
  481                         return (NULL);
  482                 }
  483                 memcpy(((char *)meta) + pp->sectorsize, buf,
  484                     pp->sectorsize * left);
  485                 g_free(buf);
  486         }
  487 
  488         /* Check metadata checksum. */
  489         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
  490             i < (meta->config_size / sizeof(uint32_t)); i++) {
  491                 checksum += *ptr++;
  492         }
  493         checksum -= meta->checksum;
  494         if (checksum != meta->checksum) {
  495                 G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
  496                 free(meta, M_MD_INTEL);
  497                 return (NULL);
  498         }
  499 
  500         /* Validate metadata size. */
  501         size = sizeof(struct intel_raid_conf) +
  502             sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
  503             sizeof(struct intel_raid_vol) * meta->total_volumes;
  504         if (size > meta->config_size) {
  505 badsize:
  506                 G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
  507                     meta->config_size, size);
  508                 free(meta, M_MD_INTEL);
  509                 return (NULL);
  510         }
  511         for (i = 0; i < meta->total_volumes; i++) {
  512                 mvol = intel_get_volume(meta, i);
  513                 mmap = intel_get_map(mvol, 0);
  514                 size += 4 * (mmap->total_disks - 1);
  515                 if (size > meta->config_size)
  516                         goto badsize;
  517                 if (mvol->migr_state) {
  518                         size += sizeof(struct intel_raid_map);
  519                         if (size > meta->config_size)
  520                                 goto badsize;
  521                         mmap = intel_get_map(mvol, 1);
  522                         size += 4 * (mmap->total_disks - 1);
  523                         if (size > meta->config_size)
  524                                 goto badsize;
  525                 }
  526         }
  527 
  528         /* Validate disk indexes. */
  529         for (i = 0; i < meta->total_volumes; i++) {
  530                 mvol = intel_get_volume(meta, i);
  531                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
  532                         mmap = intel_get_map(mvol, j);
  533                         for (k = 0; k < mmap->total_disks; k++) {
  534                                 if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
  535                                     meta->total_disks) {
  536                                         G_RAID_DEBUG(1, "Intel metadata disk"
  537                                             " index %d too big (>%d)",
  538                                             mmap->disk_idx[k] & INTEL_DI_IDX,
  539                                             meta->total_disks);
  540                                         free(meta, M_MD_INTEL);
  541                                         return (NULL);
  542                                 }
  543                         }
  544                 }
  545         }
  546 
  547         /* Validate migration types. */
  548         for (i = 0; i < meta->total_volumes; i++) {
  549                 mvol = intel_get_volume(meta, i);
  550                 if (mvol->migr_state &&
  551                     mvol->migr_type != INTEL_MT_INIT &&
  552                     mvol->migr_type != INTEL_MT_REBUILD &&
  553                     mvol->migr_type != INTEL_MT_VERIFY &&
  554                     mvol->migr_type != INTEL_MT_REPAIR) {
  555                         G_RAID_DEBUG(1, "Intel metadata has unsupported"
  556                             " migration type %d", mvol->migr_type);
  557                         free(meta, M_MD_INTEL);
  558                         return (NULL);
  559                 }
  560         }
  561 
  562         return (meta);
  563 }
  564 
  565 static int
  566 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
  567 {
  568         struct g_provider *pp;
  569         char *buf;
  570         int error, i, sectors;
  571         uint32_t checksum, *ptr;
  572 
  573         pp = cp->provider;
  574 
  575         /* Recalculate checksum for case if metadata were changed. */
  576         meta->checksum = 0;
  577         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
  578             i < (meta->config_size / sizeof(uint32_t)); i++) {
  579                 checksum += *ptr++;
  580         }
  581         meta->checksum = checksum;
  582 
  583         /* Create and fill buffer. */
  584         sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
  585         buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
  586         if (sectors > 1) {
  587                 memcpy(buf, ((char *)meta) + pp->sectorsize,
  588                     (sectors - 1) * pp->sectorsize);
  589         }
  590         memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
  591 
  592         error = g_write_data(cp,
  593             pp->mediasize - pp->sectorsize * (1 + sectors),
  594             buf, pp->sectorsize * sectors);
  595         if (error != 0) {
  596                 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
  597                     pp->name, error);
  598         }
  599 
  600         free(buf, M_MD_INTEL);
  601         return (error);
  602 }
  603 
  604 static int
  605 intel_meta_erase(struct g_consumer *cp)
  606 {
  607         struct g_provider *pp;
  608         char *buf;
  609         int error;
  610 
  611         pp = cp->provider;
  612         buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
  613         error = g_write_data(cp,
  614             pp->mediasize - 2 * pp->sectorsize,
  615             buf, pp->sectorsize);
  616         if (error != 0) {
  617                 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
  618                     pp->name, error);
  619         }
  620         free(buf, M_MD_INTEL);
  621         return (error);
  622 }
  623 
  624 static int
  625 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
  626 {
  627         struct intel_raid_conf *meta;
  628         int error;
  629 
  630         /* Fill anchor and single disk. */
  631         meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
  632         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
  633         memcpy(&meta->version[0], INTEL_VERSION_1000,
  634             sizeof(INTEL_VERSION_1000) - 1);
  635         meta->config_size = INTEL_MAX_MD_SIZE(1);
  636         meta->config_id = arc4random();
  637         meta->generation = 1;
  638         meta->total_disks = 1;
  639         meta->disk[0] = *d;
  640         error = intel_meta_write(cp, meta);
  641         free(meta, M_MD_INTEL);
  642         return (error);
  643 }
  644 
  645 static struct g_raid_disk *
  646 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
  647 {
  648         struct g_raid_disk      *disk;
  649         struct g_raid_md_intel_perdisk *pd;
  650 
  651         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  652                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
  653                 if (pd->pd_disk_pos == id)
  654                         break;
  655         }
  656         return (disk);
  657 }
  658 
  659 static int
  660 g_raid_md_intel_supported(int level, int qual, int disks, int force)
  661 {
  662 
  663         switch (level) {
  664         case G_RAID_VOLUME_RL_RAID0:
  665                 if (disks < 1)
  666                         return (0);
  667                 if (!force && (disks < 2 || disks > 6))
  668                         return (0);
  669                 break;
  670         case G_RAID_VOLUME_RL_RAID1:
  671                 if (disks < 1)
  672                         return (0);
  673                 if (!force && (disks != 2))
  674                         return (0);
  675                 break;
  676         case G_RAID_VOLUME_RL_RAID1E:
  677                 if (disks < 2)
  678                         return (0);
  679                 if (!force && (disks != 4))
  680                         return (0);
  681                 break;
  682         case G_RAID_VOLUME_RL_RAID5:
  683                 if (disks < 3)
  684                         return (0);
  685                 if (!force && disks > 6)
  686                         return (0);
  687                 if (qual != G_RAID_VOLUME_RLQ_R5LA)
  688                         return (0);
  689                 break;
  690         default:
  691                 return (0);
  692         }
  693         if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
  694                 return (0);
  695         return (1);
  696 }
  697 
  698 static struct g_raid_volume *
  699 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
  700 {
  701         struct g_raid_volume    *mvol;
  702 
  703         TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
  704                 if ((intptr_t)(mvol->v_md_data) == id)
  705                         break;
  706         }
  707         return (mvol);
  708 }
  709 
  710 static int
  711 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
  712 {
  713         struct g_raid_softc *sc;
  714         struct g_raid_subdisk *sd, *tmpsd;
  715         struct g_raid_disk *olddisk, *tmpdisk;
  716         struct g_raid_md_object *md;
  717         struct g_raid_md_intel_object *mdi;
  718         struct g_raid_md_intel_perdisk *pd, *oldpd;
  719         struct intel_raid_conf *meta;
  720         struct intel_raid_vol *mvol;
  721         struct intel_raid_map *mmap0, *mmap1;
  722         int disk_pos, resurrection = 0;
  723 
  724         sc = disk->d_softc;
  725         md = sc->sc_md;
  726         mdi = (struct g_raid_md_intel_object *)md;
  727         meta = mdi->mdio_meta;
  728         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
  729         olddisk = NULL;
  730 
  731         /* Find disk position in metadata by it's serial. */
  732         disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
  733         if (disk_pos < 0) {
  734                 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
  735                 /* Failed stale disk is useless for us. */
  736                 if (pd->pd_disk_meta.flags & INTEL_F_FAILED) {
  737                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
  738                         return (0);
  739                 }
  740                 /* If we are in the start process, that's all for now. */
  741                 if (!mdi->mdio_started)
  742                         goto nofit;
  743                 /*
  744                  * If we have already started - try to get use of the disk.
  745                  * Try to replace OFFLINE disks first, then FAILED.
  746                  */
  747                 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
  748                         if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
  749                             tmpdisk->d_state != G_RAID_DISK_S_FAILED)
  750                                 continue;
  751                         /* Make sure this disk is big enough. */
  752                         TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
  753                                 off_t disk_sectors = 
  754                                     intel_get_disk_sectors(&pd->pd_disk_meta);
  755 
  756                                 if (sd->sd_offset + sd->sd_size + 4096 >
  757                                     disk_sectors * 512) {
  758                                         G_RAID_DEBUG1(1, sc,
  759                                             "Disk too small (%llu < %llu)",
  760                                             (unsigned long long)
  761                                             disk_sectors * 512,
  762                                             (unsigned long long)
  763                                             sd->sd_offset + sd->sd_size + 4096);
  764                                         break;
  765                                 }
  766                         }
  767                         if (sd != NULL)
  768                                 continue;
  769                         if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
  770                                 olddisk = tmpdisk;
  771                                 break;
  772                         } else if (olddisk == NULL)
  773                                 olddisk = tmpdisk;
  774                 }
  775                 if (olddisk == NULL) {
  776 nofit:
  777                         if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
  778                                 g_raid_change_disk_state(disk,
  779                                     G_RAID_DISK_S_SPARE);
  780                                 return (1);
  781                         } else {
  782                                 g_raid_change_disk_state(disk,
  783                                     G_RAID_DISK_S_STALE);
  784                                 return (0);
  785                         }
  786                 }
  787                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
  788                 disk_pos = oldpd->pd_disk_pos;
  789                 resurrection = 1;
  790         }
  791 
  792         if (olddisk == NULL) {
  793                 /* Find placeholder by position. */
  794                 olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
  795                 if (olddisk == NULL)
  796                         panic("No disk at position %d!", disk_pos);
  797                 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
  798                         G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
  799                             disk_pos);
  800                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
  801                         return (0);
  802                 }
  803                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
  804         }
  805 
  806         /* Replace failed disk or placeholder with new disk. */
  807         TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
  808                 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
  809                 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
  810                 sd->sd_disk = disk;
  811         }
  812         oldpd->pd_disk_pos = -2;
  813         pd->pd_disk_pos = disk_pos;
  814 
  815         /* If it was placeholder -- destroy it. */
  816         if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
  817                 g_raid_destroy_disk(olddisk);
  818         } else {
  819                 /* Otherwise, make it STALE_FAILED. */
  820                 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
  821                 /* Update global metadata just in case. */
  822                 memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
  823                     sizeof(struct intel_raid_disk));
  824         }
  825 
  826         /* Welcome the new disk. */
  827         if (resurrection)
  828                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
  829         else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
  830                 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
  831         else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
  832                 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
  833         else
  834                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
  835         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
  836                 mvol = intel_get_volume(meta,
  837                     (uintptr_t)(sd->sd_volume->v_md_data));
  838                 mmap0 = intel_get_map(mvol, 0);
  839                 if (mvol->migr_state)
  840                         mmap1 = intel_get_map(mvol, 1);
  841                 else
  842                         mmap1 = mmap0;
  843 
  844                 if (resurrection) {
  845                         /* Stale disk, almost same as new. */
  846                         g_raid_change_subdisk_state(sd,
  847                             G_RAID_SUBDISK_S_NEW);
  848                 } else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
  849                         /* Failed disk, almost useless. */
  850                         g_raid_change_subdisk_state(sd,
  851                             G_RAID_SUBDISK_S_FAILED);
  852                 } else if (mvol->migr_state == 0) {
  853                         if (mmap0->status == INTEL_S_UNINITIALIZED) {
  854                                 /* Freshly created uninitialized volume. */
  855                                 g_raid_change_subdisk_state(sd,
  856                                     G_RAID_SUBDISK_S_UNINITIALIZED);
  857                         } else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  858                                 /* Freshly inserted disk. */
  859                                 g_raid_change_subdisk_state(sd,
  860                                     G_RAID_SUBDISK_S_NEW);
  861                         } else if (mvol->dirty) {
  862                                 /* Dirty volume (unclean shutdown). */
  863                                 g_raid_change_subdisk_state(sd,
  864                                     G_RAID_SUBDISK_S_STALE);
  865                         } else {
  866                                 /* Up to date disk. */
  867                                 g_raid_change_subdisk_state(sd,
  868                                     G_RAID_SUBDISK_S_ACTIVE);
  869                         }
  870                 } else if (mvol->migr_type == INTEL_MT_INIT ||
  871                            mvol->migr_type == INTEL_MT_REBUILD) {
  872                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  873                                 /* Freshly inserted disk. */
  874                                 g_raid_change_subdisk_state(sd,
  875                                     G_RAID_SUBDISK_S_NEW);
  876                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  877                                 /* Rebuilding disk. */
  878                                 g_raid_change_subdisk_state(sd,
  879                                     G_RAID_SUBDISK_S_REBUILD);
  880                                 if (mvol->dirty) {
  881                                         sd->sd_rebuild_pos = 0;
  882                                 } else {
  883                                         sd->sd_rebuild_pos =
  884                                             intel_get_vol_curr_migr_unit(mvol) *
  885                                             sd->sd_volume->v_strip_size *
  886                                             mmap0->total_domains;
  887                                 }
  888                         } else if (mvol->dirty) {
  889                                 /* Dirty volume (unclean shutdown). */
  890                                 g_raid_change_subdisk_state(sd,
  891                                     G_RAID_SUBDISK_S_STALE);
  892                         } else {
  893                                 /* Up to date disk. */
  894                                 g_raid_change_subdisk_state(sd,
  895                                     G_RAID_SUBDISK_S_ACTIVE);
  896                         }
  897                 } else if (mvol->migr_type == INTEL_MT_VERIFY ||
  898                            mvol->migr_type == INTEL_MT_REPAIR) {
  899                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  900                                 /* Freshly inserted disk. */
  901                                 g_raid_change_subdisk_state(sd,
  902                                     G_RAID_SUBDISK_S_NEW);
  903                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  904                                 /* Resyncing disk. */
  905                                 g_raid_change_subdisk_state(sd,
  906                                     G_RAID_SUBDISK_S_RESYNC);
  907                                 if (mvol->dirty) {
  908                                         sd->sd_rebuild_pos = 0;
  909                                 } else {
  910                                         sd->sd_rebuild_pos =
  911                                             intel_get_vol_curr_migr_unit(mvol) *
  912                                             sd->sd_volume->v_strip_size *
  913                                             mmap0->total_domains;
  914                                 }
  915                         } else if (mvol->dirty) {
  916                                 /* Dirty volume (unclean shutdown). */
  917                                 g_raid_change_subdisk_state(sd,
  918                                     G_RAID_SUBDISK_S_STALE);
  919                         } else {
  920                                 /* Up to date disk. */
  921                                 g_raid_change_subdisk_state(sd,
  922                                     G_RAID_SUBDISK_S_ACTIVE);
  923                         }
  924                 }
  925                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
  926                     G_RAID_EVENT_SUBDISK);
  927         }
  928 
  929         /* Update status of our need for spare. */
  930         if (mdi->mdio_started) {
  931                 mdi->mdio_incomplete =
  932                     (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
  933                      meta->total_disks);
  934         }
  935 
  936         return (resurrection);
  937 }
  938 
  939 static void
  940 g_disk_md_intel_retaste(void *arg, int pending)
  941 {
  942 
  943         G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
  944         g_retaste(&g_raid_class);
  945         free(arg, M_MD_INTEL);
  946 }
  947 
  948 static void
  949 g_raid_md_intel_refill(struct g_raid_softc *sc)
  950 {
  951         struct g_raid_md_object *md;
  952         struct g_raid_md_intel_object *mdi;
  953         struct intel_raid_conf *meta;
  954         struct g_raid_disk *disk;
  955         struct task *task;
  956         int update, na;
  957 
  958         md = sc->sc_md;
  959         mdi = (struct g_raid_md_intel_object *)md;
  960         meta = mdi->mdio_meta;
  961         update = 0;
  962         do {
  963                 /* Make sure we miss anything. */
  964                 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
  965                 if (na == meta->total_disks)
  966                         break;
  967 
  968                 G_RAID_DEBUG1(1, md->mdo_softc,
  969                     "Array is not complete (%d of %d), "
  970                     "trying to refill.", na, meta->total_disks);
  971 
  972                 /* Try to get use some of STALE disks. */
  973                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  974                         if (disk->d_state == G_RAID_DISK_S_STALE) {
  975                                 update += g_raid_md_intel_start_disk(disk);
  976                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
  977                                         break;
  978                         }
  979                 }
  980                 if (disk != NULL)
  981                         continue;
  982 
  983                 /* Try to get use some of SPARE disks. */
  984                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  985                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
  986                                 update += g_raid_md_intel_start_disk(disk);
  987                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
  988                                         break;
  989                         }
  990                 }
  991         } while (disk != NULL);
  992 
  993         /* Write new metadata if we changed something. */
  994         if (update) {
  995                 g_raid_md_write_intel(md, NULL, NULL, NULL);
  996                 meta = mdi->mdio_meta;
  997         }
  998 
  999         /* Update status of our need for spare. */
 1000         mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 1001             meta->total_disks);
 1002 
 1003         /* Request retaste hoping to find spare. */
 1004         if (mdi->mdio_incomplete) {
 1005                 task = malloc(sizeof(struct task),
 1006                     M_MD_INTEL, M_WAITOK | M_ZERO);
 1007                 TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
 1008                 taskqueue_enqueue(taskqueue_swi, task);
 1009         }
 1010 }
 1011 
 1012 static void
 1013 g_raid_md_intel_start(struct g_raid_softc *sc)
 1014 {
 1015         struct g_raid_md_object *md;
 1016         struct g_raid_md_intel_object *mdi;
 1017         struct g_raid_md_intel_perdisk *pd;
 1018         struct intel_raid_conf *meta;
 1019         struct intel_raid_vol *mvol;
 1020         struct intel_raid_map *mmap;
 1021         struct g_raid_volume *vol;
 1022         struct g_raid_subdisk *sd;
 1023         struct g_raid_disk *disk;
 1024         int i, j, disk_pos;
 1025 
 1026         md = sc->sc_md;
 1027         mdi = (struct g_raid_md_intel_object *)md;
 1028         meta = mdi->mdio_meta;
 1029 
 1030         /* Create volumes and subdisks. */
 1031         for (i = 0; i < meta->total_volumes; i++) {
 1032                 mvol = intel_get_volume(meta, i);
 1033                 mmap = intel_get_map(mvol, 0);
 1034                 vol = g_raid_create_volume(sc, mvol->name, -1);
 1035                 vol->v_md_data = (void *)(intptr_t)i;
 1036                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 1037                 if (mmap->type == INTEL_T_RAID0)
 1038                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 1039                 else if (mmap->type == INTEL_T_RAID1 &&
 1040                     mmap->total_domains >= 2 &&
 1041                     mmap->total_domains <= mmap->total_disks) {
 1042                         /* Assume total_domains is correct. */
 1043                         if (mmap->total_domains == mmap->total_disks)
 1044                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 1045                         else
 1046                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 1047                 } else if (mmap->type == INTEL_T_RAID1) {
 1048                         /* total_domains looks wrong. */
 1049                         if (mmap->total_disks <= 2)
 1050                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 1051                         else
 1052                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 1053                 } else if (mmap->type == INTEL_T_RAID5) {
 1054                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 1055                         vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
 1056                 } else
 1057                         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 1058                 vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
 1059                 vol->v_disks_count = mmap->total_disks;
 1060                 vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
 1061                 vol->v_sectorsize = 512; //ZZZ
 1062                 for (j = 0; j < vol->v_disks_count; j++) {
 1063                         sd = &vol->v_subdisks[j];
 1064                         sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
 1065                         sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
 1066                 }
 1067                 g_raid_start_volume(vol);
 1068         }
 1069 
 1070         /* Create disk placeholders to store data for later writing. */
 1071         for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
 1072                 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1073                 pd->pd_disk_pos = disk_pos;
 1074                 pd->pd_disk_meta = meta->disk[disk_pos];
 1075                 disk = g_raid_create_disk(sc);
 1076                 disk->d_md_data = (void *)pd;
 1077                 disk->d_state = G_RAID_DISK_S_OFFLINE;
 1078                 for (i = 0; i < meta->total_volumes; i++) {
 1079                         mvol = intel_get_volume(meta, i);
 1080                         mmap = intel_get_map(mvol, 0);
 1081                         for (j = 0; j < mmap->total_disks; j++) {
 1082                                 if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
 1083                                         break;
 1084                         }
 1085                         if (j == mmap->total_disks)
 1086                                 continue;
 1087                         vol = g_raid_md_intel_get_volume(sc, i);
 1088                         sd = &vol->v_subdisks[j];
 1089                         sd->sd_disk = disk;
 1090                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1091                 }
 1092         }
 1093 
 1094         /* Make all disks found till the moment take their places. */
 1095         do {
 1096                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1097                         if (disk->d_state == G_RAID_DISK_S_NONE) {
 1098                                 g_raid_md_intel_start_disk(disk);
 1099                                 break;
 1100                         }
 1101                 }
 1102         } while (disk != NULL);
 1103 
 1104         mdi->mdio_started = 1;
 1105         G_RAID_DEBUG1(0, sc, "Array started.");
 1106         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1107 
 1108         /* Pickup any STALE/SPARE disks to refill array if needed. */
 1109         g_raid_md_intel_refill(sc);
 1110 
 1111         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1112                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1113                     G_RAID_EVENT_VOLUME);
 1114         }
 1115 
 1116         callout_stop(&mdi->mdio_start_co);
 1117         G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 1118         root_mount_rel(mdi->mdio_rootmount);
 1119         mdi->mdio_rootmount = NULL;
 1120 }
 1121 
 1122 static void
 1123 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
 1124 {
 1125         struct g_raid_softc *sc;
 1126         struct g_raid_md_object *md;
 1127         struct g_raid_md_intel_object *mdi;
 1128         struct intel_raid_conf *pdmeta;
 1129         struct g_raid_md_intel_perdisk *pd;
 1130 
 1131         sc = disk->d_softc;
 1132         md = sc->sc_md;
 1133         mdi = (struct g_raid_md_intel_object *)md;
 1134         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1135         pdmeta = pd->pd_meta;
 1136 
 1137         if (mdi->mdio_started) {
 1138                 if (g_raid_md_intel_start_disk(disk))
 1139                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1140         } else {
 1141                 /* If we haven't started yet - check metadata freshness. */
 1142                 if (mdi->mdio_meta == NULL ||
 1143                     ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
 1144                         G_RAID_DEBUG1(1, sc, "Newer disk");
 1145                         if (mdi->mdio_meta != NULL)
 1146                                 free(mdi->mdio_meta, M_MD_INTEL);
 1147                         mdi->mdio_meta = intel_meta_copy(pdmeta);
 1148                         mdi->mdio_generation = mdi->mdio_meta->generation;
 1149                         mdi->mdio_disks_present = 1;
 1150                 } else if (pdmeta->generation == mdi->mdio_generation) {
 1151                         mdi->mdio_disks_present++;
 1152                         G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 1153                             mdi->mdio_disks_present,
 1154                             mdi->mdio_meta->total_disks);
 1155                 } else {
 1156                         G_RAID_DEBUG1(1, sc, "Older disk");
 1157                 }
 1158                 /* If we collected all needed disks - start array. */
 1159                 if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
 1160                         g_raid_md_intel_start(sc);
 1161         }
 1162 }
 1163 
 1164 static void
 1165 g_raid_intel_go(void *arg)
 1166 {
 1167         struct g_raid_softc *sc;
 1168         struct g_raid_md_object *md;
 1169         struct g_raid_md_intel_object *mdi;
 1170 
 1171         sc = arg;
 1172         md = sc->sc_md;
 1173         mdi = (struct g_raid_md_intel_object *)md;
 1174         if (!mdi->mdio_started) {
 1175                 G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 1176                 g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 1177         }
 1178 }
 1179 
 1180 static int
 1181 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
 1182     struct g_geom **gp)
 1183 {
 1184         struct g_raid_softc *sc;
 1185         struct g_raid_md_intel_object *mdi;
 1186         char name[16];
 1187 
 1188         mdi = (struct g_raid_md_intel_object *)md;
 1189         mdi->mdio_config_id = arc4random();
 1190         mdi->mdio_generation = 0;
 1191         snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
 1192         sc = g_raid_create_node(mp, name, md);
 1193         if (sc == NULL)
 1194                 return (G_RAID_MD_TASTE_FAIL);
 1195         md->mdo_softc = sc;
 1196         *gp = sc->sc_geom;
 1197         return (G_RAID_MD_TASTE_NEW);
 1198 }
 1199 
 1200 /*
 1201  * Return the last N characters of the serial label.  The Linux and
 1202  * ataraid(7) code always uses the last 16 characters of the label to
 1203  * store into the Intel meta format.  Generalize this to N characters
 1204  * since that's easy.  Labels can be up to 20 characters for SATA drives
 1205  * and up 251 characters for SAS drives.  Since intel controllers don't
 1206  * support SAS drives, just stick with the SATA limits for stack friendliness.
 1207  */
 1208 static int
 1209 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
 1210 {
 1211         char serial_buffer[24];
 1212         int len, error;
 1213         
 1214         len = sizeof(serial_buffer);
 1215         error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
 1216         if (error != 0)
 1217                 return (error);
 1218         len = strlen(serial_buffer);
 1219         if (len > serlen)
 1220                 len -= serlen;
 1221         else
 1222                 len = 0;
 1223         strncpy(serial, serial_buffer + len, serlen);
 1224         return (0);
 1225 }
 1226 
 1227 static int
 1228 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
 1229                               struct g_consumer *cp, struct g_geom **gp)
 1230 {
 1231         struct g_consumer *rcp;
 1232         struct g_provider *pp;
 1233         struct g_raid_md_intel_object *mdi, *mdi1;
 1234         struct g_raid_softc *sc;
 1235         struct g_raid_disk *disk;
 1236         struct intel_raid_conf *meta;
 1237         struct g_raid_md_intel_perdisk *pd;
 1238         struct g_geom *geom;
 1239         int error, disk_pos, result, spare, len;
 1240         char serial[INTEL_SERIAL_LEN];
 1241         char name[16];
 1242         uint16_t vendor;
 1243 
 1244         G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
 1245         mdi = (struct g_raid_md_intel_object *)md;
 1246         pp = cp->provider;
 1247 
 1248         /* Read metadata from device. */
 1249         meta = NULL;
 1250         vendor = 0xffff;
 1251         disk_pos = 0;
 1252         if (g_access(cp, 1, 0, 0) != 0)
 1253                 return (G_RAID_MD_TASTE_FAIL);
 1254         g_topology_unlock();
 1255         error = g_raid_md_get_label(cp, serial, sizeof(serial));
 1256         if (error != 0) {
 1257                 G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
 1258                     pp->name, error);
 1259                 goto fail2;
 1260         }
 1261         len = 2;
 1262         if (pp->geom->rank == 1)
 1263                 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 1264         meta = intel_meta_read(cp);
 1265         g_topology_lock();
 1266         g_access(cp, -1, 0, 0);
 1267         if (meta == NULL) {
 1268                 if (g_raid_aggressive_spare) {
 1269                         if (vendor != 0x8086) {
 1270                                 G_RAID_DEBUG(1,
 1271                                     "Intel vendor mismatch 0x%04x != 0x8086",
 1272                                     vendor);
 1273                         } else {
 1274                                 G_RAID_DEBUG(1,
 1275                                     "No Intel metadata, forcing spare.");
 1276                                 spare = 2;
 1277                                 goto search;
 1278                         }
 1279                 }
 1280                 return (G_RAID_MD_TASTE_FAIL);
 1281         }
 1282 
 1283         /* Check this disk position in obtained metadata. */
 1284         disk_pos = intel_meta_find_disk(meta, serial);
 1285         if (disk_pos < 0) {
 1286                 G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
 1287                 goto fail1;
 1288         }
 1289         if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
 1290             (pp->mediasize / pp->sectorsize)) {
 1291                 G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
 1292                     intel_get_disk_sectors(&meta->disk[disk_pos]),
 1293                     (off_t)(pp->mediasize / pp->sectorsize));
 1294                 goto fail1;
 1295         }
 1296 
 1297         /* Metadata valid. Print it. */
 1298         g_raid_md_intel_print(meta);
 1299         G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
 1300         spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
 1301 
 1302 search:
 1303         /* Search for matching node. */
 1304         sc = NULL;
 1305         mdi1 = NULL;
 1306         LIST_FOREACH(geom, &mp->geom, geom) {
 1307                 sc = geom->softc;
 1308                 if (sc == NULL)
 1309                         continue;
 1310                 if (sc->sc_stopping != 0)
 1311                         continue;
 1312                 if (sc->sc_md->mdo_class != md->mdo_class)
 1313                         continue;
 1314                 mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
 1315                 if (spare) {
 1316                         if (mdi1->mdio_incomplete)
 1317                                 break;
 1318                 } else {
 1319                         if (mdi1->mdio_config_id == meta->config_id)
 1320                                 break;
 1321                 }
 1322         }
 1323 
 1324         /* Found matching node. */
 1325         if (geom != NULL) {
 1326                 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 1327                 result = G_RAID_MD_TASTE_EXISTING;
 1328 
 1329         } else if (spare) { /* Not found needy node -- left for later. */
 1330                 G_RAID_DEBUG(1, "Spare is not needed at this time");
 1331                 goto fail1;
 1332 
 1333         } else { /* Not found matching node -- create one. */
 1334                 result = G_RAID_MD_TASTE_NEW;
 1335                 mdi->mdio_config_id = meta->config_id;
 1336                 snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
 1337                 sc = g_raid_create_node(mp, name, md);
 1338                 md->mdo_softc = sc;
 1339                 geom = sc->sc_geom;
 1340                 callout_init(&mdi->mdio_start_co, 1);
 1341                 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 1342                     g_raid_intel_go, sc);
 1343                 mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
 1344                 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 1345         }
 1346 
 1347         rcp = g_new_consumer(geom);
 1348         g_attach(rcp, pp);
 1349         if (g_access(rcp, 1, 1, 1) != 0)
 1350                 ; //goto fail1;
 1351 
 1352         g_topology_unlock();
 1353         sx_xlock(&sc->sc_lock);
 1354 
 1355         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1356         pd->pd_meta = meta;
 1357         pd->pd_disk_pos = -1;
 1358         if (spare == 2) {
 1359                 memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
 1360                 intel_set_disk_sectors(&pd->pd_disk_meta, 
 1361                     pp->mediasize / pp->sectorsize);
 1362                 pd->pd_disk_meta.id = 0;
 1363                 pd->pd_disk_meta.flags = INTEL_F_SPARE;
 1364         } else {
 1365                 pd->pd_disk_meta = meta->disk[disk_pos];
 1366         }
 1367         disk = g_raid_create_disk(sc);
 1368         disk->d_md_data = (void *)pd;
 1369         disk->d_consumer = rcp;
 1370         rcp->private = disk;
 1371 
 1372         /* Read kernel dumping information. */
 1373         disk->d_kd.offset = 0;
 1374         disk->d_kd.length = OFF_MAX;
 1375         len = sizeof(disk->d_kd);
 1376         error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
 1377         if (disk->d_kd.di.dumper == NULL)
 1378                 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 
 1379                     rcp->provider->name, error);
 1380 
 1381         g_raid_md_intel_new_disk(disk);
 1382 
 1383         sx_xunlock(&sc->sc_lock);
 1384         g_topology_lock();
 1385         *gp = geom;
 1386         return (result);
 1387 fail2:
 1388         g_topology_lock();
 1389         g_access(cp, -1, 0, 0);
 1390 fail1:
 1391         free(meta, M_MD_INTEL);
 1392         return (G_RAID_MD_TASTE_FAIL);
 1393 }
 1394 
 1395 static int
 1396 g_raid_md_event_intel(struct g_raid_md_object *md,
 1397     struct g_raid_disk *disk, u_int event)
 1398 {
 1399         struct g_raid_softc *sc;
 1400         struct g_raid_subdisk *sd;
 1401         struct g_raid_md_intel_object *mdi;
 1402         struct g_raid_md_intel_perdisk *pd;
 1403 
 1404         sc = md->mdo_softc;
 1405         mdi = (struct g_raid_md_intel_object *)md;
 1406         if (disk == NULL) {
 1407                 switch (event) {
 1408                 case G_RAID_NODE_E_START:
 1409                         if (!mdi->mdio_started)
 1410                                 g_raid_md_intel_start(sc);
 1411                         return (0);
 1412                 }
 1413                 return (-1);
 1414         }
 1415         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1416         switch (event) {
 1417         case G_RAID_DISK_E_DISCONNECTED:
 1418                 /* If disk was assigned, just update statuses. */
 1419                 if (pd->pd_disk_pos >= 0) {
 1420                         g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1421                         if (disk->d_consumer) {
 1422                                 g_raid_kill_consumer(sc, disk->d_consumer);
 1423                                 disk->d_consumer = NULL;
 1424                         }
 1425                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 1426                                 g_raid_change_subdisk_state(sd,
 1427                                     G_RAID_SUBDISK_S_NONE);
 1428                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1429                                     G_RAID_EVENT_SUBDISK);
 1430                         }
 1431                 } else {
 1432                         /* Otherwise -- delete. */
 1433                         g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 1434                         g_raid_destroy_disk(disk);
 1435                 }
 1436 
 1437                 /* Write updated metadata to all disks. */
 1438                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1439 
 1440                 /* Check if anything left except placeholders. */
 1441                 if (g_raid_ndisks(sc, -1) ==
 1442                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 1443                         g_raid_destroy_node(sc, 0);
 1444                 else
 1445                         g_raid_md_intel_refill(sc);
 1446                 return (0);
 1447         }
 1448         return (-2);
 1449 }
 1450 
 1451 static int
 1452 g_raid_md_ctl_intel(struct g_raid_md_object *md,
 1453     struct gctl_req *req)
 1454 {
 1455         struct g_raid_softc *sc;
 1456         struct g_raid_volume *vol, *vol1;
 1457         struct g_raid_subdisk *sd;
 1458         struct g_raid_disk *disk;
 1459         struct g_raid_md_intel_object *mdi;
 1460         struct g_raid_md_intel_perdisk *pd;
 1461         struct g_consumer *cp;
 1462         struct g_provider *pp;
 1463         char arg[16], serial[INTEL_SERIAL_LEN];
 1464         const char *verb, *volname, *levelname, *diskname;
 1465         char *tmp;
 1466         int *nargs, *force;
 1467         off_t off, size, sectorsize, strip, disk_sectors;
 1468         intmax_t *sizearg, *striparg;
 1469         int numdisks, i, len, level, qual, update;
 1470         int error;
 1471 
 1472         sc = md->mdo_softc;
 1473         mdi = (struct g_raid_md_intel_object *)md;
 1474         verb = gctl_get_param(req, "verb", NULL);
 1475         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 1476         error = 0;
 1477         if (strcmp(verb, "label") == 0) {
 1478 
 1479                 if (*nargs < 4) {
 1480                         gctl_error(req, "Invalid number of arguments.");
 1481                         return (-1);
 1482                 }
 1483                 volname = gctl_get_asciiparam(req, "arg1");
 1484                 if (volname == NULL) {
 1485                         gctl_error(req, "No volume name.");
 1486                         return (-2);
 1487                 }
 1488                 levelname = gctl_get_asciiparam(req, "arg2");
 1489                 if (levelname == NULL) {
 1490                         gctl_error(req, "No RAID level.");
 1491                         return (-3);
 1492                 }
 1493                 if (strcasecmp(levelname, "RAID5") == 0)
 1494                         levelname = "RAID5-LA";
 1495                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
 1496                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
 1497                         return (-4);
 1498                 }
 1499                 numdisks = *nargs - 3;
 1500                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1501                 if (!g_raid_md_intel_supported(level, qual, numdisks,
 1502                     force ? *force : 0)) {
 1503                         gctl_error(req, "Unsupported RAID level "
 1504                             "(0x%02x/0x%02x), or number of disks (%d).",
 1505                             level, qual, numdisks);
 1506                         return (-5);
 1507                 }
 1508 
 1509                 /* Search for disks, connect them and probe. */
 1510                 size = 0x7fffffffffffffffllu;
 1511                 sectorsize = 0;
 1512                 for (i = 0; i < numdisks; i++) {
 1513                         snprintf(arg, sizeof(arg), "arg%d", i + 3);
 1514                         diskname = gctl_get_asciiparam(req, arg);
 1515                         if (diskname == NULL) {
 1516                                 gctl_error(req, "No disk name (%s).", arg);
 1517                                 error = -6;
 1518                                 break;
 1519                         }
 1520                         if (strcmp(diskname, "NONE") == 0) {
 1521                                 cp = NULL;
 1522                                 pp = NULL;
 1523                         } else {
 1524                                 g_topology_lock();
 1525                                 cp = g_raid_open_consumer(sc, diskname);
 1526                                 if (cp == NULL) {
 1527                                         gctl_error(req, "Can't open disk '%s'.",
 1528                                             diskname);
 1529                                         g_topology_unlock();
 1530                                         error = -7;
 1531                                         break;
 1532                                 }
 1533                                 pp = cp->provider;
 1534                         }
 1535                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1536                         pd->pd_disk_pos = i;
 1537                         disk = g_raid_create_disk(sc);
 1538                         disk->d_md_data = (void *)pd;
 1539                         disk->d_consumer = cp;
 1540                         if (cp == NULL) {
 1541                                 strcpy(&pd->pd_disk_meta.serial[0], "NONE");
 1542                                 pd->pd_disk_meta.id = 0xffffffff;
 1543                                 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 1544                                 continue;
 1545                         }
 1546                         cp->private = disk;
 1547                         g_topology_unlock();
 1548 
 1549                         error = g_raid_md_get_label(cp,
 1550                             &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
 1551                         if (error != 0) {
 1552                                 gctl_error(req,
 1553                                     "Can't get serial for provider '%s'.",
 1554                                     diskname);
 1555                                 error = -8;
 1556                                 break;
 1557                         }
 1558 
 1559                         /* Read kernel dumping information. */
 1560                         disk->d_kd.offset = 0;
 1561                         disk->d_kd.length = OFF_MAX;
 1562                         len = sizeof(disk->d_kd);
 1563                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 1564                         if (disk->d_kd.di.dumper == NULL)
 1565                                 G_RAID_DEBUG1(2, sc,
 1566                                     "Dumping not supported by %s.",
 1567                                     cp->provider->name);
 1568 
 1569                         intel_set_disk_sectors(&pd->pd_disk_meta,
 1570                             pp->mediasize / pp->sectorsize);
 1571                         if (size > pp->mediasize)
 1572                                 size = pp->mediasize;
 1573                         if (sectorsize < pp->sectorsize)
 1574                                 sectorsize = pp->sectorsize;
 1575                         pd->pd_disk_meta.id = 0;
 1576                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
 1577                 }
 1578                 if (error != 0)
 1579                         return (error);
 1580 
 1581                 if (sectorsize <= 0) {
 1582                         gctl_error(req, "Can't get sector size.");
 1583                         return (-8);
 1584                 }
 1585 
 1586                 /* Reserve some space for metadata. */
 1587                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 1588 
 1589                 /* Handle size argument. */
 1590                 len = sizeof(*sizearg);
 1591                 sizearg = gctl_get_param(req, "size", &len);
 1592                 if (sizearg != NULL && len == sizeof(*sizearg) &&
 1593                     *sizearg > 0) {
 1594                         if (*sizearg > size) {
 1595                                 gctl_error(req, "Size too big %lld > %lld.",
 1596                                     (long long)*sizearg, (long long)size);
 1597                                 return (-9);
 1598                         }
 1599                         size = *sizearg;
 1600                 }
 1601 
 1602                 /* Handle strip argument. */
 1603                 strip = 131072;
 1604                 len = sizeof(*striparg);
 1605                 striparg = gctl_get_param(req, "strip", &len);
 1606                 if (striparg != NULL && len == sizeof(*striparg) &&
 1607                     *striparg > 0) {
 1608                         if (*striparg < sectorsize) {
 1609                                 gctl_error(req, "Strip size too small.");
 1610                                 return (-10);
 1611                         }
 1612                         if (*striparg % sectorsize != 0) {
 1613                                 gctl_error(req, "Incorrect strip size.");
 1614                                 return (-11);
 1615                         }
 1616                         if (strip > 65535 * sectorsize) {
 1617                                 gctl_error(req, "Strip size too big.");
 1618                                 return (-12);
 1619                         }
 1620                         strip = *striparg;
 1621                 }
 1622 
 1623                 /* Round size down to strip or sector. */
 1624                 if (level == G_RAID_VOLUME_RL_RAID1)
 1625                         size -= (size % sectorsize);
 1626                 else if (level == G_RAID_VOLUME_RL_RAID1E &&
 1627                     (numdisks & 1) != 0)
 1628                         size -= (size % (2 * strip));
 1629                 else
 1630                         size -= (size % strip);
 1631                 if (size <= 0) {
 1632                         gctl_error(req, "Size too small.");
 1633                         return (-13);
 1634                 }
 1635 
 1636                 /* We have all we need, create things: volume, ... */
 1637                 mdi->mdio_started = 1;
 1638                 vol = g_raid_create_volume(sc, volname, -1);
 1639                 vol->v_md_data = (void *)(intptr_t)0;
 1640                 vol->v_raid_level = level;
 1641                 vol->v_raid_level_qualifier = qual;
 1642                 vol->v_strip_size = strip;
 1643                 vol->v_disks_count = numdisks;
 1644                 if (level == G_RAID_VOLUME_RL_RAID0)
 1645                         vol->v_mediasize = size * numdisks;
 1646                 else if (level == G_RAID_VOLUME_RL_RAID1)
 1647                         vol->v_mediasize = size;
 1648                 else if (level == G_RAID_VOLUME_RL_RAID5)
 1649                         vol->v_mediasize = size * (numdisks - 1);
 1650                 else { /* RAID1E */
 1651                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
 1652                             strip;
 1653                 }
 1654                 vol->v_sectorsize = sectorsize;
 1655                 g_raid_start_volume(vol);
 1656 
 1657                 /* , and subdisks. */
 1658                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1659                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1660                         sd = &vol->v_subdisks[pd->pd_disk_pos];
 1661                         sd->sd_disk = disk;
 1662                         sd->sd_offset = 0;
 1663                         sd->sd_size = size;
 1664                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1665                         if (sd->sd_disk->d_consumer != NULL) {
 1666                                 g_raid_change_disk_state(disk,
 1667                                     G_RAID_DISK_S_ACTIVE);
 1668                                 if (level == G_RAID_VOLUME_RL_RAID5)
 1669                                         g_raid_change_subdisk_state(sd,
 1670                                             G_RAID_SUBDISK_S_UNINITIALIZED);
 1671                                 else
 1672                                         g_raid_change_subdisk_state(sd,
 1673                                             G_RAID_SUBDISK_S_ACTIVE);
 1674                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 1675                                     G_RAID_EVENT_SUBDISK);
 1676                         } else {
 1677                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1678                         }
 1679                 }
 1680 
 1681                 /* Write metadata based on created entities. */
 1682                 G_RAID_DEBUG1(0, sc, "Array started.");
 1683                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1684 
 1685                 /* Pickup any STALE/SPARE disks to refill array if needed. */
 1686                 g_raid_md_intel_refill(sc);
 1687 
 1688                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1689                     G_RAID_EVENT_VOLUME);
 1690                 return (0);
 1691         }
 1692         if (strcmp(verb, "add") == 0) {
 1693 
 1694                 if (*nargs != 3) {
 1695                         gctl_error(req, "Invalid number of arguments.");
 1696                         return (-1);
 1697                 }
 1698                 volname = gctl_get_asciiparam(req, "arg1");
 1699                 if (volname == NULL) {
 1700                         gctl_error(req, "No volume name.");
 1701                         return (-2);
 1702                 }
 1703                 levelname = gctl_get_asciiparam(req, "arg2");
 1704                 if (levelname == NULL) {
 1705                         gctl_error(req, "No RAID level.");
 1706                         return (-3);
 1707                 }
 1708                 if (strcasecmp(levelname, "RAID5") == 0)
 1709                         levelname = "RAID5-LA";
 1710                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
 1711                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
 1712                         return (-4);
 1713                 }
 1714 
 1715                 /* Look for existing volumes. */
 1716                 i = 0;
 1717                 vol1 = NULL;
 1718                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1719                         vol1 = vol;
 1720                         i++;
 1721                 }
 1722                 if (i > 1) {
 1723                         gctl_error(req, "Maximum two volumes supported.");
 1724                         return (-6);
 1725                 }
 1726                 if (vol1 == NULL) {
 1727                         gctl_error(req, "At least one volume must exist.");
 1728                         return (-7);
 1729                 }
 1730 
 1731                 numdisks = vol1->v_disks_count;
 1732                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1733                 if (!g_raid_md_intel_supported(level, qual, numdisks,
 1734                     force ? *force : 0)) {
 1735                         gctl_error(req, "Unsupported RAID level "
 1736                             "(0x%02x/0x%02x), or number of disks (%d).",
 1737                             level, qual, numdisks);
 1738                         return (-5);
 1739                 }
 1740 
 1741                 /* Collect info about present disks. */
 1742                 size = 0x7fffffffffffffffllu;
 1743                 sectorsize = 512;
 1744                 for (i = 0; i < numdisks; i++) {
 1745                         disk = vol1->v_subdisks[i].sd_disk;
 1746                         pd = (struct g_raid_md_intel_perdisk *)
 1747                             disk->d_md_data;
 1748                         disk_sectors = 
 1749                             intel_get_disk_sectors(&pd->pd_disk_meta);
 1750 
 1751                         if (disk_sectors * 512 < size)
 1752                                 size = disk_sectors * 512;
 1753                         if (disk->d_consumer != NULL &&
 1754                             disk->d_consumer->provider != NULL &&
 1755                             disk->d_consumer->provider->sectorsize >
 1756                              sectorsize) {
 1757                                 sectorsize =
 1758                                     disk->d_consumer->provider->sectorsize;
 1759                         }
 1760                 }
 1761 
 1762                 /* Reserve some space for metadata. */
 1763                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 1764 
 1765                 /* Decide insert before or after. */
 1766                 sd = &vol1->v_subdisks[0];
 1767                 if (sd->sd_offset >
 1768                     size - (sd->sd_offset + sd->sd_size)) {
 1769                         off = 0;
 1770                         size = sd->sd_offset;
 1771                 } else {
 1772                         off = sd->sd_offset + sd->sd_size;
 1773                         size = size - (sd->sd_offset + sd->sd_size);
 1774                 }
 1775 
 1776                 /* Handle strip argument. */
 1777                 strip = 131072;
 1778                 len = sizeof(*striparg);
 1779                 striparg = gctl_get_param(req, "strip", &len);
 1780                 if (striparg != NULL && len == sizeof(*striparg) &&
 1781                     *striparg > 0) {
 1782                         if (*striparg < sectorsize) {
 1783                                 gctl_error(req, "Strip size too small.");
 1784                                 return (-10);
 1785                         }
 1786                         if (*striparg % sectorsize != 0) {
 1787                                 gctl_error(req, "Incorrect strip size.");
 1788                                 return (-11);
 1789                         }
 1790                         if (strip > 65535 * sectorsize) {
 1791                                 gctl_error(req, "Strip size too big.");
 1792                                 return (-12);
 1793                         }
 1794                         strip = *striparg;
 1795                 }
 1796 
 1797                 /* Round offset up to strip. */
 1798                 if (off % strip != 0) {
 1799                         size -= strip - off % strip;
 1800                         off += strip - off % strip;
 1801                 }
 1802 
 1803                 /* Handle size argument. */
 1804                 len = sizeof(*sizearg);
 1805                 sizearg = gctl_get_param(req, "size", &len);
 1806                 if (sizearg != NULL && len == sizeof(*sizearg) &&
 1807                     *sizearg > 0) {
 1808                         if (*sizearg > size) {
 1809                                 gctl_error(req, "Size too big %lld > %lld.",
 1810                                     (long long)*sizearg, (long long)size);
 1811                                 return (-9);
 1812                         }
 1813                         size = *sizearg;
 1814                 }
 1815 
 1816                 /* Round size down to strip or sector. */
 1817                 if (level == G_RAID_VOLUME_RL_RAID1)
 1818                         size -= (size % sectorsize);
 1819                 else
 1820                         size -= (size % strip);
 1821                 if (size <= 0) {
 1822                         gctl_error(req, "Size too small.");
 1823                         return (-13);
 1824                 }
 1825                 if (size > 0xffffffffllu * sectorsize) {
 1826                         gctl_error(req, "Size too big.");
 1827                         return (-14);
 1828                 }
 1829 
 1830                 /* We have all we need, create things: volume, ... */
 1831                 vol = g_raid_create_volume(sc, volname, -1);
 1832                 vol->v_md_data = (void *)(intptr_t)i;
 1833                 vol->v_raid_level = level;
 1834                 vol->v_raid_level_qualifier = qual;
 1835                 vol->v_strip_size = strip;
 1836                 vol->v_disks_count = numdisks;
 1837                 if (level == G_RAID_VOLUME_RL_RAID0)
 1838                         vol->v_mediasize = size * numdisks;
 1839                 else if (level == G_RAID_VOLUME_RL_RAID1)
 1840                         vol->v_mediasize = size;
 1841                 else if (level == G_RAID_VOLUME_RL_RAID5)
 1842                         vol->v_mediasize = size * (numdisks - 1);
 1843                 else { /* RAID1E */
 1844                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
 1845                             strip;
 1846                 }
 1847                 vol->v_sectorsize = sectorsize;
 1848                 g_raid_start_volume(vol);
 1849 
 1850                 /* , and subdisks. */
 1851                 for (i = 0; i < numdisks; i++) {
 1852                         disk = vol1->v_subdisks[i].sd_disk;
 1853                         sd = &vol->v_subdisks[i];
 1854                         sd->sd_disk = disk;
 1855                         sd->sd_offset = off;
 1856                         sd->sd_size = size;
 1857                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1858                         if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 1859                                 if (level == G_RAID_VOLUME_RL_RAID5)
 1860                                         g_raid_change_subdisk_state(sd,
 1861                                             G_RAID_SUBDISK_S_UNINITIALIZED);
 1862                                 else
 1863                                         g_raid_change_subdisk_state(sd,
 1864                                             G_RAID_SUBDISK_S_ACTIVE);
 1865                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 1866                                     G_RAID_EVENT_SUBDISK);
 1867                         }
 1868                 }
 1869 
 1870                 /* Write metadata based on created entities. */
 1871                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1872 
 1873                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1874                     G_RAID_EVENT_VOLUME);
 1875                 return (0);
 1876         }
 1877         if (strcmp(verb, "delete") == 0) {
 1878 
 1879                 /* Full node destruction. */
 1880                 if (*nargs == 1) {
 1881                         /* Check if some volume is still open. */
 1882                         force = gctl_get_paraml(req, "force", sizeof(*force));
 1883                         if (force != NULL && *force == 0 &&
 1884                             g_raid_nopens(sc) != 0) {
 1885                                 gctl_error(req, "Some volume is still open.");
 1886                                 return (-4);
 1887                         }
 1888 
 1889                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1890                                 if (disk->d_consumer)
 1891                                         intel_meta_erase(disk->d_consumer);
 1892                         }
 1893                         g_raid_destroy_node(sc, 0);
 1894                         return (0);
 1895                 }
 1896 
 1897                 /* Destroy specified volume. If it was last - all node. */
 1898                 if (*nargs != 2) {
 1899                         gctl_error(req, "Invalid number of arguments.");
 1900                         return (-1);
 1901                 }
 1902                 volname = gctl_get_asciiparam(req, "arg1");
 1903                 if (volname == NULL) {
 1904                         gctl_error(req, "No volume name.");
 1905                         return (-2);
 1906                 }
 1907 
 1908                 /* Search for volume. */
 1909                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1910                         if (strcmp(vol->v_name, volname) == 0)
 1911                                 break;
 1912                 }
 1913                 if (vol == NULL) {
 1914                         i = strtol(volname, &tmp, 10);
 1915                         if (verb != volname && tmp[0] == 0) {
 1916                                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1917                                         if (vol->v_global_id == i)
 1918                                                 break;
 1919                                 }
 1920                         }
 1921                 }
 1922                 if (vol == NULL) {
 1923                         gctl_error(req, "Volume '%s' not found.", volname);
 1924                         return (-3);
 1925                 }
 1926 
 1927                 /* Check if volume is still open. */
 1928                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1929                 if (force != NULL && *force == 0 &&
 1930                     vol->v_provider_open != 0) {
 1931                         gctl_error(req, "Volume is still open.");
 1932                         return (-4);
 1933                 }
 1934 
 1935                 /* Destroy volume and potentially node. */
 1936                 i = 0;
 1937                 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 1938                         i++;
 1939                 if (i >= 2) {
 1940                         g_raid_destroy_volume(vol);
 1941                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1942                 } else {
 1943                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1944                                 if (disk->d_consumer)
 1945                                         intel_meta_erase(disk->d_consumer);
 1946                         }
 1947                         g_raid_destroy_node(sc, 0);
 1948                 }
 1949                 return (0);
 1950         }
 1951         if (strcmp(verb, "remove") == 0 ||
 1952             strcmp(verb, "fail") == 0) {
 1953                 if (*nargs < 2) {
 1954                         gctl_error(req, "Invalid number of arguments.");
 1955                         return (-1);
 1956                 }
 1957                 for (i = 1; i < *nargs; i++) {
 1958                         snprintf(arg, sizeof(arg), "arg%d", i);
 1959                         diskname = gctl_get_asciiparam(req, arg);
 1960                         if (diskname == NULL) {
 1961                                 gctl_error(req, "No disk name (%s).", arg);
 1962                                 error = -2;
 1963                                 break;
 1964                         }
 1965                         if (strncmp(diskname, "/dev/", 5) == 0)
 1966                                 diskname += 5;
 1967 
 1968                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1969                                 if (disk->d_consumer != NULL && 
 1970                                     disk->d_consumer->provider != NULL &&
 1971                                     strcmp(disk->d_consumer->provider->name,
 1972                                      diskname) == 0)
 1973                                         break;
 1974                         }
 1975                         if (disk == NULL) {
 1976                                 gctl_error(req, "Disk '%s' not found.",
 1977                                     diskname);
 1978                                 error = -3;
 1979                                 break;
 1980                         }
 1981 
 1982                         if (strcmp(verb, "fail") == 0) {
 1983                                 g_raid_md_fail_disk_intel(md, NULL, disk);
 1984                                 continue;
 1985                         }
 1986 
 1987                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1988 
 1989                         /* Erase metadata on deleting disk. */
 1990                         intel_meta_erase(disk->d_consumer);
 1991 
 1992                         /* If disk was assigned, just update statuses. */
 1993                         if (pd->pd_disk_pos >= 0) {
 1994                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1995                                 g_raid_kill_consumer(sc, disk->d_consumer);
 1996                                 disk->d_consumer = NULL;
 1997                                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 1998                                         g_raid_change_subdisk_state(sd,
 1999                                             G_RAID_SUBDISK_S_NONE);
 2000                                         g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 2001                                             G_RAID_EVENT_SUBDISK);
 2002                                 }
 2003                         } else {
 2004                                 /* Otherwise -- delete. */
 2005                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 2006                                 g_raid_destroy_disk(disk);
 2007                         }
 2008                 }
 2009 
 2010                 /* Write updated metadata to remaining disks. */
 2011                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 2012 
 2013                 /* Check if anything left except placeholders. */
 2014                 if (g_raid_ndisks(sc, -1) ==
 2015                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 2016                         g_raid_destroy_node(sc, 0);
 2017                 else
 2018                         g_raid_md_intel_refill(sc);
 2019                 return (error);
 2020         }
 2021         if (strcmp(verb, "insert") == 0) {
 2022                 if (*nargs < 2) {
 2023                         gctl_error(req, "Invalid number of arguments.");
 2024                         return (-1);
 2025                 }
 2026                 update = 0;
 2027                 for (i = 1; i < *nargs; i++) {
 2028                         /* Get disk name. */
 2029                         snprintf(arg, sizeof(arg), "arg%d", i);
 2030                         diskname = gctl_get_asciiparam(req, arg);
 2031                         if (diskname == NULL) {
 2032                                 gctl_error(req, "No disk name (%s).", arg);
 2033                                 error = -3;
 2034                                 break;
 2035                         }
 2036 
 2037                         /* Try to find provider with specified name. */
 2038                         g_topology_lock();
 2039                         cp = g_raid_open_consumer(sc, diskname);
 2040                         if (cp == NULL) {
 2041                                 gctl_error(req, "Can't open disk '%s'.",
 2042                                     diskname);
 2043                                 g_topology_unlock();
 2044                                 error = -4;
 2045                                 break;
 2046                         }
 2047                         pp = cp->provider;
 2048                         g_topology_unlock();
 2049 
 2050                         /* Read disk serial. */
 2051                         error = g_raid_md_get_label(cp,
 2052                             &serial[0], INTEL_SERIAL_LEN);
 2053                         if (error != 0) {
 2054                                 gctl_error(req,
 2055                                     "Can't get serial for provider '%s'.",
 2056                                     diskname);
 2057                                 g_raid_kill_consumer(sc, cp);
 2058                                 error = -7;
 2059                                 break;
 2060                         }
 2061 
 2062                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 2063                         pd->pd_disk_pos = -1;
 2064 
 2065                         disk = g_raid_create_disk(sc);
 2066                         disk->d_consumer = cp;
 2067                         disk->d_md_data = (void *)pd;
 2068                         cp->private = disk;
 2069 
 2070                         /* Read kernel dumping information. */
 2071                         disk->d_kd.offset = 0;
 2072                         disk->d_kd.length = OFF_MAX;
 2073                         len = sizeof(disk->d_kd);
 2074                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 2075                         if (disk->d_kd.di.dumper == NULL)
 2076                                 G_RAID_DEBUG1(2, sc,
 2077                                     "Dumping not supported by %s.",
 2078                                     cp->provider->name);
 2079 
 2080                         memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
 2081                             INTEL_SERIAL_LEN);
 2082                         intel_set_disk_sectors(&pd->pd_disk_meta,
 2083                             pp->mediasize / pp->sectorsize);
 2084                         pd->pd_disk_meta.id = 0;
 2085                         pd->pd_disk_meta.flags = INTEL_F_SPARE;
 2086 
 2087                         /* Welcome the "new" disk. */
 2088                         update += g_raid_md_intel_start_disk(disk);
 2089                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
 2090                                 intel_meta_write_spare(cp, &pd->pd_disk_meta);
 2091                                 g_raid_destroy_disk(disk);
 2092                         } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 2093                                 gctl_error(req, "Disk '%s' doesn't fit.",
 2094                                     diskname);
 2095                                 g_raid_destroy_disk(disk);
 2096                                 error = -8;
 2097                                 break;
 2098                         }
 2099                 }
 2100 
 2101                 /* Write new metadata if we changed something. */
 2102                 if (update)
 2103                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 2104                 return (error);
 2105         }
 2106         return (-100);
 2107 }
 2108 
 2109 static int
 2110 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
 2111     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 2112 {
 2113         struct g_raid_softc *sc;
 2114         struct g_raid_volume *vol;
 2115         struct g_raid_subdisk *sd;
 2116         struct g_raid_disk *disk;
 2117         struct g_raid_md_intel_object *mdi;
 2118         struct g_raid_md_intel_perdisk *pd;
 2119         struct intel_raid_conf *meta;
 2120         struct intel_raid_vol *mvol;
 2121         struct intel_raid_map *mmap0, *mmap1;
 2122         off_t sectorsize = 512, pos;
 2123         const char *version, *cv;
 2124         int vi, sdi, numdisks, len, state, stale;
 2125 
 2126         sc = md->mdo_softc;
 2127         mdi = (struct g_raid_md_intel_object *)md;
 2128 
 2129         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 2130                 return (0);
 2131 
 2132         /* Bump generation. Newly written metadata may differ from previous. */
 2133         mdi->mdio_generation++;
 2134 
 2135         /* Count number of disks. */
 2136         numdisks = 0;
 2137         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2138                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2139                 if (pd->pd_disk_pos < 0)
 2140                         continue;
 2141                 numdisks++;
 2142                 if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 2143                         pd->pd_disk_meta.flags =
 2144                             INTEL_F_ONLINE | INTEL_F_ASSIGNED;
 2145                 } else if (disk->d_state == G_RAID_DISK_S_FAILED) {
 2146                         pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
 2147                 } else {
 2148                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 2149                         if (pd->pd_disk_meta.id != 0xffffffff) {
 2150                                 pd->pd_disk_meta.id = 0xffffffff;
 2151                                 len = strlen(pd->pd_disk_meta.serial);
 2152                                 len = min(len, INTEL_SERIAL_LEN - 3);
 2153                                 strcpy(pd->pd_disk_meta.serial + len, ":0");
 2154                         }
 2155                 }
 2156         }
 2157 
 2158         /* Fill anchor and disks. */
 2159         meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
 2160             M_MD_INTEL, M_WAITOK | M_ZERO);
 2161         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
 2162         meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
 2163         meta->config_id = mdi->mdio_config_id;
 2164         meta->generation = mdi->mdio_generation;
 2165         meta->attributes = INTEL_ATTR_CHECKSUM;
 2166         meta->total_disks = numdisks;
 2167         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2168                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2169                 if (pd->pd_disk_pos < 0)
 2170                         continue;
 2171                 meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
 2172         }
 2173 
 2174         /* Fill volumes and maps. */
 2175         vi = 0;
 2176         version = INTEL_VERSION_1000;
 2177         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 2178                 if (vol->v_stopping)
 2179                         continue;
 2180                 mvol = intel_get_volume(meta, vi);
 2181 
 2182                 /* New metadata may have different volumes order. */
 2183                 vol->v_md_data = (void *)(intptr_t)vi;
 2184 
 2185                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2186                         sd = &vol->v_subdisks[sdi];
 2187                         if (sd->sd_disk != NULL)
 2188                                 break;
 2189                 }
 2190                 if (sdi >= vol->v_disks_count)
 2191                         panic("No any filled subdisk in volume");
 2192                 if (vol->v_mediasize >= 0x20000000000llu)
 2193                         meta->attributes |= INTEL_ATTR_2TB;
 2194                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 2195                         meta->attributes |= INTEL_ATTR_RAID0;
 2196                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2197                         meta->attributes |= INTEL_ATTR_RAID1;
 2198                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 2199                         meta->attributes |= INTEL_ATTR_RAID5;
 2200                 else
 2201                         meta->attributes |= INTEL_ATTR_RAID10;
 2202 
 2203                 if (meta->attributes & INTEL_ATTR_2TB)
 2204                         cv = INTEL_VERSION_1300;
 2205 //              else if (dev->status == DEV_CLONE_N_GO)
 2206 //                      cv = INTEL_VERSION_1206;
 2207                 else if (vol->v_disks_count > 4)
 2208                         cv = INTEL_VERSION_1204;
 2209                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 2210                         cv = INTEL_VERSION_1202;
 2211                 else if (vol->v_disks_count > 2)
 2212                         cv = INTEL_VERSION_1201;
 2213                 else if (vi > 0)
 2214                         cv = INTEL_VERSION_1200;
 2215                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2216                         cv = INTEL_VERSION_1100;
 2217                 else
 2218                         cv = INTEL_VERSION_1000;
 2219                 if (strcmp(cv, version) > 0)
 2220                         version = cv;
 2221 
 2222                 strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
 2223                 mvol->total_sectors = vol->v_mediasize / sectorsize;
 2224 
 2225                 /* Check for any recovery in progress. */
 2226                 state = G_RAID_SUBDISK_S_ACTIVE;
 2227                 pos = 0x7fffffffffffffffllu;
 2228                 stale = 0;
 2229                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2230                         sd = &vol->v_subdisks[sdi];
 2231                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
 2232                                 state = G_RAID_SUBDISK_S_REBUILD;
 2233                         else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
 2234                             state != G_RAID_SUBDISK_S_REBUILD)
 2235                                 state = G_RAID_SUBDISK_S_RESYNC;
 2236                         else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
 2237                                 stale = 1;
 2238                         if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2239                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 2240                              sd->sd_rebuild_pos < pos)
 2241                                 pos = sd->sd_rebuild_pos;
 2242                 }
 2243                 if (state == G_RAID_SUBDISK_S_REBUILD) {
 2244                         mvol->migr_state = 1;
 2245                         mvol->migr_type = INTEL_MT_REBUILD;
 2246                 } else if (state == G_RAID_SUBDISK_S_RESYNC) {
 2247                         mvol->migr_state = 1;
 2248                         /* mvol->migr_type = INTEL_MT_REPAIR; */
 2249                         mvol->migr_type = INTEL_MT_VERIFY;
 2250                         mvol->state |= INTEL_ST_VERIFY_AND_FIX;
 2251                 } else
 2252                         mvol->migr_state = 0;
 2253                 mvol->dirty = (vol->v_dirty || stale);
 2254 
 2255                 mmap0 = intel_get_map(mvol, 0);
 2256 
 2257                 /* Write map / common part of two maps. */
 2258                 intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
 2259                 intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
 2260                 mmap0->strip_sectors = vol->v_strip_size / sectorsize;
 2261                 if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
 2262                         mmap0->status = INTEL_S_FAILURE;
 2263                 else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
 2264                         mmap0->status = INTEL_S_DEGRADED;
 2265                 else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
 2266                     == g_raid_nsubdisks(vol, -1))
 2267                         mmap0->status = INTEL_S_UNINITIALIZED;
 2268                 else
 2269                         mmap0->status = INTEL_S_READY;
 2270                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 2271                         mmap0->type = INTEL_T_RAID0;
 2272                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 2273                     vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 2274                         mmap0->type = INTEL_T_RAID1;
 2275                 else
 2276                         mmap0->type = INTEL_T_RAID5;
 2277                 mmap0->total_disks = vol->v_disks_count;
 2278                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2279                         mmap0->total_domains = vol->v_disks_count;
 2280                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 2281                         mmap0->total_domains = 2;
 2282                 else
 2283                         mmap0->total_domains = 1;
 2284                 intel_set_map_stripe_count(mmap0,
 2285                     sd->sd_size / vol->v_strip_size / mmap0->total_domains);
 2286                 mmap0->failed_disk_num = 0xff;
 2287                 mmap0->ddf = 1;
 2288 
 2289                 /* If there are two maps - copy common and update. */
 2290                 if (mvol->migr_state) {
 2291                         intel_set_vol_curr_migr_unit(mvol,
 2292                             pos / vol->v_strip_size / mmap0->total_domains);
 2293                         mmap1 = intel_get_map(mvol, 1);
 2294                         memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
 2295                         mmap0->status = INTEL_S_READY;
 2296                 } else
 2297                         mmap1 = NULL;
 2298 
 2299                 /* Write disk indexes and put rebuild flags. */
 2300                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2301                         sd = &vol->v_subdisks[sdi];
 2302                         pd = (struct g_raid_md_intel_perdisk *)
 2303                             sd->sd_disk->d_md_data;
 2304                         mmap0->disk_idx[sdi] = pd->pd_disk_pos;
 2305                         if (mvol->migr_state)
 2306                                 mmap1->disk_idx[sdi] = pd->pd_disk_pos;
 2307                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2308                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2309                                 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 2310                         } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
 2311                             sd->sd_state != G_RAID_SUBDISK_S_STALE &&
 2312                             sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
 2313                                 mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
 2314                                 if (mvol->migr_state)
 2315                                         mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 2316                         }
 2317                         if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 2318                              sd->sd_state == G_RAID_SUBDISK_S_FAILED) &&
 2319                             mmap0->failed_disk_num == 0xff) {
 2320                                 mmap0->failed_disk_num = sdi;
 2321                                 if (mvol->migr_state)
 2322                                         mmap1->failed_disk_num = sdi;
 2323                         }
 2324                 }
 2325                 vi++;
 2326         }
 2327         meta->total_volumes = vi;
 2328         if (strcmp(version, INTEL_VERSION_1300) != 0)
 2329                 meta->attributes &= INTEL_ATTR_CHECKSUM;
 2330         memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
 2331 
 2332         /* We are done. Print meta data and store them to disks. */
 2333         g_raid_md_intel_print(meta);
 2334         if (mdi->mdio_meta != NULL)
 2335                 free(mdi->mdio_meta, M_MD_INTEL);
 2336         mdi->mdio_meta = meta;
 2337         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2338                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2339                 if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 2340                         continue;
 2341                 if (pd->pd_meta != NULL) {
 2342                         free(pd->pd_meta, M_MD_INTEL);
 2343                         pd->pd_meta = NULL;
 2344                 }
 2345                 pd->pd_meta = intel_meta_copy(meta);
 2346                 intel_meta_write(disk->d_consumer, meta);
 2347         }
 2348         return (0);
 2349 }
 2350 
 2351 static int
 2352 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
 2353     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 2354 {
 2355         struct g_raid_softc *sc;
 2356         struct g_raid_md_intel_object *mdi;
 2357         struct g_raid_md_intel_perdisk *pd;
 2358         struct g_raid_subdisk *sd;
 2359 
 2360         sc = md->mdo_softc;
 2361         mdi = (struct g_raid_md_intel_object *)md;
 2362         pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
 2363 
 2364         /* We can't fail disk that is not a part of array now. */
 2365         if (pd->pd_disk_pos < 0)
 2366                 return (-1);
 2367 
 2368         /*
 2369          * Mark disk as failed in metadata and try to write that metadata
 2370          * to the disk itself to prevent it's later resurrection as STALE.
 2371          */
 2372         mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
 2373         pd->pd_disk_meta.flags = INTEL_F_FAILED;
 2374         g_raid_md_intel_print(mdi->mdio_meta);
 2375         if (tdisk->d_consumer)
 2376                 intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
 2377 
 2378         /* Change states. */
 2379         g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 2380         TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 2381                 g_raid_change_subdisk_state(sd,
 2382                     G_RAID_SUBDISK_S_FAILED);
 2383                 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 2384                     G_RAID_EVENT_SUBDISK);
 2385         }
 2386 
 2387         /* Write updated metadata to remaining disks. */
 2388         g_raid_md_write_intel(md, NULL, NULL, tdisk);
 2389 
 2390         /* Check if anything left except placeholders. */
 2391         if (g_raid_ndisks(sc, -1) ==
 2392             g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 2393                 g_raid_destroy_node(sc, 0);
 2394         else
 2395                 g_raid_md_intel_refill(sc);
 2396         return (0);
 2397 }
 2398 
 2399 static int
 2400 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
 2401     struct g_raid_disk *disk)
 2402 {
 2403         struct g_raid_md_intel_perdisk *pd;
 2404 
 2405         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2406         if (pd->pd_meta != NULL) {
 2407                 free(pd->pd_meta, M_MD_INTEL);
 2408                 pd->pd_meta = NULL;
 2409         }
 2410         free(pd, M_MD_INTEL);
 2411         disk->d_md_data = NULL;
 2412         return (0);
 2413 }
 2414 
 2415 static int
 2416 g_raid_md_free_intel(struct g_raid_md_object *md)
 2417 {
 2418         struct g_raid_md_intel_object *mdi;
 2419 
 2420         mdi = (struct g_raid_md_intel_object *)md;
 2421         if (!mdi->mdio_started) {
 2422                 mdi->mdio_started = 0;
 2423                 callout_stop(&mdi->mdio_start_co);
 2424                 G_RAID_DEBUG1(1, md->mdo_softc,
 2425                     "root_mount_rel %p", mdi->mdio_rootmount);
 2426                 root_mount_rel(mdi->mdio_rootmount);
 2427                 mdi->mdio_rootmount = NULL;
 2428         }
 2429         if (mdi->mdio_meta != NULL) {
 2430                 free(mdi->mdio_meta, M_MD_INTEL);
 2431                 mdi->mdio_meta = NULL;
 2432         }
 2433         return (0);
 2434 }
 2435 
 2436 G_RAID_MD_DECLARE(intel, "Intel");

Cache object: 28cbd7d9f9ea9e058db4dd75cc7cb8af


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.