The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/md_intel.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/9.0/sys/geom/raid/md_intel.c 220210 2011-03-31 16:19:53Z mav $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bio.h>
   32 #include <sys/endian.h>
   33 #include <sys/kernel.h>
   34 #include <sys/kobj.h>
   35 #include <sys/limits.h>
   36 #include <sys/lock.h>
   37 #include <sys/malloc.h>
   38 #include <sys/mutex.h>
   39 #include <sys/systm.h>
   40 #include <sys/taskqueue.h>
   41 #include <geom/geom.h>
   42 #include "geom/raid/g_raid.h"
   43 #include "g_raid_md_if.h"
   44 
   45 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
   46 
   47 struct intel_raid_map {
   48         uint32_t        offset;
   49         uint32_t        disk_sectors;
   50         uint32_t        stripe_count;
   51         uint16_t        strip_sectors;
   52         uint8_t         status;
   53 #define INTEL_S_READY           0x00
   54 #define INTEL_S_UNINITIALIZED   0x01
   55 #define INTEL_S_DEGRADED        0x02
   56 #define INTEL_S_FAILURE         0x03
   57 
   58         uint8_t         type;
   59 #define INTEL_T_RAID0           0x00
   60 #define INTEL_T_RAID1           0x01
   61 #define INTEL_T_RAID5           0x05
   62 
   63         uint8_t         total_disks;
   64         uint8_t         total_domains;
   65         uint8_t         failed_disk_num;
   66         uint8_t         ddf;
   67         uint32_t        filler_2[7];
   68         uint32_t        disk_idx[1];    /* total_disks entries. */
   69 #define INTEL_DI_IDX    0x00ffffff
   70 #define INTEL_DI_RBLD   0x01000000
   71 } __packed;
   72 
   73 struct intel_raid_vol {
   74         uint8_t         name[16];
   75         u_int64_t       total_sectors __packed;
   76         uint32_t        state;
   77 #define INTEL_ST_BOOTABLE               0x00000001
   78 #define INTEL_ST_BOOT_DEVICE            0x00000002
   79 #define INTEL_ST_READ_COALESCING        0x00000004
   80 #define INTEL_ST_WRITE_COALESCING       0x00000008
   81 #define INTEL_ST_LAST_SHUTDOWN_DIRTY    0x00000010
   82 #define INTEL_ST_HIDDEN_AT_BOOT         0x00000020
   83 #define INTEL_ST_CURRENTLY_HIDDEN       0x00000040
   84 #define INTEL_ST_VERIFY_AND_FIX         0x00000080
   85 #define INTEL_ST_MAP_STATE_UNINIT       0x00000100
   86 #define INTEL_ST_NO_AUTO_RECOVERY       0x00000200
   87 #define INTEL_ST_CLONE_N_GO             0x00000400
   88 #define INTEL_ST_CLONE_MAN_SYNC         0x00000800
   89 #define INTEL_ST_CNG_MASTER_DISK_NUM    0x00001000
   90         uint32_t        reserved;
   91         uint8_t         migr_priority;
   92         uint8_t         num_sub_vols;
   93         uint8_t         tid;
   94         uint8_t         cng_master_disk;
   95         uint16_t        cache_policy;
   96         uint8_t         cng_state;
   97         uint8_t         cng_sub_state;
   98         uint32_t        filler_0[10];
   99 
  100         uint32_t        curr_migr_unit;
  101         uint32_t        checkpoint_id;
  102         uint8_t         migr_state;
  103         uint8_t         migr_type;
  104 #define INTEL_MT_INIT           0
  105 #define INTEL_MT_REBUILD        1
  106 #define INTEL_MT_VERIFY         2
  107 #define INTEL_MT_GEN_MIGR       3
  108 #define INTEL_MT_STATE_CHANGE   4
  109 #define INTEL_MT_REPAIR         5
  110         uint8_t         dirty;
  111         uint8_t         fs_state;
  112         uint16_t        verify_errors;
  113         uint16_t        bad_blocks;
  114         uint32_t        filler_1[4];
  115         struct intel_raid_map map[1];   /* 2 entries if migr_state != 0. */
  116 } __packed;
  117 
  118 struct intel_raid_disk {
  119 #define INTEL_SERIAL_LEN        16
  120         uint8_t         serial[INTEL_SERIAL_LEN];
  121         uint32_t        sectors;
  122         uint32_t        id;
  123         uint32_t        flags;
  124 #define INTEL_F_SPARE           0x01
  125 #define INTEL_F_ASSIGNED        0x02
  126 #define INTEL_F_FAILED          0x04
  127 #define INTEL_F_ONLINE          0x08
  128 
  129         uint32_t        filler[5];
  130 } __packed;
  131 
  132 struct intel_raid_conf {
  133         uint8_t         intel_id[24];
  134 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
  135 
  136         uint8_t         version[6];
  137 #define INTEL_VERSION_1000      "1.0.00"        /* RAID0 */
  138 #define INTEL_VERSION_1100      "1.1.00"        /* RAID1 */
  139 #define INTEL_VERSION_1200      "1.2.00"        /* Many volumes */
  140 #define INTEL_VERSION_1201      "1.2.01"        /* 3 or 4 disks */
  141 #define INTEL_VERSION_1202      "1.2.02"        /* RAID5 */
  142 #define INTEL_VERSION_1204      "1.2.04"        /* 5 or 6 disks */
  143 #define INTEL_VERSION_1206      "1.2.06"        /* CNG */
  144 #define INTEL_VERSION_1300      "1.3.00"        /* Attributes */
  145 
  146         uint8_t         dummy_0[2];
  147         uint32_t        checksum;
  148         uint32_t        config_size;
  149         uint32_t        config_id;
  150         uint32_t        generation;
  151         uint32_t        error_log_size;
  152         uint32_t        attributes;
  153 #define INTEL_ATTR_RAID0        0x00000001
  154 #define INTEL_ATTR_RAID1        0x00000002
  155 #define INTEL_ATTR_RAID10       0x00000004
  156 #define INTEL_ATTR_RAID1E       0x00000008
  157 #define INTEL_ATTR_RAID5        0x00000010
  158 #define INTEL_ATTR_RAIDCNG      0x00000020
  159 #define INTEL_ATTR_2TB          0x20000000
  160 #define INTEL_ATTR_PM           0x40000000
  161 #define INTEL_ATTR_CHECKSUM     0x80000000
  162 
  163         uint8_t         total_disks;
  164         uint8_t         total_volumes;
  165         uint8_t         dummy_2[2];
  166         uint32_t        filler_0[39];
  167         struct intel_raid_disk  disk[1];        /* total_disks entries. */
  168         /* Here goes total_volumes of struct intel_raid_vol. */
  169 } __packed;
  170 
  171 #define INTEL_MAX_MD_SIZE(ndisks)                               \
  172     (sizeof(struct intel_raid_conf) +                           \
  173      sizeof(struct intel_raid_disk) * (ndisks - 1) +            \
  174      sizeof(struct intel_raid_vol) * 2 +                        \
  175      sizeof(struct intel_raid_map) * 2 +                        \
  176      sizeof(uint32_t) * (ndisks - 1) * 4)
  177 
  178 struct g_raid_md_intel_perdisk {
  179         struct intel_raid_conf  *pd_meta;
  180         int                      pd_disk_pos;
  181         struct intel_raid_disk   pd_disk_meta;
  182 };
  183 
  184 struct g_raid_md_intel_object {
  185         struct g_raid_md_object  mdio_base;
  186         uint32_t                 mdio_config_id;
  187         uint32_t                 mdio_generation;
  188         struct intel_raid_conf  *mdio_meta;
  189         struct callout           mdio_start_co; /* STARTING state timer. */
  190         int                      mdio_disks_present;
  191         int                      mdio_started;
  192         int                      mdio_incomplete;
  193         struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
  194 };
  195 
  196 static g_raid_md_create_t g_raid_md_create_intel;
  197 static g_raid_md_taste_t g_raid_md_taste_intel;
  198 static g_raid_md_event_t g_raid_md_event_intel;
  199 static g_raid_md_ctl_t g_raid_md_ctl_intel;
  200 static g_raid_md_write_t g_raid_md_write_intel;
  201 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
  202 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
  203 static g_raid_md_free_t g_raid_md_free_intel;
  204 
  205 static kobj_method_t g_raid_md_intel_methods[] = {
  206         KOBJMETHOD(g_raid_md_create,    g_raid_md_create_intel),
  207         KOBJMETHOD(g_raid_md_taste,     g_raid_md_taste_intel),
  208         KOBJMETHOD(g_raid_md_event,     g_raid_md_event_intel),
  209         KOBJMETHOD(g_raid_md_ctl,       g_raid_md_ctl_intel),
  210         KOBJMETHOD(g_raid_md_write,     g_raid_md_write_intel),
  211         KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_intel),
  212         KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel),
  213         KOBJMETHOD(g_raid_md_free,      g_raid_md_free_intel),
  214         { 0, 0 }
  215 };
  216 
  217 static struct g_raid_md_class g_raid_md_intel_class = {
  218         "Intel",
  219         g_raid_md_intel_methods,
  220         sizeof(struct g_raid_md_intel_object),
  221         .mdc_priority = 100
  222 };
  223 
  224 
  225 static struct intel_raid_map *
  226 intel_get_map(struct intel_raid_vol *mvol, int i)
  227 {
  228         struct intel_raid_map *mmap;
  229 
  230         if (i > (mvol->migr_state ? 1 : 0))
  231                 return (NULL);
  232         mmap = &mvol->map[0];
  233         for (; i > 0; i--) {
  234                 mmap = (struct intel_raid_map *)
  235                     &mmap->disk_idx[mmap->total_disks];
  236         }
  237         return ((struct intel_raid_map *)mmap);
  238 }
  239 
  240 static struct intel_raid_vol *
  241 intel_get_volume(struct intel_raid_conf *meta, int i)
  242 {
  243         struct intel_raid_vol *mvol;
  244         struct intel_raid_map *mmap;
  245 
  246         if (i > 1)
  247                 return (NULL);
  248         mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
  249         for (; i > 0; i--) {
  250                 mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
  251                 mvol = (struct intel_raid_vol *)
  252                     &mmap->disk_idx[mmap->total_disks];
  253         }
  254         return (mvol);
  255 }
  256 
  257 static void
  258 g_raid_md_intel_print(struct intel_raid_conf *meta)
  259 {
  260         struct intel_raid_vol *mvol;
  261         struct intel_raid_map *mmap;
  262         int i, j, k;
  263 
  264         if (g_raid_debug < 1)
  265                 return;
  266 
  267         printf("********* ATA Intel MatrixRAID Metadata *********\n");
  268         printf("intel_id            <%.24s>\n", meta->intel_id);
  269         printf("version             <%.6s>\n", meta->version);
  270         printf("checksum            0x%08x\n", meta->checksum);
  271         printf("config_size         0x%08x\n", meta->config_size);
  272         printf("config_id           0x%08x\n", meta->config_id);
  273         printf("generation          0x%08x\n", meta->generation);
  274         printf("attributes          0x%08x\n", meta->attributes);
  275         printf("total_disks         %u\n", meta->total_disks);
  276         printf("total_volumes       %u\n", meta->total_volumes);
  277         printf("DISK#   serial disk_sectors disk_id flags\n");
  278         for (i = 0; i < meta->total_disks; i++ ) {
  279                 printf("    %d   <%.16s> %u 0x%08x 0x%08x\n", i,
  280                     meta->disk[i].serial, meta->disk[i].sectors,
  281                     meta->disk[i].id, meta->disk[i].flags);
  282         }
  283         for (i = 0; i < meta->total_volumes; i++) {
  284                 mvol = intel_get_volume(meta, i);
  285                 printf(" ****** Volume %d ******\n", i);
  286                 printf(" name               %.16s\n", mvol->name);
  287                 printf(" total_sectors      %ju\n", mvol->total_sectors);
  288                 printf(" state              %u\n", mvol->state);
  289                 printf(" reserved           %u\n", mvol->reserved);
  290                 printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
  291                 printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
  292                 printf(" migr_state         %u\n", mvol->migr_state);
  293                 printf(" migr_type          %u\n", mvol->migr_type);
  294                 printf(" dirty              %u\n", mvol->dirty);
  295 
  296                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
  297                         printf("  *** Map %d ***\n", j);
  298                         mmap = intel_get_map(mvol, j);
  299                         printf("  offset            %u\n", mmap->offset);
  300                         printf("  disk_sectors      %u\n", mmap->disk_sectors);
  301                         printf("  stripe_count      %u\n", mmap->stripe_count);
  302                         printf("  strip_sectors     %u\n", mmap->strip_sectors);
  303                         printf("  status            %u\n", mmap->status);
  304                         printf("  type              %u\n", mmap->type);
  305                         printf("  total_disks       %u\n", mmap->total_disks);
  306                         printf("  total_domains     %u\n", mmap->total_domains);
  307                         printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
  308                         printf("  ddf               %u\n", mmap->ddf);
  309                         printf("  disk_idx         ");
  310                         for (k = 0; k < mmap->total_disks; k++)
  311                                 printf(" 0x%08x", mmap->disk_idx[k]);
  312                         printf("\n");
  313                 }
  314         }
  315         printf("=================================================\n");
  316 }
  317 
  318 static struct intel_raid_conf *
  319 intel_meta_copy(struct intel_raid_conf *meta)
  320 {
  321         struct intel_raid_conf *nmeta;
  322 
  323         nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
  324         memcpy(nmeta, meta, meta->config_size);
  325         return (nmeta);
  326 }
  327 
  328 static int
  329 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
  330 {
  331         int pos;
  332 
  333         for (pos = 0; pos < meta->total_disks; pos++) {
  334                 if (strncmp(meta->disk[pos].serial,
  335                     serial, INTEL_SERIAL_LEN) == 0)
  336                         return (pos);
  337         }
  338         return (-1);
  339 }
  340 
  341 static struct intel_raid_conf *
  342 intel_meta_read(struct g_consumer *cp)
  343 {
  344         struct g_provider *pp;
  345         struct intel_raid_conf *meta;
  346         struct intel_raid_vol *mvol;
  347         struct intel_raid_map *mmap;
  348         char *buf;
  349         int error, i, j, k, left, size;
  350         uint32_t checksum, *ptr;
  351 
  352         pp = cp->provider;
  353 
  354         /* Read the anchor sector. */
  355         buf = g_read_data(cp,
  356             pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
  357         if (buf == NULL) {
  358                 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
  359                     pp->name, error);
  360                 return (NULL);
  361         }
  362         meta = (struct intel_raid_conf *)buf;
  363 
  364         /* Check if this is an Intel RAID struct */
  365         if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
  366                 G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
  367                 g_free(buf);
  368                 return (NULL);
  369         }
  370         if (meta->config_size > 65536 ||
  371             meta->config_size < sizeof(struct intel_raid_conf)) {
  372                 G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
  373                     meta->config_size);
  374                 g_free(buf);
  375                 return (NULL);
  376         }
  377         size = meta->config_size;
  378         meta = malloc(size, M_MD_INTEL, M_WAITOK);
  379         memcpy(meta, buf, min(size, pp->sectorsize));
  380         g_free(buf);
  381 
  382         /* Read all the rest, if needed. */
  383         if (meta->config_size > pp->sectorsize) {
  384                 left = (meta->config_size - 1) / pp->sectorsize;
  385                 buf = g_read_data(cp,
  386                     pp->mediasize - pp->sectorsize * (2 + left),
  387                     pp->sectorsize * left, &error);
  388                 if (buf == NULL) {
  389                         G_RAID_DEBUG(1, "Cannot read remaining metadata"
  390                             " part from %s (error=%d).",
  391                             pp->name, error);
  392                         free(meta, M_MD_INTEL);
  393                         return (NULL);
  394                 }
  395                 memcpy(((char *)meta) + pp->sectorsize, buf,
  396                     pp->sectorsize * left);
  397                 g_free(buf);
  398         }
  399 
  400         /* Check metadata checksum. */
  401         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
  402             i < (meta->config_size / sizeof(uint32_t)); i++) {
  403                 checksum += *ptr++;
  404         }
  405         checksum -= meta->checksum;
  406         if (checksum != meta->checksum) {
  407                 G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
  408                 free(meta, M_MD_INTEL);
  409                 return (NULL);
  410         }
  411 
  412         /* Validate metadata size. */
  413         size = sizeof(struct intel_raid_conf) +
  414             sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
  415             sizeof(struct intel_raid_vol) * meta->total_volumes;
  416         if (size > meta->config_size) {
  417 badsize:
  418                 G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
  419                     meta->config_size, size);
  420                 free(meta, M_MD_INTEL);
  421                 return (NULL);
  422         }
  423         for (i = 0; i < meta->total_volumes; i++) {
  424                 mvol = intel_get_volume(meta, i);
  425                 mmap = intel_get_map(mvol, 0);
  426                 size += 4 * (mmap->total_disks - 1);
  427                 if (size > meta->config_size)
  428                         goto badsize;
  429                 if (mvol->migr_state) {
  430                         size += sizeof(struct intel_raid_map);
  431                         if (size > meta->config_size)
  432                                 goto badsize;
  433                         mmap = intel_get_map(mvol, 1);
  434                         size += 4 * (mmap->total_disks - 1);
  435                         if (size > meta->config_size)
  436                                 goto badsize;
  437                 }
  438         }
  439 
  440         /* Validate disk indexes. */
  441         for (i = 0; i < meta->total_volumes; i++) {
  442                 mvol = intel_get_volume(meta, i);
  443                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
  444                         mmap = intel_get_map(mvol, j);
  445                         for (k = 0; k < mmap->total_disks; k++) {
  446                                 if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
  447                                     meta->total_disks) {
  448                                         G_RAID_DEBUG(1, "Intel metadata disk"
  449                                             " index %d too big (>%d)",
  450                                             mmap->disk_idx[k] & INTEL_DI_IDX,
  451                                             meta->total_disks);
  452                                         free(meta, M_MD_INTEL);
  453                                         return (NULL);
  454                                 }
  455                         }
  456                 }
  457         }
  458 
  459         /* Validate migration types. */
  460         for (i = 0; i < meta->total_volumes; i++) {
  461                 mvol = intel_get_volume(meta, i);
  462                 if (mvol->migr_state &&
  463                     mvol->migr_type != INTEL_MT_INIT &&
  464                     mvol->migr_type != INTEL_MT_REBUILD &&
  465                     mvol->migr_type != INTEL_MT_VERIFY &&
  466                     mvol->migr_type != INTEL_MT_REPAIR) {
  467                         G_RAID_DEBUG(1, "Intel metadata has unsupported"
  468                             " migration type %d", mvol->migr_type);
  469                         free(meta, M_MD_INTEL);
  470                         return (NULL);
  471                 }
  472         }
  473 
  474         return (meta);
  475 }
  476 
  477 static int
  478 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
  479 {
  480         struct g_provider *pp;
  481         char *buf;
  482         int error, i, sectors;
  483         uint32_t checksum, *ptr;
  484 
  485         pp = cp->provider;
  486 
  487         /* Recalculate checksum for case if metadata were changed. */
  488         meta->checksum = 0;
  489         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
  490             i < (meta->config_size / sizeof(uint32_t)); i++) {
  491                 checksum += *ptr++;
  492         }
  493         meta->checksum = checksum;
  494 
  495         /* Create and fill buffer. */
  496         sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
  497         buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
  498         if (sectors > 1) {
  499                 memcpy(buf, ((char *)meta) + pp->sectorsize,
  500                     (sectors - 1) * pp->sectorsize);
  501         }
  502         memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
  503 
  504         error = g_write_data(cp,
  505             pp->mediasize - pp->sectorsize * (1 + sectors),
  506             buf, pp->sectorsize * sectors);
  507         if (error != 0) {
  508                 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
  509                     pp->name, error);
  510         }
  511 
  512         free(buf, M_MD_INTEL);
  513         return (error);
  514 }
  515 
  516 static int
  517 intel_meta_erase(struct g_consumer *cp)
  518 {
  519         struct g_provider *pp;
  520         char *buf;
  521         int error;
  522 
  523         pp = cp->provider;
  524         buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
  525         error = g_write_data(cp,
  526             pp->mediasize - 2 * pp->sectorsize,
  527             buf, pp->sectorsize);
  528         if (error != 0) {
  529                 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
  530                     pp->name, error);
  531         }
  532         free(buf, M_MD_INTEL);
  533         return (error);
  534 }
  535 
  536 static int
  537 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
  538 {
  539         struct intel_raid_conf *meta;
  540         int error;
  541 
  542         /* Fill anchor and single disk. */
  543         meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
  544         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
  545         memcpy(&meta->version[0], INTEL_VERSION_1000,
  546             sizeof(INTEL_VERSION_1000) - 1);
  547         meta->config_size = INTEL_MAX_MD_SIZE(1);
  548         meta->config_id = arc4random();
  549         meta->generation = 1;
  550         meta->total_disks = 1;
  551         meta->disk[0] = *d;
  552         error = intel_meta_write(cp, meta);
  553         free(meta, M_MD_INTEL);
  554         return (error);
  555 }
  556 
  557 static struct g_raid_disk *
  558 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
  559 {
  560         struct g_raid_disk      *disk;
  561         struct g_raid_md_intel_perdisk *pd;
  562 
  563         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  564                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
  565                 if (pd->pd_disk_pos == id)
  566                         break;
  567         }
  568         return (disk);
  569 }
  570 
  571 static int
  572 g_raid_md_intel_supported(int level, int qual, int disks, int force)
  573 {
  574 
  575         switch (level) {
  576         case G_RAID_VOLUME_RL_RAID0:
  577                 if (disks < 1)
  578                         return (0);
  579                 if (!force && (disks < 2 || disks > 6))
  580                         return (0);
  581                 break;
  582         case G_RAID_VOLUME_RL_RAID1:
  583                 if (disks < 1)
  584                         return (0);
  585                 if (!force && (disks != 2))
  586                         return (0);
  587                 break;
  588         case G_RAID_VOLUME_RL_RAID1E:
  589                 if (disks < 2)
  590                         return (0);
  591                 if (!force && (disks != 4))
  592                         return (0);
  593                 break;
  594         case G_RAID_VOLUME_RL_RAID5:
  595                 if (disks < 3)
  596                         return (0);
  597                 if (!force && disks > 6)
  598                         return (0);
  599                 break;
  600         default:
  601                 return (0);
  602         }
  603         if (qual != G_RAID_VOLUME_RLQ_NONE)
  604                 return (0);
  605         return (1);
  606 }
  607 
  608 static struct g_raid_volume *
  609 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
  610 {
  611         struct g_raid_volume    *mvol;
  612 
  613         TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
  614                 if ((intptr_t)(mvol->v_md_data) == id)
  615                         break;
  616         }
  617         return (mvol);
  618 }
  619 
  620 static int
  621 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
  622 {
  623         struct g_raid_softc *sc;
  624         struct g_raid_subdisk *sd, *tmpsd;
  625         struct g_raid_disk *olddisk, *tmpdisk;
  626         struct g_raid_md_object *md;
  627         struct g_raid_md_intel_object *mdi;
  628         struct g_raid_md_intel_perdisk *pd, *oldpd;
  629         struct intel_raid_conf *meta;
  630         struct intel_raid_vol *mvol;
  631         struct intel_raid_map *mmap0, *mmap1;
  632         int disk_pos, resurrection = 0;
  633 
  634         sc = disk->d_softc;
  635         md = sc->sc_md;
  636         mdi = (struct g_raid_md_intel_object *)md;
  637         meta = mdi->mdio_meta;
  638         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
  639         olddisk = NULL;
  640 
  641         /* Find disk position in metadata by it's serial. */
  642         disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
  643         if (disk_pos < 0) {
  644                 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
  645                 /* Failed stale disk is useless for us. */
  646                 if (pd->pd_disk_meta.flags & INTEL_F_FAILED) {
  647                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
  648                         return (0);
  649                 }
  650                 /* If we are in the start process, that's all for now. */
  651                 if (!mdi->mdio_started)
  652                         goto nofit;
  653                 /*
  654                  * If we have already started - try to get use of the disk.
  655                  * Try to replace OFFLINE disks first, then FAILED.
  656                  */
  657                 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
  658                         if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
  659                             tmpdisk->d_state != G_RAID_DISK_S_FAILED)
  660                                 continue;
  661                         /* Make sure this disk is big enough. */
  662                         TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
  663                                 if (sd->sd_offset + sd->sd_size + 4096 >
  664                                     (off_t)pd->pd_disk_meta.sectors * 512) {
  665                                         G_RAID_DEBUG1(1, sc,
  666                                             "Disk too small (%llu < %llu)",
  667                                             ((unsigned long long)
  668                                             pd->pd_disk_meta.sectors) * 512,
  669                                             (unsigned long long)
  670                                             sd->sd_offset + sd->sd_size + 4096);
  671                                         break;
  672                                 }
  673                         }
  674                         if (sd != NULL)
  675                                 continue;
  676                         if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
  677                                 olddisk = tmpdisk;
  678                                 break;
  679                         } else if (olddisk == NULL)
  680                                 olddisk = tmpdisk;
  681                 }
  682                 if (olddisk == NULL) {
  683 nofit:
  684                         if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
  685                                 g_raid_change_disk_state(disk,
  686                                     G_RAID_DISK_S_SPARE);
  687                                 return (1);
  688                         } else {
  689                                 g_raid_change_disk_state(disk,
  690                                     G_RAID_DISK_S_STALE);
  691                                 return (0);
  692                         }
  693                 }
  694                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
  695                 disk_pos = oldpd->pd_disk_pos;
  696                 resurrection = 1;
  697         }
  698 
  699         if (olddisk == NULL) {
  700                 /* Find placeholder by position. */
  701                 olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
  702                 if (olddisk == NULL)
  703                         panic("No disk at position %d!", disk_pos);
  704                 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
  705                         G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
  706                             disk_pos);
  707                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
  708                         return (0);
  709                 }
  710                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
  711         }
  712 
  713         /* Replace failed disk or placeholder with new disk. */
  714         TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
  715                 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
  716                 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
  717                 sd->sd_disk = disk;
  718         }
  719         oldpd->pd_disk_pos = -2;
  720         pd->pd_disk_pos = disk_pos;
  721 
  722         /* If it was placeholder -- destroy it. */
  723         if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
  724                 g_raid_destroy_disk(olddisk);
  725         } else {
  726                 /* Otherwise, make it STALE_FAILED. */
  727                 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
  728                 /* Update global metadata just in case. */
  729                 memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
  730                     sizeof(struct intel_raid_disk));
  731         }
  732 
  733         /* Welcome the new disk. */
  734         if (resurrection)
  735                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
  736         else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
  737                 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
  738         else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
  739                 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
  740         else
  741                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
  742         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
  743                 mvol = intel_get_volume(meta,
  744                     (uintptr_t)(sd->sd_volume->v_md_data));
  745                 mmap0 = intel_get_map(mvol, 0);
  746                 if (mvol->migr_state)
  747                         mmap1 = intel_get_map(mvol, 1);
  748                 else
  749                         mmap1 = mmap0;
  750 
  751                 if (resurrection) {
  752                         /* Stale disk, almost same as new. */
  753                         g_raid_change_subdisk_state(sd,
  754                             G_RAID_SUBDISK_S_NEW);
  755                 } else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
  756                         /* Failed disk, almost useless. */
  757                         g_raid_change_subdisk_state(sd,
  758                             G_RAID_SUBDISK_S_FAILED);
  759                 } else if (mvol->migr_state == 0) {
  760                         if (mmap0->status == INTEL_S_UNINITIALIZED) {
  761                                 /* Freshly created uninitialized volume. */
  762                                 g_raid_change_subdisk_state(sd,
  763                                     G_RAID_SUBDISK_S_UNINITIALIZED);
  764                         } else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  765                                 /* Freshly inserted disk. */
  766                                 g_raid_change_subdisk_state(sd,
  767                                     G_RAID_SUBDISK_S_NEW);
  768                         } else if (mvol->dirty) {
  769                                 /* Dirty volume (unclean shutdown). */
  770                                 g_raid_change_subdisk_state(sd,
  771                                     G_RAID_SUBDISK_S_STALE);
  772                         } else {
  773                                 /* Up to date disk. */
  774                                 g_raid_change_subdisk_state(sd,
  775                                     G_RAID_SUBDISK_S_ACTIVE);
  776                         }
  777                 } else if (mvol->migr_type == INTEL_MT_INIT ||
  778                            mvol->migr_type == INTEL_MT_REBUILD) {
  779                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  780                                 /* Freshly inserted disk. */
  781                                 g_raid_change_subdisk_state(sd,
  782                                     G_RAID_SUBDISK_S_NEW);
  783                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  784                                 /* Rebuilding disk. */
  785                                 g_raid_change_subdisk_state(sd,
  786                                     G_RAID_SUBDISK_S_REBUILD);
  787                                 if (mvol->dirty) {
  788                                         sd->sd_rebuild_pos = 0;
  789                                 } else {
  790                                         sd->sd_rebuild_pos =
  791                                             (off_t)mvol->curr_migr_unit *
  792                                             sd->sd_volume->v_strip_size *
  793                                             mmap0->total_domains;
  794                                 }
  795                         } else if (mvol->dirty) {
  796                                 /* Dirty volume (unclean shutdown). */
  797                                 g_raid_change_subdisk_state(sd,
  798                                     G_RAID_SUBDISK_S_STALE);
  799                         } else {
  800                                 /* Up to date disk. */
  801                                 g_raid_change_subdisk_state(sd,
  802                                     G_RAID_SUBDISK_S_ACTIVE);
  803                         }
  804                 } else if (mvol->migr_type == INTEL_MT_VERIFY ||
  805                            mvol->migr_type == INTEL_MT_REPAIR) {
  806                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  807                                 /* Freshly inserted disk. */
  808                                 g_raid_change_subdisk_state(sd,
  809                                     G_RAID_SUBDISK_S_NEW);
  810                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
  811                                 /* Resyncing disk. */
  812                                 g_raid_change_subdisk_state(sd,
  813                                     G_RAID_SUBDISK_S_RESYNC);
  814                                 if (mvol->dirty) {
  815                                         sd->sd_rebuild_pos = 0;
  816                                 } else {
  817                                         sd->sd_rebuild_pos =
  818                                             (off_t)mvol->curr_migr_unit *
  819                                             sd->sd_volume->v_strip_size *
  820                                             mmap0->total_domains;
  821                                 }
  822                         } else if (mvol->dirty) {
  823                                 /* Dirty volume (unclean shutdown). */
  824                                 g_raid_change_subdisk_state(sd,
  825                                     G_RAID_SUBDISK_S_STALE);
  826                         } else {
  827                                 /* Up to date disk. */
  828                                 g_raid_change_subdisk_state(sd,
  829                                     G_RAID_SUBDISK_S_ACTIVE);
  830                         }
  831                 }
  832                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
  833                     G_RAID_EVENT_SUBDISK);
  834         }
  835 
  836         /* Update status of our need for spare. */
  837         if (mdi->mdio_started) {
  838                 mdi->mdio_incomplete =
  839                     (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
  840                      meta->total_disks);
  841         }
  842 
  843         return (resurrection);
  844 }
  845 
  846 static void
  847 g_disk_md_intel_retaste(void *arg, int pending)
  848 {
  849 
  850         G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
  851         g_retaste(&g_raid_class);
  852         free(arg, M_MD_INTEL);
  853 }
  854 
  855 static void
  856 g_raid_md_intel_refill(struct g_raid_softc *sc)
  857 {
  858         struct g_raid_md_object *md;
  859         struct g_raid_md_intel_object *mdi;
  860         struct intel_raid_conf *meta;
  861         struct g_raid_disk *disk;
  862         struct task *task;
  863         int update, na;
  864 
  865         md = sc->sc_md;
  866         mdi = (struct g_raid_md_intel_object *)md;
  867         meta = mdi->mdio_meta;
  868         update = 0;
  869         do {
  870                 /* Make sure we miss anything. */
  871                 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
  872                 if (na == meta->total_disks)
  873                         break;
  874 
  875                 G_RAID_DEBUG1(1, md->mdo_softc,
  876                     "Array is not complete (%d of %d), "
  877                     "trying to refill.", na, meta->total_disks);
  878 
  879                 /* Try to get use some of STALE disks. */
  880                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  881                         if (disk->d_state == G_RAID_DISK_S_STALE) {
  882                                 update += g_raid_md_intel_start_disk(disk);
  883                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
  884                                         break;
  885                         }
  886                 }
  887                 if (disk != NULL)
  888                         continue;
  889 
  890                 /* Try to get use some of SPARE disks. */
  891                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  892                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
  893                                 update += g_raid_md_intel_start_disk(disk);
  894                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
  895                                         break;
  896                         }
  897                 }
  898         } while (disk != NULL);
  899 
  900         /* Write new metadata if we changed something. */
  901         if (update) {
  902                 g_raid_md_write_intel(md, NULL, NULL, NULL);
  903                 meta = mdi->mdio_meta;
  904         }
  905 
  906         /* Update status of our need for spare. */
  907         mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
  908             meta->total_disks);
  909 
  910         /* Request retaste hoping to find spare. */
  911         if (mdi->mdio_incomplete) {
  912                 task = malloc(sizeof(struct task),
  913                     M_MD_INTEL, M_WAITOK | M_ZERO);
  914                 TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
  915                 taskqueue_enqueue(taskqueue_swi, task);
  916         }
  917 }
  918 
  919 static void
  920 g_raid_md_intel_start(struct g_raid_softc *sc)
  921 {
  922         struct g_raid_md_object *md;
  923         struct g_raid_md_intel_object *mdi;
  924         struct g_raid_md_intel_perdisk *pd;
  925         struct intel_raid_conf *meta;
  926         struct intel_raid_vol *mvol;
  927         struct intel_raid_map *mmap;
  928         struct g_raid_volume *vol;
  929         struct g_raid_subdisk *sd;
  930         struct g_raid_disk *disk;
  931         int i, j, disk_pos;
  932 
  933         md = sc->sc_md;
  934         mdi = (struct g_raid_md_intel_object *)md;
  935         meta = mdi->mdio_meta;
  936 
  937         /* Create volumes and subdisks. */
  938         for (i = 0; i < meta->total_volumes; i++) {
  939                 mvol = intel_get_volume(meta, i);
  940                 mmap = intel_get_map(mvol, 0);
  941                 vol = g_raid_create_volume(sc, mvol->name, -1);
  942                 vol->v_md_data = (void *)(intptr_t)i;
  943                 if (mmap->type == INTEL_T_RAID0)
  944                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
  945                 else if (mmap->type == INTEL_T_RAID1 &&
  946                     mmap->total_domains >= 2 &&
  947                     mmap->total_domains <= mmap->total_disks) {
  948                         /* Assume total_domains is correct. */
  949                         if (mmap->total_domains == mmap->total_disks)
  950                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
  951                         else
  952                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
  953                 } else if (mmap->type == INTEL_T_RAID1) {
  954                         /* total_domains looks wrong. */
  955                         if (mmap->total_disks <= 2)
  956                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
  957                         else
  958                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
  959                 } else if (mmap->type == INTEL_T_RAID5)
  960                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
  961                 else
  962                         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
  963                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
  964                 vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
  965                 vol->v_disks_count = mmap->total_disks;
  966                 vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
  967                 vol->v_sectorsize = 512; //ZZZ
  968                 for (j = 0; j < vol->v_disks_count; j++) {
  969                         sd = &vol->v_subdisks[j];
  970                         sd->sd_offset = (off_t)mmap->offset * 512; //ZZZ
  971                         sd->sd_size = (off_t)mmap->disk_sectors * 512; //ZZZ
  972                 }
  973                 g_raid_start_volume(vol);
  974         }
  975 
  976         /* Create disk placeholders to store data for later writing. */
  977         for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
  978                 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
  979                 pd->pd_disk_pos = disk_pos;
  980                 pd->pd_disk_meta = meta->disk[disk_pos];
  981                 disk = g_raid_create_disk(sc);
  982                 disk->d_md_data = (void *)pd;
  983                 disk->d_state = G_RAID_DISK_S_OFFLINE;
  984                 for (i = 0; i < meta->total_volumes; i++) {
  985                         mvol = intel_get_volume(meta, i);
  986                         mmap = intel_get_map(mvol, 0);
  987                         for (j = 0; j < mmap->total_disks; j++) {
  988                                 if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
  989                                         break;
  990                         }
  991                         if (j == mmap->total_disks)
  992                                 continue;
  993                         vol = g_raid_md_intel_get_volume(sc, i);
  994                         sd = &vol->v_subdisks[j];
  995                         sd->sd_disk = disk;
  996                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
  997                 }
  998         }
  999 
 1000         /* Make all disks found till the moment take their places. */
 1001         do {
 1002                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1003                         if (disk->d_state == G_RAID_DISK_S_NONE) {
 1004                                 g_raid_md_intel_start_disk(disk);
 1005                                 break;
 1006                         }
 1007                 }
 1008         } while (disk != NULL);
 1009 
 1010         mdi->mdio_started = 1;
 1011         G_RAID_DEBUG1(0, sc, "Array started.");
 1012         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1013 
 1014         /* Pickup any STALE/SPARE disks to refill array if needed. */
 1015         g_raid_md_intel_refill(sc);
 1016 
 1017         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1018                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1019                     G_RAID_EVENT_VOLUME);
 1020         }
 1021 
 1022         callout_stop(&mdi->mdio_start_co);
 1023         G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 1024         root_mount_rel(mdi->mdio_rootmount);
 1025         mdi->mdio_rootmount = NULL;
 1026 }
 1027 
 1028 static void
 1029 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
 1030 {
 1031         struct g_raid_softc *sc;
 1032         struct g_raid_md_object *md;
 1033         struct g_raid_md_intel_object *mdi;
 1034         struct intel_raid_conf *pdmeta;
 1035         struct g_raid_md_intel_perdisk *pd;
 1036 
 1037         sc = disk->d_softc;
 1038         md = sc->sc_md;
 1039         mdi = (struct g_raid_md_intel_object *)md;
 1040         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1041         pdmeta = pd->pd_meta;
 1042 
 1043         if (mdi->mdio_started) {
 1044                 if (g_raid_md_intel_start_disk(disk))
 1045                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1046         } else {
 1047                 /* If we haven't started yet - check metadata freshness. */
 1048                 if (mdi->mdio_meta == NULL ||
 1049                     ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
 1050                         G_RAID_DEBUG1(1, sc, "Newer disk");
 1051                         if (mdi->mdio_meta != NULL)
 1052                                 free(mdi->mdio_meta, M_MD_INTEL);
 1053                         mdi->mdio_meta = intel_meta_copy(pdmeta);
 1054                         mdi->mdio_generation = mdi->mdio_meta->generation;
 1055                         mdi->mdio_disks_present = 1;
 1056                 } else if (pdmeta->generation == mdi->mdio_generation) {
 1057                         mdi->mdio_disks_present++;
 1058                         G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 1059                             mdi->mdio_disks_present,
 1060                             mdi->mdio_meta->total_disks);
 1061                 } else {
 1062                         G_RAID_DEBUG1(1, sc, "Older disk");
 1063                 }
 1064                 /* If we collected all needed disks - start array. */
 1065                 if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
 1066                         g_raid_md_intel_start(sc);
 1067         }
 1068 }
 1069 
 1070 static void
 1071 g_raid_intel_go(void *arg)
 1072 {
 1073         struct g_raid_softc *sc;
 1074         struct g_raid_md_object *md;
 1075         struct g_raid_md_intel_object *mdi;
 1076 
 1077         sc = arg;
 1078         md = sc->sc_md;
 1079         mdi = (struct g_raid_md_intel_object *)md;
 1080         if (!mdi->mdio_started) {
 1081                 G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 1082                 g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 1083         }
 1084 }
 1085 
 1086 static int
 1087 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
 1088     struct g_geom **gp)
 1089 {
 1090         struct g_raid_softc *sc;
 1091         struct g_raid_md_intel_object *mdi;
 1092         char name[16];
 1093 
 1094         mdi = (struct g_raid_md_intel_object *)md;
 1095         mdi->mdio_config_id = arc4random();
 1096         mdi->mdio_generation = 0;
 1097         snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
 1098         sc = g_raid_create_node(mp, name, md);
 1099         if (sc == NULL)
 1100                 return (G_RAID_MD_TASTE_FAIL);
 1101         md->mdo_softc = sc;
 1102         *gp = sc->sc_geom;
 1103         return (G_RAID_MD_TASTE_NEW);
 1104 }
 1105 
 1106 /*
 1107  * Return the last N characters of the serial label.  The Linux and
 1108  * ataraid(7) code always uses the last 16 characters of the label to
 1109  * store into the Intel meta format.  Generalize this to N characters
 1110  * since that's easy.  Labels can be up to 20 characters for SATA drives
 1111  * and up 251 characters for SAS drives.  Since intel controllers don't
 1112  * support SAS drives, just stick with the SATA limits for stack friendliness.
 1113  */
 1114 static int
 1115 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
 1116 {
 1117         char serial_buffer[24];
 1118         int len, error;
 1119         
 1120         len = sizeof(serial_buffer);
 1121         error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
 1122         if (error != 0)
 1123                 return (error);
 1124         len = strlen(serial_buffer);
 1125         if (len > serlen)
 1126                 len -= serlen;
 1127         else
 1128                 len = 0;
 1129         strncpy(serial, serial_buffer + len, serlen);
 1130         return (0);
 1131 }
 1132 
 1133 static int
 1134 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
 1135                               struct g_consumer *cp, struct g_geom **gp)
 1136 {
 1137         struct g_consumer *rcp;
 1138         struct g_provider *pp;
 1139         struct g_raid_md_intel_object *mdi, *mdi1;
 1140         struct g_raid_softc *sc;
 1141         struct g_raid_disk *disk;
 1142         struct intel_raid_conf *meta;
 1143         struct g_raid_md_intel_perdisk *pd;
 1144         struct g_geom *geom;
 1145         int error, disk_pos, result, spare, len;
 1146         char serial[INTEL_SERIAL_LEN];
 1147         char name[16];
 1148         uint16_t vendor;
 1149 
 1150         G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
 1151         mdi = (struct g_raid_md_intel_object *)md;
 1152         pp = cp->provider;
 1153 
 1154         /* Read metadata from device. */
 1155         meta = NULL;
 1156         vendor = 0xffff;
 1157         disk_pos = 0;
 1158         if (g_access(cp, 1, 0, 0) != 0)
 1159                 return (G_RAID_MD_TASTE_FAIL);
 1160         g_topology_unlock();
 1161         error = g_raid_md_get_label(cp, serial, sizeof(serial));
 1162         if (error != 0) {
 1163                 G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
 1164                     pp->name, error);
 1165                 goto fail2;
 1166         }
 1167         len = 2;
 1168         if (pp->geom->rank == 1)
 1169                 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 1170         meta = intel_meta_read(cp);
 1171         g_topology_lock();
 1172         g_access(cp, -1, 0, 0);
 1173         if (meta == NULL) {
 1174                 if (g_raid_aggressive_spare) {
 1175                         if (vendor == 0x8086) {
 1176                                 G_RAID_DEBUG(1,
 1177                                     "No Intel metadata, forcing spare.");
 1178                                 spare = 2;
 1179                                 goto search;
 1180                         } else {
 1181                                 G_RAID_DEBUG(1,
 1182                                     "Intel vendor mismatch 0x%04x != 0x8086",
 1183                                     vendor);
 1184                         }
 1185                 }
 1186                 return (G_RAID_MD_TASTE_FAIL);
 1187         }
 1188 
 1189         /* Check this disk position in obtained metadata. */
 1190         disk_pos = intel_meta_find_disk(meta, serial);
 1191         if (disk_pos < 0) {
 1192                 G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
 1193                 goto fail1;
 1194         }
 1195         if (meta->disk[disk_pos].sectors !=
 1196             (pp->mediasize / pp->sectorsize)) {
 1197                 G_RAID_DEBUG(1, "Intel size mismatch %u != %u",
 1198                     meta->disk[disk_pos].sectors,
 1199                     (u_int)(pp->mediasize / pp->sectorsize));
 1200                 goto fail1;
 1201         }
 1202 
 1203         /* Metadata valid. Print it. */
 1204         g_raid_md_intel_print(meta);
 1205         G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
 1206         spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
 1207 
 1208 search:
 1209         /* Search for matching node. */
 1210         sc = NULL;
 1211         mdi1 = NULL;
 1212         LIST_FOREACH(geom, &mp->geom, geom) {
 1213                 sc = geom->softc;
 1214                 if (sc == NULL)
 1215                         continue;
 1216                 if (sc->sc_stopping != 0)
 1217                         continue;
 1218                 if (sc->sc_md->mdo_class != md->mdo_class)
 1219                         continue;
 1220                 mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
 1221                 if (spare) {
 1222                         if (mdi1->mdio_incomplete)
 1223                                 break;
 1224                 } else {
 1225                         if (mdi1->mdio_config_id == meta->config_id)
 1226                                 break;
 1227                 }
 1228         }
 1229 
 1230         /* Found matching node. */
 1231         if (geom != NULL) {
 1232                 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 1233                 result = G_RAID_MD_TASTE_EXISTING;
 1234 
 1235         } else if (spare) { /* Not found needy node -- left for later. */
 1236                 G_RAID_DEBUG(1, "Spare is not needed at this time");
 1237                 goto fail1;
 1238 
 1239         } else { /* Not found matching node -- create one. */
 1240                 result = G_RAID_MD_TASTE_NEW;
 1241                 mdi->mdio_config_id = meta->config_id;
 1242                 snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
 1243                 sc = g_raid_create_node(mp, name, md);
 1244                 md->mdo_softc = sc;
 1245                 geom = sc->sc_geom;
 1246                 callout_init(&mdi->mdio_start_co, 1);
 1247                 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 1248                     g_raid_intel_go, sc);
 1249                 mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
 1250                 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 1251         }
 1252 
 1253         rcp = g_new_consumer(geom);
 1254         g_attach(rcp, pp);
 1255         if (g_access(rcp, 1, 1, 1) != 0)
 1256                 ; //goto fail1;
 1257 
 1258         g_topology_unlock();
 1259         sx_xlock(&sc->sc_lock);
 1260 
 1261         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1262         pd->pd_meta = meta;
 1263         pd->pd_disk_pos = -1;
 1264         if (spare == 2) {
 1265                 memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
 1266                 pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
 1267                 pd->pd_disk_meta.id = 0;
 1268                 pd->pd_disk_meta.flags = INTEL_F_SPARE;
 1269         } else {
 1270                 pd->pd_disk_meta = meta->disk[disk_pos];
 1271         }
 1272         disk = g_raid_create_disk(sc);
 1273         disk->d_md_data = (void *)pd;
 1274         disk->d_consumer = rcp;
 1275         rcp->private = disk;
 1276 
 1277         /* Read kernel dumping information. */
 1278         disk->d_kd.offset = 0;
 1279         disk->d_kd.length = OFF_MAX;
 1280         len = sizeof(disk->d_kd);
 1281         error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
 1282         if (disk->d_kd.di.dumper == NULL)
 1283                 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 
 1284                     rcp->provider->name, error);
 1285 
 1286         g_raid_md_intel_new_disk(disk);
 1287 
 1288         sx_xunlock(&sc->sc_lock);
 1289         g_topology_lock();
 1290         *gp = geom;
 1291         return (result);
 1292 fail2:
 1293         g_topology_lock();
 1294         g_access(cp, -1, 0, 0);
 1295 fail1:
 1296         free(meta, M_MD_INTEL);
 1297         return (G_RAID_MD_TASTE_FAIL);
 1298 }
 1299 
 1300 static int
 1301 g_raid_md_event_intel(struct g_raid_md_object *md,
 1302     struct g_raid_disk *disk, u_int event)
 1303 {
 1304         struct g_raid_softc *sc;
 1305         struct g_raid_subdisk *sd;
 1306         struct g_raid_md_intel_object *mdi;
 1307         struct g_raid_md_intel_perdisk *pd;
 1308 
 1309         sc = md->mdo_softc;
 1310         mdi = (struct g_raid_md_intel_object *)md;
 1311         if (disk == NULL) {
 1312                 switch (event) {
 1313                 case G_RAID_NODE_E_START:
 1314                         if (!mdi->mdio_started)
 1315                                 g_raid_md_intel_start(sc);
 1316                         return (0);
 1317                 }
 1318                 return (-1);
 1319         }
 1320         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1321         switch (event) {
 1322         case G_RAID_DISK_E_DISCONNECTED:
 1323                 /* If disk was assigned, just update statuses. */
 1324                 if (pd->pd_disk_pos >= 0) {
 1325                         g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1326                         if (disk->d_consumer) {
 1327                                 g_raid_kill_consumer(sc, disk->d_consumer);
 1328                                 disk->d_consumer = NULL;
 1329                         }
 1330                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 1331                                 g_raid_change_subdisk_state(sd,
 1332                                     G_RAID_SUBDISK_S_NONE);
 1333                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1334                                     G_RAID_EVENT_SUBDISK);
 1335                         }
 1336                 } else {
 1337                         /* Otherwise -- delete. */
 1338                         g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 1339                         g_raid_destroy_disk(disk);
 1340                 }
 1341 
 1342                 /* Write updated metadata to all disks. */
 1343                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1344 
 1345                 /* Check if anything left except placeholders. */
 1346                 if (g_raid_ndisks(sc, -1) ==
 1347                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 1348                         g_raid_destroy_node(sc, 0);
 1349                 else
 1350                         g_raid_md_intel_refill(sc);
 1351                 return (0);
 1352         }
 1353         return (-2);
 1354 }
 1355 
 1356 static int
 1357 g_raid_md_ctl_intel(struct g_raid_md_object *md,
 1358     struct gctl_req *req)
 1359 {
 1360         struct g_raid_softc *sc;
 1361         struct g_raid_volume *vol, *vol1;
 1362         struct g_raid_subdisk *sd;
 1363         struct g_raid_disk *disk;
 1364         struct g_raid_md_intel_object *mdi;
 1365         struct g_raid_md_intel_perdisk *pd;
 1366         struct g_consumer *cp;
 1367         struct g_provider *pp;
 1368         char arg[16], serial[INTEL_SERIAL_LEN];
 1369         const char *verb, *volname, *levelname, *diskname;
 1370         char *tmp;
 1371         int *nargs, *force;
 1372         off_t off, size, sectorsize, strip;
 1373         intmax_t *sizearg, *striparg;
 1374         int numdisks, i, len, level, qual, update;
 1375         int error;
 1376 
 1377         sc = md->mdo_softc;
 1378         mdi = (struct g_raid_md_intel_object *)md;
 1379         verb = gctl_get_param(req, "verb", NULL);
 1380         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 1381         error = 0;
 1382         if (strcmp(verb, "label") == 0) {
 1383 
 1384                 if (*nargs < 4) {
 1385                         gctl_error(req, "Invalid number of arguments.");
 1386                         return (-1);
 1387                 }
 1388                 volname = gctl_get_asciiparam(req, "arg1");
 1389                 if (volname == NULL) {
 1390                         gctl_error(req, "No volume name.");
 1391                         return (-2);
 1392                 }
 1393                 levelname = gctl_get_asciiparam(req, "arg2");
 1394                 if (levelname == NULL) {
 1395                         gctl_error(req, "No RAID level.");
 1396                         return (-3);
 1397                 }
 1398                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
 1399                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
 1400                         return (-4);
 1401                 }
 1402                 numdisks = *nargs - 3;
 1403                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1404                 if (!g_raid_md_intel_supported(level, qual, numdisks,
 1405                     force ? *force : 0)) {
 1406                         gctl_error(req, "Unsupported RAID level "
 1407                             "(0x%02x/0x%02x), or number of disks (%d).",
 1408                             level, qual, numdisks);
 1409                         return (-5);
 1410                 }
 1411 
 1412                 /* Search for disks, connect them and probe. */
 1413                 size = 0x7fffffffffffffffllu;
 1414                 sectorsize = 0;
 1415                 for (i = 0; i < numdisks; i++) {
 1416                         snprintf(arg, sizeof(arg), "arg%d", i + 3);
 1417                         diskname = gctl_get_asciiparam(req, arg);
 1418                         if (diskname == NULL) {
 1419                                 gctl_error(req, "No disk name (%s).", arg);
 1420                                 error = -6;
 1421                                 break;
 1422                         }
 1423                         if (strcmp(diskname, "NONE") == 0) {
 1424                                 cp = NULL;
 1425                                 pp = NULL;
 1426                         } else {
 1427                                 g_topology_lock();
 1428                                 cp = g_raid_open_consumer(sc, diskname);
 1429                                 if (cp == NULL) {
 1430                                         gctl_error(req, "Can't open disk '%s'.",
 1431                                             diskname);
 1432                                         g_topology_unlock();
 1433                                         error = -7;
 1434                                         break;
 1435                                 }
 1436                                 pp = cp->provider;
 1437                         }
 1438                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1439                         pd->pd_disk_pos = i;
 1440                         disk = g_raid_create_disk(sc);
 1441                         disk->d_md_data = (void *)pd;
 1442                         disk->d_consumer = cp;
 1443                         if (cp == NULL) {
 1444                                 strcpy(&pd->pd_disk_meta.serial[0], "NONE");
 1445                                 pd->pd_disk_meta.id = 0xffffffff;
 1446                                 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 1447                                 continue;
 1448                         }
 1449                         cp->private = disk;
 1450                         g_topology_unlock();
 1451 
 1452                         error = g_raid_md_get_label(cp,
 1453                             &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
 1454                         if (error != 0) {
 1455                                 gctl_error(req,
 1456                                     "Can't get serial for provider '%s'.",
 1457                                     diskname);
 1458                                 error = -8;
 1459                                 break;
 1460                         }
 1461 
 1462                         /* Read kernel dumping information. */
 1463                         disk->d_kd.offset = 0;
 1464                         disk->d_kd.length = OFF_MAX;
 1465                         len = sizeof(disk->d_kd);
 1466                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 1467                         if (disk->d_kd.di.dumper == NULL)
 1468                                 G_RAID_DEBUG1(2, sc,
 1469                                     "Dumping not supported by %s.",
 1470                                     cp->provider->name);
 1471 
 1472                         pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
 1473                         if (size > pp->mediasize)
 1474                                 size = pp->mediasize;
 1475                         if (sectorsize < pp->sectorsize)
 1476                                 sectorsize = pp->sectorsize;
 1477                         pd->pd_disk_meta.id = 0;
 1478                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
 1479                 }
 1480                 if (error != 0)
 1481                         return (error);
 1482 
 1483                 if (sectorsize <= 0) {
 1484                         gctl_error(req, "Can't get sector size.");
 1485                         return (-8);
 1486                 }
 1487 
 1488                 /* Reserve some space for metadata. */
 1489                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 1490 
 1491                 /* Handle size argument. */
 1492                 len = sizeof(*sizearg);
 1493                 sizearg = gctl_get_param(req, "size", &len);
 1494                 if (sizearg != NULL && len == sizeof(*sizearg) &&
 1495                     *sizearg > 0) {
 1496                         if (*sizearg > size) {
 1497                                 gctl_error(req, "Size too big %lld > %lld.",
 1498                                     (long long)*sizearg, (long long)size);
 1499                                 return (-9);
 1500                         }
 1501                         size = *sizearg;
 1502                 }
 1503 
 1504                 /* Handle strip argument. */
 1505                 strip = 131072;
 1506                 len = sizeof(*striparg);
 1507                 striparg = gctl_get_param(req, "strip", &len);
 1508                 if (striparg != NULL && len == sizeof(*striparg) &&
 1509                     *striparg > 0) {
 1510                         if (*striparg < sectorsize) {
 1511                                 gctl_error(req, "Strip size too small.");
 1512                                 return (-10);
 1513                         }
 1514                         if (*striparg % sectorsize != 0) {
 1515                                 gctl_error(req, "Incorrect strip size.");
 1516                                 return (-11);
 1517                         }
 1518                         if (strip > 65535 * sectorsize) {
 1519                                 gctl_error(req, "Strip size too big.");
 1520                                 return (-12);
 1521                         }
 1522                         strip = *striparg;
 1523                 }
 1524 
 1525                 /* Round size down to strip or sector. */
 1526                 if (level == G_RAID_VOLUME_RL_RAID1)
 1527                         size -= (size % sectorsize);
 1528                 else if (level == G_RAID_VOLUME_RL_RAID1E &&
 1529                     (numdisks & 1) != 0)
 1530                         size -= (size % (2 * strip));
 1531                 else
 1532                         size -= (size % strip);
 1533                 if (size <= 0) {
 1534                         gctl_error(req, "Size too small.");
 1535                         return (-13);
 1536                 }
 1537                 if (size > 0xffffffffllu * sectorsize) {
 1538                         gctl_error(req, "Size too big.");
 1539                         return (-14);
 1540                 }
 1541 
 1542                 /* We have all we need, create things: volume, ... */
 1543                 mdi->mdio_started = 1;
 1544                 vol = g_raid_create_volume(sc, volname, -1);
 1545                 vol->v_md_data = (void *)(intptr_t)0;
 1546                 vol->v_raid_level = level;
 1547                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 1548                 vol->v_strip_size = strip;
 1549                 vol->v_disks_count = numdisks;
 1550                 if (level == G_RAID_VOLUME_RL_RAID0)
 1551                         vol->v_mediasize = size * numdisks;
 1552                 else if (level == G_RAID_VOLUME_RL_RAID1)
 1553                         vol->v_mediasize = size;
 1554                 else if (level == G_RAID_VOLUME_RL_RAID5)
 1555                         vol->v_mediasize = size * (numdisks - 1);
 1556                 else { /* RAID1E */
 1557                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
 1558                             strip;
 1559                 }
 1560                 vol->v_sectorsize = sectorsize;
 1561                 g_raid_start_volume(vol);
 1562 
 1563                 /* , and subdisks. */
 1564                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1565                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1566                         sd = &vol->v_subdisks[pd->pd_disk_pos];
 1567                         sd->sd_disk = disk;
 1568                         sd->sd_offset = 0;
 1569                         sd->sd_size = size;
 1570                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1571                         if (sd->sd_disk->d_consumer != NULL) {
 1572                                 g_raid_change_disk_state(disk,
 1573                                     G_RAID_DISK_S_ACTIVE);
 1574                                 g_raid_change_subdisk_state(sd,
 1575                                     G_RAID_SUBDISK_S_ACTIVE);
 1576                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 1577                                     G_RAID_EVENT_SUBDISK);
 1578                         } else {
 1579                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1580                         }
 1581                 }
 1582 
 1583                 /* Write metadata based on created entities. */
 1584                 G_RAID_DEBUG1(0, sc, "Array started.");
 1585                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1586 
 1587                 /* Pickup any STALE/SPARE disks to refill array if needed. */
 1588                 g_raid_md_intel_refill(sc);
 1589 
 1590                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1591                     G_RAID_EVENT_VOLUME);
 1592                 return (0);
 1593         }
 1594         if (strcmp(verb, "add") == 0) {
 1595 
 1596                 if (*nargs != 3) {
 1597                         gctl_error(req, "Invalid number of arguments.");
 1598                         return (-1);
 1599                 }
 1600                 volname = gctl_get_asciiparam(req, "arg1");
 1601                 if (volname == NULL) {
 1602                         gctl_error(req, "No volume name.");
 1603                         return (-2);
 1604                 }
 1605                 levelname = gctl_get_asciiparam(req, "arg2");
 1606                 if (levelname == NULL) {
 1607                         gctl_error(req, "No RAID level.");
 1608                         return (-3);
 1609                 }
 1610                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
 1611                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
 1612                         return (-4);
 1613                 }
 1614 
 1615                 /* Look for existing volumes. */
 1616                 i = 0;
 1617                 vol1 = NULL;
 1618                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1619                         vol1 = vol;
 1620                         i++;
 1621                 }
 1622                 if (i > 1) {
 1623                         gctl_error(req, "Maximum two volumes supported.");
 1624                         return (-6);
 1625                 }
 1626                 if (vol1 == NULL) {
 1627                         gctl_error(req, "At least one volume must exist.");
 1628                         return (-7);
 1629                 }
 1630 
 1631                 numdisks = vol1->v_disks_count;
 1632                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1633                 if (!g_raid_md_intel_supported(level, qual, numdisks,
 1634                     force ? *force : 0)) {
 1635                         gctl_error(req, "Unsupported RAID level "
 1636                             "(0x%02x/0x%02x), or number of disks (%d).",
 1637                             level, qual, numdisks);
 1638                         return (-5);
 1639                 }
 1640 
 1641                 /* Collect info about present disks. */
 1642                 size = 0x7fffffffffffffffllu;
 1643                 sectorsize = 512;
 1644                 for (i = 0; i < numdisks; i++) {
 1645                         disk = vol1->v_subdisks[i].sd_disk;
 1646                         pd = (struct g_raid_md_intel_perdisk *)
 1647                             disk->d_md_data;
 1648                         if ((off_t)pd->pd_disk_meta.sectors * 512 < size)
 1649                                 size = (off_t)pd->pd_disk_meta.sectors * 512;
 1650                         if (disk->d_consumer != NULL &&
 1651                             disk->d_consumer->provider != NULL &&
 1652                             disk->d_consumer->provider->sectorsize >
 1653                              sectorsize) {
 1654                                 sectorsize =
 1655                                     disk->d_consumer->provider->sectorsize;
 1656                         }
 1657                 }
 1658 
 1659                 /* Reserve some space for metadata. */
 1660                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 1661 
 1662                 /* Decide insert before or after. */
 1663                 sd = &vol1->v_subdisks[0];
 1664                 if (sd->sd_offset >
 1665                     size - (sd->sd_offset + sd->sd_size)) {
 1666                         off = 0;
 1667                         size = sd->sd_offset;
 1668                 } else {
 1669                         off = sd->sd_offset + sd->sd_size;
 1670                         size = size - (sd->sd_offset + sd->sd_size);
 1671                 }
 1672 
 1673                 /* Handle strip argument. */
 1674                 strip = 131072;
 1675                 len = sizeof(*striparg);
 1676                 striparg = gctl_get_param(req, "strip", &len);
 1677                 if (striparg != NULL && len == sizeof(*striparg) &&
 1678                     *striparg > 0) {
 1679                         if (*striparg < sectorsize) {
 1680                                 gctl_error(req, "Strip size too small.");
 1681                                 return (-10);
 1682                         }
 1683                         if (*striparg % sectorsize != 0) {
 1684                                 gctl_error(req, "Incorrect strip size.");
 1685                                 return (-11);
 1686                         }
 1687                         if (strip > 65535 * sectorsize) {
 1688                                 gctl_error(req, "Strip size too big.");
 1689                                 return (-12);
 1690                         }
 1691                         strip = *striparg;
 1692                 }
 1693 
 1694                 /* Round offset up to strip. */
 1695                 if (off % strip != 0) {
 1696                         size -= strip - off % strip;
 1697                         off += strip - off % strip;
 1698                 }
 1699 
 1700                 /* Handle size argument. */
 1701                 len = sizeof(*sizearg);
 1702                 sizearg = gctl_get_param(req, "size", &len);
 1703                 if (sizearg != NULL && len == sizeof(*sizearg) &&
 1704                     *sizearg > 0) {
 1705                         if (*sizearg > size) {
 1706                                 gctl_error(req, "Size too big %lld > %lld.",
 1707                                     (long long)*sizearg, (long long)size);
 1708                                 return (-9);
 1709                         }
 1710                         size = *sizearg;
 1711                 }
 1712 
 1713                 /* Round size down to strip or sector. */
 1714                 if (level == G_RAID_VOLUME_RL_RAID1)
 1715                         size -= (size % sectorsize);
 1716                 else
 1717                         size -= (size % strip);
 1718                 if (size <= 0) {
 1719                         gctl_error(req, "Size too small.");
 1720                         return (-13);
 1721                 }
 1722                 if (size > 0xffffffffllu * sectorsize) {
 1723                         gctl_error(req, "Size too big.");
 1724                         return (-14);
 1725                 }
 1726 
 1727                 /* We have all we need, create things: volume, ... */
 1728                 vol = g_raid_create_volume(sc, volname, -1);
 1729                 vol->v_md_data = (void *)(intptr_t)i;
 1730                 vol->v_raid_level = level;
 1731                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 1732                 vol->v_strip_size = strip;
 1733                 vol->v_disks_count = numdisks;
 1734                 if (level == G_RAID_VOLUME_RL_RAID0)
 1735                         vol->v_mediasize = size * numdisks;
 1736                 else if (level == G_RAID_VOLUME_RL_RAID1)
 1737                         vol->v_mediasize = size;
 1738                 else if (level == G_RAID_VOLUME_RL_RAID5)
 1739                         vol->v_mediasize = size * (numdisks - 1);
 1740                 else { /* RAID1E */
 1741                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
 1742                             strip;
 1743                 }
 1744                 vol->v_sectorsize = sectorsize;
 1745                 g_raid_start_volume(vol);
 1746 
 1747                 /* , and subdisks. */
 1748                 for (i = 0; i < numdisks; i++) {
 1749                         disk = vol1->v_subdisks[i].sd_disk;
 1750                         sd = &vol->v_subdisks[i];
 1751                         sd->sd_disk = disk;
 1752                         sd->sd_offset = off;
 1753                         sd->sd_size = size;
 1754                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 1755                         if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 1756                                 g_raid_change_subdisk_state(sd,
 1757                                     G_RAID_SUBDISK_S_ACTIVE);
 1758                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 1759                                     G_RAID_EVENT_SUBDISK);
 1760                         }
 1761                 }
 1762 
 1763                 /* Write metadata based on created entities. */
 1764                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1765 
 1766                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 1767                     G_RAID_EVENT_VOLUME);
 1768                 return (0);
 1769         }
 1770         if (strcmp(verb, "delete") == 0) {
 1771 
 1772                 /* Full node destruction. */
 1773                 if (*nargs == 1) {
 1774                         /* Check if some volume is still open. */
 1775                         force = gctl_get_paraml(req, "force", sizeof(*force));
 1776                         if (force != NULL && *force == 0 &&
 1777                             g_raid_nopens(sc) != 0) {
 1778                                 gctl_error(req, "Some volume is still open.");
 1779                                 return (-4);
 1780                         }
 1781 
 1782                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1783                                 if (disk->d_consumer)
 1784                                         intel_meta_erase(disk->d_consumer);
 1785                         }
 1786                         g_raid_destroy_node(sc, 0);
 1787                         return (0);
 1788                 }
 1789 
 1790                 /* Destroy specified volume. If it was last - all node. */
 1791                 if (*nargs != 2) {
 1792                         gctl_error(req, "Invalid number of arguments.");
 1793                         return (-1);
 1794                 }
 1795                 volname = gctl_get_asciiparam(req, "arg1");
 1796                 if (volname == NULL) {
 1797                         gctl_error(req, "No volume name.");
 1798                         return (-2);
 1799                 }
 1800 
 1801                 /* Search for volume. */
 1802                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1803                         if (strcmp(vol->v_name, volname) == 0)
 1804                                 break;
 1805                 }
 1806                 if (vol == NULL) {
 1807                         i = strtol(volname, &tmp, 10);
 1808                         if (verb != volname && tmp[0] == 0) {
 1809                                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1810                                         if (vol->v_global_id == i)
 1811                                                 break;
 1812                                 }
 1813                         }
 1814                 }
 1815                 if (vol == NULL) {
 1816                         gctl_error(req, "Volume '%s' not found.", volname);
 1817                         return (-3);
 1818                 }
 1819 
 1820                 /* Check if volume is still open. */
 1821                 force = gctl_get_paraml(req, "force", sizeof(*force));
 1822                 if (force != NULL && *force == 0 &&
 1823                     vol->v_provider_open != 0) {
 1824                         gctl_error(req, "Volume is still open.");
 1825                         return (-4);
 1826                 }
 1827 
 1828                 /* Destroy volume and potentially node. */
 1829                 i = 0;
 1830                 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 1831                         i++;
 1832                 if (i >= 2) {
 1833                         g_raid_destroy_volume(vol);
 1834                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1835                 } else {
 1836                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1837                                 if (disk->d_consumer)
 1838                                         intel_meta_erase(disk->d_consumer);
 1839                         }
 1840                         g_raid_destroy_node(sc, 0);
 1841                 }
 1842                 return (0);
 1843         }
 1844         if (strcmp(verb, "remove") == 0 ||
 1845             strcmp(verb, "fail") == 0) {
 1846                 if (*nargs < 2) {
 1847                         gctl_error(req, "Invalid number of arguments.");
 1848                         return (-1);
 1849                 }
 1850                 for (i = 1; i < *nargs; i++) {
 1851                         snprintf(arg, sizeof(arg), "arg%d", i);
 1852                         diskname = gctl_get_asciiparam(req, arg);
 1853                         if (diskname == NULL) {
 1854                                 gctl_error(req, "No disk name (%s).", arg);
 1855                                 error = -2;
 1856                                 break;
 1857                         }
 1858                         if (strncmp(diskname, "/dev/", 5) == 0)
 1859                                 diskname += 5;
 1860 
 1861                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 1862                                 if (disk->d_consumer != NULL && 
 1863                                     disk->d_consumer->provider != NULL &&
 1864                                     strcmp(disk->d_consumer->provider->name,
 1865                                      diskname) == 0)
 1866                                         break;
 1867                         }
 1868                         if (disk == NULL) {
 1869                                 gctl_error(req, "Disk '%s' not found.",
 1870                                     diskname);
 1871                                 error = -3;
 1872                                 break;
 1873                         }
 1874 
 1875                         if (strcmp(verb, "fail") == 0) {
 1876                                 g_raid_md_fail_disk_intel(md, NULL, disk);
 1877                                 continue;
 1878                         }
 1879 
 1880                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 1881 
 1882                         /* Erase metadata on deleting disk. */
 1883                         intel_meta_erase(disk->d_consumer);
 1884 
 1885                         /* If disk was assigned, just update statuses. */
 1886                         if (pd->pd_disk_pos >= 0) {
 1887                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 1888                                 g_raid_kill_consumer(sc, disk->d_consumer);
 1889                                 disk->d_consumer = NULL;
 1890                                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 1891                                         g_raid_change_subdisk_state(sd,
 1892                                             G_RAID_SUBDISK_S_NONE);
 1893                                         g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1894                                             G_RAID_EVENT_SUBDISK);
 1895                                 }
 1896                         } else {
 1897                                 /* Otherwise -- delete. */
 1898                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 1899                                 g_raid_destroy_disk(disk);
 1900                         }
 1901                 }
 1902 
 1903                 /* Write updated metadata to remaining disks. */
 1904                 g_raid_md_write_intel(md, NULL, NULL, NULL);
 1905 
 1906                 /* Check if anything left except placeholders. */
 1907                 if (g_raid_ndisks(sc, -1) ==
 1908                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 1909                         g_raid_destroy_node(sc, 0);
 1910                 else
 1911                         g_raid_md_intel_refill(sc);
 1912                 return (error);
 1913         }
 1914         if (strcmp(verb, "insert") == 0) {
 1915                 if (*nargs < 2) {
 1916                         gctl_error(req, "Invalid number of arguments.");
 1917                         return (-1);
 1918                 }
 1919                 update = 0;
 1920                 for (i = 1; i < *nargs; i++) {
 1921                         /* Get disk name. */
 1922                         snprintf(arg, sizeof(arg), "arg%d", i);
 1923                         diskname = gctl_get_asciiparam(req, arg);
 1924                         if (diskname == NULL) {
 1925                                 gctl_error(req, "No disk name (%s).", arg);
 1926                                 error = -3;
 1927                                 break;
 1928                         }
 1929 
 1930                         /* Try to find provider with specified name. */
 1931                         g_topology_lock();
 1932                         cp = g_raid_open_consumer(sc, diskname);
 1933                         if (cp == NULL) {
 1934                                 gctl_error(req, "Can't open disk '%s'.",
 1935                                     diskname);
 1936                                 g_topology_unlock();
 1937                                 error = -4;
 1938                                 break;
 1939                         }
 1940                         pp = cp->provider;
 1941                         g_topology_unlock();
 1942 
 1943                         /* Read disk serial. */
 1944                         error = g_raid_md_get_label(cp,
 1945                             &serial[0], INTEL_SERIAL_LEN);
 1946                         if (error != 0) {
 1947                                 gctl_error(req,
 1948                                     "Can't get serial for provider '%s'.",
 1949                                     diskname);
 1950                                 g_raid_kill_consumer(sc, cp);
 1951                                 error = -7;
 1952                                 break;
 1953                         }
 1954 
 1955                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 1956                         pd->pd_disk_pos = -1;
 1957 
 1958                         disk = g_raid_create_disk(sc);
 1959                         disk->d_consumer = cp;
 1960                         disk->d_md_data = (void *)pd;
 1961                         cp->private = disk;
 1962 
 1963                         /* Read kernel dumping information. */
 1964                         disk->d_kd.offset = 0;
 1965                         disk->d_kd.length = OFF_MAX;
 1966                         len = sizeof(disk->d_kd);
 1967                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 1968                         if (disk->d_kd.di.dumper == NULL)
 1969                                 G_RAID_DEBUG1(2, sc,
 1970                                     "Dumping not supported by %s.",
 1971                                     cp->provider->name);
 1972 
 1973                         memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
 1974                             INTEL_SERIAL_LEN);
 1975                         pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
 1976                         pd->pd_disk_meta.id = 0;
 1977                         pd->pd_disk_meta.flags = INTEL_F_SPARE;
 1978 
 1979                         /* Welcome the "new" disk. */
 1980                         update += g_raid_md_intel_start_disk(disk);
 1981                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
 1982                                 intel_meta_write_spare(cp, &pd->pd_disk_meta);
 1983                                 g_raid_destroy_disk(disk);
 1984                         } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 1985                                 gctl_error(req, "Disk '%s' doesn't fit.",
 1986                                     diskname);
 1987                                 g_raid_destroy_disk(disk);
 1988                                 error = -8;
 1989                                 break;
 1990                         }
 1991                 }
 1992 
 1993                 /* Write new metadata if we changed something. */
 1994                 if (update)
 1995                         g_raid_md_write_intel(md, NULL, NULL, NULL);
 1996                 return (error);
 1997         }
 1998         return (-100);
 1999 }
 2000 
 2001 static int
 2002 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
 2003     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 2004 {
 2005         struct g_raid_softc *sc;
 2006         struct g_raid_volume *vol;
 2007         struct g_raid_subdisk *sd;
 2008         struct g_raid_disk *disk;
 2009         struct g_raid_md_intel_object *mdi;
 2010         struct g_raid_md_intel_perdisk *pd;
 2011         struct intel_raid_conf *meta;
 2012         struct intel_raid_vol *mvol;
 2013         struct intel_raid_map *mmap0, *mmap1;
 2014         off_t sectorsize = 512, pos;
 2015         const char *version, *cv;
 2016         int vi, sdi, numdisks, len, state, stale;
 2017 
 2018         sc = md->mdo_softc;
 2019         mdi = (struct g_raid_md_intel_object *)md;
 2020 
 2021         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 2022                 return (0);
 2023 
 2024         /* Bump generation. Newly written metadata may differ from previous. */
 2025         mdi->mdio_generation++;
 2026 
 2027         /* Count number of disks. */
 2028         numdisks = 0;
 2029         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2030                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2031                 if (pd->pd_disk_pos < 0)
 2032                         continue;
 2033                 numdisks++;
 2034                 if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 2035                         pd->pd_disk_meta.flags =
 2036                             INTEL_F_ONLINE | INTEL_F_ASSIGNED;
 2037                 } else if (disk->d_state == G_RAID_DISK_S_FAILED) {
 2038                         pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
 2039                 } else {
 2040                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 2041                         if (pd->pd_disk_meta.id != 0xffffffff) {
 2042                                 pd->pd_disk_meta.id = 0xffffffff;
 2043                                 len = strlen(pd->pd_disk_meta.serial);
 2044                                 len = min(len, INTEL_SERIAL_LEN - 3);
 2045                                 strcpy(pd->pd_disk_meta.serial + len, ":0");
 2046                         }
 2047                 }
 2048         }
 2049 
 2050         /* Fill anchor and disks. */
 2051         meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
 2052             M_MD_INTEL, M_WAITOK | M_ZERO);
 2053         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
 2054         meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
 2055         meta->config_id = mdi->mdio_config_id;
 2056         meta->generation = mdi->mdio_generation;
 2057         meta->attributes = INTEL_ATTR_CHECKSUM;
 2058         meta->total_disks = numdisks;
 2059         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2060                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2061                 if (pd->pd_disk_pos < 0)
 2062                         continue;
 2063                 meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
 2064         }
 2065 
 2066         /* Fill volumes and maps. */
 2067         vi = 0;
 2068         version = INTEL_VERSION_1000;
 2069         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 2070                 if (vol->v_stopping)
 2071                         continue;
 2072                 mvol = intel_get_volume(meta, vi);
 2073 
 2074                 /* New metadata may have different volumes order. */
 2075                 vol->v_md_data = (void *)(intptr_t)vi;
 2076 
 2077                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2078                         sd = &vol->v_subdisks[sdi];
 2079                         if (sd->sd_disk != NULL)
 2080                                 break;
 2081                 }
 2082                 if (sdi >= vol->v_disks_count)
 2083                         panic("No any filled subdisk in volume");
 2084                 if (vol->v_mediasize >= 0x20000000000llu)
 2085                         meta->attributes |= INTEL_ATTR_2TB;
 2086                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 2087                         meta->attributes |= INTEL_ATTR_RAID0;
 2088                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2089                         meta->attributes |= INTEL_ATTR_RAID1;
 2090                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 2091                         meta->attributes |= INTEL_ATTR_RAID5;
 2092                 else
 2093                         meta->attributes |= INTEL_ATTR_RAID10;
 2094 
 2095                 if (meta->attributes & INTEL_ATTR_2TB)
 2096                         cv = INTEL_VERSION_1300;
 2097 //              else if (dev->status == DEV_CLONE_N_GO)
 2098 //                      cv = INTEL_VERSION_1206;
 2099                 else if (vol->v_disks_count > 4)
 2100                         cv = INTEL_VERSION_1204;
 2101                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 2102                         cv = INTEL_VERSION_1202;
 2103                 else if (vol->v_disks_count > 2)
 2104                         cv = INTEL_VERSION_1201;
 2105                 else if (vi > 0)
 2106                         cv = INTEL_VERSION_1200;
 2107                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2108                         cv = INTEL_VERSION_1100;
 2109                 else
 2110                         cv = INTEL_VERSION_1000;
 2111                 if (strcmp(cv, version) > 0)
 2112                         version = cv;
 2113 
 2114                 strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
 2115                 mvol->total_sectors = vol->v_mediasize / sectorsize;
 2116 
 2117                 /* Check for any recovery in progress. */
 2118                 state = G_RAID_SUBDISK_S_ACTIVE;
 2119                 pos = 0x7fffffffffffffffllu;
 2120                 stale = 0;
 2121                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2122                         sd = &vol->v_subdisks[sdi];
 2123                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
 2124                                 state = G_RAID_SUBDISK_S_REBUILD;
 2125                         else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
 2126                             state != G_RAID_SUBDISK_S_REBUILD)
 2127                                 state = G_RAID_SUBDISK_S_RESYNC;
 2128                         else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
 2129                                 stale = 1;
 2130                         if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2131                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 2132                              sd->sd_rebuild_pos < pos)
 2133                                 pos = sd->sd_rebuild_pos;
 2134                 }
 2135                 if (state == G_RAID_SUBDISK_S_REBUILD) {
 2136                         mvol->migr_state = 1;
 2137                         mvol->migr_type = INTEL_MT_REBUILD;
 2138                 } else if (state == G_RAID_SUBDISK_S_RESYNC) {
 2139                         mvol->migr_state = 1;
 2140                         /* mvol->migr_type = INTEL_MT_REPAIR; */
 2141                         mvol->migr_type = INTEL_MT_VERIFY;
 2142                         mvol->state |= INTEL_ST_VERIFY_AND_FIX;
 2143                 } else
 2144                         mvol->migr_state = 0;
 2145                 mvol->dirty = (vol->v_dirty || stale);
 2146 
 2147                 mmap0 = intel_get_map(mvol, 0);
 2148 
 2149                 /* Write map / common part of two maps. */
 2150                 mmap0->offset = sd->sd_offset / sectorsize;
 2151                 mmap0->disk_sectors = sd->sd_size / sectorsize;
 2152                 mmap0->strip_sectors = vol->v_strip_size / sectorsize;
 2153                 if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
 2154                         mmap0->status = INTEL_S_FAILURE;
 2155                 else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
 2156                         mmap0->status = INTEL_S_DEGRADED;
 2157                 else
 2158                         mmap0->status = INTEL_S_READY;
 2159                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 2160                         mmap0->type = INTEL_T_RAID0;
 2161                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 2162                     vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 2163                         mmap0->type = INTEL_T_RAID1;
 2164                 else
 2165                         mmap0->type = INTEL_T_RAID5;
 2166                 mmap0->total_disks = vol->v_disks_count;
 2167                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 2168                         mmap0->total_domains = vol->v_disks_count;
 2169                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 2170                         mmap0->total_domains = 2;
 2171                 else
 2172                         mmap0->total_domains = 1;
 2173                 mmap0->stripe_count = sd->sd_size / vol->v_strip_size /
 2174                     mmap0->total_domains;
 2175                 mmap0->failed_disk_num = 0xff;
 2176                 mmap0->ddf = 1;
 2177 
 2178                 /* If there are two maps - copy common and update. */
 2179                 if (mvol->migr_state) {
 2180                         mvol->curr_migr_unit = pos /
 2181                             vol->v_strip_size / mmap0->total_domains;
 2182                         mmap1 = intel_get_map(mvol, 1);
 2183                         memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
 2184                         mmap0->status = INTEL_S_READY;
 2185                 } else
 2186                         mmap1 = NULL;
 2187 
 2188                 /* Write disk indexes and put rebuild flags. */
 2189                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 2190                         sd = &vol->v_subdisks[sdi];
 2191                         pd = (struct g_raid_md_intel_perdisk *)
 2192                             sd->sd_disk->d_md_data;
 2193                         mmap0->disk_idx[sdi] = pd->pd_disk_pos;
 2194                         if (mvol->migr_state)
 2195                                 mmap1->disk_idx[sdi] = pd->pd_disk_pos;
 2196                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2197                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2198                                 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 2199                         } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
 2200                             sd->sd_state != G_RAID_SUBDISK_S_STALE) {
 2201                                 mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
 2202                                 if (mvol->migr_state)
 2203                                         mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 2204                         }
 2205                         if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 2206                              sd->sd_state == G_RAID_SUBDISK_S_FAILED) &&
 2207                             mmap0->failed_disk_num == 0xff) {
 2208                                 mmap0->failed_disk_num = sdi;
 2209                                 if (mvol->migr_state)
 2210                                         mmap1->failed_disk_num = sdi;
 2211                         }
 2212                 }
 2213                 vi++;
 2214         }
 2215         meta->total_volumes = vi;
 2216         if (strcmp(version, INTEL_VERSION_1300) != 0)
 2217                 meta->attributes &= INTEL_ATTR_CHECKSUM;
 2218         memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
 2219 
 2220         /* We are done. Print meta data and store them to disks. */
 2221         g_raid_md_intel_print(meta);
 2222         if (mdi->mdio_meta != NULL)
 2223                 free(mdi->mdio_meta, M_MD_INTEL);
 2224         mdi->mdio_meta = meta;
 2225         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 2226                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2227                 if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 2228                         continue;
 2229                 if (pd->pd_meta != NULL) {
 2230                         free(pd->pd_meta, M_MD_INTEL);
 2231                         pd->pd_meta = NULL;
 2232                 }
 2233                 pd->pd_meta = intel_meta_copy(meta);
 2234                 intel_meta_write(disk->d_consumer, meta);
 2235         }
 2236         return (0);
 2237 }
 2238 
 2239 static int
 2240 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
 2241     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 2242 {
 2243         struct g_raid_softc *sc;
 2244         struct g_raid_md_intel_object *mdi;
 2245         struct g_raid_md_intel_perdisk *pd;
 2246         struct g_raid_subdisk *sd;
 2247 
 2248         sc = md->mdo_softc;
 2249         mdi = (struct g_raid_md_intel_object *)md;
 2250         pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
 2251 
 2252         /* We can't fail disk that is not a part of array now. */
 2253         if (pd->pd_disk_pos < 0)
 2254                 return (-1);
 2255 
 2256         /*
 2257          * Mark disk as failed in metadata and try to write that metadata
 2258          * to the disk itself to prevent it's later resurrection as STALE.
 2259          */
 2260         mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
 2261         pd->pd_disk_meta.flags = INTEL_F_FAILED;
 2262         g_raid_md_intel_print(mdi->mdio_meta);
 2263         if (tdisk->d_consumer)
 2264                 intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
 2265 
 2266         /* Change states. */
 2267         g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 2268         TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 2269                 g_raid_change_subdisk_state(sd,
 2270                     G_RAID_SUBDISK_S_FAILED);
 2271                 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 2272                     G_RAID_EVENT_SUBDISK);
 2273         }
 2274 
 2275         /* Write updated metadata to remaining disks. */
 2276         g_raid_md_write_intel(md, NULL, NULL, tdisk);
 2277 
 2278         /* Check if anything left except placeholders. */
 2279         if (g_raid_ndisks(sc, -1) ==
 2280             g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 2281                 g_raid_destroy_node(sc, 0);
 2282         else
 2283                 g_raid_md_intel_refill(sc);
 2284         return (0);
 2285 }
 2286 
 2287 static int
 2288 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
 2289     struct g_raid_disk *disk)
 2290 {
 2291         struct g_raid_md_intel_perdisk *pd;
 2292 
 2293         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 2294         if (pd->pd_meta != NULL) {
 2295                 free(pd->pd_meta, M_MD_INTEL);
 2296                 pd->pd_meta = NULL;
 2297         }
 2298         free(pd, M_MD_INTEL);
 2299         disk->d_md_data = NULL;
 2300         return (0);
 2301 }
 2302 
 2303 static int
 2304 g_raid_md_free_intel(struct g_raid_md_object *md)
 2305 {
 2306         struct g_raid_md_intel_object *mdi;
 2307 
 2308         mdi = (struct g_raid_md_intel_object *)md;
 2309         if (!mdi->mdio_started) {
 2310                 mdi->mdio_started = 0;
 2311                 callout_stop(&mdi->mdio_start_co);
 2312                 G_RAID_DEBUG1(1, md->mdo_softc,
 2313                     "root_mount_rel %p", mdi->mdio_rootmount);
 2314                 root_mount_rel(mdi->mdio_rootmount);
 2315                 mdi->mdio_rootmount = NULL;
 2316         }
 2317         if (mdi->mdio_meta != NULL) {
 2318                 free(mdi->mdio_meta, M_MD_INTEL);
 2319                 mdi->mdio_meta = NULL;
 2320         }
 2321         return (0);
 2322 }
 2323 
 2324 G_RAID_MD_DECLARE(g_raid_md_intel);

Cache object: 2d85d10d1a45e612853ffdd15356e875


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.