The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/softraid.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $OpenBSD: softraid.c,v 1.429 2022/12/21 09:54:23 kn Exp $ */
    2 /*
    3  * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us>
    4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
    5  * Copyright (c) 2009 Joel Sing <jsing@openbsd.org>
    6  *
    7  * Permission to use, copy, modify, and distribute this software for any
    8  * purpose with or without fee is hereby granted, provided that the above
    9  * copyright notice and this permission notice appear in all copies.
   10  *
   11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   18  */
   19 
   20 #include "bio.h"
   21 
   22 #include <sys/param.h>
   23 #include <sys/systm.h>
   24 #include <sys/buf.h>
   25 #include <sys/device.h>
   26 #include <sys/ioctl.h>
   27 #include <sys/malloc.h>
   28 #include <sys/pool.h>
   29 #include <sys/kernel.h>
   30 #include <sys/disk.h>
   31 #include <sys/rwlock.h>
   32 #include <sys/queue.h>
   33 #include <sys/fcntl.h>
   34 #include <sys/disklabel.h>
   35 #include <sys/vnode.h>
   36 #include <sys/lock.h>
   37 #include <sys/mount.h>
   38 #include <sys/sensors.h>
   39 #include <sys/stat.h>
   40 #include <sys/conf.h>
   41 #include <sys/uio.h>
   42 #include <sys/task.h>
   43 #include <sys/kthread.h>
   44 #include <sys/dkio.h>
   45 #include <sys/stdint.h>
   46 
   47 #include <scsi/scsi_all.h>
   48 #include <scsi/scsiconf.h>
   49 #include <scsi/scsi_disk.h>
   50 
   51 #include <dev/softraidvar.h>
   52 
   53 #ifdef HIBERNATE
   54 #include <lib/libsa/aes_xts.h>
   55 #include <sys/hibernate.h>
   56 #include <scsi/sdvar.h>
   57 #endif /* HIBERNATE */
   58 
   59 /* #define SR_FANCY_STATS */
   60 
   61 #ifdef SR_DEBUG
   62 #define SR_FANCY_STATS
   63 uint32_t        sr_debug = 0
   64                     /* | SR_D_CMD */
   65                     /* | SR_D_MISC */
   66                     /* | SR_D_INTR */
   67                     /* | SR_D_IOCTL */
   68                     /* | SR_D_CCB */
   69                     /* | SR_D_WU */
   70                     /* | SR_D_META */
   71                     /* | SR_D_DIS */
   72                     /* | SR_D_STATE */
   73                     /* | SR_D_REBUILD */
   74                 ;
   75 #endif
   76 
   77 struct sr_softc *softraid0;
   78 struct sr_uuid  sr_bootuuid;
   79 u_int8_t        sr_bootkey[SR_CRYPTO_MAXKEYBYTES];
   80 
   81 int             sr_match(struct device *, void *, void *);
   82 void            sr_attach(struct device *, struct device *, void *);
   83 int             sr_detach(struct device *, int);
   84 void            sr_map_root(void);
   85 
   86 const struct cfattach softraid_ca = {
   87         sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
   88 };
   89 
   90 struct cfdriver softraid_cd = {
   91         NULL, "softraid", DV_DULL
   92 };
   93 
   94 /* scsi & discipline */
   95 void                    sr_scsi_cmd(struct scsi_xfer *);
   96 int                     sr_scsi_probe(struct scsi_link *);
   97 int                     sr_scsi_ioctl(struct scsi_link *, u_long,
   98                             caddr_t, int);
   99 int                     sr_bio_ioctl(struct device *, u_long, caddr_t);
  100 int                     sr_bio_handler(struct sr_softc *,
  101                             struct sr_discipline *, u_long, struct bio *);
  102 int                     sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
  103 int                     sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
  104 int                     sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
  105 int                     sr_ioctl_setstate(struct sr_softc *,
  106                             struct bioc_setstate *);
  107 int                     sr_ioctl_createraid(struct sr_softc *,
  108                             struct bioc_createraid *, int, void *);
  109 int                     sr_ioctl_deleteraid(struct sr_softc *,
  110                             struct sr_discipline *, struct bioc_deleteraid *);
  111 int                     sr_ioctl_discipline(struct sr_softc *,
  112                             struct sr_discipline *, struct bioc_discipline *);
  113 int                     sr_ioctl_installboot(struct sr_softc *,
  114                             struct sr_discipline *, struct bioc_installboot *);
  115 void                    sr_chunks_unwind(struct sr_softc *,
  116                             struct sr_chunk_head *);
  117 void                    sr_discipline_free(struct sr_discipline *);
  118 void                    sr_discipline_shutdown(struct sr_discipline *, int, int);
  119 int                     sr_discipline_init(struct sr_discipline *, int);
  120 int                     sr_alloc_resources(struct sr_discipline *);
  121 void                    sr_free_resources(struct sr_discipline *);
  122 void                    sr_set_chunk_state(struct sr_discipline *, int, int);
  123 void                    sr_set_vol_state(struct sr_discipline *);
  124 
  125 /* utility functions */
  126 void                    sr_shutdown(int);
  127 void                    sr_uuid_generate(struct sr_uuid *);
  128 char                    *sr_uuid_format(struct sr_uuid *);
  129 void                    sr_uuid_print(struct sr_uuid *, int);
  130 void                    sr_checksum_print(u_int8_t *);
  131 int                     sr_boot_assembly(struct sr_softc *);
  132 int                     sr_already_assembled(struct sr_discipline *);
  133 int                     sr_hotspare(struct sr_softc *, dev_t);
  134 void                    sr_hotspare_rebuild(struct sr_discipline *);
  135 int                     sr_rebuild_init(struct sr_discipline *, dev_t, int);
  136 void                    sr_rebuild_start(void *);
  137 void                    sr_rebuild_thread(void *);
  138 void                    sr_rebuild(struct sr_discipline *);
  139 void                    sr_roam_chunks(struct sr_discipline *);
  140 int                     sr_chunk_in_use(struct sr_softc *, dev_t);
  141 int                     sr_rw(struct sr_softc *, dev_t, char *, size_t,
  142                             daddr_t, long);
  143 void                    sr_wu_done_callback(void *);
  144 struct sr_discipline    *sr_find_discipline(struct sr_softc *sc, const char *);
  145 
  146 /* don't include these on RAMDISK */
  147 #ifndef SMALL_KERNEL
  148 void                    sr_sensors_refresh(void *);
  149 int                     sr_sensors_create(struct sr_discipline *);
  150 void                    sr_sensors_delete(struct sr_discipline *);
  151 #endif
  152 
  153 /* metadata */
  154 int                     sr_meta_probe(struct sr_discipline *, dev_t *, int);
  155 int                     sr_meta_attach(struct sr_discipline *, int, int);
  156 int                     sr_meta_rw(struct sr_discipline *, dev_t, void *, long);
  157 int                     sr_meta_clear(struct sr_discipline *);
  158 void                    sr_meta_init(struct sr_discipline *, int, int);
  159 void                    sr_meta_init_complete(struct sr_discipline *);
  160 void                    sr_meta_opt_handler(struct sr_discipline *,
  161                             struct sr_meta_opt_hdr *);
  162 
  163 /* hotplug magic */
  164 void                    sr_disk_attach(struct disk *, int);
  165 
  166 struct sr_hotplug_list {
  167         void                    (*sh_hotplug)(struct sr_discipline *,
  168                                     struct disk *, int);
  169         struct sr_discipline    *sh_sd;
  170 
  171         SLIST_ENTRY(sr_hotplug_list) shl_link;
  172 };
  173 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list);
  174 
  175 struct                  sr_hotplug_list_head    sr_hotplug_callbacks;
  176 extern void             (*softraid_disk_attach)(struct disk *, int);
  177 
  178 /* scsi glue */
  179 const struct scsi_adapter sr_switch = {
  180         sr_scsi_cmd, NULL, sr_scsi_probe, NULL, sr_scsi_ioctl
  181 };
  182 
  183 /* native metadata format */
  184 int                     sr_meta_native_bootprobe(struct sr_softc *, dev_t,
  185                             struct sr_boot_chunk_head *);
  186 #define SR_META_NOTCLAIMED      (0)
  187 #define SR_META_CLAIMED         (1)
  188 int                     sr_meta_native_probe(struct sr_softc *,
  189                            struct sr_chunk *);
  190 int                     sr_meta_native_attach(struct sr_discipline *, int);
  191 int                     sr_meta_native_write(struct sr_discipline *, dev_t,
  192                             struct sr_metadata *,void *);
  193 
  194 #ifdef SR_DEBUG
  195 void                    sr_meta_print(struct sr_metadata *);
  196 #else
  197 #define                 sr_meta_print(m)
  198 #endif
  199 
  200 /* the metadata driver should remain stateless */
  201 struct sr_meta_driver {
  202         daddr_t                 smd_offset;     /* metadata location */
  203         u_int32_t               smd_size;       /* size of metadata */
  204 
  205         int                     (*smd_probe)(struct sr_softc *,
  206                                    struct sr_chunk *);
  207         int                     (*smd_attach)(struct sr_discipline *, int);
  208         int                     (*smd_detach)(struct sr_discipline *);
  209         int                     (*smd_read)(struct sr_discipline *, dev_t,
  210                                     struct sr_metadata *, void *);
  211         int                     (*smd_write)(struct sr_discipline *, dev_t,
  212                                     struct sr_metadata *, void *);
  213         int                     (*smd_validate)(struct sr_discipline *,
  214                                     struct sr_metadata *, void *);
  215 } smd[] = {
  216         { SR_META_OFFSET, SR_META_SIZE * DEV_BSIZE,
  217           sr_meta_native_probe, sr_meta_native_attach, NULL,
  218           sr_meta_native_read, sr_meta_native_write, NULL },
  219         { 0, 0, NULL, NULL, NULL, NULL }
  220 };
  221 
  222 int
  223 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force)
  224 {
  225         struct sr_softc         *sc = sd->sd_sc;
  226         struct sr_chunk_head    *cl;
  227         struct sr_chunk         *ch_entry, *chunk1, *chunk2;
  228         int                     rv = 1, i = 0;
  229 
  230         DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc), chunk_no);
  231 
  232         /* in memory copy of metadata */
  233         sd->sd_meta = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF,
  234             M_ZERO | M_NOWAIT);
  235         if (!sd->sd_meta) {
  236                 sr_error(sc, "could not allocate memory for metadata");
  237                 goto bad;
  238         }
  239 
  240         if (sd->sd_meta_type != SR_META_F_NATIVE) {
  241                 /* in memory copy of foreign metadata */
  242                 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size,
  243                     M_DEVBUF, M_ZERO | M_NOWAIT);
  244                 if (!sd->sd_meta_foreign) {
  245                         /* unwind frees sd_meta */
  246                         sr_error(sc, "could not allocate memory for foreign "
  247                             "metadata");
  248                         goto bad;
  249                 }
  250         }
  251 
  252         /* we have a valid list now create an array index */
  253         cl = &sd->sd_vol.sv_chunk_list;
  254         sd->sd_vol.sv_chunks = mallocarray(chunk_no, sizeof(struct sr_chunk *),
  255             M_DEVBUF, M_WAITOK | M_ZERO);
  256 
  257         /* fill out chunk array */
  258         i = 0;
  259         SLIST_FOREACH(ch_entry, cl, src_link)
  260                 sd->sd_vol.sv_chunks[i++] = ch_entry;
  261 
  262         /* attach metadata */
  263         if (smd[sd->sd_meta_type].smd_attach(sd, force))
  264                 goto bad;
  265 
  266         /* Force chunks into correct order now that metadata is attached. */
  267         SLIST_INIT(cl);
  268         for (i = 0; i < chunk_no; i++) {
  269                 ch_entry = sd->sd_vol.sv_chunks[i];
  270                 chunk2 = NULL;
  271                 SLIST_FOREACH(chunk1, cl, src_link) {
  272                         if (chunk1->src_meta.scmi.scm_chunk_id >
  273                             ch_entry->src_meta.scmi.scm_chunk_id)
  274                                 break;
  275                         chunk2 = chunk1;
  276                 }
  277                 if (chunk2 == NULL)
  278                         SLIST_INSERT_HEAD(cl, ch_entry, src_link);
  279                 else
  280                         SLIST_INSERT_AFTER(chunk2, ch_entry, src_link);
  281         }
  282         i = 0;
  283         SLIST_FOREACH(ch_entry, cl, src_link)
  284                 sd->sd_vol.sv_chunks[i++] = ch_entry;
  285 
  286         rv = 0;
  287 bad:
  288         return (rv);
  289 }
  290 
  291 int
  292 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
  293 {
  294         struct sr_softc         *sc = sd->sd_sc;
  295         struct vnode            *vn;
  296         struct sr_chunk         *ch_entry, *ch_prev = NULL;
  297         struct sr_chunk_head    *cl;
  298         char                    devname[32];
  299         int                     i, d, type, found, prevf, error;
  300         dev_t                   dev;
  301 
  302         DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
  303 
  304         if (no_chunk == 0)
  305                 goto unwind;
  306 
  307         cl = &sd->sd_vol.sv_chunk_list;
  308 
  309         for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
  310                 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
  311                     M_WAITOK | M_ZERO);
  312                 /* keep disks in user supplied order */
  313                 if (ch_prev)
  314                         SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
  315                 else
  316                         SLIST_INSERT_HEAD(cl, ch_entry, src_link);
  317                 ch_prev = ch_entry;
  318                 dev = dt[d];
  319                 ch_entry->src_dev_mm = dev;
  320 
  321                 if (dev == NODEV) {
  322                         ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
  323                         continue;
  324                 } else {
  325                         sr_meta_getdevname(sc, dev, devname, sizeof(devname));
  326                         if (bdevvp(dev, &vn)) {
  327                                 sr_error(sc, "sr_meta_probe: cannot allocate "
  328                                     "vnode");
  329                                 goto unwind;
  330                         }
  331 
  332                         /*
  333                          * XXX leaving dev open for now; move this to attach
  334                          * and figure out the open/close dance for unwind.
  335                          */
  336                         error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc);
  337                         if (error) {
  338                                 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't "
  339                                     "open %s\n", DEVNAME(sc), devname);
  340                                 vput(vn);
  341                                 goto unwind;
  342                         }
  343 
  344                         strlcpy(ch_entry->src_devname, devname,
  345                             sizeof(ch_entry->src_devname));
  346                         ch_entry->src_vn = vn;
  347                 }
  348 
  349                 /* determine if this is a device we understand */
  350                 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
  351                         type = smd[i].smd_probe(sc, ch_entry);
  352                         if (type == SR_META_F_INVALID)
  353                                 continue;
  354                         else {
  355                                 found = type;
  356                                 break;
  357                         }
  358                 }
  359 
  360                 if (found == SR_META_F_INVALID)
  361                         goto unwind;
  362                 if (prevf == SR_META_F_INVALID)
  363                         prevf = found;
  364                 if (prevf != found) {
  365                         DNPRINTF(SR_D_META, "%s: prevf != found\n",
  366                             DEVNAME(sc));
  367                         goto unwind;
  368                 }
  369         }
  370 
  371         return (prevf);
  372 unwind:
  373         return (SR_META_F_INVALID);
  374 }
  375 
  376 void
  377 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
  378 {
  379         int                     maj, unit, part;
  380         char                    *name;
  381 
  382         DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
  383             DEVNAME(sc), buf, size);
  384 
  385         if (!buf)
  386                 return;
  387 
  388         maj = major(dev);
  389         part = DISKPART(dev);
  390         unit = DISKUNIT(dev);
  391 
  392         name = findblkname(maj);
  393         if (name == NULL)
  394                 return;
  395 
  396         snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
  397 }
  398 
  399 int
  400 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr_t blkno,
  401     long flags)
  402 {
  403         struct vnode            *vp;
  404         struct buf              b;
  405         size_t                  bufsize, dma_bufsize;
  406         int                     rv = 1;
  407         char                    *dma_buf;
  408         int                     s;
  409 
  410         DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %zu, %lld 0x%lx)\n",
  411             DEVNAME(sc), dev, buf, size, (long long)blkno, flags);
  412 
  413         dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size;
  414         dma_buf = dma_alloc(dma_bufsize, PR_WAITOK);
  415 
  416         if (bdevvp(dev, &vp)) {
  417                 printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc));
  418                 goto done;
  419         }
  420 
  421         while (size > 0) {
  422                 DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %zu, blkno %lld)\n",
  423                     DEVNAME(sc), dma_buf, size, (long long)blkno);
  424 
  425                 bufsize = (size > MAXPHYS) ? MAXPHYS : size;
  426                 if (flags == B_WRITE)
  427                         memcpy(dma_buf, buf, bufsize);
  428 
  429                 bzero(&b, sizeof(b));
  430                 b.b_flags = flags | B_PHYS;
  431                 b.b_proc = curproc;
  432                 b.b_dev = dev;
  433                 b.b_iodone = NULL;
  434                 b.b_error = 0;
  435                 b.b_blkno = blkno;
  436                 b.b_data = dma_buf;
  437                 b.b_bcount = bufsize;
  438                 b.b_bufsize = bufsize;
  439                 b.b_resid = bufsize;
  440                 b.b_vp = vp;
  441 
  442                 if ((b.b_flags & B_READ) == 0) {
  443                         s = splbio();
  444                         vp->v_numoutput++;
  445                         splx(s);
  446                 }
  447 
  448                 LIST_INIT(&b.b_dep);
  449                 VOP_STRATEGY(vp, &b);
  450                 biowait(&b);
  451 
  452                 if (b.b_flags & B_ERROR) {
  453                         printf("%s: I/O error %d on dev 0x%x at block %llu\n",
  454                             DEVNAME(sc), b.b_error, dev, b.b_blkno);
  455                         goto done;
  456                 }
  457 
  458                 if (flags == B_READ)
  459                         memcpy(buf, dma_buf, bufsize);
  460 
  461                 size -= bufsize;
  462                 buf += bufsize;
  463                 blkno += howmany(bufsize, DEV_BSIZE);
  464         }
  465 
  466         rv = 0;
  467 
  468 done:
  469         if (vp)
  470                 vput(vp);
  471 
  472         dma_free(dma_buf, dma_bufsize);
  473 
  474         return (rv);
  475 }
  476 
  477 int
  478 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, long flags)
  479 {
  480         int                     rv = 1;
  481 
  482         DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, 0x%lx)\n",
  483             DEVNAME(sd->sd_sc), dev, md, flags);
  484 
  485         if (md == NULL) {
  486                 printf("%s: sr_meta_rw: invalid metadata pointer\n",
  487                     DEVNAME(sd->sd_sc));
  488                 goto done;
  489         }
  490 
  491         rv = sr_rw(sd->sd_sc, dev, md, SR_META_SIZE * DEV_BSIZE,
  492             SR_META_OFFSET, flags);
  493 
  494 done:
  495         return (rv);
  496 }
  497 
  498 int
  499 sr_meta_clear(struct sr_discipline *sd)
  500 {
  501         struct sr_softc         *sc = sd->sd_sc;
  502         struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
  503         struct sr_chunk         *ch_entry;
  504         void                    *m;
  505         int                     rv = 1;
  506 
  507         DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
  508 
  509         if (sd->sd_meta_type != SR_META_F_NATIVE) {
  510                 sr_error(sc, "cannot clear foreign metadata");
  511                 goto done;
  512         }
  513 
  514         m = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_WAITOK | M_ZERO);
  515         SLIST_FOREACH(ch_entry, cl, src_link) {
  516                 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
  517                         /* XXX mark disk offline */
  518                         DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
  519                             "clear %s\n", DEVNAME(sc), ch_entry->src_devname);
  520                         rv++;
  521                         continue;
  522                 }
  523                 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
  524         }
  525 
  526         bzero(sd->sd_meta, SR_META_SIZE * DEV_BSIZE);
  527 
  528         free(m, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
  529         rv = 0;
  530 done:
  531         return (rv);
  532 }
  533 
  534 void
  535 sr_meta_init(struct sr_discipline *sd, int level, int no_chunk)
  536 {
  537         struct sr_softc         *sc = sd->sd_sc;
  538         struct sr_metadata      *sm = sd->sd_meta;
  539         struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
  540         struct sr_meta_chunk    *scm;
  541         struct sr_chunk         *chunk;
  542         int                     cid = 0;
  543         u_int64_t               max_chunk_sz = 0, min_chunk_sz = 0;
  544         u_int32_t               secsize = DEV_BSIZE;
  545 
  546         DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
  547 
  548         if (!sm)
  549                 return;
  550 
  551         /* Initialise volume metadata. */
  552         sm->ssdi.ssd_magic = SR_MAGIC;
  553         sm->ssdi.ssd_version = SR_META_VERSION;
  554         sm->ssdi.ssd_vol_flags = sd->sd_meta_flags;
  555         sm->ssdi.ssd_volid = 0;
  556         sm->ssdi.ssd_chunk_no = no_chunk;
  557         sm->ssdi.ssd_level = level;
  558 
  559         sm->ssd_data_blkno = SR_DATA_OFFSET;
  560         sm->ssd_ondisk = 0;
  561 
  562         sr_uuid_generate(&sm->ssdi.ssd_uuid);
  563 
  564         /* Initialise chunk metadata and get min/max chunk sizes & secsize. */
  565         SLIST_FOREACH(chunk, cl, src_link) {
  566                 scm = &chunk->src_meta;
  567                 scm->scmi.scm_size = chunk->src_size;
  568                 scm->scmi.scm_chunk_id = cid++;
  569                 scm->scm_status = BIOC_SDONLINE;
  570                 scm->scmi.scm_volid = 0;
  571                 strlcpy(scm->scmi.scm_devname, chunk->src_devname,
  572                     sizeof(scm->scmi.scm_devname));
  573                 memcpy(&scm->scmi.scm_uuid, &sm->ssdi.ssd_uuid,
  574                     sizeof(scm->scmi.scm_uuid));
  575                 sr_checksum(sc, scm, &scm->scm_checksum,
  576                     sizeof(scm->scm_checksum));
  577 
  578                 if (min_chunk_sz == 0)
  579                         min_chunk_sz = scm->scmi.scm_size;
  580                 if (chunk->src_secsize > secsize)
  581                         secsize = chunk->src_secsize;
  582                 min_chunk_sz = MIN(min_chunk_sz, scm->scmi.scm_size);
  583                 max_chunk_sz = MAX(max_chunk_sz, scm->scmi.scm_size);
  584         }
  585 
  586         sm->ssdi.ssd_secsize = secsize;
  587 
  588         /* Equalize chunk sizes. */
  589         SLIST_FOREACH(chunk, cl, src_link)
  590                 chunk->src_meta.scmi.scm_coerced_size = min_chunk_sz;
  591 
  592         sd->sd_vol.sv_chunk_minsz = min_chunk_sz;
  593         sd->sd_vol.sv_chunk_maxsz = max_chunk_sz;
  594 }
  595 
  596 void
  597 sr_meta_init_complete(struct sr_discipline *sd)
  598 {
  599 #ifdef SR_DEBUG
  600         struct sr_softc         *sc = sd->sd_sc;
  601 #endif
  602         struct sr_metadata      *sm = sd->sd_meta;
  603 
  604         DNPRINTF(SR_D_META, "%s: sr_meta_complete\n", DEVNAME(sc));
  605 
  606         /* Complete initialisation of volume metadata. */
  607         strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor));
  608         snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product),
  609             "SR %s", sd->sd_name);
  610         snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
  611             "%03d", sm->ssdi.ssd_version);
  612 }
  613 
  614 void
  615 sr_meta_opt_handler(struct sr_discipline *sd, struct sr_meta_opt_hdr *om)
  616 {
  617         if (om->som_type != SR_OPT_BOOT)
  618                 panic("unknown optional metadata type");
  619 }
  620 
  621 void
  622 sr_meta_save_callback(void *xsd)
  623 {
  624         struct sr_discipline    *sd = xsd;
  625         int                     s;
  626 
  627         s = splbio();
  628 
  629         if (sr_meta_save(sd, SR_META_DIRTY))
  630                 printf("%s: save metadata failed\n", DEVNAME(sd->sd_sc));
  631 
  632         sd->sd_must_flush = 0;
  633         splx(s);
  634 }
  635 
  636 int
  637 sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
  638 {
  639         struct sr_softc         *sc = sd->sd_sc;
  640         struct sr_metadata      *sm = sd->sd_meta, *m;
  641         struct sr_meta_driver   *s;
  642         struct sr_chunk         *src;
  643         struct sr_meta_chunk    *cm;
  644         struct sr_workunit      wu;
  645         struct sr_meta_opt_hdr  *omh;
  646         struct sr_meta_opt_item *omi;
  647         int                     i;
  648 
  649         DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
  650             DEVNAME(sc), sd->sd_meta->ssd_devname);
  651 
  652         if (!sm) {
  653                 printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
  654                 goto bad;
  655         }
  656 
  657         /* meta scratchpad */
  658         s = &smd[sd->sd_meta_type];
  659         m = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT);
  660         if (!m) {
  661                 printf("%s: could not allocate metadata scratch area\n",
  662                     DEVNAME(sc));
  663                 goto bad;
  664         }
  665 
  666         /* from here on out metadata is updated */
  667 restart:
  668         sm->ssd_ondisk++;
  669         sm->ssd_meta_flags = flags;
  670         memcpy(m, sm, sizeof(*m));
  671 
  672         /* Chunk metadata. */
  673         cm = (struct sr_meta_chunk *)(m + 1);
  674         for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
  675                 src = sd->sd_vol.sv_chunks[i];
  676                 memcpy(cm, &src->src_meta, sizeof(*cm));
  677                 cm++;
  678         }
  679 
  680         /* Optional metadata. */
  681         omh = (struct sr_meta_opt_hdr *)(cm);
  682         SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) {
  683                 DNPRINTF(SR_D_META, "%s: saving optional metadata type %u with "
  684                     "length %u\n", DEVNAME(sc), omi->omi_som->som_type,
  685                     omi->omi_som->som_length);
  686                 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH);
  687                 sr_checksum(sc, omi->omi_som, &omi->omi_som->som_checksum,
  688                     omi->omi_som->som_length);
  689                 memcpy(omh, omi->omi_som, omi->omi_som->som_length);
  690                 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)omh +
  691                     omi->omi_som->som_length);
  692         }
  693 
  694         for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
  695                 src = sd->sd_vol.sv_chunks[i];
  696 
  697                 /* skip disks that are offline */
  698                 if (src->src_meta.scm_status == BIOC_SDOFFLINE)
  699                         continue;
  700 
  701                 /* calculate metadata checksum for correct chunk */
  702                 m->ssdi.ssd_chunk_id = i;
  703                 sr_checksum(sc, m, &m->ssd_checksum,
  704                     sizeof(struct sr_meta_invariant));
  705 
  706 #ifdef SR_DEBUG
  707                 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
  708                     "chunkid: %d checksum: ",
  709                     DEVNAME(sc), src->src_meta.scmi.scm_devname,
  710                     m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
  711 
  712                 if (sr_debug & SR_D_META)
  713                         sr_checksum_print((u_int8_t *)&m->ssd_checksum);
  714                 DNPRINTF(SR_D_META, "\n");
  715                 sr_meta_print(m);
  716 #endif
  717 
  718                 /* translate and write to disk */
  719                 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
  720                         printf("%s: could not write metadata to %s\n",
  721                             DEVNAME(sc), src->src_devname);
  722                         /* restart the meta write */
  723                         src->src_meta.scm_status = BIOC_SDOFFLINE;
  724                         /* XXX recalculate volume status */
  725                         goto restart;
  726                 }
  727         }
  728 
  729         /* not all disciplines have sync */
  730         if (sd->sd_scsi_sync) {
  731                 bzero(&wu, sizeof(wu));
  732                 wu.swu_flags |= SR_WUF_FAKE;
  733                 wu.swu_dis = sd;
  734                 sd->sd_scsi_sync(&wu);
  735         }
  736         free(m, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
  737         return (0);
  738 bad:
  739         return (1);
  740 }
  741 
  742 int
  743 sr_meta_read(struct sr_discipline *sd)
  744 {
  745         struct sr_softc         *sc = sd->sd_sc;
  746         struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
  747         struct sr_metadata      *sm;
  748         struct sr_chunk         *ch_entry;
  749         struct sr_meta_chunk    *cp;
  750         struct sr_meta_driver   *s;
  751         void                    *fm = NULL;
  752         int                     no_disk = 0, got_meta = 0;
  753 
  754         DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
  755 
  756         sm = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_WAITOK | M_ZERO);
  757         s = &smd[sd->sd_meta_type];
  758         if (sd->sd_meta_type != SR_META_F_NATIVE)
  759                 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO);
  760 
  761         cp = (struct sr_meta_chunk *)(sm + 1);
  762         SLIST_FOREACH(ch_entry, cl, src_link) {
  763                 /* skip disks that are offline */
  764                 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) {
  765                         DNPRINTF(SR_D_META,
  766                             "%s: %s chunk marked offline, spoofing status\n",
  767                             DEVNAME(sc), ch_entry->src_devname);
  768                         cp++; /* adjust chunk pointer to match failure */
  769                         continue;
  770                 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
  771                         /* read and translate */
  772                         /* XXX mark chunk offline, elsewhere!! */
  773                         ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
  774                         cp++; /* adjust chunk pointer to match failure */
  775                         DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
  776                             DEVNAME(sc));
  777                         continue;
  778                 }
  779 
  780                 if (sm->ssdi.ssd_magic != SR_MAGIC) {
  781                         DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
  782                             DEVNAME(sc));
  783                         continue;
  784                 }
  785 
  786                 /* validate metadata */
  787                 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
  788                         DNPRINTF(SR_D_META, "%s: invalid metadata\n",
  789                             DEVNAME(sc));
  790                         no_disk = -1;
  791                         goto done;
  792                 }
  793 
  794                 /* assume first chunk contains metadata */
  795                 if (got_meta == 0) {
  796                         sr_meta_opt_load(sc, sm, &sd->sd_meta_opt);
  797                         memcpy(sd->sd_meta, sm, sizeof(*sd->sd_meta));
  798                         got_meta = 1;
  799                 }
  800 
  801                 memcpy(&ch_entry->src_meta, cp, sizeof(ch_entry->src_meta));
  802 
  803                 no_disk++;
  804                 cp++;
  805         }
  806 
  807         free(sm, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
  808         free(fm, M_DEVBUF, s->smd_size);
  809 
  810 done:
  811         DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
  812             no_disk);
  813         return (no_disk);
  814 }
  815 
  816 void
  817 sr_meta_opt_load(struct sr_softc *sc, struct sr_metadata *sm,
  818     struct sr_meta_opt_head *som)
  819 {
  820         struct sr_meta_opt_hdr  *omh;
  821         struct sr_meta_opt_item *omi;
  822         u_int8_t                checksum[MD5_DIGEST_LENGTH];
  823         int                     i;
  824 
  825         /* Process optional metadata. */
  826         omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) +
  827             sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
  828         for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
  829 
  830                 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF,
  831                     M_WAITOK | M_ZERO);
  832                 SLIST_INSERT_HEAD(som, omi, omi_link);
  833 
  834                 if (omh->som_length == 0) {
  835 
  836                         /* Load old fixed length optional metadata. */
  837                         DNPRINTF(SR_D_META, "%s: old optional metadata of type "
  838                             "%u\n", DEVNAME(sc), omh->som_type);
  839 
  840                         /* Validate checksum. */
  841                         sr_checksum(sc, (void *)omh, &checksum,
  842                             SR_OLD_META_OPT_SIZE - MD5_DIGEST_LENGTH);
  843                         if (bcmp(&checksum, (void *)omh + SR_OLD_META_OPT_MD5,
  844                             sizeof(checksum)))
  845                                 panic("%s: invalid optional metadata checksum",
  846                                     DEVNAME(sc));
  847 
  848                         /* Determine correct length. */
  849                         switch (omh->som_type) {
  850                         case SR_OPT_CRYPTO:
  851                                 omh->som_length = sizeof(struct sr_meta_crypto);
  852                                 break;
  853                         case SR_OPT_BOOT:
  854                                 omh->som_length = sizeof(struct sr_meta_boot);
  855                                 break;
  856                         case SR_OPT_KEYDISK:
  857                                 omh->som_length =
  858                                     sizeof(struct sr_meta_keydisk);
  859                                 break;
  860                         default:
  861                                 panic("unknown old optional metadata type %u",
  862                                     omh->som_type);
  863                         }
  864 
  865                         omi->omi_som = malloc(omh->som_length, M_DEVBUF,
  866                             M_WAITOK | M_ZERO);
  867                         memcpy((u_int8_t *)omi->omi_som + sizeof(*omi->omi_som),
  868                             (u_int8_t *)omh + SR_OLD_META_OPT_OFFSET,
  869                             omh->som_length - sizeof(*omi->omi_som));
  870                         omi->omi_som->som_type = omh->som_type;
  871                         omi->omi_som->som_length = omh->som_length;
  872 
  873                         omh = (struct sr_meta_opt_hdr *)((void *)omh +
  874                             SR_OLD_META_OPT_SIZE);
  875                 } else {
  876 
  877                         /* Load variable length optional metadata. */
  878                         DNPRINTF(SR_D_META, "%s: optional metadata of type %u, "
  879                             "length %u\n", DEVNAME(sc), omh->som_type,
  880                             omh->som_length);
  881                         omi->omi_som = malloc(omh->som_length, M_DEVBUF,
  882                             M_WAITOK | M_ZERO);
  883                         memcpy(omi->omi_som, omh, omh->som_length);
  884 
  885                         /* Validate checksum. */
  886                         memcpy(&checksum, &omi->omi_som->som_checksum,
  887                             MD5_DIGEST_LENGTH);
  888                         bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH);
  889                         sr_checksum(sc, omi->omi_som,
  890                             &omi->omi_som->som_checksum, omh->som_length);
  891                         if (bcmp(&checksum, &omi->omi_som->som_checksum,
  892                             sizeof(checksum)))
  893                                 panic("%s: invalid optional metadata checksum",
  894                                     DEVNAME(sc));
  895 
  896                         omh = (struct sr_meta_opt_hdr *)((void *)omh +
  897                             omh->som_length);
  898                 }
  899         }
  900 }
  901 
  902 int
  903 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
  904     void *fm)
  905 {
  906         struct sr_softc         *sc = sd->sd_sc;
  907         struct sr_meta_driver   *s;
  908 #ifdef SR_DEBUG
  909         struct sr_meta_chunk    *mc;
  910 #endif
  911         u_int8_t                checksum[MD5_DIGEST_LENGTH];
  912         char                    devname[32];
  913         int                     rv = 1;
  914 
  915         DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
  916 
  917         sr_meta_getdevname(sc, dev, devname, sizeof(devname));
  918 
  919         s = &smd[sd->sd_meta_type];
  920         if (sd->sd_meta_type != SR_META_F_NATIVE)
  921                 if (s->smd_validate(sd, sm, fm)) {
  922                         sr_error(sc, "invalid foreign metadata");
  923                         goto done;
  924                 }
  925 
  926         /*
  927          * at this point all foreign metadata has been translated to the native
  928          * format and will be treated just like the native format
  929          */
  930 
  931         if (sm->ssdi.ssd_magic != SR_MAGIC) {
  932                 sr_error(sc, "not valid softraid metadata");
  933                 goto done;
  934         }
  935 
  936         /* Verify metadata checksum. */
  937         sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
  938         if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
  939                 sr_error(sc, "invalid metadata checksum");
  940                 goto done;
  941         }
  942 
  943         /* Handle changes between versions. */
  944         if (sm->ssdi.ssd_version == 3) {
  945 
  946                 /*
  947                  * Version 3 - update metadata version and fix up data blkno
  948                  * value since this did not exist in version 3.
  949                  */
  950                 if (sm->ssd_data_blkno == 0)
  951                         sm->ssd_data_blkno = SR_META_V3_DATA_OFFSET;
  952                 sm->ssdi.ssd_secsize = DEV_BSIZE;
  953 
  954         } else if (sm->ssdi.ssd_version == 4) {
  955 
  956                 /*
  957                  * Version 4 - original metadata format did not store
  958                  * data blkno so fix this up if necessary.
  959                  */
  960                 if (sm->ssd_data_blkno == 0)
  961                         sm->ssd_data_blkno = SR_DATA_OFFSET;
  962                 sm->ssdi.ssd_secsize = DEV_BSIZE;
  963 
  964         } else if (sm->ssdi.ssd_version == 5) {
  965 
  966                 /*
  967                  * Version 5 - variable length optional metadata. Migration
  968                  * from earlier fixed length optional metadata is handled
  969                  * in sr_meta_read().
  970                  */
  971                 sm->ssdi.ssd_secsize = DEV_BSIZE;
  972 
  973         } else if (sm->ssdi.ssd_version == SR_META_VERSION) {
  974 
  975                 /*
  976                  * Version 6 - store & report a sector size.
  977                  */
  978 
  979         } else {
  980 
  981                 sr_error(sc, "cannot read metadata version %u on %s, "
  982                     "expected version %u or earlier",
  983                     sm->ssdi.ssd_version, devname, SR_META_VERSION);
  984                 goto done;
  985 
  986         }
  987 
  988         /* Update version number and revision string. */
  989         sm->ssdi.ssd_version = SR_META_VERSION;
  990         snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
  991             "%03d", SR_META_VERSION);
  992 
  993 #ifdef SR_DEBUG
  994         /* warn if disk changed order */
  995         mc = (struct sr_meta_chunk *)(sm + 1);
  996         if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
  997             sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
  998                 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n",
  999                     DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname,
 1000                     devname);
 1001 #endif
 1002 
 1003         /* we have meta data on disk */
 1004         DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
 1005             DEVNAME(sc), devname);
 1006 
 1007         rv = 0;
 1008 done:
 1009         return (rv);
 1010 }
 1011 
 1012 int
 1013 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno,
 1014     struct sr_boot_chunk_head *bch)
 1015 {
 1016         struct vnode            *vn;
 1017         struct disklabel        label;
 1018         struct sr_metadata      *md = NULL;
 1019         struct sr_discipline    *fake_sd = NULL;
 1020         struct sr_boot_chunk    *bc;
 1021         char                    devname[32];
 1022         dev_t                   chrdev, rawdev;
 1023         int                     error, i;
 1024         int                     rv = SR_META_NOTCLAIMED;
 1025 
 1026         DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
 1027 
 1028         /*
 1029          * Use character raw device to avoid SCSI complaints about missing
 1030          * media on removable media devices.
 1031          */
 1032         chrdev = blktochr(devno);
 1033         rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART);
 1034         if (cdevvp(rawdev, &vn)) {
 1035                 sr_error(sc, "sr_meta_native_bootprobe: cannot allocate vnode");
 1036                 goto done;
 1037         }
 1038 
 1039         /* open device */
 1040         error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
 1041         if (error) {
 1042                 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
 1043                     "failed\n", DEVNAME(sc));
 1044                 vput(vn);
 1045                 goto done;
 1046         }
 1047 
 1048         /* get disklabel */
 1049         error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED,
 1050             curproc);
 1051         if (error) {
 1052                 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
 1053                     "failed\n", DEVNAME(sc));
 1054                 VOP_CLOSE(vn, FREAD, NOCRED, curproc);
 1055                 vput(vn);
 1056                 goto done;
 1057         }
 1058 
 1059         /* we are done, close device */
 1060         error = VOP_CLOSE(vn, FREAD, NOCRED, curproc);
 1061         if (error) {
 1062                 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
 1063                     "failed\n", DEVNAME(sc));
 1064                 vput(vn);
 1065                 goto done;
 1066         }
 1067         vput(vn);
 1068 
 1069         md = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT);
 1070         if (md == NULL) {
 1071                 sr_error(sc, "not enough memory for metadata buffer");
 1072                 goto done;
 1073         }
 1074 
 1075         /* create fake sd to use utility functions */
 1076         fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF,
 1077             M_ZERO | M_NOWAIT);
 1078         if (fake_sd == NULL) {
 1079                 sr_error(sc, "not enough memory for fake discipline");
 1080                 goto done;
 1081         }
 1082         fake_sd->sd_sc = sc;
 1083         fake_sd->sd_meta_type = SR_META_F_NATIVE;
 1084 
 1085         for (i = 0; i < MAXPARTITIONS; i++) {
 1086                 if (label.d_partitions[i].p_fstype != FS_RAID)
 1087                         continue;
 1088 
 1089                 /* open partition */
 1090                 rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i);
 1091                 if (bdevvp(rawdev, &vn)) {
 1092                         sr_error(sc, "sr_meta_native_bootprobe: cannot "
 1093                             "allocate vnode for partition");
 1094                         goto done;
 1095                 }
 1096                 error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
 1097                 if (error) {
 1098                         DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
 1099                             "open failed, partition %d\n",
 1100                             DEVNAME(sc), i);
 1101                         vput(vn);
 1102                         continue;
 1103                 }
 1104 
 1105                 if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) {
 1106                         sr_error(sc, "native bootprobe could not read native "
 1107                             "metadata");
 1108                         VOP_CLOSE(vn, FREAD, NOCRED, curproc);
 1109                         vput(vn);
 1110                         continue;
 1111                 }
 1112 
 1113                 /* are we a softraid partition? */
 1114                 if (md->ssdi.ssd_magic != SR_MAGIC) {
 1115                         VOP_CLOSE(vn, FREAD, NOCRED, curproc);
 1116                         vput(vn);
 1117                         continue;
 1118                 }
 1119 
 1120                 sr_meta_getdevname(sc, rawdev, devname, sizeof(devname));
 1121                 if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) {
 1122                         /* XXX fix M_WAITOK, this is boot time */
 1123                         bc = malloc(sizeof(struct sr_boot_chunk),
 1124                             M_DEVBUF, M_WAITOK | M_ZERO);
 1125                         bc->sbc_metadata = malloc(sizeof(struct sr_metadata),
 1126                             M_DEVBUF, M_WAITOK | M_ZERO);
 1127                         memcpy(bc->sbc_metadata, md, sizeof(struct sr_metadata));
 1128                         bc->sbc_mm = rawdev;
 1129                         SLIST_INSERT_HEAD(bch, bc, sbc_link);
 1130                         rv = SR_META_CLAIMED;
 1131                 }
 1132 
 1133                 /* we are done, close partition */
 1134                 VOP_CLOSE(vn, FREAD, NOCRED, curproc);
 1135                 vput(vn);
 1136         }
 1137 
 1138 done:
 1139         free(fake_sd, M_DEVBUF, sizeof(struct sr_discipline));
 1140         free(md, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
 1141 
 1142         return (rv);
 1143 }
 1144 
 1145 int
 1146 sr_boot_assembly(struct sr_softc *sc)
 1147 {
 1148         struct sr_boot_volume_head bvh;
 1149         struct sr_boot_chunk_head bch, kdh;
 1150         struct sr_boot_volume   *bv, *bv1, *bv2;
 1151         struct sr_boot_chunk    *bc, *bcnext, *bc1, *bc2;
 1152         struct sr_disk_head     sdklist;
 1153         struct sr_disk          *sdk;
 1154         struct disk             *dk;
 1155         struct bioc_createraid  bcr;
 1156         struct sr_meta_chunk    *hm;
 1157         struct sr_chunk_head    *cl;
 1158         struct sr_chunk         *hotspare, *chunk, *last;
 1159         u_int64_t               *ondisk = NULL;
 1160         dev_t                   *devs = NULL;
 1161         void                    *data;
 1162         char                    devname[32];
 1163         int                     rv = 0, i;
 1164 
 1165         DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
 1166 
 1167         SLIST_INIT(&sdklist);
 1168         SLIST_INIT(&bvh);
 1169         SLIST_INIT(&bch);
 1170         SLIST_INIT(&kdh);
 1171 
 1172         dk = TAILQ_FIRST(&disklist);
 1173         while (dk != NULL) {
 1174 
 1175                 /* See if this disk has been checked. */
 1176                 SLIST_FOREACH(sdk, &sdklist, sdk_link)
 1177                         if (sdk->sdk_devno == dk->dk_devno)
 1178                                 break;
 1179 
 1180                 if (sdk != NULL || dk->dk_devno == NODEV) {
 1181                         dk = TAILQ_NEXT(dk, dk_link);
 1182                         continue;
 1183                 }
 1184 
 1185                 /* Add this disk to the list that we've checked. */
 1186                 sdk = malloc(sizeof(struct sr_disk), M_DEVBUF,
 1187                     M_NOWAIT | M_ZERO);
 1188                 if (sdk == NULL)
 1189                         goto unwind;
 1190                 sdk->sdk_devno = dk->dk_devno;
 1191                 SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link);
 1192 
 1193                 /* Only check sd(4) and wd(4) devices. */
 1194                 if (strncmp(dk->dk_name, "sd", 2) &&
 1195                     strncmp(dk->dk_name, "wd", 2)) {
 1196                         dk = TAILQ_NEXT(dk, dk_link);
 1197                         continue;
 1198                 }
 1199 
 1200                 /* native softraid uses partitions */
 1201                 rw_enter_write(&sc->sc_lock);
 1202                 bio_status_init(&sc->sc_status, &sc->sc_dev);
 1203                 sr_meta_native_bootprobe(sc, dk->dk_devno, &bch);
 1204                 rw_exit_write(&sc->sc_lock);
 1205 
 1206                 /* probe non-native disks if native failed. */
 1207 
 1208                 /* Restart scan since we may have slept. */
 1209                 dk = TAILQ_FIRST(&disklist);
 1210         }
 1211 
 1212         /*
 1213          * Create a list of volumes and associate chunks with each volume.
 1214          */
 1215         for (bc = SLIST_FIRST(&bch); bc != NULL; bc = bcnext) {
 1216 
 1217                 bcnext = SLIST_NEXT(bc, sbc_link);
 1218                 SLIST_REMOVE(&bch, bc, sr_boot_chunk, sbc_link);
 1219                 bc->sbc_chunk_id = bc->sbc_metadata->ssdi.ssd_chunk_id;
 1220 
 1221                 /* Handle key disks separately. */
 1222                 if (bc->sbc_metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) {
 1223                         SLIST_INSERT_HEAD(&kdh, bc, sbc_link);
 1224                         continue;
 1225                 }
 1226 
 1227                 SLIST_FOREACH(bv, &bvh, sbv_link) {
 1228                         if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid,
 1229                             &bv->sbv_uuid,
 1230                             sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) == 0)
 1231                                 break;
 1232                 }
 1233 
 1234                 if (bv == NULL) {
 1235                         bv = malloc(sizeof(struct sr_boot_volume),
 1236                             M_DEVBUF, M_NOWAIT | M_ZERO);
 1237                         if (bv == NULL) {
 1238                                 printf("%s: failed to allocate boot volume\n",
 1239                                     DEVNAME(sc));
 1240                                 goto unwind;
 1241                         }
 1242 
 1243                         bv->sbv_level = bc->sbc_metadata->ssdi.ssd_level;
 1244                         bv->sbv_volid = bc->sbc_metadata->ssdi.ssd_volid;
 1245                         bv->sbv_chunk_no = bc->sbc_metadata->ssdi.ssd_chunk_no;
 1246                         bv->sbv_flags = bc->sbc_metadata->ssdi.ssd_vol_flags;
 1247                         memcpy(&bv->sbv_uuid, &bc->sbc_metadata->ssdi.ssd_uuid,
 1248                             sizeof(bc->sbc_metadata->ssdi.ssd_uuid));
 1249                         SLIST_INIT(&bv->sbv_chunks);
 1250 
 1251                         /* Maintain volume order. */
 1252                         bv2 = NULL;
 1253                         SLIST_FOREACH(bv1, &bvh, sbv_link) {
 1254                                 if (bv1->sbv_volid > bv->sbv_volid)
 1255                                         break;
 1256                                 bv2 = bv1;
 1257                         }
 1258                         if (bv2 == NULL) {
 1259                                 DNPRINTF(SR_D_META, "%s: insert volume %u "
 1260                                     "at head\n", DEVNAME(sc), bv->sbv_volid);
 1261                                 SLIST_INSERT_HEAD(&bvh, bv, sbv_link);
 1262                         } else {
 1263                                 DNPRINTF(SR_D_META, "%s: insert volume %u "
 1264                                     "after %u\n", DEVNAME(sc), bv->sbv_volid,
 1265                                     bv2->sbv_volid);
 1266                                 SLIST_INSERT_AFTER(bv2, bv, sbv_link);
 1267                         }
 1268                 }
 1269 
 1270                 /* Maintain chunk order. */
 1271                 bc2 = NULL;
 1272                 SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) {
 1273                         if (bc1->sbc_chunk_id > bc->sbc_chunk_id)
 1274                                 break;
 1275                         bc2 = bc1;
 1276                 }
 1277                 if (bc2 == NULL) {
 1278                         DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
 1279                             "at head\n", DEVNAME(sc), bv->sbv_volid,
 1280                             bc->sbc_chunk_id);
 1281                         SLIST_INSERT_HEAD(&bv->sbv_chunks, bc, sbc_link);
 1282                 } else {
 1283                         DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
 1284                             "after %u\n", DEVNAME(sc), bv->sbv_volid,
 1285                             bc->sbc_chunk_id, bc2->sbc_chunk_id);
 1286                         SLIST_INSERT_AFTER(bc2, bc, sbc_link);
 1287                 }
 1288 
 1289                 bv->sbv_chunks_found++;
 1290         }
 1291 
 1292         /* Allocate memory for device and ondisk version arrays. */
 1293         devs = mallocarray(BIOC_CRMAXLEN, sizeof(dev_t), M_DEVBUF,
 1294             M_NOWAIT);
 1295         if (devs == NULL) {
 1296                 printf("%s: failed to allocate device array\n", DEVNAME(sc));
 1297                 goto unwind;
 1298         }
 1299         ondisk = mallocarray(BIOC_CRMAXLEN, sizeof(u_int64_t), M_DEVBUF,
 1300             M_NOWAIT);
 1301         if (ondisk == NULL) {
 1302                 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc));
 1303                 goto unwind;
 1304         }
 1305 
 1306         /*
 1307          * Assemble hotspare "volumes".
 1308          */
 1309         SLIST_FOREACH(bv, &bvh, sbv_link) {
 1310 
 1311                 /* Check if this is a hotspare "volume". */
 1312                 if (bv->sbv_level != SR_HOTSPARE_LEVEL ||
 1313                     bv->sbv_chunk_no != 1)
 1314                         continue;
 1315 
 1316 #ifdef SR_DEBUG
 1317                 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ",
 1318                     DEVNAME(sc));
 1319                 if (sr_debug & SR_D_META)
 1320                         sr_uuid_print(&bv->sbv_uuid, 0);
 1321                 DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
 1322                     bv->sbv_volid, bv->sbv_chunk_no);
 1323 #endif
 1324 
 1325                 /* Create hotspare chunk metadata. */
 1326                 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF,
 1327                     M_NOWAIT | M_ZERO);
 1328                 if (hotspare == NULL) {
 1329                         printf("%s: failed to allocate hotspare\n",
 1330                             DEVNAME(sc));
 1331                         goto unwind;
 1332                 }
 1333 
 1334                 bc = SLIST_FIRST(&bv->sbv_chunks);
 1335                 sr_meta_getdevname(sc, bc->sbc_mm, devname, sizeof(devname));
 1336                 hotspare->src_dev_mm = bc->sbc_mm;
 1337                 strlcpy(hotspare->src_devname, devname,
 1338                     sizeof(hotspare->src_devname));
 1339                 hotspare->src_size = bc->sbc_metadata->ssdi.ssd_size;
 1340 
 1341                 hm = &hotspare->src_meta;
 1342                 hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
 1343                 hm->scmi.scm_chunk_id = 0;
 1344                 hm->scmi.scm_size = bc->sbc_metadata->ssdi.ssd_size;
 1345                 hm->scmi.scm_coerced_size = bc->sbc_metadata->ssdi.ssd_size;
 1346                 strlcpy(hm->scmi.scm_devname, devname,
 1347                     sizeof(hm->scmi.scm_devname));
 1348                 memcpy(&hm->scmi.scm_uuid, &bc->sbc_metadata->ssdi.ssd_uuid,
 1349                     sizeof(struct sr_uuid));
 1350 
 1351                 sr_checksum(sc, hm, &hm->scm_checksum,
 1352                     sizeof(struct sr_meta_chunk_invariant));
 1353 
 1354                 hm->scm_status = BIOC_SDHOTSPARE;
 1355 
 1356                 /* Add chunk to hotspare list. */
 1357                 rw_enter_write(&sc->sc_hs_lock);
 1358                 cl = &sc->sc_hotspare_list;
 1359                 if (SLIST_EMPTY(cl))
 1360                         SLIST_INSERT_HEAD(cl, hotspare, src_link);
 1361                 else {
 1362                         SLIST_FOREACH(chunk, cl, src_link)
 1363                                 last = chunk;
 1364                         SLIST_INSERT_AFTER(last, hotspare, src_link);
 1365                 }
 1366                 sc->sc_hotspare_no++;
 1367                 rw_exit_write(&sc->sc_hs_lock);
 1368 
 1369         }
 1370 
 1371         /*
 1372          * Assemble RAID volumes.
 1373          */
 1374         SLIST_FOREACH(bv, &bvh, sbv_link) {
 1375 
 1376                 bzero(&bcr, sizeof(bcr));
 1377                 data = NULL;
 1378 
 1379                 /* Check if this is a hotspare "volume". */
 1380                 if (bv->sbv_level == SR_HOTSPARE_LEVEL &&
 1381                     bv->sbv_chunk_no == 1)
 1382                         continue;
 1383 
 1384                 /*
 1385                  * Skip volumes that are marked as no auto assemble, unless
 1386                  * this was the volume which we actually booted from.
 1387                  */
 1388                 if (bcmp(&sr_bootuuid, &bv->sbv_uuid, sizeof(sr_bootuuid)) != 0)
 1389                         if (bv->sbv_flags & BIOC_SCNOAUTOASSEMBLE)
 1390                                 continue;
 1391 
 1392 #ifdef SR_DEBUG
 1393                 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc));
 1394                 if (sr_debug & SR_D_META)
 1395                         sr_uuid_print(&bv->sbv_uuid, 0);
 1396                 DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
 1397                     bv->sbv_volid, bv->sbv_chunk_no);
 1398 #endif
 1399 
 1400                 /*
 1401                  * If this is a crypto volume, try to find a matching
 1402                  * key disk...
 1403                  */
 1404                 bcr.bc_key_disk = NODEV;
 1405                 if (bv->sbv_level == 'C' || bv->sbv_level == 0x1C) {
 1406                         SLIST_FOREACH(bc, &kdh, sbc_link) {
 1407                                 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid,
 1408                                     &bv->sbv_uuid,
 1409                                     sizeof(bc->sbc_metadata->ssdi.ssd_uuid))
 1410                                     == 0)
 1411                                         bcr.bc_key_disk = bc->sbc_mm;
 1412                         }
 1413                 }
 1414 
 1415                 for (i = 0; i < BIOC_CRMAXLEN; i++) {
 1416                         devs[i] = NODEV; /* mark device as illegal */
 1417                         ondisk[i] = 0;
 1418                 }
 1419 
 1420                 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
 1421                         if (devs[bc->sbc_chunk_id] != NODEV) {
 1422                                 bv->sbv_chunks_found--;
 1423                                 sr_meta_getdevname(sc, bc->sbc_mm, devname,
 1424                                     sizeof(devname));
 1425                                 printf("%s: found duplicate chunk %u for "
 1426                                     "volume %u on device %s\n", DEVNAME(sc),
 1427                                     bc->sbc_chunk_id, bv->sbv_volid, devname);
 1428                         }
 1429 
 1430                         if (devs[bc->sbc_chunk_id] == NODEV ||
 1431                             bc->sbc_metadata->ssd_ondisk >
 1432                             ondisk[bc->sbc_chunk_id]) {
 1433                                 devs[bc->sbc_chunk_id] = bc->sbc_mm;
 1434                                 ondisk[bc->sbc_chunk_id] =
 1435                                     bc->sbc_metadata->ssd_ondisk;
 1436                                 DNPRINTF(SR_D_META, "%s: using ondisk "
 1437                                     "metadata version %llu for chunk %u\n",
 1438                                     DEVNAME(sc), ondisk[bc->sbc_chunk_id],
 1439                                     bc->sbc_chunk_id);
 1440                         }
 1441                 }
 1442 
 1443                 if (bv->sbv_chunk_no != bv->sbv_chunks_found) {
 1444                         printf("%s: not all chunks were provided; "
 1445                             "attempting to bring volume %d online\n",
 1446                             DEVNAME(sc), bv->sbv_volid);
 1447                 }
 1448 
 1449                 bcr.bc_level = bv->sbv_level;
 1450                 bcr.bc_dev_list_len = bv->sbv_chunk_no * sizeof(dev_t);
 1451                 bcr.bc_dev_list = devs;
 1452                 bcr.bc_flags = BIOC_SCDEVT |
 1453                     (bv->sbv_flags & BIOC_SCNOAUTOASSEMBLE);
 1454 
 1455                 if ((bv->sbv_level == 'C' || bv->sbv_level == 0x1C) &&
 1456                     bcmp(&sr_bootuuid, &bv->sbv_uuid, sizeof(sr_bootuuid)) == 0)
 1457                         data = sr_bootkey;
 1458 
 1459                 rw_enter_write(&sc->sc_lock);
 1460                 bio_status_init(&sc->sc_status, &sc->sc_dev);
 1461                 sr_ioctl_createraid(sc, &bcr, 0, data);
 1462                 rw_exit_write(&sc->sc_lock);
 1463 
 1464                 rv++;
 1465         }
 1466 
 1467         /* done with metadata */
 1468 unwind:
 1469         /* Free boot volumes and associated chunks. */
 1470         for (bv1 = SLIST_FIRST(&bvh); bv1 != NULL; bv1 = bv2) {
 1471                 bv2 = SLIST_NEXT(bv1, sbv_link);
 1472                 for (bc1 = SLIST_FIRST(&bv1->sbv_chunks); bc1 != NULL;
 1473                     bc1 = bc2) {
 1474                         bc2 = SLIST_NEXT(bc1, sbc_link);
 1475                         free(bc1->sbc_metadata, M_DEVBUF,
 1476                             sizeof(*bc1->sbc_metadata));
 1477                         free(bc1, M_DEVBUF, sizeof(*bc1));
 1478                 }
 1479                 free(bv1, M_DEVBUF, sizeof(*bv1));
 1480         }
 1481         /* Free keydisks chunks. */
 1482         for (bc1 = SLIST_FIRST(&kdh); bc1 != NULL; bc1 = bc2) {
 1483                 bc2 = SLIST_NEXT(bc1, sbc_link);
 1484                 free(bc1->sbc_metadata, M_DEVBUF, sizeof(*bc1->sbc_metadata));
 1485                 free(bc1, M_DEVBUF, sizeof(*bc1));
 1486         }
 1487         /* Free unallocated chunks. */
 1488         for (bc1 = SLIST_FIRST(&bch); bc1 != NULL; bc1 = bc2) {
 1489                 bc2 = SLIST_NEXT(bc1, sbc_link);
 1490                 free(bc1->sbc_metadata, M_DEVBUF, sizeof(*bc1->sbc_metadata));
 1491                 free(bc1, M_DEVBUF, sizeof(*bc1));
 1492         }
 1493 
 1494         while (!SLIST_EMPTY(&sdklist)) {
 1495                 sdk = SLIST_FIRST(&sdklist);
 1496                 SLIST_REMOVE_HEAD(&sdklist, sdk_link);
 1497                 free(sdk, M_DEVBUF, sizeof(*sdk));
 1498         }
 1499 
 1500         free(devs, M_DEVBUF, BIOC_CRMAXLEN * sizeof(dev_t));
 1501         free(ondisk, M_DEVBUF, BIOC_CRMAXLEN * sizeof(u_int64_t));
 1502 
 1503         return (rv);
 1504 }
 1505 
 1506 void
 1507 sr_map_root(void)
 1508 {
 1509         struct sr_softc         *sc = softraid0;
 1510         struct sr_discipline    *sd;
 1511         struct sr_meta_opt_item *omi;
 1512         struct sr_meta_boot     *sbm;
 1513         u_char                  duid[8];
 1514         int                     i;
 1515 
 1516         if (sc == NULL)
 1517                 return;
 1518 
 1519         DNPRINTF(SR_D_MISC, "%s: sr_map_root\n", DEVNAME(sc));
 1520 
 1521         bzero(duid, sizeof(duid));
 1522         if (bcmp(rootduid, duid, sizeof(duid)) == 0) {
 1523                 DNPRINTF(SR_D_MISC, "%s: root duid is zero\n", DEVNAME(sc));
 1524                 return;
 1525         }
 1526 
 1527         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 1528                 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) {
 1529                         if (omi->omi_som->som_type != SR_OPT_BOOT)
 1530                                 continue;
 1531                         sbm = (struct sr_meta_boot *)omi->omi_som;
 1532                         for (i = 0; i < SR_MAX_BOOT_DISKS; i++) {
 1533                                 if (bcmp(rootduid, sbm->sbm_boot_duid[i],
 1534                                     sizeof(rootduid)) == 0) {
 1535                                         memcpy(rootduid, sbm->sbm_root_duid,
 1536                                             sizeof(rootduid));
 1537                                         DNPRINTF(SR_D_MISC, "%s: root duid "
 1538                                             "mapped to %s\n", DEVNAME(sc),
 1539                                             duid_format(rootduid));
 1540                                         return;
 1541                                 }
 1542                         }
 1543                 }
 1544         }
 1545 }
 1546 
 1547 int
 1548 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
 1549 {
 1550         struct disklabel        label;
 1551         char                    *devname;
 1552         int                     error, part;
 1553         u_int64_t               size;
 1554 
 1555         DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
 1556            DEVNAME(sc), ch_entry->src_devname);
 1557 
 1558         devname = ch_entry->src_devname;
 1559         part = DISKPART(ch_entry->src_dev_mm);
 1560 
 1561         /* get disklabel */
 1562         error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD,
 1563             NOCRED, curproc);
 1564         if (error) {
 1565                 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
 1566                     DEVNAME(sc), devname);
 1567                 goto unwind;
 1568         }
 1569         memcpy(ch_entry->src_duid, label.d_uid, sizeof(ch_entry->src_duid));
 1570 
 1571         /* make sure the partition is of the right type */
 1572         if (label.d_partitions[part].p_fstype != FS_RAID) {
 1573                 DNPRINTF(SR_D_META,
 1574                     "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc),
 1575                     devname,
 1576                     label.d_partitions[part].p_fstype);
 1577                 goto unwind;
 1578         }
 1579 
 1580         size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part]));
 1581         if (size <= SR_DATA_OFFSET) {
 1582                 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
 1583                     devname);
 1584                 goto unwind;
 1585         }
 1586         size -= SR_DATA_OFFSET;
 1587         if (size > INT64_MAX) {
 1588                 DNPRINTF(SR_D_META, "%s: %s partition too large\n", DEVNAME(sc),
 1589                     devname);
 1590                 goto unwind;
 1591         }
 1592         ch_entry->src_size = size;
 1593         ch_entry->src_secsize = label.d_secsize;
 1594 
 1595         DNPRINTF(SR_D_META, "%s: probe found %s size %lld\n", DEVNAME(sc),
 1596             devname, (long long)size);
 1597 
 1598         return (SR_META_F_NATIVE);
 1599 unwind:
 1600         DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
 1601             devname ? devname : "nodev");
 1602         return (SR_META_F_INVALID);
 1603 }
 1604 
 1605 int
 1606 sr_meta_native_attach(struct sr_discipline *sd, int force)
 1607 {
 1608         struct sr_softc         *sc = sd->sd_sc;
 1609         struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
 1610         struct sr_metadata      *md = NULL;
 1611         struct sr_chunk         *ch_entry, *ch_next;
 1612         struct sr_uuid          uuid;
 1613         u_int64_t               version = 0;
 1614         int                     sr, not_sr, rv = 1, d, expected = -1, old_meta = 0;
 1615 
 1616         DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
 1617 
 1618         md = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT);
 1619         if (md == NULL) {
 1620                 sr_error(sc, "not enough memory for metadata buffer");
 1621                 goto bad;
 1622         }
 1623 
 1624         bzero(&uuid, sizeof uuid);
 1625 
 1626         sr = not_sr = d = 0;
 1627         SLIST_FOREACH(ch_entry, cl, src_link) {
 1628                 if (ch_entry->src_dev_mm == NODEV)
 1629                         continue;
 1630 
 1631                 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
 1632                         sr_error(sc, "could not read native metadata");
 1633                         goto bad;
 1634                 }
 1635 
 1636                 if (md->ssdi.ssd_magic == SR_MAGIC) {
 1637                         sr++;
 1638                         ch_entry->src_meta.scmi.scm_chunk_id =
 1639                             md->ssdi.ssd_chunk_id;
 1640                         if (d == 0) {
 1641                                 memcpy(&uuid, &md->ssdi.ssd_uuid, sizeof uuid);
 1642                                 expected = md->ssdi.ssd_chunk_no;
 1643                                 version = md->ssd_ondisk;
 1644                                 d++;
 1645                                 continue;
 1646                         } else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
 1647                             sizeof uuid)) {
 1648                                 sr_error(sc, "not part of the same volume");
 1649                                 goto bad;
 1650                         }
 1651                         if (md->ssd_ondisk != version) {
 1652                                 old_meta++;
 1653                                 version = MAX(md->ssd_ondisk, version);
 1654                         }
 1655                 } else
 1656                         not_sr++;
 1657         }
 1658 
 1659         if (sr && not_sr && !force) {
 1660                 sr_error(sc, "not all chunks are of the native metadata "
 1661                     "format");
 1662                 goto bad;
 1663         }
 1664 
 1665         /* mixed metadata versions; mark bad disks offline */
 1666         if (old_meta) {
 1667                 d = 0;
 1668                 for (ch_entry = SLIST_FIRST(cl); ch_entry != NULL;
 1669                     ch_entry = ch_next, d++) {
 1670                         ch_next = SLIST_NEXT(ch_entry, src_link);
 1671 
 1672                         /* XXX do we want to read this again? */
 1673                         if (ch_entry->src_dev_mm == NODEV)
 1674                                 panic("src_dev_mm == NODEV");
 1675                         if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md,
 1676                             NULL))
 1677                                 sr_warn(sc, "could not read native metadata");
 1678                         if (md->ssd_ondisk != version)
 1679                                 sd->sd_vol.sv_chunks[d]->src_meta.scm_status =
 1680                                     BIOC_SDOFFLINE;
 1681                 }
 1682         }
 1683 
 1684         if (expected != sr && !force && expected != -1) {
 1685                 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying "
 1686                     "anyway\n", DEVNAME(sc));
 1687         }
 1688 
 1689         rv = 0;
 1690 bad:
 1691         free(md, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
 1692         return (rv);
 1693 }
 1694 
 1695 int
 1696 sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
 1697     struct sr_metadata *md, void *fm)
 1698 {
 1699 #ifdef SR_DEBUG
 1700         struct sr_softc         *sc = sd->sd_sc;
 1701 #endif
 1702         DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
 1703             DEVNAME(sc), dev, md);
 1704 
 1705         return (sr_meta_rw(sd, dev, md, B_READ));
 1706 }
 1707 
 1708 int
 1709 sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
 1710     struct sr_metadata *md, void *fm)
 1711 {
 1712 #ifdef SR_DEBUG
 1713         struct sr_softc         *sc = sd->sd_sc;
 1714 #endif
 1715         DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
 1716             DEVNAME(sc), dev, md);
 1717 
 1718         return (sr_meta_rw(sd, dev, md, B_WRITE));
 1719 }
 1720 
 1721 void
 1722 sr_hotplug_register(struct sr_discipline *sd, void *func)
 1723 {
 1724         struct sr_hotplug_list  *mhe;
 1725 
 1726         DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n",
 1727             DEVNAME(sd->sd_sc), func);
 1728 
 1729         /* make sure we aren't on the list yet */
 1730         SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
 1731                 if (mhe->sh_hotplug == func)
 1732                         return;
 1733 
 1734         mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF,
 1735             M_WAITOK | M_ZERO);
 1736         mhe->sh_hotplug = func;
 1737         mhe->sh_sd = sd;
 1738         SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link);
 1739 }
 1740 
 1741 void
 1742 sr_hotplug_unregister(struct sr_discipline *sd, void *func)
 1743 {
 1744         struct sr_hotplug_list  *mhe;
 1745 
 1746         DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n",
 1747             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func);
 1748 
 1749         /* make sure we are on the list yet */
 1750         SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) {
 1751                 if (mhe->sh_hotplug == func)
 1752                         break;
 1753         }
 1754         if (mhe != NULL) {
 1755                 SLIST_REMOVE(&sr_hotplug_callbacks, mhe,
 1756                     sr_hotplug_list, shl_link);
 1757                 free(mhe, M_DEVBUF, sizeof(*mhe));
 1758         }
 1759 }
 1760 
 1761 void
 1762 sr_disk_attach(struct disk *diskp, int action)
 1763 {
 1764         struct sr_hotplug_list  *mhe;
 1765 
 1766         SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
 1767                 if (mhe->sh_sd->sd_ready)
 1768                         mhe->sh_hotplug(mhe->sh_sd, diskp, action);
 1769 }
 1770 
 1771 int
 1772 sr_match(struct device *parent, void *match, void *aux)
 1773 {
 1774         return (1);
 1775 }
 1776 
 1777 void
 1778 sr_attach(struct device *parent, struct device *self, void *aux)
 1779 {
 1780         struct sr_softc         *sc = (void *)self;
 1781         struct scsibus_attach_args saa;
 1782 
 1783         DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
 1784 
 1785         if (softraid0 == NULL)
 1786                 softraid0 = sc;
 1787 
 1788         rw_init(&sc->sc_lock, "sr_lock");
 1789         rw_init(&sc->sc_hs_lock, "sr_hs_lock");
 1790 
 1791         SLIST_INIT(&sr_hotplug_callbacks);
 1792         TAILQ_INIT(&sc->sc_dis_list);
 1793         SLIST_INIT(&sc->sc_hotspare_list);
 1794 
 1795 #if NBIO > 0
 1796         if (bio_register(&sc->sc_dev, sr_bio_ioctl) != 0)
 1797                 printf("%s: controller registration failed", DEVNAME(sc));
 1798 #endif /* NBIO > 0 */
 1799 
 1800 #ifndef SMALL_KERNEL
 1801         strlcpy(sc->sc_sensordev.xname, DEVNAME(sc),
 1802             sizeof(sc->sc_sensordev.xname));
 1803         sensordev_install(&sc->sc_sensordev);
 1804 #endif /* SMALL_KERNEL */
 1805 
 1806         printf("\n");
 1807 
 1808         saa.saa_adapter_softc = sc;
 1809         saa.saa_adapter = &sr_switch;
 1810         saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET;
 1811         saa.saa_adapter_buswidth = SR_MAX_LD;
 1812         saa.saa_luns = 1;
 1813         saa.saa_openings = 0;
 1814         saa.saa_pool = NULL;
 1815         saa.saa_quirks = saa.saa_flags = 0;
 1816         saa.saa_wwpn = saa.saa_wwnn = 0;
 1817 
 1818         sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, &saa,
 1819             scsiprint);
 1820 
 1821         softraid_disk_attach = sr_disk_attach;
 1822 
 1823         sr_boot_assembly(sc);
 1824 
 1825         explicit_bzero(sr_bootkey, sizeof(sr_bootkey));
 1826 }
 1827 
 1828 int
 1829 sr_detach(struct device *self, int flags)
 1830 {
 1831         struct sr_softc         *sc = (void *)self;
 1832         int                     rv;
 1833 
 1834         DNPRINTF(SR_D_MISC, "%s: sr_detach\n", DEVNAME(sc));
 1835 
 1836         softraid_disk_attach = NULL;
 1837 
 1838         sr_shutdown(0);
 1839 
 1840 #ifndef SMALL_KERNEL
 1841         if (sc->sc_sensor_task != NULL)
 1842                 sensor_task_unregister(sc->sc_sensor_task);
 1843         sensordev_deinstall(&sc->sc_sensordev);
 1844 #endif /* SMALL_KERNEL */
 1845 
 1846         if (sc->sc_scsibus != NULL) {
 1847                 rv = config_detach((struct device *)sc->sc_scsibus, flags);
 1848                 if (rv != 0)
 1849                         return (rv);
 1850                 sc->sc_scsibus = NULL;
 1851         }
 1852 
 1853         return (0);
 1854 }
 1855 
 1856 void
 1857 sr_info(struct sr_softc *sc, const char *fmt, ...)
 1858 {
 1859         va_list                 ap;
 1860 
 1861         rw_assert_wrlock(&sc->sc_lock);
 1862 
 1863         va_start(ap, fmt);
 1864         bio_status(&sc->sc_status, 0, BIO_MSG_INFO, fmt, &ap);
 1865         va_end(ap);
 1866 }
 1867 
 1868 void
 1869 sr_warn(struct sr_softc *sc, const char *fmt, ...)
 1870 {
 1871         va_list                 ap;
 1872 
 1873         rw_assert_wrlock(&sc->sc_lock);
 1874 
 1875         va_start(ap, fmt);
 1876         bio_status(&sc->sc_status, 1, BIO_MSG_WARN, fmt, &ap);
 1877         va_end(ap);
 1878 }
 1879 
 1880 void
 1881 sr_error(struct sr_softc *sc, const char *fmt, ...)
 1882 {
 1883         va_list                 ap;
 1884 
 1885         rw_assert_wrlock(&sc->sc_lock);
 1886 
 1887         va_start(ap, fmt);
 1888         bio_status(&sc->sc_status, 1, BIO_MSG_ERROR, fmt, &ap);
 1889         va_end(ap);
 1890 }
 1891 
 1892 int
 1893 sr_ccb_alloc(struct sr_discipline *sd)
 1894 {
 1895         struct sr_ccb           *ccb;
 1896         int                     i;
 1897 
 1898         if (!sd)
 1899                 return (1);
 1900 
 1901         DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
 1902 
 1903         if (sd->sd_ccb)
 1904                 return (1);
 1905 
 1906         sd->sd_ccb = mallocarray(sd->sd_max_wu,
 1907             sd->sd_max_ccb_per_wu * sizeof(struct sr_ccb),
 1908             M_DEVBUF, M_WAITOK | M_ZERO);
 1909         TAILQ_INIT(&sd->sd_ccb_freeq);
 1910         for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
 1911                 ccb = &sd->sd_ccb[i];
 1912                 ccb->ccb_dis = sd;
 1913                 sr_ccb_put(ccb);
 1914         }
 1915 
 1916         DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
 1917             DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
 1918 
 1919         return (0);
 1920 }
 1921 
 1922 void
 1923 sr_ccb_free(struct sr_discipline *sd)
 1924 {
 1925         struct sr_ccb           *ccb;
 1926 
 1927         if (!sd)
 1928                 return;
 1929 
 1930         DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
 1931 
 1932         while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
 1933                 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
 1934 
 1935         free(sd->sd_ccb, M_DEVBUF, sd->sd_max_wu * sd->sd_max_ccb_per_wu *
 1936             sizeof(struct sr_ccb));
 1937 }
 1938 
 1939 struct sr_ccb *
 1940 sr_ccb_get(struct sr_discipline *sd)
 1941 {
 1942         struct sr_ccb           *ccb;
 1943         int                     s;
 1944 
 1945         s = splbio();
 1946 
 1947         ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
 1948         if (ccb) {
 1949                 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
 1950                 ccb->ccb_state = SR_CCB_INPROGRESS;
 1951         }
 1952 
 1953         splx(s);
 1954 
 1955         DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
 1956             ccb);
 1957 
 1958         return (ccb);
 1959 }
 1960 
 1961 void
 1962 sr_ccb_put(struct sr_ccb *ccb)
 1963 {
 1964         struct sr_discipline    *sd = ccb->ccb_dis;
 1965         int                     s;
 1966 
 1967         DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
 1968             ccb);
 1969 
 1970         s = splbio();
 1971 
 1972         ccb->ccb_wu = NULL;
 1973         ccb->ccb_state = SR_CCB_FREE;
 1974         ccb->ccb_target = -1;
 1975         ccb->ccb_opaque = NULL;
 1976 
 1977         TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
 1978 
 1979         splx(s);
 1980 }
 1981 
 1982 struct sr_ccb *
 1983 sr_ccb_rw(struct sr_discipline *sd, int chunk, daddr_t blkno,
 1984     long len, u_int8_t *data, int xsflags, int ccbflags)
 1985 {
 1986         struct sr_chunk         *sc = sd->sd_vol.sv_chunks[chunk];
 1987         struct sr_ccb           *ccb = NULL;
 1988         int                     s;
 1989 
 1990         ccb = sr_ccb_get(sd);
 1991         if (ccb == NULL)
 1992                 goto out;
 1993 
 1994         ccb->ccb_flags = ccbflags;
 1995         ccb->ccb_target = chunk;
 1996 
 1997         ccb->ccb_buf.b_flags = B_PHYS | B_CALL;
 1998         if (ISSET(xsflags, SCSI_DATA_IN))
 1999                 ccb->ccb_buf.b_flags |= B_READ;
 2000         else
 2001                 ccb->ccb_buf.b_flags |= B_WRITE;
 2002 
 2003         ccb->ccb_buf.b_blkno = blkno + sd->sd_meta->ssd_data_blkno;
 2004         ccb->ccb_buf.b_bcount = len;
 2005         ccb->ccb_buf.b_bufsize = len;
 2006         ccb->ccb_buf.b_resid = len;
 2007         ccb->ccb_buf.b_data = data;
 2008         ccb->ccb_buf.b_error = 0;
 2009         ccb->ccb_buf.b_iodone = sd->sd_scsi_intr;
 2010         ccb->ccb_buf.b_proc = curproc;
 2011         ccb->ccb_buf.b_dev = sc->src_dev_mm;
 2012         ccb->ccb_buf.b_vp = sc->src_vn;
 2013         ccb->ccb_buf.b_bq = NULL;
 2014 
 2015         if (!ISSET(ccb->ccb_buf.b_flags, B_READ)) {
 2016                 s = splbio();
 2017                 ccb->ccb_buf.b_vp->v_numoutput++;
 2018                 splx(s);
 2019         }
 2020 
 2021         LIST_INIT(&ccb->ccb_buf.b_dep);
 2022 
 2023         DNPRINTF(SR_D_DIS, "%s: %s %s ccb "
 2024             "b_bcount %ld b_blkno %lld b_flags 0x%0lx b_data %p\n",
 2025             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, sd->sd_name,
 2026             ccb->ccb_buf.b_bcount, (long long)ccb->ccb_buf.b_blkno,
 2027             ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
 2028 
 2029 out:
 2030         return ccb;
 2031 }
 2032 
 2033 void
 2034 sr_ccb_done(struct sr_ccb *ccb)
 2035 {
 2036         struct sr_workunit      *wu = ccb->ccb_wu;
 2037         struct sr_discipline    *sd = wu->swu_dis;
 2038         struct sr_softc         *sc = sd->sd_sc;
 2039 
 2040         DNPRINTF(SR_D_INTR, "%s: %s %s ccb done b_bcount %ld b_resid %zu"
 2041             " b_flags 0x%0lx block %lld target %d\n",
 2042             DEVNAME(sc), sd->sd_meta->ssd_devname, sd->sd_name,
 2043             ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
 2044             (long long)ccb->ccb_buf.b_blkno, ccb->ccb_target);
 2045 
 2046         splassert(IPL_BIO);
 2047 
 2048         if (ccb->ccb_target == -1)
 2049                 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
 2050 
 2051         if (ccb->ccb_buf.b_flags & B_ERROR) {
 2052                 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target %d\n",
 2053                     DEVNAME(sc), (long long)ccb->ccb_buf.b_blkno,
 2054                     ccb->ccb_target);
 2055                 if (ISSET(sd->sd_capabilities, SR_CAP_REDUNDANT))
 2056                         sd->sd_set_chunk_state(sd, ccb->ccb_target,
 2057                             BIOC_SDOFFLINE);
 2058                 else
 2059                         printf("%s: %s: i/o error %d @ %s block %lld\n",
 2060                             DEVNAME(sc), sd->sd_meta->ssd_devname,
 2061                             ccb->ccb_buf.b_error, sd->sd_name,
 2062                             (long long)ccb->ccb_buf.b_blkno);
 2063                 ccb->ccb_state = SR_CCB_FAILED;
 2064                 wu->swu_ios_failed++;
 2065         } else {
 2066                 ccb->ccb_state = SR_CCB_OK;
 2067                 wu->swu_ios_succeeded++;
 2068         }
 2069 
 2070         wu->swu_ios_complete++;
 2071 }
 2072 
 2073 int
 2074 sr_wu_alloc(struct sr_discipline *sd)
 2075 {
 2076         struct sr_workunit      *wu;
 2077         int                     i, no_wu;
 2078 
 2079         DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
 2080             sd, sd->sd_max_wu);
 2081 
 2082         no_wu = sd->sd_max_wu;
 2083         sd->sd_wu_pending = no_wu;
 2084 
 2085         mtx_init(&sd->sd_wu_mtx, IPL_BIO);
 2086         TAILQ_INIT(&sd->sd_wu);
 2087         TAILQ_INIT(&sd->sd_wu_freeq);
 2088         TAILQ_INIT(&sd->sd_wu_pendq);
 2089         TAILQ_INIT(&sd->sd_wu_defq);
 2090 
 2091         for (i = 0; i < no_wu; i++) {
 2092                 wu = malloc(sd->sd_wu_size, M_DEVBUF, M_WAITOK | M_ZERO);
 2093                 TAILQ_INSERT_TAIL(&sd->sd_wu, wu, swu_next);
 2094                 TAILQ_INIT(&wu->swu_ccb);
 2095                 wu->swu_dis = sd;
 2096                 task_set(&wu->swu_task, sr_wu_done_callback, wu);
 2097                 sr_wu_put(sd, wu);
 2098         }
 2099 
 2100         return (0);
 2101 }
 2102 
 2103 void
 2104 sr_wu_free(struct sr_discipline *sd)
 2105 {
 2106         struct sr_workunit      *wu;
 2107 
 2108         DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
 2109 
 2110         while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
 2111                 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
 2112         while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
 2113                 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
 2114         while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
 2115                 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
 2116 
 2117         while ((wu = TAILQ_FIRST(&sd->sd_wu)) != NULL) {
 2118                 TAILQ_REMOVE(&sd->sd_wu, wu, swu_next);
 2119                 free(wu, M_DEVBUF, sd->sd_wu_size);
 2120         }
 2121 }
 2122 
 2123 void *
 2124 sr_wu_get(void *xsd)
 2125 {
 2126         struct sr_discipline    *sd = (struct sr_discipline *)xsd;
 2127         struct sr_workunit      *wu;
 2128 
 2129         mtx_enter(&sd->sd_wu_mtx);
 2130         wu = TAILQ_FIRST(&sd->sd_wu_freeq);
 2131         if (wu) {
 2132                 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
 2133                 sd->sd_wu_pending++;
 2134         }
 2135         mtx_leave(&sd->sd_wu_mtx);
 2136 
 2137         DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
 2138 
 2139         return (wu);
 2140 }
 2141 
 2142 void
 2143 sr_wu_put(void *xsd, void *xwu)
 2144 {
 2145         struct sr_discipline    *sd = (struct sr_discipline *)xsd;
 2146         struct sr_workunit      *wu = (struct sr_workunit *)xwu;
 2147 
 2148         DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
 2149 
 2150         sr_wu_release_ccbs(wu);
 2151         sr_wu_init(sd, wu);
 2152 
 2153         mtx_enter(&sd->sd_wu_mtx);
 2154         TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
 2155         sd->sd_wu_pending--;
 2156         mtx_leave(&sd->sd_wu_mtx);
 2157 }
 2158 
 2159 void
 2160 sr_wu_init(struct sr_discipline *sd, struct sr_workunit *wu)
 2161 {
 2162         int                     s;
 2163 
 2164         s = splbio();
 2165         if (wu->swu_cb_active == 1)
 2166                 panic("%s: sr_wu_init got active wu", DEVNAME(sd->sd_sc));
 2167         splx(s);
 2168 
 2169         wu->swu_xs = NULL;
 2170         wu->swu_state = SR_WU_FREE;
 2171         wu->swu_flags = 0;
 2172         wu->swu_blk_start = 0;
 2173         wu->swu_blk_end = 0;
 2174         wu->swu_collider = NULL;
 2175 }
 2176 
 2177 void
 2178 sr_wu_enqueue_ccb(struct sr_workunit *wu, struct sr_ccb *ccb)
 2179 {
 2180         struct sr_discipline    *sd = wu->swu_dis;
 2181         int                     s;
 2182 
 2183         s = splbio();
 2184         if (wu->swu_cb_active == 1)
 2185                 panic("%s: sr_wu_enqueue_ccb got active wu",
 2186                     DEVNAME(sd->sd_sc));
 2187         ccb->ccb_wu = wu;
 2188         wu->swu_io_count++;
 2189         TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
 2190         splx(s);
 2191 }
 2192 
 2193 void
 2194 sr_wu_release_ccbs(struct sr_workunit *wu)
 2195 {
 2196         struct sr_ccb           *ccb;
 2197 
 2198         /* Return all ccbs that are associated with this workunit. */
 2199         while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
 2200                 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
 2201                 sr_ccb_put(ccb);
 2202         }
 2203 
 2204         wu->swu_io_count = 0;
 2205         wu->swu_ios_complete = 0;
 2206         wu->swu_ios_failed = 0;
 2207         wu->swu_ios_succeeded = 0;
 2208 }
 2209 
 2210 void
 2211 sr_wu_done(struct sr_workunit *wu)
 2212 {
 2213         struct sr_discipline    *sd = wu->swu_dis;
 2214 
 2215         DNPRINTF(SR_D_INTR, "%s: sr_wu_done count %d completed %d failed %d\n",
 2216             DEVNAME(sd->sd_sc), wu->swu_io_count, wu->swu_ios_complete,
 2217             wu->swu_ios_failed);
 2218 
 2219         if (wu->swu_ios_complete < wu->swu_io_count)
 2220                 return;
 2221 
 2222         task_add(sd->sd_taskq, &wu->swu_task);
 2223 }
 2224 
 2225 void
 2226 sr_wu_done_callback(void *xwu)
 2227 {
 2228         struct sr_workunit      *wu = xwu;
 2229         struct sr_discipline    *sd = wu->swu_dis;
 2230         struct scsi_xfer        *xs = wu->swu_xs;
 2231         struct sr_workunit      *wup;
 2232         int                     s;
 2233 
 2234         /*
 2235          * The SR_WUF_DISCIPLINE or SR_WUF_REBUILD flag must be set if
 2236          * the work unit is not associated with a scsi_xfer.
 2237          */
 2238         KASSERT(xs != NULL ||
 2239             (wu->swu_flags & (SR_WUF_DISCIPLINE|SR_WUF_REBUILD)));
 2240 
 2241         s = splbio();
 2242 
 2243         if (xs != NULL) {
 2244                 if (wu->swu_ios_failed)
 2245                         xs->error = XS_DRIVER_STUFFUP;
 2246                 else
 2247                         xs->error = XS_NOERROR;
 2248         }
 2249 
 2250         if (sd->sd_scsi_wu_done) {
 2251                 if (sd->sd_scsi_wu_done(wu) == SR_WU_RESTART)
 2252                         goto done;
 2253         }
 2254 
 2255         /* Remove work unit from pending queue. */
 2256         TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link)
 2257                 if (wup == wu)
 2258                         break;
 2259         if (wup == NULL)
 2260                 panic("%s: wu %p not on pending queue",
 2261                     DEVNAME(sd->sd_sc), wu);
 2262         TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
 2263 
 2264         if (wu->swu_collider) {
 2265                 if (wu->swu_ios_failed)
 2266                         sr_raid_recreate_wu(wu->swu_collider);
 2267 
 2268                 /* XXX Should the collider be failed if this xs failed? */
 2269                 sr_raid_startwu(wu->swu_collider);
 2270         }
 2271 
 2272         /*
 2273          * If a discipline provides its own sd_scsi_done function, then it
 2274          * is responsible for calling sr_scsi_done() once I/O is complete.
 2275          */
 2276         if (wu->swu_flags & SR_WUF_REBUILD)
 2277                 wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
 2278         if (wu->swu_flags & SR_WUF_WAKEUP)
 2279                 wakeup(wu);
 2280         if (sd->sd_scsi_done)
 2281                 sd->sd_scsi_done(wu);
 2282         else if (wu->swu_flags & SR_WUF_DISCIPLINE)
 2283                 sr_scsi_wu_put(sd, wu);
 2284         else if (!(wu->swu_flags & SR_WUF_REBUILD))
 2285                 sr_scsi_done(sd, xs);
 2286 
 2287 done:
 2288         splx(s);
 2289 }
 2290 
 2291 struct sr_workunit *
 2292 sr_scsi_wu_get(struct sr_discipline *sd, int flags)
 2293 {
 2294         return scsi_io_get(&sd->sd_iopool, flags);
 2295 }
 2296 
 2297 void
 2298 sr_scsi_wu_put(struct sr_discipline *sd, struct sr_workunit *wu)
 2299 {
 2300         scsi_io_put(&sd->sd_iopool, wu);
 2301 
 2302         if (sd->sd_sync && sd->sd_wu_pending == 0)
 2303                 wakeup(sd);
 2304 }
 2305 
 2306 void
 2307 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs)
 2308 {
 2309         DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs);
 2310 
 2311         if (xs->error == XS_NOERROR)
 2312                 xs->resid = 0;
 2313 
 2314         scsi_done(xs);
 2315 
 2316         if (sd->sd_sync && sd->sd_wu_pending == 0)
 2317                 wakeup(sd);
 2318 }
 2319 
 2320 void
 2321 sr_scsi_cmd(struct scsi_xfer *xs)
 2322 {
 2323         struct scsi_link        *link = xs->sc_link;
 2324         struct sr_softc         *sc = link->bus->sb_adapter_softc;
 2325         struct sr_workunit      *wu = xs->io;
 2326         struct sr_discipline    *sd;
 2327 
 2328         DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd target %d xs %p flags %#x\n",
 2329             DEVNAME(sc), link->target, xs, xs->flags);
 2330 
 2331         sd = sc->sc_targets[link->target];
 2332         if (sd == NULL)
 2333                 panic("%s: sr_scsi_cmd NULL discipline", DEVNAME(sc));
 2334 
 2335         if (sd->sd_deleted) {
 2336                 printf("%s: %s device is being deleted, failing io\n",
 2337                     DEVNAME(sc), sd->sd_meta->ssd_devname);
 2338                 goto stuffup;
 2339         }
 2340 
 2341         /* scsi layer *can* re-send wu without calling sr_wu_put(). */
 2342         sr_wu_release_ccbs(wu);
 2343         sr_wu_init(sd, wu);
 2344         wu->swu_state = SR_WU_INPROGRESS;
 2345         wu->swu_xs = xs;
 2346 
 2347         switch (xs->cmd.opcode) {
 2348         case READ_COMMAND:
 2349         case READ_10:
 2350         case READ_16:
 2351         case WRITE_COMMAND:
 2352         case WRITE_10:
 2353         case WRITE_16:
 2354                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
 2355                     DEVNAME(sc), xs->cmd.opcode);
 2356                 if (sd->sd_scsi_rw(wu))
 2357                         goto stuffup;
 2358                 break;
 2359 
 2360         case SYNCHRONIZE_CACHE:
 2361                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
 2362                     DEVNAME(sc));
 2363                 if (sd->sd_scsi_sync(wu))
 2364                         goto stuffup;
 2365                 goto complete;
 2366 
 2367         case TEST_UNIT_READY:
 2368                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
 2369                     DEVNAME(sc));
 2370                 if (sd->sd_scsi_tur(wu))
 2371                         goto stuffup;
 2372                 goto complete;
 2373 
 2374         case START_STOP:
 2375                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
 2376                     DEVNAME(sc));
 2377                 if (sd->sd_scsi_start_stop(wu))
 2378                         goto stuffup;
 2379                 goto complete;
 2380 
 2381         case INQUIRY:
 2382                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
 2383                     DEVNAME(sc));
 2384                 if (sd->sd_scsi_inquiry(wu))
 2385                         goto stuffup;
 2386                 goto complete;
 2387 
 2388         case READ_CAPACITY:
 2389         case READ_CAPACITY_16:
 2390                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
 2391                     DEVNAME(sc), xs->cmd.opcode);
 2392                 if (sd->sd_scsi_read_cap(wu))
 2393                         goto stuffup;
 2394                 goto complete;
 2395 
 2396         case REQUEST_SENSE:
 2397                 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
 2398                     DEVNAME(sc));
 2399                 if (sd->sd_scsi_req_sense(wu))
 2400                         goto stuffup;
 2401                 goto complete;
 2402 
 2403         default:
 2404                 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
 2405                     DEVNAME(sc), xs->cmd.opcode);
 2406                 /* XXX might need to add generic function to handle others */
 2407                 goto stuffup;
 2408         }
 2409 
 2410         return;
 2411 stuffup:
 2412         if (sd->sd_scsi_sense.error_code) {
 2413                 xs->error = XS_SENSE;
 2414                 memcpy(&xs->sense, &sd->sd_scsi_sense, sizeof(xs->sense));
 2415                 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
 2416         } else {
 2417                 xs->error = XS_DRIVER_STUFFUP;
 2418         }
 2419 complete:
 2420         sr_scsi_done(sd, xs);
 2421 }
 2422 
 2423 int
 2424 sr_scsi_probe(struct scsi_link *link)
 2425 {
 2426         struct sr_softc         *sc = link->bus->sb_adapter_softc;
 2427         struct sr_discipline    *sd;
 2428 
 2429         KASSERT(link->target < SR_MAX_LD && link->lun == 0);
 2430 
 2431         sd = sc->sc_targets[link->target];
 2432         if (sd == NULL)
 2433                 return (ENODEV);
 2434 
 2435         link->pool = &sd->sd_iopool;
 2436         if (sd->sd_openings)
 2437                 link->openings = sd->sd_openings(sd);
 2438         else
 2439                 link->openings = sd->sd_max_wu;
 2440 
 2441         return (0);
 2442 }
 2443 
 2444 int
 2445 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
 2446 {
 2447         struct sr_softc         *sc = link->bus->sb_adapter_softc;
 2448         struct sr_discipline    *sd;
 2449 
 2450         sd = sc->sc_targets[link->target];
 2451         if (sd == NULL)
 2452                 return (ENODEV);
 2453 
 2454         DNPRINTF(SR_D_IOCTL, "%s: %s sr_scsi_ioctl cmd: %#lx\n",
 2455             DEVNAME(sc), sd->sd_meta->ssd_devname, cmd);
 2456 
 2457         /* Pass bio ioctls through to the bio handler. */
 2458         if (IOCGROUP(cmd) == 'B')
 2459                 return (sr_bio_handler(sc, sd, cmd, (struct bio *)addr));
 2460 
 2461         switch (cmd) {
 2462         case DIOCGCACHE:
 2463         case DIOCSCACHE:
 2464                 return (EOPNOTSUPP);
 2465         default:
 2466                 return (ENOTTY);
 2467         }
 2468 }
 2469 
 2470 int
 2471 sr_bio_ioctl(struct device *dev, u_long cmd, caddr_t addr)
 2472 {
 2473         struct sr_softc *sc = (struct sr_softc *) dev;
 2474         DNPRINTF(SR_D_IOCTL, "%s: sr_bio_ioctl\n", DEVNAME(sc));
 2475 
 2476         return sr_bio_handler(sc, NULL, cmd, (struct bio *)addr);
 2477 }
 2478 
 2479 int
 2480 sr_bio_handler(struct sr_softc *sc, struct sr_discipline *sd, u_long cmd,
 2481     struct bio *bio)
 2482 {
 2483         int                     rv = 0;
 2484 
 2485         DNPRINTF(SR_D_IOCTL, "%s: sr_bio_handler ", DEVNAME(sc));
 2486 
 2487         rw_enter_write(&sc->sc_lock);
 2488 
 2489         bio_status_init(&sc->sc_status, &sc->sc_dev);
 2490 
 2491         switch (cmd) {
 2492         case BIOCINQ:
 2493                 DNPRINTF(SR_D_IOCTL, "inq\n");
 2494                 rv = sr_ioctl_inq(sc, (struct bioc_inq *)bio);
 2495                 break;
 2496 
 2497         case BIOCVOL:
 2498                 DNPRINTF(SR_D_IOCTL, "vol\n");
 2499                 rv = sr_ioctl_vol(sc, (struct bioc_vol *)bio);
 2500                 break;
 2501 
 2502         case BIOCDISK:
 2503                 DNPRINTF(SR_D_IOCTL, "disk\n");
 2504                 rv = sr_ioctl_disk(sc, (struct bioc_disk *)bio);
 2505                 break;
 2506 
 2507         case BIOCALARM:
 2508                 DNPRINTF(SR_D_IOCTL, "alarm\n");
 2509                 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)bio); */
 2510                 break;
 2511 
 2512         case BIOCBLINK:
 2513                 DNPRINTF(SR_D_IOCTL, "blink\n");
 2514                 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)bio); */
 2515                 break;
 2516 
 2517         case BIOCSETSTATE:
 2518                 DNPRINTF(SR_D_IOCTL, "setstate\n");
 2519                 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)bio);
 2520                 break;
 2521 
 2522         case BIOCCREATERAID:
 2523                 DNPRINTF(SR_D_IOCTL, "createraid\n");
 2524                 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)bio,
 2525                     1, NULL);
 2526                 break;
 2527 
 2528         case BIOCDELETERAID:
 2529                 DNPRINTF(SR_D_IOCTL, "deleteraid\n");
 2530                 rv = sr_ioctl_deleteraid(sc, sd, (struct bioc_deleteraid *)bio);
 2531                 break;
 2532 
 2533         case BIOCDISCIPLINE:
 2534                 DNPRINTF(SR_D_IOCTL, "discipline\n");
 2535                 rv = sr_ioctl_discipline(sc, sd, (struct bioc_discipline *)bio);
 2536                 break;
 2537 
 2538         case BIOCINSTALLBOOT:
 2539                 DNPRINTF(SR_D_IOCTL, "installboot\n");
 2540                 rv = sr_ioctl_installboot(sc, sd,
 2541                     (struct bioc_installboot *)bio);
 2542                 break;
 2543 
 2544         default:
 2545                 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
 2546                 rv = ENOTTY;
 2547         }
 2548 
 2549         sc->sc_status.bs_status = (rv ? BIO_STATUS_ERROR : BIO_STATUS_SUCCESS);
 2550 
 2551         if (sc->sc_status.bs_msg_count > 0)
 2552                 rv = 0;
 2553 
 2554         memcpy(&bio->bio_status, &sc->sc_status, sizeof(struct bio_status));
 2555 
 2556         rw_exit_write(&sc->sc_lock);
 2557 
 2558         return (rv);
 2559 }
 2560 
 2561 int
 2562 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
 2563 {
 2564         struct sr_discipline    *sd;
 2565         int                     vol = 0, disk = 0;
 2566 
 2567         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 2568                 vol++;
 2569                 disk += sd->sd_meta->ssdi.ssd_chunk_no;
 2570         }
 2571 
 2572         strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
 2573         bi->bi_novol = vol + sc->sc_hotspare_no;
 2574         bi->bi_nodisk = disk + sc->sc_hotspare_no;
 2575 
 2576         return (0);
 2577 }
 2578 
 2579 int
 2580 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
 2581 {
 2582         int                     vol = -1, rv = EINVAL;
 2583         struct sr_discipline    *sd;
 2584         struct sr_chunk         *hotspare;
 2585 
 2586         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 2587                 vol++;
 2588                 if (vol != bv->bv_volid)
 2589                         continue;
 2590 
 2591                 bv->bv_status = sd->sd_vol_status;
 2592                 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
 2593                 bv->bv_level = sd->sd_meta->ssdi.ssd_level;
 2594                 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
 2595 
 2596 #ifdef CRYPTO
 2597                 if (sd->sd_meta->ssdi.ssd_level == 'C' &&
 2598                     sd->mds.mdd_crypto.key_disk != NULL)
 2599                         bv->bv_nodisk++;
 2600                 else if (sd->sd_meta->ssdi.ssd_level == 0x1C &&
 2601                     sd->mds.mdd_raid1c.sr1c_crypto.key_disk != NULL)
 2602                         bv->bv_nodisk++;
 2603 #endif
 2604                 if (bv->bv_status == BIOC_SVREBUILD)
 2605                         bv->bv_percent = sr_rebuild_percent(sd);
 2606 
 2607                 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
 2608                     sizeof(bv->bv_dev));
 2609                 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
 2610                     sizeof(bv->bv_vendor));
 2611                 rv = 0;
 2612                 goto done;
 2613         }
 2614 
 2615         /* Check hotspares list. */
 2616         SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
 2617                 vol++;
 2618                 if (vol != bv->bv_volid)
 2619                         continue;
 2620 
 2621                 bv->bv_status = BIOC_SVONLINE;
 2622                 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
 2623                 bv->bv_level = -1;      /* Hotspare. */
 2624                 bv->bv_nodisk = 1;
 2625                 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname,
 2626                     sizeof(bv->bv_dev));
 2627                 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname,
 2628                     sizeof(bv->bv_vendor));
 2629                 rv = 0;
 2630                 goto done;
 2631         }
 2632 
 2633 done:
 2634         return (rv);
 2635 }
 2636 
 2637 int
 2638 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
 2639 {
 2640         struct sr_discipline    *sd;
 2641         struct sr_chunk         *src, *hotspare;
 2642         int                     vol = -1, rv = EINVAL;
 2643 
 2644         if (bd->bd_diskid < 0)
 2645                 goto done;
 2646 
 2647         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 2648                 vol++;
 2649                 if (vol != bd->bd_volid)
 2650                         continue;
 2651 
 2652                 if (bd->bd_diskid < sd->sd_meta->ssdi.ssd_chunk_no)
 2653                         src = sd->sd_vol.sv_chunks[bd->bd_diskid];
 2654 #ifdef CRYPTO
 2655                 else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no &&
 2656                     sd->sd_meta->ssdi.ssd_level == 'C' &&
 2657                     sd->mds.mdd_crypto.key_disk != NULL)
 2658                         src = sd->mds.mdd_crypto.key_disk;
 2659                 else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no &&
 2660                     sd->sd_meta->ssdi.ssd_level == 0x1C &&
 2661                     sd->mds.mdd_raid1c.sr1c_crypto.key_disk != NULL)
 2662                         src = sd->mds.mdd_crypto.key_disk;
 2663 #endif
 2664                 else
 2665                         break;
 2666 
 2667                 bd->bd_status = src->src_meta.scm_status;
 2668                 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
 2669                 bd->bd_channel = vol;
 2670                 bd->bd_target = bd->bd_diskid;
 2671                 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
 2672                     sizeof(bd->bd_vendor));
 2673                 rv = 0;
 2674                 goto done;
 2675         }
 2676 
 2677         /* Check hotspares list. */
 2678         SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
 2679                 vol++;
 2680                 if (vol != bd->bd_volid)
 2681                         continue;
 2682 
 2683                 if (bd->bd_diskid != 0)
 2684                         break;
 2685 
 2686                 bd->bd_status = hotspare->src_meta.scm_status;
 2687                 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
 2688                 bd->bd_channel = vol;
 2689                 bd->bd_target = bd->bd_diskid;
 2690                 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname,
 2691                     sizeof(bd->bd_vendor));
 2692                 rv = 0;
 2693                 goto done;
 2694         }
 2695 
 2696 done:
 2697         return (rv);
 2698 }
 2699 
 2700 int
 2701 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
 2702 {
 2703         int                     rv = EINVAL;
 2704         int                     vol = -1, found, c;
 2705         struct sr_discipline    *sd;
 2706         struct sr_chunk         *ch_entry;
 2707         struct sr_chunk_head    *cl;
 2708 
 2709         if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED)
 2710                 goto done;
 2711 
 2712         if (bs->bs_status == BIOC_SSHOTSPARE) {
 2713                 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id);
 2714                 goto done;
 2715         }
 2716 
 2717         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 2718                 vol++;
 2719                 if (vol == bs->bs_volid)
 2720                         break;
 2721         }
 2722         if (sd == NULL)
 2723                 goto done;
 2724 
 2725         switch (bs->bs_status) {
 2726         case BIOC_SSOFFLINE:
 2727                 /* Take chunk offline */
 2728                 found = c = 0;
 2729                 cl = &sd->sd_vol.sv_chunk_list;
 2730                 SLIST_FOREACH(ch_entry, cl, src_link) {
 2731                         if (ch_entry->src_dev_mm == bs->bs_other_id) {
 2732                                 found = 1;
 2733                                 break;
 2734                         }
 2735                         c++;
 2736                 }
 2737                 if (found == 0) {
 2738                         sr_error(sc, "chunk not part of array");
 2739                         goto done;
 2740                 }
 2741 
 2742                 /* XXX: check current state first */
 2743                 sd->sd_set_chunk_state(sd, c, BIOC_SDOFFLINE);
 2744 
 2745                 if (sr_meta_save(sd, SR_META_DIRTY)) {
 2746                         sr_error(sc, "could not save metadata for %s",
 2747                             sd->sd_meta->ssd_devname);
 2748                         goto done;
 2749                 }
 2750                 rv = 0;
 2751                 break;
 2752 
 2753         case BIOC_SDSCRUB:
 2754                 break;
 2755 
 2756         case BIOC_SSREBUILD:
 2757                 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0);
 2758                 break;
 2759 
 2760         default:
 2761                 sr_error(sc, "unsupported state request %d", bs->bs_status);
 2762         }
 2763 
 2764 done:
 2765         return (rv);
 2766 }
 2767 
 2768 int
 2769 sr_chunk_in_use(struct sr_softc *sc, dev_t dev)
 2770 {
 2771         struct sr_discipline    *sd;
 2772         struct sr_chunk         *chunk;
 2773         int                     i;
 2774 
 2775         DNPRINTF(SR_D_MISC, "%s: sr_chunk_in_use(%d)\n", DEVNAME(sc), dev);
 2776 
 2777         if (dev == NODEV)
 2778                 return BIOC_SDINVALID;
 2779 
 2780         /* See if chunk is already in use. */
 2781         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 2782                 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
 2783                         chunk = sd->sd_vol.sv_chunks[i];
 2784                         if (chunk->src_dev_mm == dev)
 2785                                 return chunk->src_meta.scm_status;
 2786                 }
 2787         }
 2788 
 2789         /* Check hotspares list. */
 2790         SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link)
 2791                 if (chunk->src_dev_mm == dev)
 2792                         return chunk->src_meta.scm_status;
 2793 
 2794         return BIOC_SDINVALID;
 2795 }
 2796 
 2797 int
 2798 sr_hotspare(struct sr_softc *sc, dev_t dev)
 2799 {
 2800         struct sr_discipline    *sd = NULL;
 2801         struct sr_metadata      *sm = NULL;
 2802         struct sr_meta_chunk    *hm;
 2803         struct sr_chunk_head    *cl;
 2804         struct sr_chunk         *chunk, *last, *hotspare = NULL;
 2805         struct sr_uuid          uuid;
 2806         struct disklabel        label;
 2807         struct vnode            *vn;
 2808         u_int64_t               size;
 2809         char                    devname[32];
 2810         int                     rv = EINVAL;
 2811         int                     c, part, open = 0;
 2812 
 2813         /*
 2814          * Add device to global hotspares list.
 2815          */
 2816 
 2817         sr_meta_getdevname(sc, dev, devname, sizeof(devname));
 2818 
 2819         /* Make sure chunk is not already in use. */
 2820         c = sr_chunk_in_use(sc, dev);
 2821         if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) {
 2822                 if (c == BIOC_SDHOTSPARE)
 2823                         sr_error(sc, "%s is already a hotspare", devname);
 2824                 else
 2825                         sr_error(sc, "%s is already in use", devname);
 2826                 goto done;
 2827         }
 2828 
 2829         /* XXX - See if there is an existing degraded volume... */
 2830 
 2831         /* Open device. */
 2832         if (bdevvp(dev, &vn)) {
 2833                 sr_error(sc, "sr_hotspare: cannot allocate vnode");
 2834                 goto done;
 2835         }
 2836         if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
 2837                 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n",
 2838                     DEVNAME(sc), devname);
 2839                 vput(vn);
 2840                 goto fail;
 2841         }
 2842         open = 1; /* close dev on error */
 2843 
 2844         /* Get partition details. */
 2845         part = DISKPART(dev);
 2846         if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
 2847             NOCRED, curproc)) {
 2848                 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n",
 2849                     DEVNAME(sc));
 2850                 goto fail;
 2851         }
 2852         if (label.d_partitions[part].p_fstype != FS_RAID) {
 2853                 sr_error(sc, "%s partition not of type RAID (%d)",
 2854                     devname, label.d_partitions[part].p_fstype);
 2855                 goto fail;
 2856         }
 2857 
 2858         /* Calculate partition size. */
 2859         size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part]));
 2860         if (size <= SR_DATA_OFFSET) {
 2861                 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
 2862                     devname);
 2863                 goto fail;
 2864         }
 2865         size -= SR_DATA_OFFSET;
 2866         if (size > INT64_MAX) {
 2867                 DNPRINTF(SR_D_META, "%s: %s partition too large\n", DEVNAME(sc),
 2868                     devname);
 2869                 goto fail;
 2870         }
 2871 
 2872         /*
 2873          * Create and populate chunk metadata.
 2874          */
 2875 
 2876         sr_uuid_generate(&uuid);
 2877         hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO);
 2878 
 2879         hotspare->src_dev_mm = dev;
 2880         hotspare->src_vn = vn;
 2881         strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname));
 2882         hotspare->src_size = size;
 2883 
 2884         hm = &hotspare->src_meta;
 2885         hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
 2886         hm->scmi.scm_chunk_id = 0;
 2887         hm->scmi.scm_size = size;
 2888         hm->scmi.scm_coerced_size = size;
 2889         strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname));
 2890         memcpy(&hm->scmi.scm_uuid, &uuid, sizeof(struct sr_uuid));
 2891 
 2892         sr_checksum(sc, hm, &hm->scm_checksum,
 2893             sizeof(struct sr_meta_chunk_invariant));
 2894 
 2895         hm->scm_status = BIOC_SDHOTSPARE;
 2896 
 2897         /*
 2898          * Create and populate our own discipline and metadata.
 2899          */
 2900 
 2901         sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO);
 2902         sm->ssdi.ssd_magic = SR_MAGIC;
 2903         sm->ssdi.ssd_version = SR_META_VERSION;
 2904         sm->ssd_ondisk = 0;
 2905         sm->ssdi.ssd_vol_flags = 0;
 2906         memcpy(&sm->ssdi.ssd_uuid, &uuid, sizeof(struct sr_uuid));
 2907         sm->ssdi.ssd_chunk_no = 1;
 2908         sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID;
 2909         sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL;
 2910         sm->ssdi.ssd_size = size;
 2911         sm->ssdi.ssd_secsize = label.d_secsize;
 2912         strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor));
 2913         snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product),
 2914             "SR %s", "HOTSPARE");
 2915         snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
 2916             "%03d", SR_META_VERSION);
 2917 
 2918         sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
 2919         sd->sd_sc = sc;
 2920         sd->sd_meta = sm;
 2921         sd->sd_meta_type = SR_META_F_NATIVE;
 2922         sd->sd_vol_status = BIOC_SVONLINE;
 2923         strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name));
 2924         SLIST_INIT(&sd->sd_meta_opt);
 2925 
 2926         /* Add chunk to volume. */
 2927         sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF,
 2928             M_WAITOK | M_ZERO);
 2929         sd->sd_vol.sv_chunks[0] = hotspare;
 2930         SLIST_INIT(&sd->sd_vol.sv_chunk_list);
 2931         SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link);
 2932 
 2933         /* Save metadata. */
 2934         if (sr_meta_save(sd, SR_META_DIRTY)) {
 2935                 sr_error(sc, "could not save metadata to %s", devname);
 2936                 goto fail;
 2937         }
 2938 
 2939         /*
 2940          * Add chunk to hotspare list.
 2941          */
 2942         rw_enter_write(&sc->sc_hs_lock);
 2943         cl = &sc->sc_hotspare_list;
 2944         if (SLIST_EMPTY(cl))
 2945                 SLIST_INSERT_HEAD(cl, hotspare, src_link);
 2946         else {
 2947                 SLIST_FOREACH(chunk, cl, src_link)
 2948                         last = chunk;
 2949                 SLIST_INSERT_AFTER(last, hotspare, src_link);
 2950         }
 2951         sc->sc_hotspare_no++;
 2952         rw_exit_write(&sc->sc_hs_lock);
 2953 
 2954         rv = 0;
 2955         goto done;
 2956 
 2957 fail:
 2958         free(hotspare, M_DEVBUF, sizeof(*hotspare));
 2959 
 2960 done:
 2961         if (sd)
 2962                 free(sd->sd_vol.sv_chunks, M_DEVBUF,
 2963                     sizeof(sd->sd_vol.sv_chunks));
 2964         free(sd, M_DEVBUF, sizeof(*sd));
 2965         free(sm, M_DEVBUF, sizeof(*sm));
 2966         if (open) {
 2967                 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
 2968                 vput(vn);
 2969         }
 2970 
 2971         return (rv);
 2972 }
 2973 
 2974 void
 2975 sr_hotspare_rebuild_callback(void *xsd)
 2976 {
 2977         struct sr_discipline *sd = xsd;
 2978         sr_hotspare_rebuild(sd);
 2979 }
 2980 
 2981 void
 2982 sr_hotspare_rebuild(struct sr_discipline *sd)
 2983 {
 2984         struct sr_softc         *sc = sd->sd_sc;
 2985         struct sr_chunk_head    *cl;
 2986         struct sr_chunk         *hotspare, *chunk = NULL;
 2987         struct sr_workunit      *wu;
 2988         struct sr_ccb           *ccb;
 2989         int                     i, s, cid, busy;
 2990 
 2991         /*
 2992          * Attempt to locate a hotspare and initiate rebuild.
 2993          */
 2994 
 2995         /* Find first offline chunk. */
 2996         for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) {
 2997                 if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status ==
 2998                     BIOC_SDOFFLINE) {
 2999                         chunk = sd->sd_vol.sv_chunks[cid];
 3000                         break;
 3001                 }
 3002         }
 3003         if (chunk == NULL) {
 3004                 printf("%s: no offline chunk found on %s!\n",
 3005                     DEVNAME(sc), sd->sd_meta->ssd_devname);
 3006                 return;
 3007         }
 3008 
 3009         /* See if we have a suitable hotspare... */
 3010         rw_enter_write(&sc->sc_hs_lock);
 3011         cl = &sc->sc_hotspare_list;
 3012         SLIST_FOREACH(hotspare, cl, src_link)
 3013                 if (hotspare->src_size >= chunk->src_size &&
 3014                     hotspare->src_secsize <= sd->sd_meta->ssdi.ssd_secsize)
 3015                         break;
 3016 
 3017         if (hotspare != NULL) {
 3018 
 3019                 printf("%s: %s volume degraded, will attempt to "
 3020                     "rebuild on hotspare %s\n", DEVNAME(sc),
 3021                     sd->sd_meta->ssd_devname, hotspare->src_devname);
 3022 
 3023                 /*
 3024                  * Ensure that all pending I/O completes on the failed chunk
 3025                  * before trying to initiate a rebuild.
 3026                  */
 3027                 i = 0;
 3028                 do {
 3029                         busy = 0;
 3030 
 3031                         s = splbio();
 3032                         TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) {
 3033                                 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
 3034                                         if (ccb->ccb_target == cid)
 3035                                                 busy = 1;
 3036                                 }
 3037                         }
 3038                         TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) {
 3039                                 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
 3040                                         if (ccb->ccb_target == cid)
 3041                                                 busy = 1;
 3042                                 }
 3043                         }
 3044                         splx(s);
 3045 
 3046                         if (busy) {
 3047                                 tsleep_nsec(sd, PRIBIO, "sr_hotspare",
 3048                                     SEC_TO_NSEC(1));
 3049                                 i++;
 3050                         }
 3051 
 3052                 } while (busy && i < 120);
 3053 
 3054                 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to "
 3055                     "complete on failed chunk %s\n", DEVNAME(sc),
 3056                     i, chunk->src_devname);
 3057 
 3058                 if (busy) {
 3059                         printf("%s: pending I/O failed to complete on "
 3060                             "failed chunk %s, hotspare rebuild aborted...\n",
 3061                             DEVNAME(sc), chunk->src_devname);
 3062                         goto done;
 3063                 }
 3064 
 3065                 s = splbio();
 3066                 rw_enter_write(&sc->sc_lock);
 3067                 bio_status_init(&sc->sc_status, &sc->sc_dev);
 3068                 if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) {
 3069 
 3070                         /* Remove hotspare from available list. */
 3071                         sc->sc_hotspare_no--;
 3072                         SLIST_REMOVE(cl, hotspare, sr_chunk, src_link);
 3073                         free(hotspare, M_DEVBUF, sizeof(*hotspare));
 3074 
 3075                 }
 3076                 rw_exit_write(&sc->sc_lock);
 3077                 splx(s);
 3078         }
 3079 done:
 3080         rw_exit_write(&sc->sc_hs_lock);
 3081 }
 3082 
 3083 int
 3084 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare)
 3085 {
 3086         struct sr_softc         *sc = sd->sd_sc;
 3087         struct sr_chunk         *chunk = NULL;
 3088         struct sr_meta_chunk    *meta;
 3089         struct disklabel        label;
 3090         struct vnode            *vn;
 3091         u_int64_t               size;
 3092         int64_t                 csize;
 3093         char                    devname[32];
 3094         int                     rv = EINVAL, open = 0;
 3095         int                     cid, i, part, status;
 3096 
 3097         /*
 3098          * Attempt to initiate a rebuild onto the specified device.
 3099          */
 3100 
 3101         if (!(sd->sd_capabilities & SR_CAP_REBUILD)) {
 3102                 sr_error(sc, "discipline does not support rebuild");
 3103                 goto done;
 3104         }
 3105 
 3106         /* make sure volume is in the right state */
 3107         if (sd->sd_vol_status == BIOC_SVREBUILD) {
 3108                 sr_error(sc, "rebuild already in progress");
 3109                 goto done;
 3110         }
 3111         if (sd->sd_vol_status != BIOC_SVDEGRADED) {
 3112                 sr_error(sc, "volume not degraded");
 3113                 goto done;
 3114         }
 3115 
 3116         /* Find first offline chunk. */
 3117         for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) {
 3118                 if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status ==
 3119                     BIOC_SDOFFLINE) {
 3120                         chunk = sd->sd_vol.sv_chunks[cid];
 3121                         break;
 3122                 }
 3123         }
 3124         if (chunk == NULL) {
 3125                 sr_error(sc, "no offline chunks available to rebuild");
 3126                 goto done;
 3127         }
 3128 
 3129         /* Get coerced size from another online chunk. */
 3130         csize = 0;
 3131         for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
 3132                 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
 3133                     BIOC_SDONLINE) {
 3134                         meta = &sd->sd_vol.sv_chunks[i]->src_meta;
 3135                         csize = meta->scmi.scm_coerced_size;
 3136                         break;
 3137                 }
 3138         }
 3139         if (csize == 0) {
 3140                 sr_error(sc, "no online chunks available for rebuild");
 3141                 goto done;
 3142         }
 3143 
 3144         sr_meta_getdevname(sc, dev, devname, sizeof(devname));
 3145         if (bdevvp(dev, &vn)) {
 3146                 printf("%s: sr_rebuild_init: can't allocate vnode\n",
 3147                     DEVNAME(sc));
 3148                 goto done;
 3149         }
 3150         if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
 3151                 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
 3152                     "open %s\n", DEVNAME(sc), devname);
 3153                 vput(vn);
 3154                 goto done;
 3155         }
 3156         open = 1; /* close dev on error */
 3157 
 3158         /* Get disklabel and check partition. */
 3159         part = DISKPART(dev);
 3160         if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
 3161             NOCRED, curproc)) {
 3162                 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n",
 3163                     DEVNAME(sc));
 3164                 goto done;
 3165         }
 3166         if (label.d_partitions[part].p_fstype != FS_RAID) {
 3167                 sr_error(sc, "%s partition not of type RAID (%d)",
 3168                     devname, label.d_partitions[part].p_fstype);
 3169                 goto done;
 3170         }
 3171 
 3172         /* Is the partition large enough? */
 3173         size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part]));
 3174         if (size <= sd->sd_meta->ssd_data_blkno) {
 3175                 sr_error(sc, "%s: %s partition too small", DEVNAME(sc),
 3176                     devname);
 3177                 goto done;
 3178         }
 3179         size -= sd->sd_meta->ssd_data_blkno;
 3180         if (size > INT64_MAX) {
 3181                 sr_error(sc, "%s: %s partition too large", DEVNAME(sc),
 3182                     devname);
 3183                 goto done;
 3184         }
 3185         if (size < csize) {
 3186                 sr_error(sc, "%s partition too small, at least %lld bytes "
 3187                     "required", devname, (long long)(csize << DEV_BSHIFT));
 3188                 goto done;
 3189         } else if (size > csize)
 3190                 sr_warn(sc, "%s partition too large, wasting %lld bytes",
 3191                     devname, (long long)((size - csize) << DEV_BSHIFT));
 3192         if (label.d_secsize > sd->sd_meta->ssdi.ssd_secsize) {
 3193                 sr_error(sc, "%s sector size too large, <= %u bytes "
 3194                     "required", devname, sd->sd_meta->ssdi.ssd_secsize);
 3195                 goto done;
 3196         }
 3197 
 3198         /* Ensure that this chunk is not already in use. */
 3199         status = sr_chunk_in_use(sc, dev);
 3200         if (status != BIOC_SDINVALID && status != BIOC_SDOFFLINE &&
 3201             !(hotspare && status == BIOC_SDHOTSPARE)) {
 3202                 sr_error(sc, "%s is already in use", devname);
 3203                 goto done;
 3204         }
 3205 
 3206         /* Reset rebuild counter since we rebuilding onto a new chunk. */
 3207         sd->sd_meta->ssd_rebuild = 0;
 3208 
 3209         open = 0; /* leave dev open from here on out */
 3210 
 3211         /* Fix up chunk. */
 3212         memcpy(chunk->src_duid, label.d_uid, sizeof(chunk->src_duid));
 3213         chunk->src_dev_mm = dev;
 3214         chunk->src_vn = vn;
 3215 
 3216         /* Reconstruct metadata. */
 3217         meta = &chunk->src_meta;
 3218         meta->scmi.scm_volid = sd->sd_meta->ssdi.ssd_volid;
 3219         meta->scmi.scm_chunk_id = cid;
 3220         strlcpy(meta->scmi.scm_devname, devname,
 3221             sizeof(meta->scmi.scm_devname));
 3222         meta->scmi.scm_size = size;
 3223         meta->scmi.scm_coerced_size = csize;
 3224         memcpy(&meta->scmi.scm_uuid, &sd->sd_meta->ssdi.ssd_uuid,
 3225             sizeof(meta->scmi.scm_uuid));
 3226         sr_checksum(sc, meta, &meta->scm_checksum,
 3227             sizeof(struct sr_meta_chunk_invariant));
 3228 
 3229         sd->sd_set_chunk_state(sd, cid, BIOC_SDREBUILD);
 3230 
 3231         if (sr_meta_save(sd, SR_META_DIRTY)) {
 3232                 sr_error(sc, "could not save metadata to %s", devname);
 3233                 open = 1;
 3234                 goto done;
 3235         }
 3236 
 3237         sr_warn(sc, "rebuild of %s started on %s",
 3238             sd->sd_meta->ssd_devname, devname);
 3239 
 3240         sd->sd_reb_abort = 0;
 3241         kthread_create_deferred(sr_rebuild_start, sd);
 3242 
 3243         rv = 0;
 3244 done:
 3245         if (open) {
 3246                 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
 3247                 vput(vn);
 3248         }
 3249 
 3250         return (rv);
 3251 }
 3252 
 3253 int
 3254 sr_rebuild_percent(struct sr_discipline *sd)
 3255 {
 3256         daddr_t                 rb, sz;
 3257 
 3258         sz = sd->sd_meta->ssdi.ssd_size;
 3259         rb = sd->sd_meta->ssd_rebuild;
 3260 
 3261         if (rb > 0)
 3262                 return (100 - ((sz * 100 - rb * 100) / sz) - 1);
 3263 
 3264         return (0);
 3265 }
 3266 
 3267 void
 3268 sr_roam_chunks(struct sr_discipline *sd)
 3269 {
 3270         struct sr_softc         *sc = sd->sd_sc;
 3271         struct sr_chunk         *chunk;
 3272         struct sr_meta_chunk    *meta;
 3273         int                     roamed = 0;
 3274 
 3275         /* Have any chunks roamed? */
 3276         SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) {
 3277                 meta = &chunk->src_meta;
 3278                 if (strncmp(meta->scmi.scm_devname, chunk->src_devname,
 3279                     sizeof(meta->scmi.scm_devname))) {
 3280 
 3281                         printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
 3282                             meta->scmi.scm_devname, chunk->src_devname);
 3283 
 3284                         strlcpy(meta->scmi.scm_devname, chunk->src_devname,
 3285                             sizeof(meta->scmi.scm_devname));
 3286 
 3287                         roamed++;
 3288                 }
 3289         }
 3290 
 3291         if (roamed)
 3292                 sr_meta_save(sd, SR_META_DIRTY);
 3293 }
 3294 
 3295 int
 3296 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc,
 3297     int user, void *data)
 3298 {
 3299         struct sr_meta_opt_item *omi;
 3300         struct sr_chunk_head    *cl;
 3301         struct sr_discipline    *sd = NULL;
 3302         struct sr_chunk         *ch_entry;
 3303         struct scsi_link        *link;
 3304         struct device           *dev;
 3305         char                    *uuid, devname[32];
 3306         dev_t                   *dt = NULL;
 3307         int                     i, no_chunk, rv = EINVAL, target, vol;
 3308         int                     no_meta;
 3309 
 3310         DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
 3311             DEVNAME(sc), user);
 3312 
 3313         /* user input */
 3314         if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
 3315                 goto unwind;
 3316 
 3317         dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
 3318         if (user) {
 3319                 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0)
 3320                         goto unwind;
 3321         } else
 3322                 memcpy(dt, bc->bc_dev_list, bc->bc_dev_list_len);
 3323 
 3324         /* Initialise discipline. */
 3325         sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
 3326         sd->sd_sc = sc;
 3327         SLIST_INIT(&sd->sd_meta_opt);
 3328         sd->sd_taskq = taskq_create("srdis", 1, IPL_BIO, 0);
 3329         if (sd->sd_taskq == NULL) {
 3330                 sr_error(sc, "could not create discipline taskq");
 3331                 goto unwind;
 3332         }
 3333         if (sr_discipline_init(sd, bc->bc_level)) {
 3334                 sr_error(sc, "could not initialize discipline");
 3335                 goto unwind;
 3336         }
 3337 
 3338         no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
 3339         cl = &sd->sd_vol.sv_chunk_list;
 3340         SLIST_INIT(cl);
 3341 
 3342         /* Ensure that chunks are not already in use. */
 3343         for (i = 0; i < no_chunk; i++) {
 3344                 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) {
 3345                         sr_meta_getdevname(sc, dt[i], devname, sizeof(devname));
 3346                         sr_error(sc, "chunk %s already in use", devname);
 3347                         goto unwind;
 3348                 }
 3349         }
 3350 
 3351         sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
 3352         if (sd->sd_meta_type == SR_META_F_INVALID) {
 3353                 sr_error(sc, "invalid metadata format");
 3354                 goto unwind;
 3355         }
 3356 
 3357         if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE))
 3358                 goto unwind;
 3359 
 3360         /* force the raid volume by clearing metadata region */
 3361         if (bc->bc_flags & BIOC_SCFORCE) {
 3362                 /* make sure disk isn't up and running */
 3363                 if (sr_meta_read(sd))
 3364                         if (sr_already_assembled(sd)) {
 3365                                 uuid = sr_uuid_format(
 3366                                     &sd->sd_meta->ssdi.ssd_uuid);
 3367                                 sr_error(sc, "disk %s is currently in use; "
 3368                                     "cannot force create", uuid);
 3369                                 free(uuid, M_DEVBUF, 37);
 3370                                 goto unwind;
 3371                         }
 3372 
 3373                 if (sr_meta_clear(sd)) {
 3374                         sr_error(sc, "failed to clear metadata");
 3375                         goto unwind;
 3376                 }
 3377         }
 3378 
 3379         no_meta = sr_meta_read(sd);
 3380         if (no_meta == -1) {
 3381 
 3382                 /* Corrupt metadata on one or more chunks. */
 3383                 sr_error(sc, "one of the chunks has corrupt metadata; "
 3384                     "aborting assembly");
 3385                 goto unwind;
 3386 
 3387         } else if (no_meta == 0) {
 3388 
 3389                 /* Initialise volume and chunk metadata. */
 3390                 sr_meta_init(sd, bc->bc_level, no_chunk);
 3391                 sd->sd_vol_status = BIOC_SVONLINE;
 3392                 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
 3393                 if (sd->sd_create) {
 3394                         if ((i = sd->sd_create(sd, bc, no_chunk,
 3395                             sd->sd_vol.sv_chunk_minsz))) {
 3396                                 rv = i;
 3397                                 goto unwind;
 3398                         }
 3399                 }
 3400                 sr_meta_init_complete(sd);
 3401 
 3402                 DNPRINTF(SR_D_IOCTL,
 3403                     "%s: sr_ioctl_createraid: vol_size: %lld\n",
 3404                     DEVNAME(sc), sd->sd_meta->ssdi.ssd_size);
 3405 
 3406                 /* Warn if we've wasted chunk space due to coercing. */
 3407                 if ((sd->sd_capabilities & SR_CAP_NON_COERCED) == 0 &&
 3408                     sd->sd_vol.sv_chunk_minsz != sd->sd_vol.sv_chunk_maxsz)
 3409                         sr_warn(sc, "chunk sizes are not equal; up to %llu "
 3410                             "blocks wasted per chunk",
 3411                             sd->sd_vol.sv_chunk_maxsz -
 3412                             sd->sd_vol.sv_chunk_minsz);
 3413 
 3414         } else {
 3415 
 3416                 /* Ensure we are assembling the correct # of chunks. */
 3417                 if (bc->bc_level == 0x1C &&
 3418                     sd->sd_meta->ssdi.ssd_chunk_no > no_chunk) {
 3419                         sr_warn(sc, "trying to bring up %s degraded",
 3420                             sd->sd_meta->ssd_devname);
 3421                 } else if (sd->sd_meta->ssdi.ssd_chunk_no != no_chunk) {
 3422                         sr_error(sc, "volume chunk count does not match metadata "
 3423                             "chunk count");
 3424                         goto unwind;
 3425                 }
 3426 
 3427                 /* Ensure metadata level matches requested assembly level. */
 3428                 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) {
 3429                         sr_error(sc, "volume level does not match metadata "
 3430                             "level");
 3431                         goto unwind;
 3432                 }
 3433 
 3434                 if (sr_already_assembled(sd)) {
 3435                         uuid = sr_uuid_format(&sd->sd_meta->ssdi.ssd_uuid);
 3436                         sr_error(sc, "disk %s already assembled", uuid);
 3437                         free(uuid, M_DEVBUF, 37);
 3438                         goto unwind;
 3439                 }
 3440 
 3441                 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
 3442                         DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
 3443                             "metadata\n", DEVNAME(sc));
 3444                         goto unwind;
 3445                 }
 3446 
 3447                 if (no_meta != no_chunk)
 3448                         sr_warn(sc, "trying to bring up %s degraded",
 3449                             sd->sd_meta->ssd_devname);
 3450 
 3451                 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
 3452                         sr_warn(sc, "%s was not shutdown properly",
 3453                             sd->sd_meta->ssd_devname);
 3454 
 3455                 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link)
 3456                         if (sd->sd_meta_opt_handler == NULL ||
 3457                             sd->sd_meta_opt_handler(sd, omi->omi_som) != 0)
 3458                                 sr_meta_opt_handler(sd, omi->omi_som);
 3459 
 3460                 if (sd->sd_assemble) {
 3461                         if ((i = sd->sd_assemble(sd, bc, no_chunk, data))) {
 3462                                 rv = i;
 3463                                 goto unwind;
 3464                         }
 3465                 }
 3466 
 3467                 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
 3468                     DEVNAME(sc));
 3469 
 3470         }
 3471 
 3472         /* Metadata MUST be fully populated by this point. */
 3473         TAILQ_INSERT_TAIL(&sc->sc_dis_list, sd, sd_link);
 3474 
 3475         /* Allocate all resources. */
 3476         if ((rv = sd->sd_alloc_resources(sd)))
 3477                 goto unwind;
 3478 
 3479         /* Adjust flags if necessary. */
 3480         if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) &&
 3481             (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) !=
 3482             (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) {
 3483                 sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE;
 3484                 sd->sd_meta->ssdi.ssd_vol_flags |=
 3485                     bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
 3486         }
 3487 
 3488         if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) {
 3489                 /* Initialise volume state. */
 3490                 sd->sd_set_vol_state(sd);
 3491                 if (sd->sd_vol_status == BIOC_SVOFFLINE) {
 3492                         sr_error(sc, "%s is offline, will not be brought "
 3493                             "online", sd->sd_meta->ssd_devname);
 3494                         goto unwind;
 3495                 }
 3496 
 3497                 /* Setup SCSI iopool. */
 3498                 scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put);
 3499 
 3500                 /*
 3501                  * All checks passed - return ENXIO if volume cannot be created.
 3502                  */
 3503                 rv = ENXIO;
 3504 
 3505                 /*
 3506                  * Find a free target.
 3507                  *
 3508                  * XXX: We reserve sd_target == 0 to indicate the
 3509                  * discipline is not linked into sc->sc_targets, so begin
 3510                  * the search with target = 1.
 3511                  */
 3512                 for (target = 1; target < SR_MAX_LD; target++)
 3513                         if (sc->sc_targets[target] == NULL)
 3514                                 break;
 3515                 if (target == SR_MAX_LD) {
 3516                         sr_error(sc, "no free target for %s",
 3517                             sd->sd_meta->ssd_devname);
 3518                         goto unwind;
 3519                 }
 3520 
 3521                 /* Clear sense data. */
 3522                 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
 3523 
 3524                 /* Attach discipline and get midlayer to probe it. */
 3525                 sd->sd_target = target;
 3526                 sc->sc_targets[target] = sd;
 3527                 if (scsi_probe_lun(sc->sc_scsibus, target, 0) != 0) {
 3528                         sr_error(sc, "scsi_probe_lun failed");
 3529                         sc->sc_targets[target] = NULL;
 3530                         sd->sd_target = 0;
 3531                         goto unwind;
 3532                 }
 3533 
 3534                 link = scsi_get_link(sc->sc_scsibus, target, 0);
 3535                 if (link == NULL)
 3536                         goto unwind;
 3537 
 3538                 dev = link->device_softc;
 3539                 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s at target %d\n",
 3540                     DEVNAME(sc), dev->dv_xname, sd->sd_target);
 3541 
 3542                 /* XXX - Count volumes, not targets. */
 3543                 for (i = 0, vol = -1; i <= sd->sd_target; i++)
 3544                         if (sc->sc_targets[i])
 3545                                 vol++;
 3546 
 3547                 rv = 0;
 3548 
 3549                 if (sd->sd_meta->ssd_devname[0] != '\0' &&
 3550                     strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
 3551                     sizeof(dev->dv_xname)))
 3552                         sr_warn(sc, "volume %s is roaming, it used to be %s, "
 3553                             "updating metadata", dev->dv_xname,
 3554                             sd->sd_meta->ssd_devname);
 3555 
 3556                 /* Populate remaining volume metadata. */
 3557                 sd->sd_meta->ssdi.ssd_volid = vol;
 3558                 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
 3559                     sizeof(sd->sd_meta->ssd_devname));
 3560 
 3561                 sr_info(sc, "%s volume attached as %s",
 3562                     sd->sd_name, sd->sd_meta->ssd_devname);
 3563 
 3564                 /* Update device name on any roaming chunks. */
 3565                 sr_roam_chunks(sd);
 3566 
 3567 #ifndef SMALL_KERNEL
 3568                 if (sr_sensors_create(sd))
 3569                         sr_warn(sc, "unable to create sensor for %s",
 3570                             dev->dv_xname);
 3571 #endif /* SMALL_KERNEL */
 3572         } else {
 3573                 /* This volume does not attach as a system disk. */
 3574                 ch_entry = SLIST_FIRST(cl); /* XXX */
 3575                 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname,
 3576                     sizeof(sd->sd_meta->ssd_devname));
 3577 
 3578                 if (sd->sd_start_discipline(sd))
 3579                         goto unwind;
 3580         }
 3581 
 3582         /* Save current metadata to disk. */
 3583         rv = sr_meta_save(sd, SR_META_DIRTY);
 3584 
 3585         if (sd->sd_vol_status == BIOC_SVREBUILD)
 3586                 kthread_create_deferred(sr_rebuild_start, sd);
 3587 
 3588         sd->sd_ready = 1;
 3589 
 3590         free(dt, M_DEVBUF, bc->bc_dev_list_len);
 3591 
 3592         return (rv);
 3593 
 3594 unwind:
 3595         free(dt, M_DEVBUF, bc->bc_dev_list_len);
 3596 
 3597         sr_discipline_shutdown(sd, 0, 0);
 3598 
 3599         if (rv == EAGAIN)
 3600                 rv = 0;
 3601 
 3602         return (rv);
 3603 }
 3604 
 3605 int
 3606 sr_ioctl_deleteraid(struct sr_softc *sc, struct sr_discipline *sd,
 3607     struct bioc_deleteraid *bd)
 3608 {
 3609         int                     rv = 1;
 3610 
 3611         DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n",
 3612             DEVNAME(sc), bd->bd_dev);
 3613 
 3614         if (sd == NULL && (sd = sr_find_discipline(sc, bd->bd_dev)) == NULL) {
 3615                 sr_error(sc, "volume %s not found", bd->bd_dev);
 3616                 goto bad;
 3617         }
 3618 
 3619         /*
 3620          * XXX Better check for mounted file systems and refuse to detach any
 3621          * volume that is actively in use.
 3622          */
 3623         if (bcmp(&sr_bootuuid, &sd->sd_meta->ssdi.ssd_uuid,
 3624             sizeof(sr_bootuuid)) == 0) {
 3625                 sr_error(sc, "refusing to delete boot volume");
 3626                 goto bad;
 3627         }
 3628 
 3629         sd->sd_deleted = 1;
 3630         sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE;
 3631         sr_discipline_shutdown(sd, 1, 0);
 3632 
 3633         rv = 0;
 3634 bad:
 3635         return (rv);
 3636 }
 3637 
 3638 int
 3639 sr_ioctl_discipline(struct sr_softc *sc, struct sr_discipline *sd,
 3640     struct bioc_discipline *bd)
 3641 {
 3642         int                     rv = 1;
 3643 
 3644         /* Dispatch a discipline specific ioctl. */
 3645 
 3646         DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc),
 3647             bd->bd_dev);
 3648 
 3649         if (sd == NULL && (sd = sr_find_discipline(sc, bd->bd_dev)) == NULL) {
 3650                 sr_error(sc, "volume %s not found", bd->bd_dev);
 3651                 goto bad;
 3652         }
 3653 
 3654         if (sd->sd_ioctl_handler)
 3655                 rv = sd->sd_ioctl_handler(sd, bd);
 3656 
 3657 bad:
 3658         return (rv);
 3659 }
 3660 
 3661 int
 3662 sr_ioctl_installboot(struct sr_softc *sc, struct sr_discipline *sd,
 3663     struct bioc_installboot *bb)
 3664 {
 3665         void                    *bootblk = NULL, *bootldr = NULL;
 3666         struct sr_chunk         *chunk;
 3667         struct sr_meta_opt_item *omi;
 3668         struct sr_meta_boot     *sbm;
 3669         struct disk             *dk;
 3670         u_int32_t               bbs = 0, bls = 0, secsize;
 3671         u_char                  duid[8];
 3672         int                     rv = EINVAL;
 3673         int                     i;
 3674 
 3675         DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc),
 3676             bb->bb_dev);
 3677 
 3678         if (sd == NULL && (sd = sr_find_discipline(sc, bb->bb_dev)) == NULL) {
 3679                 sr_error(sc, "volume %s not found", bb->bb_dev);
 3680                 goto done;
 3681         }
 3682 
 3683         TAILQ_FOREACH(dk, &disklist,  dk_link)
 3684                 if (!strncmp(dk->dk_name, bb->bb_dev, sizeof(bb->bb_dev)))
 3685                         break;
 3686         if (dk == NULL || dk->dk_label == NULL ||
 3687             duid_iszero(dk->dk_label->d_uid)) {
 3688                 sr_error(sc, "failed to get DUID for softraid volume");
 3689                 goto done;
 3690         }
 3691         memcpy(duid, dk->dk_label->d_uid, sizeof(duid));
 3692 
 3693         /* Ensure that boot storage area is large enough. */
 3694         if (sd->sd_meta->ssd_data_blkno < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) {
 3695                 sr_error(sc, "insufficient boot storage");
 3696                 goto done;
 3697         }
 3698 
 3699         if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * DEV_BSIZE) {
 3700                 sr_error(sc, "boot block too large (%d > %d)",
 3701                     bb->bb_bootblk_size, SR_BOOT_BLOCKS_SIZE * DEV_BSIZE);
 3702                 goto done;
 3703         }
 3704 
 3705         if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * DEV_BSIZE) {
 3706                 sr_error(sc, "boot loader too large (%d > %d)",
 3707                     bb->bb_bootldr_size, SR_BOOT_LOADER_SIZE * DEV_BSIZE);
 3708                 goto done;
 3709         }
 3710 
 3711         secsize = sd->sd_meta->ssdi.ssd_secsize;
 3712 
 3713         /* Copy in boot block. */
 3714         bbs = howmany(bb->bb_bootblk_size, secsize) * secsize;
 3715         bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO);
 3716         if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0)
 3717                 goto done;
 3718 
 3719         /* Copy in boot loader. */
 3720         bls = howmany(bb->bb_bootldr_size, secsize) * secsize;
 3721         bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO);
 3722         if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0)
 3723                 goto done;
 3724 
 3725         /* Create or update optional meta for bootable volumes. */
 3726         SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link)
 3727                 if (omi->omi_som->som_type == SR_OPT_BOOT)
 3728                         break;
 3729         if (omi == NULL) {
 3730                 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF,
 3731                     M_WAITOK | M_ZERO);
 3732                 omi->omi_som = malloc(sizeof(struct sr_meta_boot), M_DEVBUF,
 3733                     M_WAITOK | M_ZERO);
 3734                 omi->omi_som->som_type = SR_OPT_BOOT;
 3735                 omi->omi_som->som_length = sizeof(struct sr_meta_boot);
 3736                 SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link);
 3737                 sd->sd_meta->ssdi.ssd_opt_no++;
 3738         }
 3739         sbm = (struct sr_meta_boot *)omi->omi_som;
 3740 
 3741         memcpy(sbm->sbm_root_duid, duid, sizeof(sbm->sbm_root_duid));
 3742         bzero(&sbm->sbm_boot_duid, sizeof(sbm->sbm_boot_duid));
 3743         sbm->sbm_bootblk_size = bbs;
 3744         sbm->sbm_bootldr_size = bls;
 3745 
 3746         DNPRINTF(SR_D_IOCTL, "sr_ioctl_installboot: root duid is %s\n",
 3747             duid_format(sbm->sbm_root_duid));
 3748 
 3749         /* Save boot block and boot loader to each chunk. */
 3750         for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
 3751 
 3752                 chunk = sd->sd_vol.sv_chunks[i];
 3753                 if (chunk->src_meta.scm_status != BIOC_SDONLINE &&
 3754                     chunk->src_meta.scm_status != BIOC_SDREBUILD)
 3755                         continue;
 3756 
 3757                 if (i < SR_MAX_BOOT_DISKS)
 3758                         memcpy(&sbm->sbm_boot_duid[i], chunk->src_duid,
 3759                             sizeof(sbm->sbm_boot_duid[i]));
 3760 
 3761                 /* Save boot blocks. */
 3762                 DNPRINTF(SR_D_IOCTL,
 3763                     "sr_ioctl_installboot: saving boot block to %s "
 3764                     "(%u bytes)\n", chunk->src_devname, bbs);
 3765 
 3766                 if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs,
 3767                     SR_BOOT_BLOCKS_OFFSET, B_WRITE)) {
 3768                         sr_error(sc, "failed to write boot block");
 3769                         goto done;
 3770                 }
 3771 
 3772                 /* Save boot loader.*/
 3773                 DNPRINTF(SR_D_IOCTL,
 3774                     "sr_ioctl_installboot: saving boot loader to %s "
 3775                     "(%u bytes)\n", chunk->src_devname, bls);
 3776 
 3777                 if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls,
 3778                     SR_BOOT_LOADER_OFFSET, B_WRITE)) {
 3779                         sr_error(sc, "failed to write boot loader");
 3780                         goto done;
 3781                 }
 3782         }
 3783 
 3784         /* XXX - Install boot block on disk - MD code. */
 3785 
 3786         /* Mark volume as bootable and save metadata. */
 3787         sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE;
 3788         if (sr_meta_save(sd, SR_META_DIRTY)) {
 3789                 sr_error(sc, "could not save metadata to %s", DEVNAME(sc));
 3790                 goto done;
 3791         }
 3792 
 3793         rv = 0;
 3794 
 3795 done:
 3796         free(bootblk, M_DEVBUF, bbs);
 3797         free(bootldr, M_DEVBUF, bls);
 3798 
 3799         return (rv);
 3800 }
 3801 
 3802 void
 3803 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
 3804 {
 3805         struct sr_chunk         *ch_entry, *ch_next;
 3806 
 3807         DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
 3808 
 3809         if (!cl)
 3810                 return;
 3811 
 3812         for (ch_entry = SLIST_FIRST(cl); ch_entry != NULL; ch_entry = ch_next) {
 3813                 ch_next = SLIST_NEXT(ch_entry, src_link);
 3814 
 3815                 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
 3816                     DEVNAME(sc), ch_entry->src_devname);
 3817                 if (ch_entry->src_vn) {
 3818                         /*
 3819                          * XXX - explicitly lock the vnode until we can resolve
 3820                          * the problem introduced by vnode aliasing... specfs
 3821                          * has no locking, whereas ufs/ffs does!
 3822                          */
 3823                         vn_lock(ch_entry->src_vn, LK_EXCLUSIVE | LK_RETRY);
 3824                         VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED,
 3825                             curproc);
 3826                         vput(ch_entry->src_vn);
 3827                 }
 3828                 free(ch_entry, M_DEVBUF, sizeof(*ch_entry));
 3829         }
 3830         SLIST_INIT(cl);
 3831 }
 3832 
 3833 void
 3834 sr_discipline_free(struct sr_discipline *sd)
 3835 {
 3836         struct sr_softc         *sc;
 3837         struct sr_discipline    *sdtmp1;
 3838         struct sr_meta_opt_head *som;
 3839         struct sr_meta_opt_item *omi, *omi_next;
 3840 
 3841         if (!sd)
 3842                 return;
 3843 
 3844         sc = sd->sd_sc;
 3845 
 3846         DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
 3847             DEVNAME(sc),
 3848             sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
 3849         if (sd->sd_free_resources)
 3850                 sd->sd_free_resources(sd);
 3851         free(sd->sd_vol.sv_chunks, M_DEVBUF, 0);
 3852         free(sd->sd_meta, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
 3853         free(sd->sd_meta_foreign, M_DEVBUF, smd[sd->sd_meta_type].smd_size);
 3854 
 3855         som = &sd->sd_meta_opt;
 3856         for (omi = SLIST_FIRST(som); omi != NULL; omi = omi_next) {
 3857                 omi_next = SLIST_NEXT(omi, omi_link);
 3858                 free(omi->omi_som, M_DEVBUF, 0);
 3859                 free(omi, M_DEVBUF, sizeof(*omi));
 3860         }
 3861 
 3862         if (sd->sd_target != 0) {
 3863                 KASSERT(sc->sc_targets[sd->sd_target] == sd);
 3864                 sc->sc_targets[sd->sd_target] = NULL;
 3865         }
 3866 
 3867         TAILQ_FOREACH(sdtmp1, &sc->sc_dis_list, sd_link) {
 3868                 if (sdtmp1 == sd)
 3869                         break;
 3870         }
 3871         if (sdtmp1 != NULL)
 3872                 TAILQ_REMOVE(&sc->sc_dis_list, sd, sd_link);
 3873 
 3874         explicit_bzero(sd, sizeof *sd);
 3875         free(sd, M_DEVBUF, sizeof(*sd));
 3876 }
 3877 
 3878 void
 3879 sr_discipline_shutdown(struct sr_discipline *sd, int meta_save, int dying)
 3880 {
 3881         struct sr_softc         *sc;
 3882         int                     ret, s;
 3883 
 3884         if (!sd)
 3885                 return;
 3886         sc = sd->sd_sc;
 3887 
 3888         DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
 3889             sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
 3890 
 3891         /* If rebuilding, abort rebuild and drain I/O. */
 3892         if (sd->sd_reb_active) {
 3893                 sd->sd_reb_abort = 1;
 3894                 while (sd->sd_reb_active)
 3895                         tsleep_nsec(sd, PWAIT, "sr_shutdown", MSEC_TO_NSEC(1));
 3896         }
 3897 
 3898         if (meta_save)
 3899                 sr_meta_save(sd, 0);
 3900 
 3901         s = splbio();
 3902 
 3903         sd->sd_ready = 0;
 3904 
 3905         /* make sure there isn't a sync pending and yield */
 3906         wakeup(sd);
 3907         while (sd->sd_sync || sd->sd_must_flush) {
 3908                 ret = tsleep_nsec(&sd->sd_sync, MAXPRI, "sr_down",
 3909                     SEC_TO_NSEC(60));
 3910                 if (ret == EWOULDBLOCK)
 3911                         break;
 3912         }
 3913         if (dying == -1) {
 3914                 sd->sd_ready = 1;
 3915                 splx(s);
 3916                 return;
 3917         }
 3918 
 3919 #ifndef SMALL_KERNEL
 3920         sr_sensors_delete(sd);
 3921 #endif /* SMALL_KERNEL */
 3922 
 3923         if (sd->sd_target != 0)
 3924                 scsi_detach_lun(sc->sc_scsibus, sd->sd_target, 0,
 3925                     dying ? 0 : DETACH_FORCE);
 3926 
 3927         sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
 3928 
 3929         if (sd->sd_taskq)
 3930                 taskq_destroy(sd->sd_taskq);
 3931 
 3932         sr_discipline_free(sd);
 3933 
 3934         splx(s);
 3935 }
 3936 
 3937 int
 3938 sr_discipline_init(struct sr_discipline *sd, int level)
 3939 {
 3940         int                     rv = 1;
 3941 
 3942         /* Initialise discipline function pointers with defaults. */
 3943         sd->sd_alloc_resources = sr_alloc_resources;
 3944         sd->sd_assemble = NULL;
 3945         sd->sd_create = NULL;
 3946         sd->sd_free_resources = sr_free_resources;
 3947         sd->sd_ioctl_handler = NULL;
 3948         sd->sd_openings = NULL;
 3949         sd->sd_meta_opt_handler = NULL;
 3950         sd->sd_rebuild = sr_rebuild;
 3951         sd->sd_scsi_inquiry = sr_raid_inquiry;
 3952         sd->sd_scsi_read_cap = sr_raid_read_cap;
 3953         sd->sd_scsi_tur = sr_raid_tur;
 3954         sd->sd_scsi_req_sense = sr_raid_request_sense;
 3955         sd->sd_scsi_start_stop = sr_raid_start_stop;
 3956         sd->sd_scsi_sync = sr_raid_sync;
 3957         sd->sd_scsi_rw = NULL;
 3958         sd->sd_scsi_intr = sr_raid_intr;
 3959         sd->sd_scsi_wu_done = NULL;
 3960         sd->sd_scsi_done = NULL;
 3961         sd->sd_set_chunk_state = sr_set_chunk_state;
 3962         sd->sd_set_vol_state = sr_set_vol_state;
 3963         sd->sd_start_discipline = NULL;
 3964 
 3965         task_set(&sd->sd_meta_save_task, sr_meta_save_callback, sd);
 3966         task_set(&sd->sd_hotspare_rebuild_task, sr_hotspare_rebuild_callback,
 3967             sd);
 3968 
 3969         sd->sd_wu_size = sizeof(struct sr_workunit);
 3970         switch (level) {
 3971         case 0:
 3972                 sr_raid0_discipline_init(sd);
 3973                 break;
 3974         case 1:
 3975                 sr_raid1_discipline_init(sd);
 3976                 break;
 3977         case 5:
 3978                 sr_raid5_discipline_init(sd);
 3979                 break;
 3980         case 6:
 3981                 sr_raid6_discipline_init(sd);
 3982                 break;
 3983 #ifdef CRYPTO
 3984         case 'C':
 3985                 sr_crypto_discipline_init(sd);
 3986                 break;
 3987         case 0x1C:
 3988                 sr_raid1c_discipline_init(sd);
 3989                 break;
 3990 #endif
 3991         case 'c':
 3992                 sr_concat_discipline_init(sd);
 3993                 break;
 3994         default:
 3995                 goto bad;
 3996         }
 3997 
 3998         rv = 0;
 3999 bad:
 4000         return (rv);
 4001 }
 4002 
 4003 int
 4004 sr_raid_inquiry(struct sr_workunit *wu)
 4005 {
 4006         struct sr_discipline    *sd = wu->swu_dis;
 4007         struct scsi_xfer        *xs = wu->swu_xs;
 4008         struct scsi_inquiry     *cdb = (struct scsi_inquiry *)&xs->cmd;
 4009         struct scsi_inquiry_data inq;
 4010 
 4011         DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
 4012 
 4013         if (xs->cmdlen != sizeof(*cdb))
 4014                 return (EINVAL);
 4015 
 4016         if (ISSET(cdb->flags, SI_EVPD))
 4017                 return (EOPNOTSUPP);
 4018 
 4019         bzero(&inq, sizeof(inq));
 4020         inq.device = T_DIRECT;
 4021         inq.dev_qual2 = 0;
 4022         inq.version = SCSI_REV_2;
 4023         inq.response_format = SID_SCSI2_RESPONSE;
 4024         inq.additional_length = SID_SCSI2_ALEN;
 4025         inq.flags |= SID_CmdQue;
 4026         strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
 4027             sizeof(inq.vendor));
 4028         strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
 4029             sizeof(inq.product));
 4030         strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
 4031             sizeof(inq.revision));
 4032         scsi_copy_internal_data(xs, &inq, sizeof(inq));
 4033 
 4034         return (0);
 4035 }
 4036 
 4037 int
 4038 sr_raid_read_cap(struct sr_workunit *wu)
 4039 {
 4040         struct sr_discipline    *sd = wu->swu_dis;
 4041         struct scsi_xfer        *xs = wu->swu_xs;
 4042         struct scsi_read_cap_data rcd;
 4043         struct scsi_read_cap_data_16 rcd16;
 4044         u_int64_t               addr;
 4045         int                     rv = 1;
 4046         u_int32_t               secsize;
 4047 
 4048         DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
 4049 
 4050         secsize = sd->sd_meta->ssdi.ssd_secsize;
 4051 
 4052         addr = ((sd->sd_meta->ssdi.ssd_size * DEV_BSIZE) / secsize) - 1;
 4053         if (xs->cmd.opcode == READ_CAPACITY) {
 4054                 bzero(&rcd, sizeof(rcd));
 4055                 if (addr > 0xffffffffllu)
 4056                         _lto4b(0xffffffff, rcd.addr);
 4057                 else
 4058                         _lto4b(addr, rcd.addr);
 4059                 _lto4b(secsize, rcd.length);
 4060                 scsi_copy_internal_data(xs, &rcd, sizeof(rcd));
 4061                 rv = 0;
 4062         } else if (xs->cmd.opcode == READ_CAPACITY_16) {
 4063                 bzero(&rcd16, sizeof(rcd16));
 4064                 _lto8b(addr, rcd16.addr);
 4065                 _lto4b(secsize, rcd16.length);
 4066                 scsi_copy_internal_data(xs, &rcd16, sizeof(rcd16));
 4067                 rv = 0;
 4068         }
 4069 
 4070         return (rv);
 4071 }
 4072 
 4073 int
 4074 sr_raid_tur(struct sr_workunit *wu)
 4075 {
 4076         struct sr_discipline    *sd = wu->swu_dis;
 4077 
 4078         DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
 4079 
 4080         if (sd->sd_vol_status == BIOC_SVOFFLINE) {
 4081                 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
 4082                 sd->sd_scsi_sense.flags = SKEY_NOT_READY;
 4083                 sd->sd_scsi_sense.add_sense_code = 0x04;
 4084                 sd->sd_scsi_sense.add_sense_code_qual = 0x11;
 4085                 sd->sd_scsi_sense.extra_len = 4;
 4086                 return (1);
 4087         } else if (sd->sd_vol_status == BIOC_SVINVALID) {
 4088                 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
 4089                 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
 4090                 sd->sd_scsi_sense.add_sense_code = 0x05;
 4091                 sd->sd_scsi_sense.add_sense_code_qual = 0x00;
 4092                 sd->sd_scsi_sense.extra_len = 4;
 4093                 return (1);
 4094         }
 4095 
 4096         return (0);
 4097 }
 4098 
 4099 int
 4100 sr_raid_request_sense(struct sr_workunit *wu)
 4101 {
 4102         struct sr_discipline    *sd = wu->swu_dis;
 4103         struct scsi_xfer        *xs = wu->swu_xs;
 4104 
 4105         DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
 4106             DEVNAME(sd->sd_sc));
 4107 
 4108         /* use latest sense data */
 4109         memcpy(&xs->sense, &sd->sd_scsi_sense, sizeof(xs->sense));
 4110 
 4111         /* clear sense data */
 4112         bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
 4113 
 4114         return (0);
 4115 }
 4116 
 4117 int
 4118 sr_raid_start_stop(struct sr_workunit *wu)
 4119 {
 4120         struct scsi_xfer        *xs = wu->swu_xs;
 4121         struct scsi_start_stop  *ss = (struct scsi_start_stop *)&xs->cmd;
 4122 
 4123         DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
 4124             DEVNAME(wu->swu_dis->sd_sc));
 4125 
 4126         if (!ss)
 4127                 return (1);
 4128 
 4129         /*
 4130          * do nothing!
 4131          * a softraid discipline should always reflect correct status
 4132          */
 4133         return (0);
 4134 }
 4135 
 4136 int
 4137 sr_raid_sync(struct sr_workunit *wu)
 4138 {
 4139         struct sr_discipline    *sd = wu->swu_dis;
 4140         int                     s, ret, rv = 0, ios;
 4141 
 4142         DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
 4143 
 4144         /* when doing a fake sync don't count the wu */
 4145         ios = (wu->swu_flags & SR_WUF_FAKE) ? 0 : 1;
 4146 
 4147         s = splbio();
 4148         sd->sd_sync = 1;
 4149         while (sd->sd_wu_pending > ios) {
 4150                 ret = tsleep_nsec(sd, PRIBIO, "sr_sync", SEC_TO_NSEC(15));
 4151                 if (ret == EWOULDBLOCK) {
 4152                         DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
 4153                             DEVNAME(sd->sd_sc));
 4154                         rv = 1;
 4155                         break;
 4156                 }
 4157         }
 4158         sd->sd_sync = 0;
 4159         splx(s);
 4160 
 4161         wakeup(&sd->sd_sync);
 4162 
 4163         return (rv);
 4164 }
 4165 
 4166 void
 4167 sr_raid_intr(struct buf *bp)
 4168 {
 4169         struct sr_ccb           *ccb = (struct sr_ccb *)bp;
 4170         struct sr_workunit      *wu = ccb->ccb_wu;
 4171 #ifdef SR_DEBUG
 4172         struct sr_discipline    *sd = wu->swu_dis;
 4173         struct scsi_xfer        *xs = wu->swu_xs;
 4174 #endif
 4175         int                     s;
 4176 
 4177         DNPRINTF(SR_D_INTR, "%s: %s %s intr bp %p xs %p\n",
 4178             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, sd->sd_name, bp, xs);
 4179 
 4180         s = splbio();
 4181         sr_ccb_done(ccb);
 4182         sr_wu_done(wu);
 4183         splx(s);
 4184 }
 4185 
 4186 void
 4187 sr_schedule_wu(struct sr_workunit *wu)
 4188 {
 4189         struct sr_discipline    *sd = wu->swu_dis;
 4190         struct sr_workunit      *wup;
 4191         int                     s;
 4192 
 4193         DNPRINTF(SR_D_WU, "sr_schedule_wu: schedule wu %p state %i "
 4194             "flags 0x%x\n", wu, wu->swu_state, wu->swu_flags);
 4195 
 4196         KASSERT(wu->swu_io_count > 0);
 4197 
 4198         s = splbio();
 4199 
 4200         /* Construct the work unit, do not schedule it. */
 4201         if (wu->swu_state == SR_WU_CONSTRUCT)
 4202                 goto queued;
 4203 
 4204         /* Deferred work unit being reconstructed, do not start. */
 4205         if (wu->swu_state == SR_WU_REQUEUE)
 4206                 goto queued;
 4207 
 4208         /* Current work unit failed, restart. */
 4209         if (wu->swu_state == SR_WU_RESTART)
 4210                 goto start;
 4211 
 4212         if (wu->swu_state != SR_WU_INPROGRESS)
 4213                 panic("sr_schedule_wu: work unit not in progress (state %i)",
 4214                     wu->swu_state);
 4215 
 4216         /* Walk queue backwards and fill in collider if we have one. */
 4217         TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
 4218                 if (wu->swu_blk_end < wup->swu_blk_start ||
 4219                     wup->swu_blk_end < wu->swu_blk_start)
 4220                         continue;
 4221 
 4222                 /* Defer work unit due to LBA collision. */
 4223                 DNPRINTF(SR_D_WU, "sr_schedule_wu: deferring work unit %p\n",
 4224                     wu);
 4225                 wu->swu_state = SR_WU_DEFERRED;
 4226                 while (wup->swu_collider)
 4227                         wup = wup->swu_collider;
 4228                 wup->swu_collider = wu;
 4229                 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
 4230                 sd->sd_wu_collisions++;
 4231                 goto queued;
 4232         }
 4233 
 4234 start:
 4235         sr_raid_startwu(wu);
 4236 
 4237 queued:
 4238         splx(s);
 4239 }
 4240 
 4241 void
 4242 sr_raid_startwu(struct sr_workunit *wu)
 4243 {
 4244         struct sr_discipline    *sd = wu->swu_dis;
 4245         struct sr_ccb           *ccb;
 4246 
 4247         DNPRINTF(SR_D_WU, "sr_raid_startwu: start wu %p\n", wu);
 4248 
 4249         splassert(IPL_BIO);
 4250 
 4251         if (wu->swu_state == SR_WU_DEFERRED) {
 4252                 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
 4253                 wu->swu_state = SR_WU_INPROGRESS;
 4254         }
 4255 
 4256         if (wu->swu_state != SR_WU_RESTART)
 4257                 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
 4258 
 4259         /* Start all of the individual I/Os. */
 4260         if (wu->swu_cb_active == 1)
 4261                 panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc));
 4262         wu->swu_cb_active = 1;
 4263 
 4264         TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
 4265                 VOP_STRATEGY(ccb->ccb_buf.b_vp, &ccb->ccb_buf);
 4266 
 4267         wu->swu_cb_active = 0;
 4268 }
 4269 
 4270 void
 4271 sr_raid_recreate_wu(struct sr_workunit *wu)
 4272 {
 4273         struct sr_discipline    *sd = wu->swu_dis;
 4274         struct sr_workunit      *wup = wu;
 4275 
 4276         /*
 4277          * Recreate a work unit by releasing the associated CCBs and reissuing
 4278          * the SCSI I/O request. This process is then repeated for all of the
 4279          * colliding work units.
 4280          */
 4281         do {
 4282                 sr_wu_release_ccbs(wup);
 4283 
 4284                 wup->swu_state = SR_WU_REQUEUE;
 4285                 if (sd->sd_scsi_rw(wup))
 4286                         panic("could not requeue I/O");
 4287 
 4288                 wup = wup->swu_collider;
 4289         } while (wup);
 4290 }
 4291 
 4292 int
 4293 sr_alloc_resources(struct sr_discipline *sd)
 4294 {
 4295         if (sr_wu_alloc(sd)) {
 4296                 sr_error(sd->sd_sc, "unable to allocate work units");
 4297                 return (ENOMEM);
 4298         }
 4299         if (sr_ccb_alloc(sd)) {
 4300                 sr_error(sd->sd_sc, "unable to allocate ccbs");
 4301                 return (ENOMEM);
 4302         }
 4303 
 4304         return (0);
 4305 }
 4306 
 4307 void
 4308 sr_free_resources(struct sr_discipline *sd)
 4309 {
 4310         sr_wu_free(sd);
 4311         sr_ccb_free(sd);
 4312 }
 4313 
 4314 void
 4315 sr_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
 4316 {
 4317         int                     old_state, s;
 4318 
 4319         DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_set_chunk_state %d -> %d\n",
 4320             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
 4321             sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 4322 
 4323         /* ok to go to splbio since this only happens in error path */
 4324         s = splbio();
 4325         old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
 4326 
 4327         /* multiple IOs to the same chunk that fail will come through here */
 4328         if (old_state == new_state)
 4329                 goto done;
 4330 
 4331         switch (old_state) {
 4332         case BIOC_SDONLINE:
 4333                 if (new_state == BIOC_SDOFFLINE)
 4334                         break;
 4335                 else
 4336                         goto die;
 4337                 break;
 4338 
 4339         case BIOC_SDOFFLINE:
 4340                 goto die;
 4341 
 4342         default:
 4343 die:
 4344                 splx(s); /* XXX */
 4345                 panic("%s: %s: %s: invalid chunk state transition %d -> %d",
 4346                     DEVNAME(sd->sd_sc),
 4347                     sd->sd_meta->ssd_devname,
 4348                     sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
 4349                     old_state, new_state);
 4350                 /* NOTREACHED */
 4351         }
 4352 
 4353         sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
 4354         sd->sd_set_vol_state(sd);
 4355 
 4356         sd->sd_must_flush = 1;
 4357         task_add(systq, &sd->sd_meta_save_task);
 4358 done:
 4359         splx(s);
 4360 }
 4361 
 4362 void
 4363 sr_set_vol_state(struct sr_discipline *sd)
 4364 {
 4365         int                     states[SR_MAX_STATES];
 4366         int                     new_state, i, nd;
 4367         int                     old_state = sd->sd_vol_status;
 4368         u_int32_t               s;
 4369 
 4370         DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state\n",
 4371             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 4372 
 4373         nd = sd->sd_meta->ssdi.ssd_chunk_no;
 4374 
 4375         for (i = 0; i < SR_MAX_STATES; i++)
 4376                 states[i] = 0;
 4377 
 4378         for (i = 0; i < nd; i++) {
 4379                 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
 4380                 if (s >= SR_MAX_STATES)
 4381                         panic("%s: %s: %s: invalid chunk state",
 4382                             DEVNAME(sd->sd_sc),
 4383                             sd->sd_meta->ssd_devname,
 4384                             sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
 4385                 states[s]++;
 4386         }
 4387 
 4388         if (states[BIOC_SDONLINE] == nd)
 4389                 new_state = BIOC_SVONLINE;
 4390         else
 4391                 new_state = BIOC_SVOFFLINE;
 4392 
 4393         DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state %d -> %d\n",
 4394             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
 4395             old_state, new_state);
 4396 
 4397         switch (old_state) {
 4398         case BIOC_SVONLINE:
 4399                 if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE)
 4400                         break;
 4401                 else
 4402                         goto die;
 4403                 break;
 4404 
 4405         case BIOC_SVOFFLINE:
 4406                 /* XXX this might be a little too much */
 4407                 goto die;
 4408 
 4409         default:
 4410 die:
 4411                 panic("%s: %s: invalid volume state transition %d -> %d",
 4412                     DEVNAME(sd->sd_sc),
 4413                     sd->sd_meta->ssd_devname,
 4414                     old_state, new_state);
 4415                 /* NOTREACHED */
 4416         }
 4417 
 4418         sd->sd_vol_status = new_state;
 4419 }
 4420 
 4421 void *
 4422 sr_block_get(struct sr_discipline *sd, long length)
 4423 {
 4424         return dma_alloc(length, PR_NOWAIT | PR_ZERO);
 4425 }
 4426 
 4427 void
 4428 sr_block_put(struct sr_discipline *sd, void *ptr, int length)
 4429 {
 4430         dma_free(ptr, length);
 4431 }
 4432 
 4433 void
 4434 sr_checksum_print(u_int8_t *md5)
 4435 {
 4436         int                     i;
 4437 
 4438         for (i = 0; i < MD5_DIGEST_LENGTH; i++)
 4439                 printf("%02x", md5[i]);
 4440 }
 4441 
 4442 void
 4443 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
 4444 {
 4445         MD5_CTX                 ctx;
 4446 
 4447         DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
 4448             md5, len);
 4449 
 4450         MD5Init(&ctx);
 4451         MD5Update(&ctx, src, len);
 4452         MD5Final(md5, &ctx);
 4453 }
 4454 
 4455 void
 4456 sr_uuid_generate(struct sr_uuid *uuid)
 4457 {
 4458         arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
 4459         /* UUID version 4: random */
 4460         uuid->sui_id[6] &= 0x0f;
 4461         uuid->sui_id[6] |= 0x40;
 4462         /* RFC4122 variant */
 4463         uuid->sui_id[8] &= 0x3f;
 4464         uuid->sui_id[8] |= 0x80;
 4465 }
 4466 
 4467 char *
 4468 sr_uuid_format(struct sr_uuid *uuid)
 4469 {
 4470         char *uuidstr;
 4471 
 4472         uuidstr = malloc(37, M_DEVBUF, M_WAITOK);
 4473 
 4474         snprintf(uuidstr, 37,
 4475             "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
 4476             "%02x%02x%02x%02x%02x%02x",
 4477             uuid->sui_id[0], uuid->sui_id[1],
 4478             uuid->sui_id[2], uuid->sui_id[3],
 4479             uuid->sui_id[4], uuid->sui_id[5],
 4480             uuid->sui_id[6], uuid->sui_id[7],
 4481             uuid->sui_id[8], uuid->sui_id[9],
 4482             uuid->sui_id[10], uuid->sui_id[11],
 4483             uuid->sui_id[12], uuid->sui_id[13],
 4484             uuid->sui_id[14], uuid->sui_id[15]);
 4485 
 4486         return uuidstr;
 4487 }
 4488 
 4489 void
 4490 sr_uuid_print(struct sr_uuid *uuid, int cr)
 4491 {
 4492         char *uuidstr;
 4493 
 4494         uuidstr = sr_uuid_format(uuid);
 4495         printf("%s%s", uuidstr, (cr ? "\n" : ""));
 4496         free(uuidstr, M_DEVBUF, 37);
 4497 }
 4498 
 4499 int
 4500 sr_already_assembled(struct sr_discipline *sd)
 4501 {
 4502         struct sr_softc         *sc = sd->sd_sc;
 4503         struct sr_discipline    *sdtmp;
 4504 
 4505         TAILQ_FOREACH(sdtmp, &sc->sc_dis_list, sd_link) {
 4506                 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
 4507                     &sdtmp->sd_meta->ssdi.ssd_uuid,
 4508                     sizeof(sd->sd_meta->ssdi.ssd_uuid)))
 4509                         return (1);
 4510         }
 4511 
 4512         return (0);
 4513 }
 4514 
 4515 int32_t
 4516 sr_validate_stripsize(u_int32_t b)
 4517 {
 4518         int                     s = 0;
 4519 
 4520         if (b % DEV_BSIZE)
 4521                 return (-1);
 4522 
 4523         while ((b & 1) == 0) {
 4524                 b >>= 1;
 4525                 s++;
 4526         }
 4527 
 4528         /* only multiple of twos */
 4529         b >>= 1;
 4530         if (b)
 4531                 return(-1);
 4532 
 4533         return (s);
 4534 }
 4535 
 4536 void
 4537 sr_quiesce(void)
 4538 {
 4539         struct sr_softc         *sc = softraid0;
 4540         struct sr_discipline    *sd, *nsd;
 4541 
 4542         if (sc == NULL)
 4543                 return;
 4544 
 4545         /* Shutdown disciplines in reverse attach order. */
 4546         TAILQ_FOREACH_REVERSE_SAFE(sd, &sc->sc_dis_list,
 4547             sr_discipline_list, sd_link, nsd)
 4548                 sr_discipline_shutdown(sd, 1, -1);
 4549 }
 4550 
 4551 void
 4552 sr_shutdown(int dying)
 4553 {
 4554         struct sr_softc         *sc = softraid0;
 4555         struct sr_discipline    *sd;
 4556 
 4557         if (sc == NULL)
 4558                 return;
 4559 
 4560         DNPRINTF(SR_D_MISC, "%s: sr_shutdown\n", DEVNAME(sc));
 4561 
 4562         /*
 4563          * Since softraid is not under mainbus, we have to explicitly
 4564          * notify its children that the power is going down, so they
 4565          * can execute their shutdown hooks.
 4566          */
 4567         config_suspend((struct device *)sc, DVACT_POWERDOWN);
 4568 
 4569         /* Shutdown disciplines in reverse attach order. */
 4570         while ((sd = TAILQ_LAST(&sc->sc_dis_list, sr_discipline_list)) != NULL)
 4571                 sr_discipline_shutdown(sd, 1, dying);
 4572 }
 4573 
 4574 int
 4575 sr_validate_io(struct sr_workunit *wu, daddr_t *blkno, char *func)
 4576 {
 4577         struct sr_discipline    *sd = wu->swu_dis;
 4578         struct scsi_xfer        *xs = wu->swu_xs;
 4579         int                     rv = 1;
 4580 
 4581         DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
 4582             xs->cmd.opcode);
 4583 
 4584         if (sd->sd_meta->ssd_data_blkno == 0)
 4585                 panic("invalid data blkno");
 4586 
 4587         if (sd->sd_vol_status == BIOC_SVOFFLINE) {
 4588                 DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
 4589                     DEVNAME(sd->sd_sc), func);
 4590                 goto bad;
 4591         }
 4592 
 4593         if (xs->datalen == 0) {
 4594                 printf("%s: %s: illegal block count for %s\n",
 4595                     DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
 4596                 goto bad;
 4597         }
 4598 
 4599         if (xs->cmdlen == 10)
 4600                 *blkno = _4btol(((struct scsi_rw_10 *)&xs->cmd)->addr);
 4601         else if (xs->cmdlen == 16)
 4602                 *blkno = _8btol(((struct scsi_rw_16 *)&xs->cmd)->addr);
 4603         else if (xs->cmdlen == 6)
 4604                 *blkno = _3btol(((struct scsi_rw *)&xs->cmd)->addr);
 4605         else {
 4606                 printf("%s: %s: illegal cmdlen for %s\n",
 4607                     DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
 4608                 goto bad;
 4609         }
 4610 
 4611         *blkno *= (sd->sd_meta->ssdi.ssd_secsize / DEV_BSIZE);
 4612 
 4613         wu->swu_blk_start = *blkno;
 4614         wu->swu_blk_end = *blkno + (xs->datalen >> DEV_BSHIFT) - 1;
 4615 
 4616         if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
 4617                 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
 4618                     "end: %lld length: %d\n",
 4619                     DEVNAME(sd->sd_sc), func, (long long)wu->swu_blk_start,
 4620                     (long long)wu->swu_blk_end, xs->datalen);
 4621 
 4622                 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
 4623                     SSD_ERRCODE_VALID;
 4624                 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
 4625                 sd->sd_scsi_sense.add_sense_code = 0x21;
 4626                 sd->sd_scsi_sense.add_sense_code_qual = 0x00;
 4627                 sd->sd_scsi_sense.extra_len = 4;
 4628                 goto bad;
 4629         }
 4630 
 4631         rv = 0;
 4632 bad:
 4633         return (rv);
 4634 }
 4635 
 4636 void
 4637 sr_rebuild_start(void *arg)
 4638 {
 4639         struct sr_discipline    *sd = arg;
 4640         struct sr_softc         *sc = sd->sd_sc;
 4641 
 4642         DNPRINTF(SR_D_REBUILD, "%s: %s starting rebuild thread\n",
 4643             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 4644 
 4645         if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc,
 4646             DEVNAME(sc)) != 0)
 4647                 printf("%s: unable to start background operation\n",
 4648                     DEVNAME(sc));
 4649 }
 4650 
 4651 void
 4652 sr_rebuild_thread(void *arg)
 4653 {
 4654         struct sr_discipline    *sd = arg;
 4655 
 4656         DNPRINTF(SR_D_REBUILD, "%s: %s rebuild thread started\n",
 4657             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 4658 
 4659         sd->sd_reb_active = 1;
 4660         sd->sd_rebuild(sd);
 4661         sd->sd_reb_active = 0;
 4662 
 4663         kthread_exit(0);
 4664 }
 4665 
 4666 void
 4667 sr_rebuild(struct sr_discipline *sd)
 4668 {
 4669         struct sr_softc         *sc = sd->sd_sc;
 4670         u_int64_t               sz, whole_blk, partial_blk, blk, restart;
 4671         daddr_t                 lba;
 4672         struct sr_workunit      *wu_r, *wu_w;
 4673         struct scsi_xfer        xs_r, xs_w;
 4674         struct scsi_rw_16       *cr, *cw;
 4675         int                     c, s, slept, percent = 0, old_percent = -1;
 4676         u_int8_t                *buf;
 4677 
 4678         whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE;
 4679         partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE;
 4680 
 4681         restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE;
 4682         if (restart > whole_blk) {
 4683                 printf("%s: bogus rebuild restart offset, starting from 0\n",
 4684                     DEVNAME(sc));
 4685                 restart = 0;
 4686         }
 4687         if (restart) {
 4688                 /*
 4689                  * XXX there is a hole here; there is a possibility that we
 4690                  * had a restart however the chunk that was supposed to
 4691                  * be rebuilt is no longer valid; we can reach this situation
 4692                  * when a rebuild is in progress and the box crashes and
 4693                  * on reboot the rebuild chunk is different (like zero'd or
 4694                  * replaced).  We need to check the uuid of the chunk that is
 4695                  * being rebuilt to assert this.
 4696                  */
 4697                 percent = sr_rebuild_percent(sd);
 4698                 printf("%s: resuming rebuild on %s at %d%%\n",
 4699                     DEVNAME(sc), sd->sd_meta->ssd_devname, percent);
 4700         }
 4701 
 4702         /* currently this is 64k therefore we can use dma_alloc */
 4703         buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK);
 4704         for (blk = restart; blk <= whole_blk; blk++) {
 4705                 lba = blk * SR_REBUILD_IO_SIZE;
 4706                 sz = SR_REBUILD_IO_SIZE;
 4707                 if (blk == whole_blk) {
 4708                         if (partial_blk == 0)
 4709                                 break;
 4710                         sz = partial_blk;
 4711                 }
 4712 
 4713                 /* get some wu */
 4714                 wu_r = sr_scsi_wu_get(sd, 0);
 4715                 wu_w = sr_scsi_wu_get(sd, 0);
 4716 
 4717                 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild wu_r %p, wu_w %p\n",
 4718                     DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, wu_r, wu_w);
 4719 
 4720                 /* setup read io */
 4721                 bzero(&xs_r, sizeof xs_r);
 4722                 xs_r.error = XS_NOERROR;
 4723                 xs_r.flags = SCSI_DATA_IN;
 4724                 xs_r.datalen = sz << DEV_BSHIFT;
 4725                 xs_r.data = buf;
 4726                 xs_r.cmdlen = sizeof(*cr);
 4727                 cr = (struct scsi_rw_16 *)&xs_r.cmd;
 4728                 cr->opcode = READ_16;
 4729                 _lto4b(sz, cr->length);
 4730                 _lto8b(lba, cr->addr);
 4731                 wu_r->swu_state = SR_WU_CONSTRUCT;
 4732                 wu_r->swu_flags |= SR_WUF_REBUILD;
 4733                 wu_r->swu_xs = &xs_r;
 4734                 if (sd->sd_scsi_rw(wu_r)) {
 4735                         printf("%s: could not create read io\n",
 4736                             DEVNAME(sc));
 4737                         goto fail;
 4738                 }
 4739 
 4740                 /* setup write io */
 4741                 bzero(&xs_w, sizeof xs_w);
 4742                 xs_w.error = XS_NOERROR;
 4743                 xs_w.flags = SCSI_DATA_OUT;
 4744                 xs_w.datalen = sz << DEV_BSHIFT;
 4745                 xs_w.data = buf;
 4746                 xs_w.cmdlen = sizeof(*cw);
 4747                 cw = (struct scsi_rw_16 *)&xs_w.cmd;
 4748                 cw->opcode = WRITE_16;
 4749                 _lto4b(sz, cw->length);
 4750                 _lto8b(lba, cw->addr);
 4751                 wu_w->swu_state = SR_WU_CONSTRUCT;
 4752                 wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP;
 4753                 wu_w->swu_xs = &xs_w;
 4754                 if (sd->sd_scsi_rw(wu_w)) {
 4755                         printf("%s: could not create write io\n",
 4756                             DEVNAME(sc));
 4757                         goto fail;
 4758                 }
 4759 
 4760                 /*
 4761                  * collide with the read io so that we get automatically
 4762                  * started when the read is done
 4763                  */
 4764                 wu_w->swu_state = SR_WU_DEFERRED;
 4765                 wu_r->swu_collider = wu_w;
 4766                 s = splbio();
 4767                 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
 4768                 splx(s);
 4769 
 4770                 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild scheduling wu_r %p\n",
 4771                     DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, wu_r);
 4772 
 4773                 wu_r->swu_state = SR_WU_INPROGRESS;
 4774                 sr_schedule_wu(wu_r);
 4775 
 4776                 /* wait for write completion */
 4777                 slept = 0;
 4778                 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
 4779                         tsleep_nsec(wu_w, PRIBIO, "sr_rebuild", INFSLP);
 4780                         slept = 1;
 4781                 }
 4782                 /* yield if we didn't sleep */
 4783                 if (slept == 0)
 4784                         tsleep_nsec(sc, PWAIT, "sr_yield", MSEC_TO_NSEC(1));
 4785 
 4786                 sr_scsi_wu_put(sd, wu_r);
 4787                 sr_scsi_wu_put(sd, wu_w);
 4788 
 4789                 sd->sd_meta->ssd_rebuild = lba;
 4790 
 4791                 /* XXX - this should be based on size, not percentage. */
 4792                 /* save metadata every percent */
 4793                 percent = sr_rebuild_percent(sd);
 4794                 if (percent != old_percent && blk != whole_blk) {
 4795                         if (sr_meta_save(sd, SR_META_DIRTY))
 4796                                 printf("%s: could not save metadata to %s\n",
 4797                                     DEVNAME(sc), sd->sd_meta->ssd_devname);
 4798                         old_percent = percent;
 4799                 }
 4800 
 4801                 if (sd->sd_reb_abort)
 4802                         goto abort;
 4803         }
 4804 
 4805         /* all done */
 4806         sd->sd_meta->ssd_rebuild = 0;
 4807         for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) {
 4808                 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
 4809                     BIOC_SDREBUILD) {
 4810                         sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE);
 4811                         break;
 4812                 }
 4813         }
 4814 
 4815 abort:
 4816         if (sr_meta_save(sd, SR_META_DIRTY))
 4817                 printf("%s: could not save metadata to %s\n",
 4818                     DEVNAME(sc), sd->sd_meta->ssd_devname);
 4819 fail:
 4820         dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT);
 4821 }
 4822 
 4823 struct sr_discipline *
 4824 sr_find_discipline(struct sr_softc *sc, const char *devname)
 4825 {
 4826         struct sr_discipline    *sd;
 4827 
 4828         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link)
 4829                 if (!strncmp(sd->sd_meta->ssd_devname, devname,
 4830                     sizeof(sd->sd_meta->ssd_devname)))
 4831                         break;
 4832         return sd;
 4833 }
 4834 
 4835 #ifndef SMALL_KERNEL
 4836 int
 4837 sr_sensors_create(struct sr_discipline *sd)
 4838 {
 4839         struct sr_softc         *sc = sd->sd_sc;
 4840         int                     rv = 1;
 4841 
 4842         DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
 4843             DEVNAME(sc), sd->sd_meta->ssd_devname);
 4844 
 4845         sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
 4846         sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
 4847         strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
 4848             sizeof(sd->sd_vol.sv_sensor.desc));
 4849 
 4850         sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor);
 4851         sd->sd_vol.sv_sensor_attached = 1;
 4852 
 4853         if (sc->sc_sensor_task == NULL) {
 4854                 sc->sc_sensor_task = sensor_task_register(sc,
 4855                     sr_sensors_refresh, 10);
 4856                 if (sc->sc_sensor_task == NULL)
 4857                         goto bad;
 4858         }
 4859 
 4860         rv = 0;
 4861 bad:
 4862         return (rv);
 4863 }
 4864 
 4865 void
 4866 sr_sensors_delete(struct sr_discipline *sd)
 4867 {
 4868         DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc));
 4869 
 4870         if (sd->sd_vol.sv_sensor_attached)
 4871                 sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor);
 4872 }
 4873 
 4874 void
 4875 sr_sensors_refresh(void *arg)
 4876 {
 4877         struct sr_softc         *sc = arg;
 4878         struct sr_volume        *sv;
 4879         struct sr_discipline    *sd;
 4880 
 4881         DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
 4882 
 4883         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 4884                 sv = &sd->sd_vol;
 4885 
 4886                 switch(sd->sd_vol_status) {
 4887                 case BIOC_SVOFFLINE:
 4888                         sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
 4889                         sv->sv_sensor.status = SENSOR_S_CRIT;
 4890                         break;
 4891 
 4892                 case BIOC_SVDEGRADED:
 4893                         sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
 4894                         sv->sv_sensor.status = SENSOR_S_WARN;
 4895                         break;
 4896 
 4897                 case BIOC_SVREBUILD:
 4898                         sv->sv_sensor.value = SENSOR_DRIVE_REBUILD;
 4899                         sv->sv_sensor.status = SENSOR_S_WARN;
 4900                         break;
 4901 
 4902                 case BIOC_SVSCRUB:
 4903                 case BIOC_SVONLINE:
 4904                         sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
 4905                         sv->sv_sensor.status = SENSOR_S_OK;
 4906                         break;
 4907 
 4908                 default:
 4909                         sv->sv_sensor.value = 0; /* unknown */
 4910                         sv->sv_sensor.status = SENSOR_S_UNKNOWN;
 4911                 }
 4912         }
 4913 }
 4914 #endif /* SMALL_KERNEL */
 4915 
 4916 #ifdef SR_FANCY_STATS
 4917 void                            sr_print_stats(void);
 4918 
 4919 void
 4920 sr_print_stats(void)
 4921 {
 4922         struct sr_softc         *sc = softraid0;
 4923         struct sr_discipline    *sd;
 4924 
 4925         if (sc == NULL) {
 4926                 printf("no softraid softc found\n");
 4927                 return;
 4928         }
 4929 
 4930         TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) {
 4931                 printf("%s: ios pending %d, collisions %llu\n",
 4932                     sd->sd_meta->ssd_devname,
 4933                     sd->sd_wu_pending,
 4934                     sd->sd_wu_collisions);
 4935         }
 4936 }
 4937 #endif /* SR_FANCY_STATS */
 4938 
 4939 #ifdef SR_DEBUG
 4940 void
 4941 sr_meta_print(struct sr_metadata *m)
 4942 {
 4943         int                     i;
 4944         struct sr_meta_chunk    *mc;
 4945         struct sr_meta_opt_hdr  *omh;
 4946 
 4947         if (!(sr_debug & SR_D_META))
 4948                 return;
 4949 
 4950         printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
 4951         printf("\tssd_version %d\n", m->ssdi.ssd_version);
 4952         printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags);
 4953         printf("\tssd_uuid ");
 4954         sr_uuid_print(&m->ssdi.ssd_uuid, 1);
 4955         printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
 4956         printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
 4957         printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
 4958         printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
 4959         printf("\tssd_level %d\n", m->ssdi.ssd_level);
 4960         printf("\tssd_size %lld\n", m->ssdi.ssd_size);
 4961         printf("\tssd_devname %s\n", m->ssd_devname);
 4962         printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
 4963         printf("\tssd_product %s\n", m->ssdi.ssd_product);
 4964         printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
 4965         printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
 4966         printf("\tssd_checksum ");
 4967         sr_checksum_print(m->ssd_checksum);
 4968         printf("\n");
 4969         printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
 4970         printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
 4971 
 4972         mc = (struct sr_meta_chunk *)(m + 1);
 4973         for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
 4974                 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
 4975                 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
 4976                 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
 4977                 printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
 4978                 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
 4979                 printf("\t\tscm_uuid ");
 4980                 sr_uuid_print(&mc->scmi.scm_uuid, 1);
 4981                 printf("\t\tscm_checksum ");
 4982                 sr_checksum_print(mc->scm_checksum);
 4983                 printf("\n");
 4984                 printf("\t\tscm_status %d\n", mc->scm_status);
 4985         }
 4986 
 4987         omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(m + 1) +
 4988             sizeof(struct sr_meta_chunk) * m->ssdi.ssd_chunk_no);
 4989         for (i = 0; i < m->ssdi.ssd_opt_no; i++) {
 4990                 printf("\t\t\tsom_type %d\n", omh->som_type);
 4991                 printf("\t\t\tsom_checksum ");
 4992                 sr_checksum_print(omh->som_checksum);
 4993                 printf("\n");
 4994                 omh = (struct sr_meta_opt_hdr *)((void *)omh +
 4995                     omh->som_length);
 4996         }
 4997 }
 4998 
 4999 void
 5000 sr_dump_block(void *blk, int len)
 5001 {
 5002         uint8_t                 *b = blk;
 5003         int                     i, j, c;
 5004 
 5005         for (i = 0; i < len; i += 16) {
 5006                 for (j = 0; j < 16; j++)
 5007                         printf("%.2x ", b[i + j]);
 5008                 printf("  ");
 5009                 for (j = 0; j < 16; j++) {
 5010                         c = b[i + j];
 5011                         if (c < ' ' || c > 'z' || i + j > len)
 5012                                 c = '.';
 5013                         printf("%c", c);
 5014                 }
 5015                 printf("\n");
 5016         }
 5017 }
 5018 
 5019 void
 5020 sr_dump_mem(u_int8_t *p, int len)
 5021 {
 5022         int                     i;
 5023 
 5024         for (i = 0; i < len; i++)
 5025                 printf("%02x ", *p++);
 5026         printf("\n");
 5027 }
 5028 
 5029 #endif /* SR_DEBUG */
 5030 
 5031 #ifdef HIBERNATE
 5032 /*
 5033  * Side-effect free (no malloc, printf, pool, splx) softraid crypto writer.
 5034  *
 5035  * This function must perform the following:
 5036  * 1. Determine the underlying device's own side-effect free I/O function
 5037  *    (eg, ahci_hibernate_io, wd_hibernate_io, etc).
 5038  * 2. Store enough information in the provided page argument for subsequent
 5039  *    I/O calls (such as the crypto discipline structure for the keys, the
 5040  *    offset of the softraid partition on the underlying disk, as well as
 5041  *    the offset of the swap partition within the crypto volume.
 5042  * 3. Encrypt the incoming data using the sr_discipline keys, then pass
 5043  *    the request to the underlying device's own I/O function.
 5044  */
 5045 int
 5046 sr_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, int op, void *page)
 5047 {
 5048         /* Struct for stashing data obtained on HIB_INIT.
 5049          * XXX
 5050          * We share the page with the underlying device's own
 5051          * side-effect free I/O function, so we pad our data to
 5052          * the end of the page. Presently this does not overlap
 5053          * with either of the two other side-effect free i/o
 5054          * functions (ahci/wd).
 5055          */
 5056         struct {
 5057                 char pad[3072];
 5058                 struct sr_discipline *srd;
 5059                 hibio_fn subfn;         /* underlying device i/o fn */
 5060                 dev_t subdev;           /* underlying device dev_t */
 5061                 daddr_t sr_swapoff;     /* ofs of swap part in sr volume */
 5062                 char buf[DEV_BSIZE];    /* encryption performed into this buf */
 5063         } *my = page;
 5064         extern struct cfdriver sd_cd;
 5065         char errstr[128], *dl_ret;
 5066         struct sr_chunk *schunk;
 5067         struct sd_softc *sd;
 5068         struct aes_xts_ctx ctx;
 5069         struct sr_softc *sc;
 5070         struct device *dv;
 5071         daddr_t key_blkno;
 5072         uint32_t sub_raidoff;  /* ofs of sr part in underlying dev */
 5073         struct disklabel dl;
 5074         struct partition *pp;
 5075         size_t i, j;
 5076         u_char iv[8];
 5077 
 5078         /*
 5079          * In HIB_INIT, we are passed the swap partition size and offset
 5080          * in 'size' and 'blkno' respectively. These are relative to the
 5081          * start of the softraid partition, and we need to save these
 5082          * for later translation to the underlying device's layout.
 5083          */
 5084         if (op == HIB_INIT) {
 5085                 dv = disk_lookup(&sd_cd, DISKUNIT(dev));
 5086                 sd = (struct sd_softc *)dv;
 5087                 sc = (struct sr_softc *)dv->dv_parent->dv_parent;
 5088 
 5089                 /*
 5090                  * Look up the sr discipline. This is used to determine
 5091                  * if we are SR crypto and what the underlying device is.
 5092                  */
 5093                 my->srd = sc->sc_targets[sd->sc_link->target];
 5094                 DNPRINTF(SR_D_MISC, "sr_hibernate_io: discipline is %s\n",
 5095                         my->srd->sd_name);
 5096                 if (strncmp(my->srd->sd_name, "CRYPTO",
 5097                     sizeof(my->srd->sd_name)))
 5098                         return (ENOTSUP);
 5099 
 5100                 /* Find the underlying device */
 5101                 schunk = my->srd->sd_vol.sv_chunks[0];
 5102                 my->subdev = schunk->src_dev_mm;
 5103 
 5104                 /*
 5105                  * Find the appropriate underlying device side effect free
 5106                  * I/O function, based on the type of device it is.
 5107                  */
 5108                 my->subfn = get_hibernate_io_function(my->subdev);
 5109                 if (!my->subfn)
 5110                         return (ENODEV);
 5111 
 5112                 /*
 5113                  * Find blkno where this raid partition starts on
 5114                  * the underlying disk.
 5115                  */
 5116                 dl_ret = disk_readlabel(&dl, my->subdev, errstr,
 5117                     sizeof(errstr));
 5118                 if (dl_ret) {
 5119                         printf("Hibernate error reading disklabel: %s\n", dl_ret);
 5120                         return (ENOTSUP);
 5121                 }
 5122 
 5123                 pp = &dl.d_partitions[DISKPART(my->subdev)];
 5124                 if (pp->p_fstype != FS_RAID || DL_GETPSIZE(pp) == 0)
 5125                         return (ENOTSUP);
 5126 
 5127                 /* Find the blkno of the SR part in the underlying device */
 5128                 sub_raidoff = my->srd->sd_meta->ssd_data_blkno +
 5129                     DL_SECTOBLK(&dl, DL_GETPOFFSET(pp));
 5130                 DNPRINTF(SR_D_MISC,"sr_hibernate_io: blk trans ofs: %d blks\n",
 5131                     sub_raidoff);
 5132 
 5133                 /* Save the blkno of the swap partition in the SR disk */
 5134                 my->sr_swapoff = blkno;
 5135 
 5136                 /* Initialize the sub-device */
 5137                 return my->subfn(my->subdev, sub_raidoff + blkno,
 5138                     addr, size, op, page);
 5139         }
 5140 
 5141         /* Hibernate only uses (and we only support) writes */
 5142         if (op != HIB_W)
 5143                 return (ENOTSUP);
 5144 
 5145         /*
 5146          * Blocks act as the IV for the encryption. These block numbers
 5147          * are relative to the start of the sr partition, but the 'blkno'
 5148          * passed above is relative to the start of the swap partition
 5149          * inside the sr partition, so bias appropriately.
 5150          */
 5151         key_blkno = my->sr_swapoff + blkno;
 5152 
 5153         /* Process each disk block one at a time. */
 5154         for (i = 0; i < size; i += DEV_BSIZE) {
 5155                 int res;
 5156 
 5157                 bzero(&ctx, sizeof(ctx));
 5158 
 5159                 /*
 5160                  * Set encryption key (from the sr discipline stashed
 5161                  * during HIB_INIT. This code is based on the softraid
 5162                  * bootblock code.
 5163                  */
 5164                 aes_xts_setkey(&ctx, my->srd->mds.mdd_crypto.scr_key[0], 64);
 5165                 /* We encrypt DEV_BSIZE bytes at a time in my->buf */
 5166                 memcpy(my->buf, ((char *)addr) + i, DEV_BSIZE);
 5167 
 5168                 /* Block number is the IV */
 5169                 memcpy(&iv, &key_blkno, sizeof(key_blkno));
 5170                 aes_xts_reinit(&ctx, iv);
 5171 
 5172                 /* Encrypt DEV_BSIZE bytes, AES_XTS_BLOCKSIZE bytes at a time */
 5173                 for (j = 0; j < DEV_BSIZE; j += AES_XTS_BLOCKSIZE)
 5174                         aes_xts_encrypt(&ctx, my->buf + j);
 5175 
 5176                 /*
 5177                  * Write one block out from my->buf to the underlying device
 5178                  * using its own side-effect free I/O function.
 5179                  */
 5180                 res = my->subfn(my->subdev, blkno + (i / DEV_BSIZE),
 5181                     (vaddr_t)(my->buf), DEV_BSIZE, op, page);
 5182                 if (res != 0)
 5183                         return (res);
 5184                 key_blkno++;
 5185         }
 5186         return (0);
 5187 }
 5188 #endif /* HIBERNATE */

Cache object: 08f8aa0d61fe19e94076ac51024f99b3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.