g_raid.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/9.0/sys/geom/raid/g_raid.c 223921 2011-07-11 05:22:31Z ae $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/systm.h>
   32 #include <sys/kernel.h>
   33 #include <sys/module.h>
   34 #include <sys/limits.h>
   35 #include <sys/lock.h>
   36 #include <sys/mutex.h>
   37 #include <sys/bio.h>
   38 #include <sys/sbuf.h>
   39 #include <sys/sysctl.h>
   40 #include <sys/malloc.h>
   41 #include <sys/eventhandler.h>
   42 #include <vm/uma.h>
   43 #include <geom/geom.h>
   44 #include <sys/proc.h>
   45 #include <sys/kthread.h>
   46 #include <sys/sched.h>
   47 #include <geom/raid/g_raid.h>
   48 #include "g_raid_md_if.h"
   49 #include "g_raid_tr_if.h"
   50 
   51 static MALLOC_DEFINE(M_RAID, "raid_data", "GEOM_RAID Data");
   52 
   53 SYSCTL_DECL(_kern_geom);
   54 SYSCTL_NODE(_kern_geom, OID_AUTO, raid, CTLFLAG_RW, 0, "GEOM_RAID stuff");
   55 u_int g_raid_aggressive_spare = 0;
   56 TUNABLE_INT("kern.geom.raid.aggressive_spare", &g_raid_aggressive_spare);
   57 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, aggressive_spare, CTLFLAG_RW,
   58     &g_raid_aggressive_spare, 0, "Use disks without metadata as spare");
   59 u_int g_raid_debug = 0;
   60 TUNABLE_INT("kern.geom.raid.debug", &g_raid_debug);
   61 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, debug, CTLFLAG_RW, &g_raid_debug, 0,
   62     "Debug level");
   63 int g_raid_read_err_thresh = 10;
   64 TUNABLE_INT("kern.geom.raid.read_err_thresh", &g_raid_read_err_thresh);
   65 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, read_err_thresh, CTLFLAG_RW,
   66     &g_raid_read_err_thresh, 0,
   67     "Number of read errors equated to disk failure");
   68 u_int g_raid_start_timeout = 30;
   69 TUNABLE_INT("kern.geom.raid.start_timeout", &g_raid_start_timeout);
   70 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, start_timeout, CTLFLAG_RW,
   71     &g_raid_start_timeout, 0,
   72     "Time to wait for all array components");
   73 static u_int g_raid_clean_time = 5;
   74 TUNABLE_INT("kern.geom.raid.clean_time", &g_raid_clean_time);
   75 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, clean_time, CTLFLAG_RW,
   76     &g_raid_clean_time, 0, "Mark volume as clean when idling");
   77 static u_int g_raid_disconnect_on_failure = 1;
   78 TUNABLE_INT("kern.geom.raid.disconnect_on_failure",
   79     &g_raid_disconnect_on_failure);
   80 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
   81     &g_raid_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
   82 static u_int g_raid_name_format = 0;
   83 TUNABLE_INT("kern.geom.raid.name_format", &g_raid_name_format);
   84 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, name_format, CTLFLAG_RW,
   85     &g_raid_name_format, 0, "Providers name format.");
   86 static u_int g_raid_idle_threshold = 1000000;
   87 TUNABLE_INT("kern.geom.raid.idle_threshold", &g_raid_idle_threshold);
   88 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, idle_threshold, CTLFLAG_RW,
   89     &g_raid_idle_threshold, 1000000,
   90     "Time in microseconds to consider a volume idle.");
   91 
   92 #define MSLEEP(rv, ident, mtx, priority, wmesg, timeout)        do {    \
   93         G_RAID_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));         \
   94         rv = msleep((ident), (mtx), (priority), (wmesg), (timeout));    \
   95         G_RAID_DEBUG(4, "%s: Woken up %p.", __func__, (ident));         \
   96 } while (0)
   97 
   98 LIST_HEAD(, g_raid_md_class) g_raid_md_classes =
   99     LIST_HEAD_INITIALIZER(g_raid_md_classes);
  100 
  101 LIST_HEAD(, g_raid_tr_class) g_raid_tr_classes =
  102     LIST_HEAD_INITIALIZER(g_raid_tr_classes);
  103 
  104 LIST_HEAD(, g_raid_volume) g_raid_volumes =
  105     LIST_HEAD_INITIALIZER(g_raid_volumes);
  106 
  107 static eventhandler_tag g_raid_pre_sync = NULL;
  108 static int g_raid_started = 0;
  109 
  110 static int g_raid_destroy_geom(struct gctl_req *req, struct g_class *mp,
  111     struct g_geom *gp);
  112 static g_taste_t g_raid_taste;
  113 static void g_raid_init(struct g_class *mp);
  114 static void g_raid_fini(struct g_class *mp);
  115 
  116 struct g_class g_raid_class = {
  117         .name = G_RAID_CLASS_NAME,
  118         .version = G_VERSION,
  119         .ctlreq = g_raid_ctl,
  120         .taste = g_raid_taste,
  121         .destroy_geom = g_raid_destroy_geom,
  122         .init = g_raid_init,
  123         .fini = g_raid_fini
  124 };
  125 
  126 static void g_raid_destroy_provider(struct g_raid_volume *vol);
  127 static int g_raid_update_disk(struct g_raid_disk *disk, u_int event);
  128 static int g_raid_update_subdisk(struct g_raid_subdisk *subdisk, u_int event);
  129 static int g_raid_update_volume(struct g_raid_volume *vol, u_int event);
  130 static int g_raid_update_node(struct g_raid_softc *sc, u_int event);
  131 static void g_raid_dumpconf(struct sbuf *sb, const char *indent,
  132     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
  133 static void g_raid_start(struct bio *bp);
  134 static void g_raid_start_request(struct bio *bp);
  135 static void g_raid_disk_done(struct bio *bp);
  136 static void g_raid_poll(struct g_raid_softc *sc);
  137 
  138 static const char *
  139 g_raid_node_event2str(int event)
  140 {
  141 
  142         switch (event) {
  143         case G_RAID_NODE_E_WAKE:
  144                 return ("WAKE");
  145         case G_RAID_NODE_E_START:
  146                 return ("START");
  147         default:
  148                 return ("INVALID");
  149         }
  150 }
  151 
  152 const char *
  153 g_raid_disk_state2str(int state)
  154 {
  155 
  156         switch (state) {
  157         case G_RAID_DISK_S_NONE:
  158                 return ("NONE");
  159         case G_RAID_DISK_S_OFFLINE:
  160                 return ("OFFLINE");
  161         case G_RAID_DISK_S_FAILED:
  162                 return ("FAILED");
  163         case G_RAID_DISK_S_STALE_FAILED:
  164                 return ("STALE_FAILED");
  165         case G_RAID_DISK_S_SPARE:
  166                 return ("SPARE");
  167         case G_RAID_DISK_S_STALE:
  168                 return ("STALE");
  169         case G_RAID_DISK_S_ACTIVE:
  170                 return ("ACTIVE");
  171         default:
  172                 return ("INVALID");
  173         }
  174 }
  175 
  176 static const char *
  177 g_raid_disk_event2str(int event)
  178 {
  179 
  180         switch (event) {
  181         case G_RAID_DISK_E_DISCONNECTED:
  182                 return ("DISCONNECTED");
  183         default:
  184                 return ("INVALID");
  185         }
  186 }
  187 
  188 const char *
  189 g_raid_subdisk_state2str(int state)
  190 {
  191 
  192         switch (state) {
  193         case G_RAID_SUBDISK_S_NONE:
  194                 return ("NONE");
  195         case G_RAID_SUBDISK_S_FAILED:
  196                 return ("FAILED");
  197         case G_RAID_SUBDISK_S_NEW:
  198                 return ("NEW");
  199         case G_RAID_SUBDISK_S_REBUILD:
  200                 return ("REBUILD");
  201         case G_RAID_SUBDISK_S_UNINITIALIZED:
  202                 return ("UNINITIALIZED");
  203         case G_RAID_SUBDISK_S_STALE:
  204                 return ("STALE");
  205         case G_RAID_SUBDISK_S_RESYNC:
  206                 return ("RESYNC");
  207         case G_RAID_SUBDISK_S_ACTIVE:
  208                 return ("ACTIVE");
  209         default:
  210                 return ("INVALID");
  211         }
  212 }
  213 
  214 static const char *
  215 g_raid_subdisk_event2str(int event)
  216 {
  217 
  218         switch (event) {
  219         case G_RAID_SUBDISK_E_NEW:
  220                 return ("NEW");
  221         case G_RAID_SUBDISK_E_DISCONNECTED:
  222                 return ("DISCONNECTED");
  223         default:
  224                 return ("INVALID");
  225         }
  226 }
  227 
  228 const char *
  229 g_raid_volume_state2str(int state)
  230 {
  231 
  232         switch (state) {
  233         case G_RAID_VOLUME_S_STARTING:
  234                 return ("STARTING");
  235         case G_RAID_VOLUME_S_BROKEN:
  236                 return ("BROKEN");
  237         case G_RAID_VOLUME_S_DEGRADED:
  238                 return ("DEGRADED");
  239         case G_RAID_VOLUME_S_SUBOPTIMAL:
  240                 return ("SUBOPTIMAL");
  241         case G_RAID_VOLUME_S_OPTIMAL:
  242                 return ("OPTIMAL");
  243         case G_RAID_VOLUME_S_UNSUPPORTED:
  244                 return ("UNSUPPORTED");
  245         case G_RAID_VOLUME_S_STOPPED:
  246                 return ("STOPPED");
  247         default:
  248                 return ("INVALID");
  249         }
  250 }
  251 
  252 static const char *
  253 g_raid_volume_event2str(int event)
  254 {
  255 
  256         switch (event) {
  257         case G_RAID_VOLUME_E_UP:
  258                 return ("UP");
  259         case G_RAID_VOLUME_E_DOWN:
  260                 return ("DOWN");
  261         case G_RAID_VOLUME_E_START:
  262                 return ("START");
  263         case G_RAID_VOLUME_E_STARTMD:
  264                 return ("STARTMD");
  265         default:
  266                 return ("INVALID");
  267         }
  268 }
  269 
  270 const char *
  271 g_raid_volume_level2str(int level, int qual)
  272 {
  273 
  274         switch (level) {
  275         case G_RAID_VOLUME_RL_RAID0:
  276                 return ("RAID0");
  277         case G_RAID_VOLUME_RL_RAID1:
  278                 return ("RAID1");
  279         case G_RAID_VOLUME_RL_RAID3:
  280                 return ("RAID3");
  281         case G_RAID_VOLUME_RL_RAID4:
  282                 return ("RAID4");
  283         case G_RAID_VOLUME_RL_RAID5:
  284                 return ("RAID5");
  285         case G_RAID_VOLUME_RL_RAID6:
  286                 return ("RAID6");
  287         case G_RAID_VOLUME_RL_RAID1E:
  288                 return ("RAID1E");
  289         case G_RAID_VOLUME_RL_SINGLE:
  290                 return ("SINGLE");
  291         case G_RAID_VOLUME_RL_CONCAT:
  292                 return ("CONCAT");
  293         case G_RAID_VOLUME_RL_RAID5E:
  294                 return ("RAID5E");
  295         case G_RAID_VOLUME_RL_RAID5EE:
  296                 return ("RAID5EE");
  297         default:
  298                 return ("UNKNOWN");
  299         }
  300 }
  301 
  302 int
  303 g_raid_volume_str2level(const char *str, int *level, int *qual)
  304 {
  305 
  306         *level = G_RAID_VOLUME_RL_UNKNOWN;
  307         *qual = G_RAID_VOLUME_RLQ_NONE;
  308         if (strcasecmp(str, "RAID0") == 0)
  309                 *level = G_RAID_VOLUME_RL_RAID0;
  310         else if (strcasecmp(str, "RAID1") == 0)
  311                 *level = G_RAID_VOLUME_RL_RAID1;
  312         else if (strcasecmp(str, "RAID3") == 0)
  313                 *level = G_RAID_VOLUME_RL_RAID3;
  314         else if (strcasecmp(str, "RAID4") == 0)
  315                 *level = G_RAID_VOLUME_RL_RAID4;
  316         else if (strcasecmp(str, "RAID5") == 0)
  317                 *level = G_RAID_VOLUME_RL_RAID5;
  318         else if (strcasecmp(str, "RAID6") == 0)
  319                 *level = G_RAID_VOLUME_RL_RAID6;
  320         else if (strcasecmp(str, "RAID10") == 0 ||
  321                  strcasecmp(str, "RAID1E") == 0)
  322                 *level = G_RAID_VOLUME_RL_RAID1E;
  323         else if (strcasecmp(str, "SINGLE") == 0)
  324                 *level = G_RAID_VOLUME_RL_SINGLE;
  325         else if (strcasecmp(str, "CONCAT") == 0)
  326                 *level = G_RAID_VOLUME_RL_CONCAT;
  327         else if (strcasecmp(str, "RAID5E") == 0)
  328                 *level = G_RAID_VOLUME_RL_RAID5E;
  329         else if (strcasecmp(str, "RAID5EE") == 0)
  330                 *level = G_RAID_VOLUME_RL_RAID5EE;
  331         else
  332                 return (-1);
  333         return (0);
  334 }
  335 
  336 const char *
  337 g_raid_get_diskname(struct g_raid_disk *disk)
  338 {
  339 
  340         if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
  341                 return ("[unknown]");
  342         return (disk->d_consumer->provider->name);
  343 }
  344 
  345 void
  346 g_raid_report_disk_state(struct g_raid_disk *disk)
  347 {
  348         struct g_raid_subdisk *sd;
  349         int len, state;
  350         uint32_t s;
  351 
  352         if (disk->d_consumer == NULL)
  353                 return;
  354         if (disk->d_state == G_RAID_DISK_S_FAILED ||
  355             disk->d_state == G_RAID_DISK_S_STALE_FAILED) {
  356                 s = G_STATE_FAILED;
  357         } else {
  358                 state = G_RAID_SUBDISK_S_ACTIVE;
  359                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
  360                         if (sd->sd_state < state)
  361                                 state = sd->sd_state;
  362                 }
  363                 if (state == G_RAID_SUBDISK_S_FAILED)
  364                         s = G_STATE_FAILED;
  365                 else if (state == G_RAID_SUBDISK_S_NEW ||
  366                     state == G_RAID_SUBDISK_S_REBUILD)
  367                         s = G_STATE_REBUILD;
  368                 else if (state == G_RAID_SUBDISK_S_STALE ||
  369                     state == G_RAID_SUBDISK_S_RESYNC)
  370                         s = G_STATE_RESYNC;
  371                 else
  372                         s = G_STATE_ACTIVE;
  373         }
  374         len = sizeof(s);
  375         g_io_getattr("GEOM::setstate", disk->d_consumer, &len, &s);
  376         G_RAID_DEBUG1(2, disk->d_softc, "Disk %s state reported as %d.",
  377             g_raid_get_diskname(disk), s);
  378 }
  379 
  380 void
  381 g_raid_change_disk_state(struct g_raid_disk *disk, int state)
  382 {
  383 
  384         G_RAID_DEBUG1(0, disk->d_softc, "Disk %s state changed from %s to %s.",
  385             g_raid_get_diskname(disk),
  386             g_raid_disk_state2str(disk->d_state),
  387             g_raid_disk_state2str(state));
  388         disk->d_state = state;
  389         g_raid_report_disk_state(disk);
  390 }
  391 
  392 void
  393 g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state)
  394 {
  395 
  396         G_RAID_DEBUG1(0, sd->sd_softc,
  397             "Subdisk %s:%d-%s state changed from %s to %s.",
  398             sd->sd_volume->v_name, sd->sd_pos,
  399             sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]",
  400             g_raid_subdisk_state2str(sd->sd_state),
  401             g_raid_subdisk_state2str(state));
  402         sd->sd_state = state;
  403         if (sd->sd_disk)
  404                 g_raid_report_disk_state(sd->sd_disk);
  405 }
  406 
  407 void
  408 g_raid_change_volume_state(struct g_raid_volume *vol, int state)
  409 {
  410 
  411         G_RAID_DEBUG1(0, vol->v_softc,
  412             "Volume %s state changed from %s to %s.",
  413             vol->v_name,
  414             g_raid_volume_state2str(vol->v_state),
  415             g_raid_volume_state2str(state));
  416         vol->v_state = state;
  417 }
  418 
  419 /*
  420  * --- Events handling functions ---
  421  * Events in geom_raid are used to maintain subdisks and volumes status
  422  * from one thread to simplify locking.
  423  */
  424 static void
  425 g_raid_event_free(struct g_raid_event *ep)
  426 {
  427 
  428         free(ep, M_RAID);
  429 }
  430 
  431 int
  432 g_raid_event_send(void *arg, int event, int flags)
  433 {
  434         struct g_raid_softc *sc;
  435         struct g_raid_event *ep;
  436         int error;
  437 
  438         if ((flags & G_RAID_EVENT_VOLUME) != 0) {
  439                 sc = ((struct g_raid_volume *)arg)->v_softc;
  440         } else if ((flags & G_RAID_EVENT_DISK) != 0) {
  441                 sc = ((struct g_raid_disk *)arg)->d_softc;
  442         } else if ((flags & G_RAID_EVENT_SUBDISK) != 0) {
  443                 sc = ((struct g_raid_subdisk *)arg)->sd_softc;
  444         } else {
  445                 sc = arg;
  446         }
  447         ep = malloc(sizeof(*ep), M_RAID,
  448             sx_xlocked(&sc->sc_lock) ? M_WAITOK : M_NOWAIT);
  449         if (ep == NULL)
  450                 return (ENOMEM);
  451         ep->e_tgt = arg;
  452         ep->e_event = event;
  453         ep->e_flags = flags;
  454         ep->e_error = 0;
  455         G_RAID_DEBUG1(4, sc, "Sending event %p. Waking up %p.", ep, sc);
  456         mtx_lock(&sc->sc_queue_mtx);
  457         TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
  458         mtx_unlock(&sc->sc_queue_mtx);
  459         wakeup(sc);
  460 
  461         if ((flags & G_RAID_EVENT_WAIT) == 0)
  462                 return (0);
  463 
  464         sx_assert(&sc->sc_lock, SX_XLOCKED);
  465         G_RAID_DEBUG1(4, sc, "Sleeping on %p.", ep);
  466         sx_xunlock(&sc->sc_lock);
  467         while ((ep->e_flags & G_RAID_EVENT_DONE) == 0) {
  468                 mtx_lock(&sc->sc_queue_mtx);
  469                 MSLEEP(error, ep, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:event",
  470                     hz * 5);
  471         }
  472         error = ep->e_error;
  473         g_raid_event_free(ep);
  474         sx_xlock(&sc->sc_lock);
  475         return (error);
  476 }
  477 
  478 static void
  479 g_raid_event_cancel(struct g_raid_softc *sc, void *tgt)
  480 {
  481         struct g_raid_event *ep, *tmpep;
  482 
  483         sx_assert(&sc->sc_lock, SX_XLOCKED);
  484 
  485         mtx_lock(&sc->sc_queue_mtx);
  486         TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
  487                 if (ep->e_tgt != tgt)
  488                         continue;
  489                 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
  490                 if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0)
  491                         g_raid_event_free(ep);
  492                 else {
  493                         ep->e_error = ECANCELED;
  494                         wakeup(ep);
  495                 }
  496         }
  497         mtx_unlock(&sc->sc_queue_mtx);
  498 }
  499 
  500 static int
  501 g_raid_event_check(struct g_raid_softc *sc, void *tgt)
  502 {
  503         struct g_raid_event *ep;
  504         int     res = 0;
  505 
  506         sx_assert(&sc->sc_lock, SX_XLOCKED);
  507 
  508         mtx_lock(&sc->sc_queue_mtx);
  509         TAILQ_FOREACH(ep, &sc->sc_events, e_next) {
  510                 if (ep->e_tgt != tgt)
  511                         continue;
  512                 res = 1;
  513                 break;
  514         }
  515         mtx_unlock(&sc->sc_queue_mtx);
  516         return (res);
  517 }
  518 
  519 /*
  520  * Return the number of disks in given state.
  521  * If state is equal to -1, count all connected disks.
  522  */
  523 u_int
  524 g_raid_ndisks(struct g_raid_softc *sc, int state)
  525 {
  526         struct g_raid_disk *disk;
  527         u_int n;
  528 
  529         sx_assert(&sc->sc_lock, SX_LOCKED);
  530 
  531         n = 0;
  532         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  533                 if (disk->d_state == state || state == -1)
  534                         n++;
  535         }
  536         return (n);
  537 }
  538 
  539 /*
  540  * Return the number of subdisks in given state.
  541  * If state is equal to -1, count all connected disks.
  542  */
  543 u_int
  544 g_raid_nsubdisks(struct g_raid_volume *vol, int state)
  545 {
  546         struct g_raid_subdisk *subdisk;
  547         struct g_raid_softc *sc;
  548         u_int i, n ;
  549 
  550         sc = vol->v_softc;
  551         sx_assert(&sc->sc_lock, SX_LOCKED);
  552 
  553         n = 0;
  554         for (i = 0; i < vol->v_disks_count; i++) {
  555                 subdisk = &vol->v_subdisks[i];
  556                 if ((state == -1 &&
  557                      subdisk->sd_state != G_RAID_SUBDISK_S_NONE) ||
  558                     subdisk->sd_state == state)
  559                         n++;
  560         }
  561         return (n);
  562 }
  563 
  564 /*
  565  * Return the first subdisk in given state.
  566  * If state is equal to -1, then the first connected disks.
  567  */
  568 struct g_raid_subdisk *
  569 g_raid_get_subdisk(struct g_raid_volume *vol, int state)
  570 {
  571         struct g_raid_subdisk *sd;
  572         struct g_raid_softc *sc;
  573         u_int i;
  574 
  575         sc = vol->v_softc;
  576         sx_assert(&sc->sc_lock, SX_LOCKED);
  577 
  578         for (i = 0; i < vol->v_disks_count; i++) {
  579                 sd = &vol->v_subdisks[i];
  580                 if ((state == -1 &&
  581                      sd->sd_state != G_RAID_SUBDISK_S_NONE) ||
  582                     sd->sd_state == state)
  583                         return (sd);
  584         }
  585         return (NULL);
  586 }
  587 
  588 struct g_consumer *
  589 g_raid_open_consumer(struct g_raid_softc *sc, const char *name)
  590 {
  591         struct g_consumer *cp;
  592         struct g_provider *pp;
  593 
  594         g_topology_assert();
  595 
  596         if (strncmp(name, "/dev/", 5) == 0)
  597                 name += 5;
  598         pp = g_provider_by_name(name);
  599         if (pp == NULL)
  600                 return (NULL);
  601         cp = g_new_consumer(sc->sc_geom);
  602         if (g_attach(cp, pp) != 0) {
  603                 g_destroy_consumer(cp);
  604                 return (NULL);
  605         }
  606         if (g_access(cp, 1, 1, 1) != 0) {
  607                 g_detach(cp);
  608                 g_destroy_consumer(cp);
  609                 return (NULL);
  610         }
  611         return (cp);
  612 }
  613 
  614 static u_int
  615 g_raid_nrequests(struct g_raid_softc *sc, struct g_consumer *cp)
  616 {
  617         struct bio *bp;
  618         u_int nreqs = 0;
  619 
  620         mtx_lock(&sc->sc_queue_mtx);
  621         TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
  622                 if (bp->bio_from == cp)
  623                         nreqs++;
  624         }
  625         mtx_unlock(&sc->sc_queue_mtx);
  626         return (nreqs);
  627 }
  628 
  629 u_int
  630 g_raid_nopens(struct g_raid_softc *sc)
  631 {
  632         struct g_raid_volume *vol;
  633         u_int opens;
  634 
  635         opens = 0;
  636         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
  637                 if (vol->v_provider_open != 0)
  638                         opens++;
  639         }
  640         return (opens);
  641 }
  642 
  643 static int
  644 g_raid_consumer_is_busy(struct g_raid_softc *sc, struct g_consumer *cp)
  645 {
  646 
  647         if (cp->index > 0) {
  648                 G_RAID_DEBUG1(2, sc,
  649                     "I/O requests for %s exist, can't destroy it now.",
  650                     cp->provider->name);
  651                 return (1);
  652         }
  653         if (g_raid_nrequests(sc, cp) > 0) {
  654                 G_RAID_DEBUG1(2, sc,
  655                     "I/O requests for %s in queue, can't destroy it now.",
  656                     cp->provider->name);
  657                 return (1);
  658         }
  659         return (0);
  660 }
  661 
  662 static void
  663 g_raid_destroy_consumer(void *arg, int flags __unused)
  664 {
  665         struct g_consumer *cp;
  666 
  667         g_topology_assert();
  668 
  669         cp = arg;
  670         G_RAID_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
  671         g_detach(cp);
  672         g_destroy_consumer(cp);
  673 }
  674 
  675 void
  676 g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp)
  677 {
  678         struct g_provider *pp;
  679         int retaste_wait;
  680 
  681         g_topology_assert_not();
  682 
  683         g_topology_lock();
  684         cp->private = NULL;
  685         if (g_raid_consumer_is_busy(sc, cp))
  686                 goto out;
  687         pp = cp->provider;
  688         retaste_wait = 0;
  689         if (cp->acw == 1) {
  690                 if ((pp->geom->flags & G_GEOM_WITHER) == 0)
  691                         retaste_wait = 1;
  692         }
  693         if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
  694                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
  695         if (retaste_wait) {
  696                 /*
  697                  * After retaste event was send (inside g_access()), we can send
  698                  * event to detach and destroy consumer.
  699                  * A class, which has consumer to the given provider connected
  700                  * will not receive retaste event for the provider.
  701                  * This is the way how I ignore retaste events when I close
  702                  * consumers opened for write: I detach and destroy consumer
  703                  * after retaste event is sent.
  704                  */
  705                 g_post_event(g_raid_destroy_consumer, cp, M_WAITOK, NULL);
  706                 goto out;
  707         }
  708         G_RAID_DEBUG(1, "Consumer %s destroyed.", pp->name);
  709         g_detach(cp);
  710         g_destroy_consumer(cp);
  711 out:
  712         g_topology_unlock();
  713 }
  714 
  715 static void
  716 g_raid_orphan(struct g_consumer *cp)
  717 {
  718         struct g_raid_disk *disk;
  719 
  720         g_topology_assert();
  721 
  722         disk = cp->private;
  723         if (disk == NULL)
  724                 return;
  725         g_raid_event_send(disk, G_RAID_DISK_E_DISCONNECTED,
  726             G_RAID_EVENT_DISK);
  727 }
  728 
  729 static int
  730 g_raid_clean(struct g_raid_volume *vol, int acw)
  731 {
  732         struct g_raid_softc *sc;
  733         int timeout;
  734 
  735         sc = vol->v_softc;
  736         g_topology_assert_not();
  737         sx_assert(&sc->sc_lock, SX_XLOCKED);
  738 
  739 //      if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
  740 //              return (0);
  741         if (!vol->v_dirty)
  742                 return (0);
  743         if (vol->v_writes > 0)
  744                 return (0);
  745         if (acw > 0 || (acw == -1 &&
  746             vol->v_provider != NULL && vol->v_provider->acw > 0)) {
  747                 timeout = g_raid_clean_time - (time_uptime - vol->v_last_write);
  748                 if (timeout > 0)
  749                         return (timeout);
  750         }
  751         vol->v_dirty = 0;
  752         G_RAID_DEBUG1(1, sc, "Volume %s marked as clean.",
  753             vol->v_name);
  754         g_raid_write_metadata(sc, vol, NULL, NULL);
  755         return (0);
  756 }
  757 
  758 static void
  759 g_raid_dirty(struct g_raid_volume *vol)
  760 {
  761         struct g_raid_softc *sc;
  762 
  763         sc = vol->v_softc;
  764         g_topology_assert_not();
  765         sx_assert(&sc->sc_lock, SX_XLOCKED);
  766 
  767 //      if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
  768 //              return;
  769         vol->v_dirty = 1;
  770         G_RAID_DEBUG1(1, sc, "Volume %s marked as dirty.",
  771             vol->v_name);
  772         g_raid_write_metadata(sc, vol, NULL, NULL);
  773 }
  774 
  775 void
  776 g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp)
  777 {
  778         struct g_raid_softc *sc;
  779         struct g_raid_volume *vol;
  780         struct g_raid_subdisk *sd;
  781         struct bio_queue_head queue;
  782         struct bio *cbp;
  783         int i;
  784 
  785         vol = tr->tro_volume;
  786         sc = vol->v_softc;
  787 
  788         /*
  789          * Allocate all bios before sending any request, so we can return
  790          * ENOMEM in nice and clean way.
  791          */
  792         bioq_init(&queue);
  793         for (i = 0; i < vol->v_disks_count; i++) {
  794                 sd = &vol->v_subdisks[i];
  795                 if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
  796                     sd->sd_state == G_RAID_SUBDISK_S_FAILED)
  797                         continue;
  798                 cbp = g_clone_bio(bp);
  799                 if (cbp == NULL)
  800                         goto failure;
  801                 cbp->bio_caller1 = sd;
  802                 bioq_insert_tail(&queue, cbp);
  803         }
  804         for (cbp = bioq_first(&queue); cbp != NULL;
  805             cbp = bioq_first(&queue)) {
  806                 bioq_remove(&queue, cbp);
  807                 sd = cbp->bio_caller1;
  808                 cbp->bio_caller1 = NULL;
  809                 g_raid_subdisk_iostart(sd, cbp);
  810         }
  811         return;
  812 failure:
  813         for (cbp = bioq_first(&queue); cbp != NULL;
  814             cbp = bioq_first(&queue)) {
  815                 bioq_remove(&queue, cbp);
  816                 g_destroy_bio(cbp);
  817         }
  818         if (bp->bio_error == 0)
  819                 bp->bio_error = ENOMEM;
  820         g_raid_iodone(bp, bp->bio_error);
  821 }
  822 
  823 static void
  824 g_raid_tr_kerneldump_common_done(struct bio *bp)
  825 {
  826 
  827         bp->bio_flags |= BIO_DONE;
  828 }
  829 
  830 int
  831 g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
  832     void *virtual, vm_offset_t physical, off_t offset, size_t length)
  833 {
  834         struct g_raid_softc *sc;
  835         struct g_raid_volume *vol;
  836         struct bio bp;
  837 
  838         vol = tr->tro_volume;
  839         sc = vol->v_softc;
  840 
  841         bzero(&bp, sizeof(bp));
  842         bp.bio_cmd = BIO_WRITE;
  843         bp.bio_done = g_raid_tr_kerneldump_common_done;
  844         bp.bio_attribute = NULL;
  845         bp.bio_offset = offset;
  846         bp.bio_length = length;
  847         bp.bio_data = virtual;
  848         bp.bio_to = vol->v_provider;
  849 
  850         g_raid_start(&bp);
  851         while (!(bp.bio_flags & BIO_DONE)) {
  852                 G_RAID_DEBUG1(4, sc, "Poll...");
  853                 g_raid_poll(sc);
  854                 DELAY(10);
  855         }
  856 
  857         return (bp.bio_error != 0 ? EIO : 0);
  858 }
  859 
  860 static int
  861 g_raid_dump(void *arg,
  862     void *virtual, vm_offset_t physical, off_t offset, size_t length)
  863 {
  864         struct g_raid_volume *vol;
  865         int error;
  866 
  867         vol = (struct g_raid_volume *)arg;
  868         G_RAID_DEBUG1(3, vol->v_softc, "Dumping at off %llu len %llu.",
  869             (long long unsigned)offset, (long long unsigned)length);
  870 
  871         error = G_RAID_TR_KERNELDUMP(vol->v_tr,
  872             virtual, physical, offset, length);
  873         return (error);
  874 }
  875 
  876 static void
  877 g_raid_kerneldump(struct g_raid_softc *sc, struct bio *bp)
  878 {
  879         struct g_kerneldump *gkd;
  880         struct g_provider *pp;
  881         struct g_raid_volume *vol;
  882 
  883         gkd = (struct g_kerneldump*)bp->bio_data;
  884         pp = bp->bio_to;
  885         vol = pp->private;
  886         g_trace(G_T_TOPOLOGY, "g_raid_kerneldump(%s, %jd, %jd)",
  887                 pp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
  888         gkd->di.dumper = g_raid_dump;
  889         gkd->di.priv = vol;
  890         gkd->di.blocksize = vol->v_sectorsize;
  891         gkd->di.maxiosize = DFLTPHYS;
  892         gkd->di.mediaoffset = gkd->offset;
  893         if ((gkd->offset + gkd->length) > vol->v_mediasize)
  894                 gkd->length = vol->v_mediasize - gkd->offset;
  895         gkd->di.mediasize = gkd->length;
  896         g_io_deliver(bp, 0);
  897 }
  898 
  899 static void
  900 g_raid_start(struct bio *bp)
  901 {
  902         struct g_raid_softc *sc;
  903 
  904         sc = bp->bio_to->geom->softc;
  905         /*
  906          * If sc == NULL or there are no valid disks, provider's error
  907          * should be set and g_raid_start() should not be called at all.
  908          */
  909 //      KASSERT(sc != NULL && sc->sc_state == G_RAID_VOLUME_S_RUNNING,
  910 //          ("Provider's error should be set (error=%d)(mirror=%s).",
  911 //          bp->bio_to->error, bp->bio_to->name));
  912         G_RAID_LOGREQ(3, bp, "Request received.");
  913 
  914         switch (bp->bio_cmd) {
  915         case BIO_READ:
  916         case BIO_WRITE:
  917         case BIO_DELETE:
  918         case BIO_FLUSH:
  919                 break;
  920         case BIO_GETATTR:
  921                 if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
  922                         g_raid_kerneldump(sc, bp);
  923                 else
  924                         g_io_deliver(bp, EOPNOTSUPP);
  925                 return;
  926         default:
  927                 g_io_deliver(bp, EOPNOTSUPP);
  928                 return;
  929         }
  930         mtx_lock(&sc->sc_queue_mtx);
  931         bioq_disksort(&sc->sc_queue, bp);
  932         mtx_unlock(&sc->sc_queue_mtx);
  933         if (!dumping) {
  934                 G_RAID_DEBUG1(4, sc, "Waking up %p.", sc);
  935                 wakeup(sc);
  936         }
  937 }
  938 
  939 static int
  940 g_raid_bio_overlaps(const struct bio *bp, off_t lstart, off_t len)
  941 {
  942         /*
  943          * 5 cases:
  944          * (1) bp entirely below NO
  945          * (2) bp entirely above NO
  946          * (3) bp start below, but end in range YES
  947          * (4) bp entirely within YES
  948          * (5) bp starts within, ends above YES
  949          *
  950          * lock range 10-19 (offset 10 length 10)
  951          * (1) 1-5: first if kicks it out
  952          * (2) 30-35: second if kicks it out
  953          * (3) 5-15: passes both ifs
  954          * (4) 12-14: passes both ifs
  955          * (5) 19-20: passes both
  956          */
  957         off_t lend = lstart + len - 1;
  958         off_t bstart = bp->bio_offset;
  959         off_t bend = bp->bio_offset + bp->bio_length - 1;
  960 
  961         if (bend < lstart)
  962                 return (0);
  963         if (lend < bstart)
  964                 return (0);
  965         return (1);
  966 }
  967 
  968 static int
  969 g_raid_is_in_locked_range(struct g_raid_volume *vol, const struct bio *bp)
  970 {
  971         struct g_raid_lock *lp;
  972 
  973         sx_assert(&vol->v_softc->sc_lock, SX_LOCKED);
  974 
  975         LIST_FOREACH(lp, &vol->v_locks, l_next) {
  976                 if (g_raid_bio_overlaps(bp, lp->l_offset, lp->l_length))
  977                         return (1);
  978         }
  979         return (0);
  980 }
  981 
  982 static void
  983 g_raid_start_request(struct bio *bp)
  984 {
  985         struct g_raid_softc *sc;
  986         struct g_raid_volume *vol;
  987 
  988         sc = bp->bio_to->geom->softc;
  989         sx_assert(&sc->sc_lock, SX_LOCKED);
  990         vol = bp->bio_to->private;
  991 
  992         /*
  993          * Check to see if this item is in a locked range.  If so,
  994          * queue it to our locked queue and return.  We'll requeue
  995          * it when the range is unlocked.  Internal I/O for the
  996          * rebuild/rescan/recovery process is excluded from this
  997          * check so we can actually do the recovery.
  998          */
  999         if (!(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL) &&
 1000             g_raid_is_in_locked_range(vol, bp)) {
 1001                 G_RAID_LOGREQ(3, bp, "Defer request.");
 1002                 bioq_insert_tail(&vol->v_locked, bp);
 1003                 return;
 1004         }
 1005 
 1006         /*
 1007          * If we're actually going to do the write/delete, then
 1008          * update the idle stats for the volume.
 1009          */
 1010         if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) {
 1011                 if (!vol->v_dirty)
 1012                         g_raid_dirty(vol);
 1013                 vol->v_writes++;
 1014         }
 1015 
 1016         /*
 1017          * Put request onto inflight queue, so we can check if new
 1018          * synchronization requests don't collide with it.  Then tell
 1019          * the transformation layer to start the I/O.
 1020          */
 1021         bioq_insert_tail(&vol->v_inflight, bp);
 1022         G_RAID_LOGREQ(4, bp, "Request started");
 1023         G_RAID_TR_IOSTART(vol->v_tr, bp);
 1024 }
 1025 
 1026 static void
 1027 g_raid_finish_with_locked_ranges(struct g_raid_volume *vol, struct bio *bp)
 1028 {
 1029         off_t off, len;
 1030         struct bio *nbp;
 1031         struct g_raid_lock *lp;
 1032 
 1033         vol->v_pending_lock = 0;
 1034         LIST_FOREACH(lp, &vol->v_locks, l_next) {
 1035                 if (lp->l_pending) {
 1036                         off = lp->l_offset;
 1037                         len = lp->l_length;
 1038                         lp->l_pending = 0;
 1039                         TAILQ_FOREACH(nbp, &vol->v_inflight.queue, bio_queue) {
 1040                                 if (g_raid_bio_overlaps(nbp, off, len))
 1041                                         lp->l_pending++;
 1042                         }
 1043                         if (lp->l_pending) {
 1044                                 vol->v_pending_lock = 1;
 1045                                 G_RAID_DEBUG1(4, vol->v_softc,
 1046                                     "Deferred lock(%jd, %jd) has %d pending",
 1047                                     (intmax_t)off, (intmax_t)(off + len),
 1048                                     lp->l_pending);
 1049                                 continue;
 1050                         }
 1051                         G_RAID_DEBUG1(4, vol->v_softc,
 1052                             "Deferred lock of %jd to %jd completed",
 1053                             (intmax_t)off, (intmax_t)(off + len));
 1054                         G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg);
 1055                 }
 1056         }
 1057 }
 1058 
 1059 void
 1060 g_raid_iodone(struct bio *bp, int error)
 1061 {
 1062         struct g_raid_softc *sc;
 1063         struct g_raid_volume *vol;
 1064 
 1065         sc = bp->bio_to->geom->softc;
 1066         sx_assert(&sc->sc_lock, SX_LOCKED);
 1067         vol = bp->bio_to->private;
 1068         G_RAID_LOGREQ(3, bp, "Request done: %d.", error);
 1069 
 1070         /* Update stats if we done write/delete. */
 1071         if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) {
 1072                 vol->v_writes--;
 1073                 vol->v_last_write = time_uptime;
 1074         }
 1075 
 1076         bioq_remove(&vol->v_inflight, bp);
 1077         if (vol->v_pending_lock && g_raid_is_in_locked_range(vol, bp))
 1078                 g_raid_finish_with_locked_ranges(vol, bp);
 1079         getmicrouptime(&vol->v_last_done);
 1080         g_io_deliver(bp, error);
 1081 }
 1082 
 1083 int
 1084 g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
 1085     struct bio *ignore, void *argp)
 1086 {
 1087         struct g_raid_softc *sc;
 1088         struct g_raid_lock *lp;
 1089         struct bio *bp;
 1090 
 1091         sc = vol->v_softc;
 1092         lp = malloc(sizeof(*lp), M_RAID, M_WAITOK | M_ZERO);
 1093         LIST_INSERT_HEAD(&vol->v_locks, lp, l_next);
 1094         lp->l_offset = off;
 1095         lp->l_length = len;
 1096         lp->l_callback_arg = argp;
 1097 
 1098         lp->l_pending = 0;
 1099         TAILQ_FOREACH(bp, &vol->v_inflight.queue, bio_queue) {
 1100                 if (bp != ignore && g_raid_bio_overlaps(bp, off, len))
 1101                         lp->l_pending++;
 1102         }       
 1103 
 1104         /*
 1105          * If there are any writes that are pending, we return EBUSY.  All
 1106          * callers will have to wait until all pending writes clear.
 1107          */
 1108         if (lp->l_pending > 0) {
 1109                 vol->v_pending_lock = 1;
 1110                 G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd deferred %d pend",
 1111                     (intmax_t)off, (intmax_t)(off+len), lp->l_pending);
 1112                 return (EBUSY);
 1113         }
 1114         G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd",
 1115             (intmax_t)off, (intmax_t)(off+len));
 1116         G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg);
 1117         return (0);
 1118 }
 1119 
 1120 int
 1121 g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len)
 1122 {
 1123         struct g_raid_lock *lp;
 1124         struct g_raid_softc *sc;
 1125         struct bio *bp;
 1126 
 1127         sc = vol->v_softc;
 1128         LIST_FOREACH(lp, &vol->v_locks, l_next) {
 1129                 if (lp->l_offset == off && lp->l_length == len) {
 1130                         LIST_REMOVE(lp, l_next);
 1131                         /* XXX
 1132                          * Right now we just put them all back on the queue
 1133                          * and hope for the best.  We hope this because any
 1134                          * locked ranges will go right back on this list
 1135                          * when the worker thread runs.
 1136                          * XXX
 1137                          */
 1138                         G_RAID_DEBUG1(4, sc, "Unlocked %jd to %jd",
 1139                             (intmax_t)lp->l_offset,
 1140                             (intmax_t)(lp->l_offset+lp->l_length));
 1141                         mtx_lock(&sc->sc_queue_mtx);
 1142                         while ((bp = bioq_takefirst(&vol->v_locked)) != NULL)
 1143                                 bioq_disksort(&sc->sc_queue, bp);
 1144                         mtx_unlock(&sc->sc_queue_mtx);
 1145                         free(lp, M_RAID);
 1146                         return (0);
 1147                 }
 1148         }
 1149         return (EINVAL);
 1150 }
 1151 
 1152 void
 1153 g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp)
 1154 {
 1155         struct g_consumer *cp;
 1156         struct g_raid_disk *disk, *tdisk;
 1157 
 1158         bp->bio_caller1 = sd;
 1159 
 1160         /*
 1161          * Make sure that the disk is present. Generally it is a task of
 1162          * transformation layers to not send requests to absent disks, but
 1163          * it is better to be safe and report situation then sorry.
 1164          */
 1165         if (sd->sd_disk == NULL) {
 1166                 G_RAID_LOGREQ(0, bp, "Warning! I/O request to an absent disk!");
 1167 nodisk:
 1168                 bp->bio_from = NULL;
 1169                 bp->bio_to = NULL;
 1170                 bp->bio_error = ENXIO;
 1171                 g_raid_disk_done(bp);
 1172                 return;
 1173         }
 1174         disk = sd->sd_disk;
 1175         if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 1176             disk->d_state != G_RAID_DISK_S_FAILED) {
 1177                 G_RAID_LOGREQ(0, bp, "Warning! I/O request to a disk in a "
 1178                     "wrong state (%s)!", g_raid_disk_state2str(disk->d_state));
 1179                 goto nodisk;
 1180         }
 1181 
 1182         cp = disk->d_consumer;
 1183         bp->bio_from = cp;
 1184         bp->bio_to = cp->provider;
 1185         cp->index++;
 1186 
 1187         /* Update average disks load. */
 1188         TAILQ_FOREACH(tdisk, &sd->sd_softc->sc_disks, d_next) {
 1189                 if (tdisk->d_consumer == NULL)
 1190                         tdisk->d_load = 0;
 1191                 else
 1192                         tdisk->d_load = (tdisk->d_consumer->index *
 1193                             G_RAID_SUBDISK_LOAD_SCALE + tdisk->d_load * 7) / 8;
 1194         }
 1195 
 1196         disk->d_last_offset = bp->bio_offset + bp->bio_length;
 1197         if (dumping) {
 1198                 G_RAID_LOGREQ(3, bp, "Sending dumping request.");
 1199                 if (bp->bio_cmd == BIO_WRITE) {
 1200                         bp->bio_error = g_raid_subdisk_kerneldump(sd,
 1201                             bp->bio_data, 0, bp->bio_offset, bp->bio_length);
 1202                 } else
 1203                         bp->bio_error = EOPNOTSUPP;
 1204                 g_raid_disk_done(bp);
 1205         } else {
 1206                 bp->bio_done = g_raid_disk_done;
 1207                 bp->bio_offset += sd->sd_offset;
 1208                 G_RAID_LOGREQ(3, bp, "Sending request.");
 1209                 g_io_request(bp, cp);
 1210         }
 1211 }
 1212 
 1213 int
 1214 g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
 1215     void *virtual, vm_offset_t physical, off_t offset, size_t length)
 1216 {
 1217 
 1218         if (sd->sd_disk == NULL)
 1219                 return (ENXIO);
 1220         if (sd->sd_disk->d_kd.di.dumper == NULL)
 1221                 return (EOPNOTSUPP);
 1222         return (dump_write(&sd->sd_disk->d_kd.di,
 1223             virtual, physical,
 1224             sd->sd_disk->d_kd.di.mediaoffset + sd->sd_offset + offset,
 1225             length));
 1226 }
 1227 
 1228 static void
 1229 g_raid_disk_done(struct bio *bp)
 1230 {
 1231         struct g_raid_softc *sc;
 1232         struct g_raid_subdisk *sd;
 1233 
 1234         sd = bp->bio_caller1;
 1235         sc = sd->sd_softc;
 1236         mtx_lock(&sc->sc_queue_mtx);
 1237         bioq_disksort(&sc->sc_queue, bp);
 1238         mtx_unlock(&sc->sc_queue_mtx);
 1239         if (!dumping)
 1240                 wakeup(sc);
 1241 }
 1242 
 1243 static void
 1244 g_raid_disk_done_request(struct bio *bp)
 1245 {
 1246         struct g_raid_softc *sc;
 1247         struct g_raid_disk *disk;
 1248         struct g_raid_subdisk *sd;
 1249         struct g_raid_volume *vol;
 1250 
 1251         g_topology_assert_not();
 1252 
 1253         G_RAID_LOGREQ(3, bp, "Disk request done: %d.", bp->bio_error);
 1254         sd = bp->bio_caller1;
 1255         sc = sd->sd_softc;
 1256         vol = sd->sd_volume;
 1257         if (bp->bio_from != NULL) {
 1258                 bp->bio_from->index--;
 1259                 disk = bp->bio_from->private;
 1260                 if (disk == NULL)
 1261                         g_raid_kill_consumer(sc, bp->bio_from);
 1262         }
 1263         bp->bio_offset -= sd->sd_offset;
 1264 
 1265         G_RAID_TR_IODONE(vol->v_tr, sd, bp);
 1266 }
 1267 
 1268 static void
 1269 g_raid_handle_event(struct g_raid_softc *sc, struct g_raid_event *ep)
 1270 {
 1271 
 1272         if ((ep->e_flags & G_RAID_EVENT_VOLUME) != 0)
 1273                 ep->e_error = g_raid_update_volume(ep->e_tgt, ep->e_event);
 1274         else if ((ep->e_flags & G_RAID_EVENT_DISK) != 0)
 1275                 ep->e_error = g_raid_update_disk(ep->e_tgt, ep->e_event);
 1276         else if ((ep->e_flags & G_RAID_EVENT_SUBDISK) != 0)
 1277                 ep->e_error = g_raid_update_subdisk(ep->e_tgt, ep->e_event);
 1278         else
 1279                 ep->e_error = g_raid_update_node(ep->e_tgt, ep->e_event);
 1280         if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) {
 1281                 KASSERT(ep->e_error == 0,
 1282                     ("Error cannot be handled."));
 1283                 g_raid_event_free(ep);
 1284         } else {
 1285                 ep->e_flags |= G_RAID_EVENT_DONE;
 1286                 G_RAID_DEBUG1(4, sc, "Waking up %p.", ep);
 1287                 mtx_lock(&sc->sc_queue_mtx);
 1288                 wakeup(ep);
 1289                 mtx_unlock(&sc->sc_queue_mtx);
 1290         }
 1291 }
 1292 
 1293 /*
 1294  * Worker thread.
 1295  */
 1296 static void
 1297 g_raid_worker(void *arg)
 1298 {
 1299         struct g_raid_softc *sc;
 1300         struct g_raid_event *ep;
 1301         struct g_raid_volume *vol;
 1302         struct bio *bp;
 1303         struct timeval now, t;
 1304         int timeout, rv;
 1305 
 1306         sc = arg;
 1307         thread_lock(curthread);
 1308         sched_prio(curthread, PRIBIO);
 1309         thread_unlock(curthread);
 1310 
 1311         sx_xlock(&sc->sc_lock);
 1312         for (;;) {
 1313                 mtx_lock(&sc->sc_queue_mtx);
 1314                 /*
 1315                  * First take a look at events.
 1316                  * This is important to handle events before any I/O requests.
 1317                  */
 1318                 bp = NULL;
 1319                 vol = NULL;
 1320                 rv = 0;
 1321                 ep = TAILQ_FIRST(&sc->sc_events);
 1322                 if (ep != NULL)
 1323                         TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 1324                 else if ((bp = bioq_takefirst(&sc->sc_queue)) != NULL)
 1325                         ;
 1326                 else {
 1327                         getmicrouptime(&now);
 1328                         t = now;
 1329                         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1330                                 if (bioq_first(&vol->v_inflight) == NULL &&
 1331                                     vol->v_tr &&
 1332                                     timevalcmp(&vol->v_last_done, &t, < ))
 1333                                         t = vol->v_last_done;
 1334                         }
 1335                         timevalsub(&t, &now);
 1336                         timeout = g_raid_idle_threshold +
 1337                             t.tv_sec * 1000000 + t.tv_usec;
 1338                         if (timeout > 0) {
 1339                                 /*
 1340                                  * Two steps to avoid overflows at HZ=1000
 1341                                  * and idle timeouts > 2.1s.  Some rounding
 1342                                  * errors can occur, but they are < 1tick,
 1343                                  * which is deemed to be close enough for
 1344                                  * this purpose.
 1345                                  */
 1346                                 int micpertic = 1000000 / hz;
 1347                                 timeout = (timeout + micpertic - 1) / micpertic;
 1348                                 sx_xunlock(&sc->sc_lock);
 1349                                 MSLEEP(rv, sc, &sc->sc_queue_mtx,
 1350                                     PRIBIO | PDROP, "-", timeout);
 1351                                 sx_xlock(&sc->sc_lock);
 1352                                 goto process;
 1353                         } else
 1354                                 rv = EWOULDBLOCK;
 1355                 }
 1356                 mtx_unlock(&sc->sc_queue_mtx);
 1357 process:
 1358                 if (ep != NULL) {
 1359                         g_raid_handle_event(sc, ep);
 1360                 } else if (bp != NULL) {
 1361                         if (bp->bio_to != NULL &&
 1362                             bp->bio_to->geom == sc->sc_geom)
 1363                                 g_raid_start_request(bp);
 1364                         else
 1365                                 g_raid_disk_done_request(bp);
 1366                 } else if (rv == EWOULDBLOCK) {
 1367                         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1368                                 if (vol->v_writes == 0 && vol->v_dirty)
 1369                                         g_raid_clean(vol, -1);
 1370                                 if (bioq_first(&vol->v_inflight) == NULL &&
 1371                                     vol->v_tr) {
 1372                                         t.tv_sec = g_raid_idle_threshold / 1000000;
 1373                                         t.tv_usec = g_raid_idle_threshold % 1000000;
 1374                                         timevaladd(&t, &vol->v_last_done);
 1375                                         getmicrouptime(&now);
 1376                                         if (timevalcmp(&t, &now, <= )) {
 1377                                                 G_RAID_TR_IDLE(vol->v_tr);
 1378                                                 vol->v_last_done = now;
 1379                                         }
 1380                                 }
 1381                         }
 1382                 }
 1383                 if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 1384                         g_raid_destroy_node(sc, 1);     /* May not return. */
 1385         }
 1386 }
 1387 
 1388 static void
 1389 g_raid_poll(struct g_raid_softc *sc)
 1390 {
 1391         struct g_raid_event *ep;
 1392         struct bio *bp;
 1393 
 1394         sx_xlock(&sc->sc_lock);
 1395         mtx_lock(&sc->sc_queue_mtx);
 1396         /*
 1397          * First take a look at events.
 1398          * This is important to handle events before any I/O requests.
 1399          */
 1400         ep = TAILQ_FIRST(&sc->sc_events);
 1401         if (ep != NULL) {
 1402                 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 1403                 mtx_unlock(&sc->sc_queue_mtx);
 1404                 g_raid_handle_event(sc, ep);
 1405                 goto out;
 1406         }
 1407         bp = bioq_takefirst(&sc->sc_queue);
 1408         if (bp != NULL) {
 1409                 mtx_unlock(&sc->sc_queue_mtx);
 1410                 if (bp->bio_from == NULL ||
 1411                     bp->bio_from->geom != sc->sc_geom)
 1412                         g_raid_start_request(bp);
 1413                 else
 1414                         g_raid_disk_done_request(bp);
 1415         }
 1416 out:
 1417         sx_xunlock(&sc->sc_lock);
 1418 }
 1419 
 1420 static void
 1421 g_raid_launch_provider(struct g_raid_volume *vol)
 1422 {
 1423         struct g_raid_disk *disk;
 1424         struct g_raid_softc *sc;
 1425         struct g_provider *pp;
 1426         char name[G_RAID_MAX_VOLUMENAME];
 1427         off_t off;
 1428 
 1429         sc = vol->v_softc;
 1430         sx_assert(&sc->sc_lock, SX_LOCKED);
 1431 
 1432         g_topology_lock();
 1433         /* Try to name provider with volume name. */
 1434         snprintf(name, sizeof(name), "raid/%s", vol->v_name);
 1435         if (g_raid_name_format == 0 || vol->v_name[0] == 0 ||
 1436             g_provider_by_name(name) != NULL) {
 1437                 /* Otherwise use sequential volume number. */
 1438                 snprintf(name, sizeof(name), "raid/r%d", vol->v_global_id);
 1439         }
 1440         pp = g_new_providerf(sc->sc_geom, "%s", name);
 1441         pp->private = vol;
 1442         pp->mediasize = vol->v_mediasize;
 1443         pp->sectorsize = vol->v_sectorsize;
 1444         pp->stripesize = 0;
 1445         pp->stripeoffset = 0;
 1446         if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 1447             vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 ||
 1448             vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE ||
 1449             vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) {
 1450                 if ((disk = vol->v_subdisks[0].sd_disk) != NULL &&
 1451                     disk->d_consumer != NULL &&
 1452                     disk->d_consumer->provider != NULL) {
 1453                         pp->stripesize = disk->d_consumer->provider->stripesize;
 1454                         off = disk->d_consumer->provider->stripeoffset;
 1455                         pp->stripeoffset = off + vol->v_subdisks[0].sd_offset;
 1456                         if (off > 0)
 1457                                 pp->stripeoffset %= off;
 1458                 }
 1459                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) {
 1460                         pp->stripesize *= (vol->v_disks_count - 1);
 1461                         pp->stripeoffset *= (vol->v_disks_count - 1);
 1462                 }
 1463         } else
 1464                 pp->stripesize = vol->v_strip_size;
 1465         vol->v_provider = pp;
 1466         g_error_provider(pp, 0);
 1467         g_topology_unlock();
 1468         G_RAID_DEBUG1(0, sc, "Provider %s for volume %s created.",
 1469             pp->name, vol->v_name);
 1470 }
 1471 
 1472 static void
 1473 g_raid_destroy_provider(struct g_raid_volume *vol)
 1474 {
 1475         struct g_raid_softc *sc;
 1476         struct g_provider *pp;
 1477         struct bio *bp, *tmp;
 1478 
 1479         g_topology_assert_not();
 1480         sc = vol->v_softc;
 1481         pp = vol->v_provider;
 1482         KASSERT(pp != NULL, ("NULL provider (volume=%s).", vol->v_name));
 1483 
 1484         g_topology_lock();
 1485         g_error_provider(pp, ENXIO);
 1486         mtx_lock(&sc->sc_queue_mtx);
 1487         TAILQ_FOREACH_SAFE(bp, &sc->sc_queue.queue, bio_queue, tmp) {
 1488                 if (bp->bio_to != pp)
 1489                         continue;
 1490                 bioq_remove(&sc->sc_queue, bp);
 1491                 g_io_deliver(bp, ENXIO);
 1492         }
 1493         mtx_unlock(&sc->sc_queue_mtx);
 1494         G_RAID_DEBUG1(0, sc, "Provider %s for volume %s destroyed.",
 1495             pp->name, vol->v_name);
 1496         g_wither_provider(pp, ENXIO);
 1497         g_topology_unlock();
 1498         vol->v_provider = NULL;
 1499 }
 1500 
 1501 /*
 1502  * Update device state.
 1503  */
 1504 static int
 1505 g_raid_update_volume(struct g_raid_volume *vol, u_int event)
 1506 {
 1507         struct g_raid_softc *sc;
 1508 
 1509         sc = vol->v_softc;
 1510         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1511 
 1512         G_RAID_DEBUG1(2, sc, "Event %s for volume %s.",
 1513             g_raid_volume_event2str(event),
 1514             vol->v_name);
 1515         switch (event) {
 1516         case G_RAID_VOLUME_E_DOWN:
 1517                 if (vol->v_provider != NULL)
 1518                         g_raid_destroy_provider(vol);
 1519                 break;
 1520         case G_RAID_VOLUME_E_UP:
 1521                 if (vol->v_provider == NULL)
 1522                         g_raid_launch_provider(vol);
 1523                 break;
 1524         case G_RAID_VOLUME_E_START:
 1525                 if (vol->v_tr)
 1526                         G_RAID_TR_START(vol->v_tr);
 1527                 return (0);
 1528         default:
 1529                 if (sc->sc_md)
 1530                         G_RAID_MD_VOLUME_EVENT(sc->sc_md, vol, event);
 1531                 return (0);
 1532         }
 1533 
 1534         /* Manage root mount release. */
 1535         if (vol->v_starting) {
 1536                 vol->v_starting = 0;
 1537                 G_RAID_DEBUG1(1, sc, "root_mount_rel %p", vol->v_rootmount);
 1538                 root_mount_rel(vol->v_rootmount);
 1539                 vol->v_rootmount = NULL;
 1540         }
 1541         if (vol->v_stopping && vol->v_provider_open == 0)
 1542                 g_raid_destroy_volume(vol);
 1543         return (0);
 1544 }
 1545 
 1546 /*
 1547  * Update subdisk state.
 1548  */
 1549 static int
 1550 g_raid_update_subdisk(struct g_raid_subdisk *sd, u_int event)
 1551 {
 1552         struct g_raid_softc *sc;
 1553         struct g_raid_volume *vol;
 1554 
 1555         sc = sd->sd_softc;
 1556         vol = sd->sd_volume;
 1557         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1558 
 1559         G_RAID_DEBUG1(2, sc, "Event %s for subdisk %s:%d-%s.",
 1560             g_raid_subdisk_event2str(event),
 1561             vol->v_name, sd->sd_pos,
 1562             sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 1563         if (vol->v_tr)
 1564                 G_RAID_TR_EVENT(vol->v_tr, sd, event);
 1565 
 1566         return (0);
 1567 }
 1568 
 1569 /*
 1570  * Update disk state.
 1571  */
 1572 static int
 1573 g_raid_update_disk(struct g_raid_disk *disk, u_int event)
 1574 {
 1575         struct g_raid_softc *sc;
 1576 
 1577         sc = disk->d_softc;
 1578         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1579 
 1580         G_RAID_DEBUG1(2, sc, "Event %s for disk %s.",
 1581             g_raid_disk_event2str(event),
 1582             g_raid_get_diskname(disk));
 1583 
 1584         if (sc->sc_md)
 1585                 G_RAID_MD_EVENT(sc->sc_md, disk, event);
 1586         return (0);
 1587 }
 1588 
 1589 /*
 1590  * Node event.
 1591  */
 1592 static int
 1593 g_raid_update_node(struct g_raid_softc *sc, u_int event)
 1594 {
 1595         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1596 
 1597         G_RAID_DEBUG1(2, sc, "Event %s for the array.",
 1598             g_raid_node_event2str(event));
 1599 
 1600         if (event == G_RAID_NODE_E_WAKE)
 1601                 return (0);
 1602         if (sc->sc_md)
 1603                 G_RAID_MD_EVENT(sc->sc_md, NULL, event);
 1604         return (0);
 1605 }
 1606 
 1607 static int
 1608 g_raid_access(struct g_provider *pp, int acr, int acw, int ace)
 1609 {
 1610         struct g_raid_volume *vol;
 1611         struct g_raid_softc *sc;
 1612         int dcw, opens, error = 0;
 1613 
 1614         g_topology_assert();
 1615         sc = pp->geom->softc;
 1616         vol = pp->private;
 1617         KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 1618         KASSERT(vol != NULL, ("NULL volume (provider=%s).", pp->name));
 1619 
 1620         G_RAID_DEBUG1(2, sc, "Access request for %s: r%dw%de%d.", pp->name,
 1621             acr, acw, ace);
 1622         dcw = pp->acw + acw;
 1623 
 1624         g_topology_unlock();
 1625         sx_xlock(&sc->sc_lock);
 1626         /* Deny new opens while dying. */
 1627         if (sc->sc_stopping != 0 && (acr > 0 || acw > 0 || ace > 0)) {
 1628                 error = ENXIO;
 1629                 goto out;
 1630         }
 1631         if (dcw == 0 && vol->v_dirty)
 1632                 g_raid_clean(vol, dcw);
 1633         vol->v_provider_open += acr + acw + ace;
 1634         /* Handle delayed node destruction. */
 1635         if (sc->sc_stopping == G_RAID_DESTROY_DELAYED &&
 1636             vol->v_provider_open == 0) {
 1637                 /* Count open volumes. */
 1638                 opens = g_raid_nopens(sc);
 1639                 if (opens == 0) {
 1640                         sc->sc_stopping = G_RAID_DESTROY_HARD;
 1641                         /* Wake up worker to make it selfdestruct. */
 1642                         g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1643                 }
 1644         }
 1645         /* Handle open volume destruction. */
 1646         if (vol->v_stopping && vol->v_provider_open == 0)
 1647                 g_raid_destroy_volume(vol);
 1648 out:
 1649         sx_xunlock(&sc->sc_lock);
 1650         g_topology_lock();
 1651         return (error);
 1652 }
 1653 
 1654 struct g_raid_softc *
 1655 g_raid_create_node(struct g_class *mp,
 1656     const char *name, struct g_raid_md_object *md)
 1657 {
 1658         struct g_raid_softc *sc;
 1659         struct g_geom *gp;
 1660         int error;
 1661 
 1662         g_topology_assert();
 1663         G_RAID_DEBUG(1, "Creating array %s.", name);
 1664 
 1665         gp = g_new_geomf(mp, "%s", name);
 1666         sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO);
 1667         gp->start = g_raid_start;
 1668         gp->orphan = g_raid_orphan;
 1669         gp->access = g_raid_access;
 1670         gp->dumpconf = g_raid_dumpconf;
 1671 
 1672         sc->sc_md = md;
 1673         sc->sc_geom = gp;
 1674         sc->sc_flags = 0;
 1675         TAILQ_INIT(&sc->sc_volumes);
 1676         TAILQ_INIT(&sc->sc_disks);
 1677         sx_init(&sc->sc_lock, "gmirror:lock");
 1678         mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 1679         TAILQ_INIT(&sc->sc_events);
 1680         bioq_init(&sc->sc_queue);
 1681         gp->softc = sc;
 1682         error = kproc_create(g_raid_worker, sc, &sc->sc_worker, 0, 0,
 1683             "g_raid %s", name);
 1684         if (error != 0) {
 1685                 G_RAID_DEBUG(0, "Cannot create kernel thread for %s.", name);
 1686                 mtx_destroy(&sc->sc_queue_mtx);
 1687                 sx_destroy(&sc->sc_lock);
 1688                 g_destroy_geom(sc->sc_geom);
 1689                 free(sc, M_RAID);
 1690                 return (NULL);
 1691         }
 1692 
 1693         G_RAID_DEBUG1(0, sc, "Array %s created.", name);
 1694         return (sc);
 1695 }
 1696 
 1697 struct g_raid_volume *
 1698 g_raid_create_volume(struct g_raid_softc *sc, const char *name, int id)
 1699 {
 1700         struct g_raid_volume    *vol, *vol1;
 1701         int i;
 1702 
 1703         G_RAID_DEBUG1(1, sc, "Creating volume %s.", name);
 1704         vol = malloc(sizeof(*vol), M_RAID, M_WAITOK | M_ZERO);
 1705         vol->v_softc = sc;
 1706         strlcpy(vol->v_name, name, G_RAID_MAX_VOLUMENAME);
 1707         vol->v_state = G_RAID_VOLUME_S_STARTING;
 1708         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 1709         vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN;
 1710         bioq_init(&vol->v_inflight);
 1711         bioq_init(&vol->v_locked);
 1712         LIST_INIT(&vol->v_locks);
 1713         for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) {
 1714                 vol->v_subdisks[i].sd_softc = sc;
 1715                 vol->v_subdisks[i].sd_volume = vol;
 1716                 vol->v_subdisks[i].sd_pos = i;
 1717                 vol->v_subdisks[i].sd_state = G_RAID_DISK_S_NONE;
 1718         }
 1719 
 1720         /* Find free ID for this volume. */
 1721         g_topology_lock();
 1722         vol1 = vol;
 1723         if (id >= 0) {
 1724                 LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) {
 1725                         if (vol1->v_global_id == id)
 1726                                 break;
 1727                 }
 1728         }
 1729         if (vol1 != NULL) {
 1730                 for (id = 0; ; id++) {
 1731                         LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) {
 1732                                 if (vol1->v_global_id == id)
 1733                                         break;
 1734                         }
 1735                         if (vol1 == NULL)
 1736                                 break;
 1737                 }
 1738         }
 1739         vol->v_global_id = id;
 1740         LIST_INSERT_HEAD(&g_raid_volumes, vol, v_global_next);
 1741         g_topology_unlock();
 1742 
 1743         /* Delay root mounting. */
 1744         vol->v_rootmount = root_mount_hold("GRAID");
 1745         G_RAID_DEBUG1(1, sc, "root_mount_hold %p", vol->v_rootmount);
 1746         vol->v_starting = 1;
 1747         TAILQ_INSERT_TAIL(&sc->sc_volumes, vol, v_next);
 1748         return (vol);
 1749 }
 1750 
 1751 struct g_raid_disk *
 1752 g_raid_create_disk(struct g_raid_softc *sc)
 1753 {
 1754         struct g_raid_disk      *disk;
 1755 
 1756         G_RAID_DEBUG1(1, sc, "Creating disk.");
 1757         disk = malloc(sizeof(*disk), M_RAID, M_WAITOK | M_ZERO);
 1758         disk->d_softc = sc;
 1759         disk->d_state = G_RAID_DISK_S_NONE;
 1760         TAILQ_INIT(&disk->d_subdisks);
 1761         TAILQ_INSERT_TAIL(&sc->sc_disks, disk, d_next);
 1762         return (disk);
 1763 }
 1764 
 1765 int g_raid_start_volume(struct g_raid_volume *vol)
 1766 {
 1767         struct g_raid_tr_class *class;
 1768         struct g_raid_tr_object *obj;
 1769         int status;
 1770 
 1771         G_RAID_DEBUG1(2, vol->v_softc, "Starting volume %s.", vol->v_name);
 1772         LIST_FOREACH(class, &g_raid_tr_classes, trc_list) {
 1773                 G_RAID_DEBUG1(2, vol->v_softc,
 1774                     "Tasting volume %s for %s transformation.",
 1775                     vol->v_name, class->name);
 1776                 obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 1777                     M_WAITOK);
 1778                 obj->tro_class = class;
 1779                 obj->tro_volume = vol;
 1780                 status = G_RAID_TR_TASTE(obj, vol);
 1781                 if (status != G_RAID_TR_TASTE_FAIL)
 1782                         break;
 1783                 kobj_delete((kobj_t)obj, M_RAID);
 1784         }
 1785         if (class == NULL) {
 1786                 G_RAID_DEBUG1(0, vol->v_softc,
 1787                     "No transformation module found for %s.",
 1788                     vol->v_name);
 1789                 vol->v_tr = NULL;
 1790                 g_raid_change_volume_state(vol, G_RAID_VOLUME_S_UNSUPPORTED);
 1791                 g_raid_event_send(vol, G_RAID_VOLUME_E_DOWN,
 1792                     G_RAID_EVENT_VOLUME);
 1793                 return (-1);
 1794         }
 1795         G_RAID_DEBUG1(2, vol->v_softc,
 1796             "Transformation module %s chosen for %s.",
 1797             class->name, vol->v_name);
 1798         vol->v_tr = obj;
 1799         return (0);
 1800 }
 1801 
 1802 int
 1803 g_raid_destroy_node(struct g_raid_softc *sc, int worker)
 1804 {
 1805         struct g_raid_volume *vol, *tmpv;
 1806         struct g_raid_disk *disk, *tmpd;
 1807         int error = 0;
 1808 
 1809         sc->sc_stopping = G_RAID_DESTROY_HARD;
 1810         TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tmpv) {
 1811                 if (g_raid_destroy_volume(vol))
 1812                         error = EBUSY;
 1813         }
 1814         if (error)
 1815                 return (error);
 1816         TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tmpd) {
 1817                 if (g_raid_destroy_disk(disk))
 1818                         error = EBUSY;
 1819         }
 1820         if (error)
 1821                 return (error);
 1822         if (sc->sc_md) {
 1823                 G_RAID_MD_FREE(sc->sc_md);
 1824                 kobj_delete((kobj_t)sc->sc_md, M_RAID);
 1825                 sc->sc_md = NULL;
 1826         }
 1827         if (sc->sc_geom != NULL) {
 1828                 G_RAID_DEBUG1(0, sc, "Array %s destroyed.", sc->sc_name);
 1829                 g_topology_lock();
 1830                 sc->sc_geom->softc = NULL;
 1831                 g_wither_geom(sc->sc_geom, ENXIO);
 1832                 g_topology_unlock();
 1833                 sc->sc_geom = NULL;
 1834         } else
 1835                 G_RAID_DEBUG(1, "Array destroyed.");
 1836         if (worker) {
 1837                 g_raid_event_cancel(sc, sc);
 1838                 mtx_destroy(&sc->sc_queue_mtx);
 1839                 sx_xunlock(&sc->sc_lock);
 1840                 sx_destroy(&sc->sc_lock);
 1841                 wakeup(&sc->sc_stopping);
 1842                 free(sc, M_RAID);
 1843                 curthread->td_pflags &= ~TDP_GEOM;
 1844                 G_RAID_DEBUG(1, "Thread exiting.");
 1845                 kproc_exit(0);
 1846         } else {
 1847                 /* Wake up worker to make it selfdestruct. */
 1848                 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1849         }
 1850         return (0);
 1851 }
 1852 
 1853 int
 1854 g_raid_destroy_volume(struct g_raid_volume *vol)
 1855 {
 1856         struct g_raid_softc *sc;
 1857         struct g_raid_disk *disk;
 1858         int i;
 1859 
 1860         sc = vol->v_softc;
 1861         G_RAID_DEBUG1(2, sc, "Destroying volume %s.", vol->v_name);
 1862         vol->v_stopping = 1;
 1863         if (vol->v_state != G_RAID_VOLUME_S_STOPPED) {
 1864                 if (vol->v_tr) {
 1865                         G_RAID_TR_STOP(vol->v_tr);
 1866                         return (EBUSY);
 1867                 } else
 1868                         vol->v_state = G_RAID_VOLUME_S_STOPPED;
 1869         }
 1870         if (g_raid_event_check(sc, vol) != 0)
 1871                 return (EBUSY);
 1872         if (vol->v_provider != NULL)
 1873                 return (EBUSY);
 1874         if (vol->v_provider_open != 0)
 1875                 return (EBUSY);
 1876         if (vol->v_tr) {
 1877                 G_RAID_TR_FREE(vol->v_tr);
 1878                 kobj_delete((kobj_t)vol->v_tr, M_RAID);
 1879                 vol->v_tr = NULL;
 1880         }
 1881         if (vol->v_rootmount)
 1882                 root_mount_rel(vol->v_rootmount);
 1883         g_topology_lock();
 1884         LIST_REMOVE(vol, v_global_next);
 1885         g_topology_unlock();
 1886         TAILQ_REMOVE(&sc->sc_volumes, vol, v_next);
 1887         for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) {
 1888                 g_raid_event_cancel(sc, &vol->v_subdisks[i]);
 1889                 disk = vol->v_subdisks[i].sd_disk;
 1890                 if (disk == NULL)
 1891                         continue;
 1892                 TAILQ_REMOVE(&disk->d_subdisks, &vol->v_subdisks[i], sd_next);
 1893         }
 1894         G_RAID_DEBUG1(2, sc, "Volume %s destroyed.", vol->v_name);
 1895         if (sc->sc_md)
 1896                 G_RAID_MD_FREE_VOLUME(sc->sc_md, vol);
 1897         g_raid_event_cancel(sc, vol);
 1898         free(vol, M_RAID);
 1899         if (sc->sc_stopping == G_RAID_DESTROY_HARD) {
 1900                 /* Wake up worker to let it selfdestruct. */
 1901                 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1902         }
 1903         return (0);
 1904 }
 1905 
 1906 int
 1907 g_raid_destroy_disk(struct g_raid_disk *disk)
 1908 {
 1909         struct g_raid_softc *sc;
 1910         struct g_raid_subdisk *sd, *tmp;
 1911 
 1912         sc = disk->d_softc;
 1913         G_RAID_DEBUG1(2, sc, "Destroying disk.");
 1914         if (disk->d_consumer) {
 1915                 g_raid_kill_consumer(sc, disk->d_consumer);
 1916                 disk->d_consumer = NULL;
 1917         }
 1918         TAILQ_FOREACH_SAFE(sd, &disk->d_subdisks, sd_next, tmp) {
 1919                 g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_NONE);
 1920                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1921                     G_RAID_EVENT_SUBDISK);
 1922                 TAILQ_REMOVE(&disk->d_subdisks, sd, sd_next);
 1923                 sd->sd_disk = NULL;
 1924         }
 1925         TAILQ_REMOVE(&sc->sc_disks, disk, d_next);
 1926         if (sc->sc_md)
 1927                 G_RAID_MD_FREE_DISK(sc->sc_md, disk);
 1928         g_raid_event_cancel(sc, disk);
 1929         free(disk, M_RAID);
 1930         return (0);
 1931 }
 1932 
 1933 int
 1934 g_raid_destroy(struct g_raid_softc *sc, int how)
 1935 {
 1936         int opens;
 1937 
 1938         g_topology_assert_not();
 1939         if (sc == NULL)
 1940                 return (ENXIO);
 1941         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1942 
 1943         /* Count open volumes. */
 1944         opens = g_raid_nopens(sc);
 1945 
 1946         /* React on some opened volumes. */
 1947         if (opens > 0) {
 1948                 switch (how) {
 1949                 case G_RAID_DESTROY_SOFT:
 1950                         G_RAID_DEBUG1(1, sc,
 1951                             "%d volumes are still open.",
 1952                             opens);
 1953                         return (EBUSY);
 1954                 case G_RAID_DESTROY_DELAYED:
 1955                         G_RAID_DEBUG1(1, sc,
 1956                             "Array will be destroyed on last close.");
 1957                         sc->sc_stopping = G_RAID_DESTROY_DELAYED;
 1958                         return (EBUSY);
 1959                 case G_RAID_DESTROY_HARD:
 1960                         G_RAID_DEBUG1(1, sc,
 1961                             "%d volumes are still open.",
 1962                             opens);
 1963                 }
 1964         }
 1965 
 1966         /* Mark node for destruction. */
 1967         sc->sc_stopping = G_RAID_DESTROY_HARD;
 1968         /* Wake up worker to let it selfdestruct. */
 1969         g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1970         /* Sleep until node destroyed. */
 1971         sx_sleep(&sc->sc_stopping, &sc->sc_lock,
 1972             PRIBIO | PDROP, "r:destroy", 0);
 1973         return (0);
 1974 }
 1975 
 1976 static void
 1977 g_raid_taste_orphan(struct g_consumer *cp)
 1978 {
 1979 
 1980         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 1981             cp->provider->name));
 1982 }
 1983 
 1984 static struct g_geom *
 1985 g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 1986 {
 1987         struct g_consumer *cp;
 1988         struct g_geom *gp, *geom;
 1989         struct g_raid_md_class *class;
 1990         struct g_raid_md_object *obj;
 1991         int status;
 1992 
 1993         g_topology_assert();
 1994         g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 1995         G_RAID_DEBUG(2, "Tasting provider %s.", pp->name);
 1996 
 1997         gp = g_new_geomf(mp, "mirror:taste");
 1998         /*
 1999          * This orphan function should be never called.
 2000          */
 2001         gp->orphan = g_raid_taste_orphan;
 2002         cp = g_new_consumer(gp);
 2003         g_attach(cp, pp);
 2004 
 2005         geom = NULL;
 2006         LIST_FOREACH(class, &g_raid_md_classes, mdc_list) {
 2007                 G_RAID_DEBUG(2, "Tasting provider %s for %s metadata.",
 2008                     pp->name, class->name);
 2009                 obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 2010                     M_WAITOK);
 2011                 obj->mdo_class = class;
 2012                 status = G_RAID_MD_TASTE(obj, mp, cp, &geom);
 2013                 if (status != G_RAID_MD_TASTE_NEW)
 2014                         kobj_delete((kobj_t)obj, M_RAID);
 2015                 if (status != G_RAID_MD_TASTE_FAIL)
 2016                         break;
 2017         }
 2018 
 2019         g_detach(cp);
 2020         g_destroy_consumer(cp);
 2021         g_destroy_geom(gp);
 2022         G_RAID_DEBUG(2, "Tasting provider %s done.", pp->name);
 2023         return (geom);
 2024 }
 2025 
 2026 int
 2027 g_raid_create_node_format(const char *format, struct g_geom **gp)
 2028 {
 2029         struct g_raid_md_class *class;
 2030         struct g_raid_md_object *obj;
 2031         int status;
 2032 
 2033         G_RAID_DEBUG(2, "Creating array for %s metadata.", format);
 2034         LIST_FOREACH(class, &g_raid_md_classes, mdc_list) {
 2035                 if (strcasecmp(class->name, format) == 0)
 2036                         break;
 2037         }
 2038         if (class == NULL) {
 2039                 G_RAID_DEBUG(1, "No support for %s metadata.", format);
 2040                 return (G_RAID_MD_TASTE_FAIL);
 2041         }
 2042         obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 2043             M_WAITOK);
 2044         obj->mdo_class = class;
 2045         status = G_RAID_MD_CREATE(obj, &g_raid_class, gp);
 2046         if (status != G_RAID_MD_TASTE_NEW)
 2047                 kobj_delete((kobj_t)obj, M_RAID);
 2048         return (status);
 2049 }
 2050 
 2051 static int
 2052 g_raid_destroy_geom(struct gctl_req *req __unused,
 2053     struct g_class *mp __unused, struct g_geom *gp)
 2054 {
 2055         struct g_raid_softc *sc;
 2056         int error;
 2057 
 2058         g_topology_unlock();
 2059         sc = gp->softc;
 2060         sx_xlock(&sc->sc_lock);
 2061         g_cancel_event(sc);
 2062         error = g_raid_destroy(gp->softc, G_RAID_DESTROY_SOFT);
 2063         if (error != 0)
 2064                 sx_xunlock(&sc->sc_lock);
 2065         g_topology_lock();
 2066         return (error);
 2067 }
 2068 
 2069 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
 2070     struct g_raid_subdisk *sd, struct g_raid_disk *disk)
 2071 {
 2072 
 2073         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 2074                 return;
 2075         if (sc->sc_md)
 2076                 G_RAID_MD_WRITE(sc->sc_md, vol, sd, disk);
 2077 }
 2078 
 2079 void g_raid_fail_disk(struct g_raid_softc *sc,
 2080     struct g_raid_subdisk *sd, struct g_raid_disk *disk)
 2081 {
 2082 
 2083         if (disk == NULL)
 2084                 disk = sd->sd_disk;
 2085         if (disk == NULL) {
 2086                 G_RAID_DEBUG1(0, sc, "Warning! Fail request to an absent disk!");
 2087                 return;
 2088         }
 2089         if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 2090                 G_RAID_DEBUG1(0, sc, "Warning! Fail request to a disk in a "
 2091                     "wrong state (%s)!", g_raid_disk_state2str(disk->d_state));
 2092                 return;
 2093         }
 2094         if (sc->sc_md)
 2095                 G_RAID_MD_FAIL_DISK(sc->sc_md, sd, disk);
 2096 }
 2097 
 2098 static void
 2099 g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 2100     struct g_consumer *cp, struct g_provider *pp)
 2101 {
 2102         struct g_raid_softc *sc;
 2103         struct g_raid_volume *vol;
 2104         struct g_raid_subdisk *sd;
 2105         struct g_raid_disk *disk;
 2106         int i, s;
 2107 
 2108         g_topology_assert();
 2109 
 2110         sc = gp->softc;
 2111         if (sc == NULL)
 2112                 return;
 2113         if (pp != NULL) {
 2114                 vol = pp->private;
 2115                 g_topology_unlock();
 2116                 sx_xlock(&sc->sc_lock);
 2117                 sbuf_printf(sb, "%s<Label>%s</Label>\n", indent,
 2118                     vol->v_name);
 2119                 sbuf_printf(sb, "%s<RAIDLevel>%s</RAIDLevel>\n", indent,
 2120                     g_raid_volume_level2str(vol->v_raid_level,
 2121                     vol->v_raid_level_qualifier));
 2122                 sbuf_printf(sb,
 2123                     "%s<Transformation>%s</Transformation>\n", indent,
 2124                     vol->v_tr ? vol->v_tr->tro_class->name : "NONE");
 2125                 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 2126                     vol->v_disks_count);
 2127                 sbuf_printf(sb, "%s<Strip>%u</Strip>\n", indent,
 2128                     vol->v_strip_size);
 2129                 sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 2130                     g_raid_volume_state2str(vol->v_state));
 2131                 sbuf_printf(sb, "%s<Dirty>%s</Dirty>\n", indent,
 2132                     vol->v_dirty ? "Yes" : "No");
 2133                 sbuf_printf(sb, "%s<Subdisks>", indent);
 2134                 for (i = 0; i < vol->v_disks_count; i++) {
 2135                         sd = &vol->v_subdisks[i];
 2136                         if (sd->sd_disk != NULL &&
 2137                             sd->sd_disk->d_consumer != NULL) {
 2138                                 sbuf_printf(sb, "%s ",
 2139                                     g_raid_get_diskname(sd->sd_disk));
 2140                         } else {
 2141                                 sbuf_printf(sb, "NONE ");
 2142                         }
 2143                         sbuf_printf(sb, "(%s",
 2144                             g_raid_subdisk_state2str(sd->sd_state));
 2145                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2146                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2147                                 sbuf_printf(sb, " %d%%",
 2148                                     (int)(sd->sd_rebuild_pos * 100 /
 2149                                      sd->sd_size));
 2150                         }
 2151                         sbuf_printf(sb, ")");
 2152                         if (i + 1 < vol->v_disks_count)
 2153                                 sbuf_printf(sb, ", ");
 2154                 }
 2155                 sbuf_printf(sb, "</Subdisks>\n");
 2156                 sx_xunlock(&sc->sc_lock);
 2157                 g_topology_lock();
 2158         } else if (cp != NULL) {
 2159                 disk = cp->private;
 2160                 if (disk == NULL)
 2161                         return;
 2162                 g_topology_unlock();
 2163                 sx_xlock(&sc->sc_lock);
 2164                 sbuf_printf(sb, "%s<State>%s", indent,
 2165                     g_raid_disk_state2str(disk->d_state));
 2166                 if (!TAILQ_EMPTY(&disk->d_subdisks)) {
 2167                         sbuf_printf(sb, " (");
 2168                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 2169                                 sbuf_printf(sb, "%s",
 2170                                     g_raid_subdisk_state2str(sd->sd_state));
 2171                                 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2172                                     sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2173                                         sbuf_printf(sb, " %d%%",
 2174                                             (int)(sd->sd_rebuild_pos * 100 /
 2175                                              sd->sd_size));
 2176                                 }
 2177                                 if (TAILQ_NEXT(sd, sd_next))
 2178                                         sbuf_printf(sb, ", ");
 2179                         }
 2180                         sbuf_printf(sb, ")");
 2181                 }
 2182                 sbuf_printf(sb, "</State>\n");
 2183                 sbuf_printf(sb, "%s<Subdisks>", indent);
 2184                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 2185                         sbuf_printf(sb, "r%d(%s):%d@%ju",
 2186                             sd->sd_volume->v_global_id,
 2187                             sd->sd_volume->v_name,
 2188                             sd->sd_pos, sd->sd_offset);
 2189                         if (TAILQ_NEXT(sd, sd_next))
 2190                                 sbuf_printf(sb, ", ");
 2191                 }
 2192                 sbuf_printf(sb, "</Subdisks>\n");
 2193                 sbuf_printf(sb, "%s<ReadErrors>%d</ReadErrors>\n", indent,
 2194                     disk->d_read_errs);
 2195                 sx_xunlock(&sc->sc_lock);
 2196                 g_topology_lock();
 2197         } else {
 2198                 g_topology_unlock();
 2199                 sx_xlock(&sc->sc_lock);
 2200                 if (sc->sc_md) {
 2201                         sbuf_printf(sb, "%s<Metadata>%s</Metadata>\n", indent,
 2202                             sc->sc_md->mdo_class->name);
 2203                 }
 2204                 if (!TAILQ_EMPTY(&sc->sc_volumes)) {
 2205                         s = 0xff;
 2206                         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 2207                                 if (vol->v_state < s)
 2208                                         s = vol->v_state;
 2209                         }
 2210                         sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 2211                             g_raid_volume_state2str(s));
 2212                 }
 2213                 sx_xunlock(&sc->sc_lock);
 2214                 g_topology_lock();
 2215         }
 2216 }
 2217 
 2218 static void
 2219 g_raid_shutdown_pre_sync(void *arg, int howto)
 2220 {
 2221         struct g_class *mp;
 2222         struct g_geom *gp, *gp2;
 2223         struct g_raid_softc *sc;
 2224         int error;
 2225 
 2226         mp = arg;
 2227         DROP_GIANT();
 2228         g_topology_lock();
 2229         LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 2230                 if ((sc = gp->softc) == NULL)
 2231                         continue;
 2232                 g_topology_unlock();
 2233                 sx_xlock(&sc->sc_lock);
 2234                 g_cancel_event(sc);
 2235                 error = g_raid_destroy(sc, G_RAID_DESTROY_DELAYED);
 2236                 if (error != 0)
 2237                         sx_xunlock(&sc->sc_lock);
 2238                 g_topology_lock();
 2239         }
 2240         g_topology_unlock();
 2241         PICKUP_GIANT();
 2242 }
 2243 
 2244 static void
 2245 g_raid_init(struct g_class *mp)
 2246 {
 2247 
 2248         g_raid_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 2249             g_raid_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 2250         if (g_raid_pre_sync == NULL)
 2251                 G_RAID_DEBUG(0, "Warning! Cannot register shutdown event.");
 2252         g_raid_started = 1;
 2253 }
 2254 
 2255 static void
 2256 g_raid_fini(struct g_class *mp)
 2257 {
 2258 
 2259         if (g_raid_pre_sync != NULL)
 2260                 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_raid_pre_sync);
 2261         g_raid_started = 0;
 2262 }
 2263 
 2264 int
 2265 g_raid_md_modevent(module_t mod, int type, void *arg)
 2266 {
 2267         struct g_raid_md_class *class, *c, *nc;
 2268         int error;
 2269 
 2270         error = 0;
 2271         class = arg;
 2272         switch (type) {
 2273         case MOD_LOAD:
 2274                 c = LIST_FIRST(&g_raid_md_classes);
 2275                 if (c == NULL || c->mdc_priority > class->mdc_priority)
 2276                         LIST_INSERT_HEAD(&g_raid_md_classes, class, mdc_list);
 2277                 else {
 2278                         while ((nc = LIST_NEXT(c, mdc_list)) != NULL &&
 2279                             nc->mdc_priority < class->mdc_priority)
 2280                                 c = nc;
 2281                         LIST_INSERT_AFTER(c, class, mdc_list);
 2282                 }
 2283                 if (g_raid_started)
 2284                         g_retaste(&g_raid_class);
 2285                 break;
 2286         case MOD_UNLOAD:
 2287                 LIST_REMOVE(class, mdc_list);
 2288                 break;
 2289         default:
 2290                 error = EOPNOTSUPP;
 2291                 break;
 2292         }
 2293 
 2294         return (error);
 2295 }
 2296 
 2297 int
 2298 g_raid_tr_modevent(module_t mod, int type, void *arg)
 2299 {
 2300         struct g_raid_tr_class *class, *c, *nc;
 2301         int error;
 2302 
 2303         error = 0;
 2304         class = arg;
 2305         switch (type) {
 2306         case MOD_LOAD:
 2307                 c = LIST_FIRST(&g_raid_tr_classes);
 2308                 if (c == NULL || c->trc_priority > class->trc_priority)
 2309                         LIST_INSERT_HEAD(&g_raid_tr_classes, class, trc_list);
 2310                 else {
 2311                         while ((nc = LIST_NEXT(c, trc_list)) != NULL &&
 2312                             nc->trc_priority < class->trc_priority)
 2313                                 c = nc;
 2314                         LIST_INSERT_AFTER(c, class, trc_list);
 2315                 }
 2316                 break;
 2317         case MOD_UNLOAD:
 2318                 LIST_REMOVE(class, trc_list);
 2319                 break;
 2320         default:
 2321                 error = EOPNOTSUPP;
 2322                 break;
 2323         }
 2324 
 2325         return (error);
 2326 }
 2327 
 2328 /*
 2329  * Use local implementation of DECLARE_GEOM_CLASS(g_raid_class, g_raid)
 2330  * to reduce module priority, allowing submodules to register them first.
 2331  */
 2332 static moduledata_t g_raid_mod = {
 2333         "g_raid",
 2334         g_modevent,
 2335         &g_raid_class
 2336 };
 2337 DECLARE_MODULE(g_raid, g_raid_mod, SI_SUB_DRIVERS, SI_ORDER_THIRD);
 2338 MODULE_VERSION(geom_raid, 0);
Cache object: c7df994645acbfdc63bf670a10c6243b
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/geom/raid/g_raid.c

FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/g_raid.c