The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/g_raid.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/8.3/sys/geom/raid/g_raid.c 223177 2011-06-17 06:59:49Z mav $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/systm.h>
   32 #include <sys/kernel.h>
   33 #include <sys/module.h>
   34 #include <sys/limits.h>
   35 #include <sys/lock.h>
   36 #include <sys/mutex.h>
   37 #include <sys/bio.h>
   38 #include <sys/sysctl.h>
   39 #include <sys/malloc.h>
   40 #include <sys/eventhandler.h>
   41 #include <vm/uma.h>
   42 #include <geom/geom.h>
   43 #include <sys/proc.h>
   44 #include <sys/kthread.h>
   45 #include <sys/sched.h>
   46 #include <geom/raid/g_raid.h>
   47 #include "g_raid_md_if.h"
   48 #include "g_raid_tr_if.h"
   49 
   50 static MALLOC_DEFINE(M_RAID, "raid_data", "GEOM_RAID Data");
   51 
   52 SYSCTL_DECL(_kern_geom);
   53 SYSCTL_NODE(_kern_geom, OID_AUTO, raid, CTLFLAG_RW, 0, "GEOM_RAID stuff");
   54 u_int g_raid_aggressive_spare = 0;
   55 TUNABLE_INT("kern.geom.raid.aggressive_spare", &g_raid_aggressive_spare);
   56 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, aggressive_spare, CTLFLAG_RW,
   57     &g_raid_aggressive_spare, 0, "Use disks without metadata as spare");
   58 u_int g_raid_debug = 0;
   59 TUNABLE_INT("kern.geom.raid.debug", &g_raid_debug);
   60 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, debug, CTLFLAG_RW, &g_raid_debug, 0,
   61     "Debug level");
   62 int g_raid_read_err_thresh = 10;
   63 TUNABLE_INT("kern.geom.raid.read_err_thresh", &g_raid_read_err_thresh);
   64 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, read_err_thresh, CTLFLAG_RW,
   65     &g_raid_read_err_thresh, 0,
   66     "Number of read errors equated to disk failure");
   67 u_int g_raid_start_timeout = 30;
   68 TUNABLE_INT("kern.geom.raid.start_timeout", &g_raid_start_timeout);
   69 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, start_timeout, CTLFLAG_RW,
   70     &g_raid_start_timeout, 0,
   71     "Time to wait for all array components");
   72 static u_int g_raid_clean_time = 5;
   73 TUNABLE_INT("kern.geom.raid.clean_time", &g_raid_clean_time);
   74 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, clean_time, CTLFLAG_RW,
   75     &g_raid_clean_time, 0, "Mark volume as clean when idling");
   76 static u_int g_raid_disconnect_on_failure = 1;
   77 TUNABLE_INT("kern.geom.raid.disconnect_on_failure",
   78     &g_raid_disconnect_on_failure);
   79 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
   80     &g_raid_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
   81 static u_int g_raid_name_format = 0;
   82 TUNABLE_INT("kern.geom.raid.name_format", &g_raid_name_format);
   83 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, name_format, CTLFLAG_RW,
   84     &g_raid_name_format, 0, "Providers name format.");
   85 static u_int g_raid_idle_threshold = 1000000;
   86 TUNABLE_INT("kern.geom.raid.idle_threshold", &g_raid_idle_threshold);
   87 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, idle_threshold, CTLFLAG_RW,
   88     &g_raid_idle_threshold, 1000000,
   89     "Time in microseconds to consider a volume idle.");
   90 
   91 #define MSLEEP(rv, ident, mtx, priority, wmesg, timeout)        do {    \
   92         G_RAID_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));         \
   93         rv = msleep((ident), (mtx), (priority), (wmesg), (timeout));    \
   94         G_RAID_DEBUG(4, "%s: Woken up %p.", __func__, (ident));         \
   95 } while (0)
   96 
   97 LIST_HEAD(, g_raid_md_class) g_raid_md_classes =
   98     LIST_HEAD_INITIALIZER(g_raid_md_classes);
   99 
  100 LIST_HEAD(, g_raid_tr_class) g_raid_tr_classes =
  101     LIST_HEAD_INITIALIZER(g_raid_tr_classes);
  102 
  103 LIST_HEAD(, g_raid_volume) g_raid_volumes =
  104     LIST_HEAD_INITIALIZER(g_raid_volumes);
  105 
  106 static eventhandler_tag g_raid_pre_sync = NULL;
  107 static int g_raid_started = 0;
  108 
  109 static int g_raid_destroy_geom(struct gctl_req *req, struct g_class *mp,
  110     struct g_geom *gp);
  111 static g_taste_t g_raid_taste;
  112 static void g_raid_init(struct g_class *mp);
  113 static void g_raid_fini(struct g_class *mp);
  114 
  115 struct g_class g_raid_class = {
  116         .name = G_RAID_CLASS_NAME,
  117         .version = G_VERSION,
  118         .ctlreq = g_raid_ctl,
  119         .taste = g_raid_taste,
  120         .destroy_geom = g_raid_destroy_geom,
  121         .init = g_raid_init,
  122         .fini = g_raid_fini
  123 };
  124 
  125 static void g_raid_destroy_provider(struct g_raid_volume *vol);
  126 static int g_raid_update_disk(struct g_raid_disk *disk, u_int event);
  127 static int g_raid_update_subdisk(struct g_raid_subdisk *subdisk, u_int event);
  128 static int g_raid_update_volume(struct g_raid_volume *vol, u_int event);
  129 static int g_raid_update_node(struct g_raid_softc *sc, u_int event);
  130 static void g_raid_dumpconf(struct sbuf *sb, const char *indent,
  131     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
  132 static void g_raid_start(struct bio *bp);
  133 static void g_raid_start_request(struct bio *bp);
  134 static void g_raid_disk_done(struct bio *bp);
  135 static void g_raid_poll(struct g_raid_softc *sc);
  136 
  137 static const char *
  138 g_raid_node_event2str(int event)
  139 {
  140 
  141         switch (event) {
  142         case G_RAID_NODE_E_WAKE:
  143                 return ("WAKE");
  144         case G_RAID_NODE_E_START:
  145                 return ("START");
  146         default:
  147                 return ("INVALID");
  148         }
  149 }
  150 
  151 const char *
  152 g_raid_disk_state2str(int state)
  153 {
  154 
  155         switch (state) {
  156         case G_RAID_DISK_S_NONE:
  157                 return ("NONE");
  158         case G_RAID_DISK_S_OFFLINE:
  159                 return ("OFFLINE");
  160         case G_RAID_DISK_S_FAILED:
  161                 return ("FAILED");
  162         case G_RAID_DISK_S_STALE_FAILED:
  163                 return ("STALE_FAILED");
  164         case G_RAID_DISK_S_SPARE:
  165                 return ("SPARE");
  166         case G_RAID_DISK_S_STALE:
  167                 return ("STALE");
  168         case G_RAID_DISK_S_ACTIVE:
  169                 return ("ACTIVE");
  170         default:
  171                 return ("INVALID");
  172         }
  173 }
  174 
  175 static const char *
  176 g_raid_disk_event2str(int event)
  177 {
  178 
  179         switch (event) {
  180         case G_RAID_DISK_E_DISCONNECTED:
  181                 return ("DISCONNECTED");
  182         default:
  183                 return ("INVALID");
  184         }
  185 }
  186 
  187 const char *
  188 g_raid_subdisk_state2str(int state)
  189 {
  190 
  191         switch (state) {
  192         case G_RAID_SUBDISK_S_NONE:
  193                 return ("NONE");
  194         case G_RAID_SUBDISK_S_FAILED:
  195                 return ("FAILED");
  196         case G_RAID_SUBDISK_S_NEW:
  197                 return ("NEW");
  198         case G_RAID_SUBDISK_S_REBUILD:
  199                 return ("REBUILD");
  200         case G_RAID_SUBDISK_S_UNINITIALIZED:
  201                 return ("UNINITIALIZED");
  202         case G_RAID_SUBDISK_S_STALE:
  203                 return ("STALE");
  204         case G_RAID_SUBDISK_S_RESYNC:
  205                 return ("RESYNC");
  206         case G_RAID_SUBDISK_S_ACTIVE:
  207                 return ("ACTIVE");
  208         default:
  209                 return ("INVALID");
  210         }
  211 }
  212 
  213 static const char *
  214 g_raid_subdisk_event2str(int event)
  215 {
  216 
  217         switch (event) {
  218         case G_RAID_SUBDISK_E_NEW:
  219                 return ("NEW");
  220         case G_RAID_SUBDISK_E_DISCONNECTED:
  221                 return ("DISCONNECTED");
  222         default:
  223                 return ("INVALID");
  224         }
  225 }
  226 
  227 const char *
  228 g_raid_volume_state2str(int state)
  229 {
  230 
  231         switch (state) {
  232         case G_RAID_VOLUME_S_STARTING:
  233                 return ("STARTING");
  234         case G_RAID_VOLUME_S_BROKEN:
  235                 return ("BROKEN");
  236         case G_RAID_VOLUME_S_DEGRADED:
  237                 return ("DEGRADED");
  238         case G_RAID_VOLUME_S_SUBOPTIMAL:
  239                 return ("SUBOPTIMAL");
  240         case G_RAID_VOLUME_S_OPTIMAL:
  241                 return ("OPTIMAL");
  242         case G_RAID_VOLUME_S_UNSUPPORTED:
  243                 return ("UNSUPPORTED");
  244         case G_RAID_VOLUME_S_STOPPED:
  245                 return ("STOPPED");
  246         default:
  247                 return ("INVALID");
  248         }
  249 }
  250 
  251 static const char *
  252 g_raid_volume_event2str(int event)
  253 {
  254 
  255         switch (event) {
  256         case G_RAID_VOLUME_E_UP:
  257                 return ("UP");
  258         case G_RAID_VOLUME_E_DOWN:
  259                 return ("DOWN");
  260         case G_RAID_VOLUME_E_START:
  261                 return ("START");
  262         case G_RAID_VOLUME_E_STARTMD:
  263                 return ("STARTMD");
  264         default:
  265                 return ("INVALID");
  266         }
  267 }
  268 
  269 const char *
  270 g_raid_volume_level2str(int level, int qual)
  271 {
  272 
  273         switch (level) {
  274         case G_RAID_VOLUME_RL_RAID0:
  275                 return ("RAID0");
  276         case G_RAID_VOLUME_RL_RAID1:
  277                 return ("RAID1");
  278         case G_RAID_VOLUME_RL_RAID3:
  279                 return ("RAID3");
  280         case G_RAID_VOLUME_RL_RAID4:
  281                 return ("RAID4");
  282         case G_RAID_VOLUME_RL_RAID5:
  283                 return ("RAID5");
  284         case G_RAID_VOLUME_RL_RAID6:
  285                 return ("RAID6");
  286         case G_RAID_VOLUME_RL_RAID1E:
  287                 return ("RAID1E");
  288         case G_RAID_VOLUME_RL_SINGLE:
  289                 return ("SINGLE");
  290         case G_RAID_VOLUME_RL_CONCAT:
  291                 return ("CONCAT");
  292         case G_RAID_VOLUME_RL_RAID5E:
  293                 return ("RAID5E");
  294         case G_RAID_VOLUME_RL_RAID5EE:
  295                 return ("RAID5EE");
  296         default:
  297                 return ("UNKNOWN");
  298         }
  299 }
  300 
  301 int
  302 g_raid_volume_str2level(const char *str, int *level, int *qual)
  303 {
  304 
  305         *level = G_RAID_VOLUME_RL_UNKNOWN;
  306         *qual = G_RAID_VOLUME_RLQ_NONE;
  307         if (strcasecmp(str, "RAID0") == 0)
  308                 *level = G_RAID_VOLUME_RL_RAID0;
  309         else if (strcasecmp(str, "RAID1") == 0)
  310                 *level = G_RAID_VOLUME_RL_RAID1;
  311         else if (strcasecmp(str, "RAID3") == 0)
  312                 *level = G_RAID_VOLUME_RL_RAID3;
  313         else if (strcasecmp(str, "RAID4") == 0)
  314                 *level = G_RAID_VOLUME_RL_RAID4;
  315         else if (strcasecmp(str, "RAID5") == 0)
  316                 *level = G_RAID_VOLUME_RL_RAID5;
  317         else if (strcasecmp(str, "RAID6") == 0)
  318                 *level = G_RAID_VOLUME_RL_RAID6;
  319         else if (strcasecmp(str, "RAID10") == 0 ||
  320                  strcasecmp(str, "RAID1E") == 0)
  321                 *level = G_RAID_VOLUME_RL_RAID1E;
  322         else if (strcasecmp(str, "SINGLE") == 0)
  323                 *level = G_RAID_VOLUME_RL_SINGLE;
  324         else if (strcasecmp(str, "CONCAT") == 0)
  325                 *level = G_RAID_VOLUME_RL_CONCAT;
  326         else if (strcasecmp(str, "RAID5E") == 0)
  327                 *level = G_RAID_VOLUME_RL_RAID5E;
  328         else if (strcasecmp(str, "RAID5EE") == 0)
  329                 *level = G_RAID_VOLUME_RL_RAID5EE;
  330         else
  331                 return (-1);
  332         return (0);
  333 }
  334 
  335 const char *
  336 g_raid_get_diskname(struct g_raid_disk *disk)
  337 {
  338 
  339         if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
  340                 return ("[unknown]");
  341         return (disk->d_consumer->provider->name);
  342 }
  343 
  344 void
  345 g_raid_report_disk_state(struct g_raid_disk *disk)
  346 {
  347         struct g_raid_subdisk *sd;
  348         int len, state;
  349         uint32_t s;
  350 
  351         if (disk->d_consumer == NULL)
  352                 return;
  353         if (disk->d_state == G_RAID_DISK_S_FAILED ||
  354             disk->d_state == G_RAID_DISK_S_STALE_FAILED) {
  355                 s = G_STATE_FAILED;
  356         } else {
  357                 state = G_RAID_SUBDISK_S_ACTIVE;
  358                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
  359                         if (sd->sd_state < state)
  360                                 state = sd->sd_state;
  361                 }
  362                 if (state == G_RAID_SUBDISK_S_FAILED)
  363                         s = G_STATE_FAILED;
  364                 else if (state == G_RAID_SUBDISK_S_NEW ||
  365                     state == G_RAID_SUBDISK_S_REBUILD)
  366                         s = G_STATE_REBUILD;
  367                 else if (state == G_RAID_SUBDISK_S_STALE ||
  368                     state == G_RAID_SUBDISK_S_RESYNC)
  369                         s = G_STATE_RESYNC;
  370                 else
  371                         s = G_STATE_ACTIVE;
  372         }
  373         len = sizeof(s);
  374         g_io_getattr("GEOM::setstate", disk->d_consumer, &len, &s);
  375         G_RAID_DEBUG1(2, disk->d_softc, "Disk %s state reported as %d.",
  376             g_raid_get_diskname(disk), s);
  377 }
  378 
  379 void
  380 g_raid_change_disk_state(struct g_raid_disk *disk, int state)
  381 {
  382 
  383         G_RAID_DEBUG1(0, disk->d_softc, "Disk %s state changed from %s to %s.",
  384             g_raid_get_diskname(disk),
  385             g_raid_disk_state2str(disk->d_state),
  386             g_raid_disk_state2str(state));
  387         disk->d_state = state;
  388         g_raid_report_disk_state(disk);
  389 }
  390 
  391 void
  392 g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state)
  393 {
  394 
  395         G_RAID_DEBUG1(0, sd->sd_softc,
  396             "Subdisk %s:%d-%s state changed from %s to %s.",
  397             sd->sd_volume->v_name, sd->sd_pos,
  398             sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]",
  399             g_raid_subdisk_state2str(sd->sd_state),
  400             g_raid_subdisk_state2str(state));
  401         sd->sd_state = state;
  402         if (sd->sd_disk)
  403                 g_raid_report_disk_state(sd->sd_disk);
  404 }
  405 
  406 void
  407 g_raid_change_volume_state(struct g_raid_volume *vol, int state)
  408 {
  409 
  410         G_RAID_DEBUG1(0, vol->v_softc,
  411             "Volume %s state changed from %s to %s.",
  412             vol->v_name,
  413             g_raid_volume_state2str(vol->v_state),
  414             g_raid_volume_state2str(state));
  415         vol->v_state = state;
  416 }
  417 
  418 /*
  419  * --- Events handling functions ---
  420  * Events in geom_raid are used to maintain subdisks and volumes status
  421  * from one thread to simplify locking.
  422  */
  423 static void
  424 g_raid_event_free(struct g_raid_event *ep)
  425 {
  426 
  427         free(ep, M_RAID);
  428 }
  429 
  430 int
  431 g_raid_event_send(void *arg, int event, int flags)
  432 {
  433         struct g_raid_softc *sc;
  434         struct g_raid_event *ep;
  435         int error;
  436 
  437         if ((flags & G_RAID_EVENT_VOLUME) != 0) {
  438                 sc = ((struct g_raid_volume *)arg)->v_softc;
  439         } else if ((flags & G_RAID_EVENT_DISK) != 0) {
  440                 sc = ((struct g_raid_disk *)arg)->d_softc;
  441         } else if ((flags & G_RAID_EVENT_SUBDISK) != 0) {
  442                 sc = ((struct g_raid_subdisk *)arg)->sd_softc;
  443         } else {
  444                 sc = arg;
  445         }
  446         ep = malloc(sizeof(*ep), M_RAID,
  447             sx_xlocked(&sc->sc_lock) ? M_WAITOK : M_NOWAIT);
  448         if (ep == NULL)
  449                 return (ENOMEM);
  450         ep->e_tgt = arg;
  451         ep->e_event = event;
  452         ep->e_flags = flags;
  453         ep->e_error = 0;
  454         G_RAID_DEBUG1(4, sc, "Sending event %p. Waking up %p.", ep, sc);
  455         mtx_lock(&sc->sc_queue_mtx);
  456         TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
  457         mtx_unlock(&sc->sc_queue_mtx);
  458         wakeup(sc);
  459 
  460         if ((flags & G_RAID_EVENT_WAIT) == 0)
  461                 return (0);
  462 
  463         sx_assert(&sc->sc_lock, SX_XLOCKED);
  464         G_RAID_DEBUG1(4, sc, "Sleeping on %p.", ep);
  465         sx_xunlock(&sc->sc_lock);
  466         while ((ep->e_flags & G_RAID_EVENT_DONE) == 0) {
  467                 mtx_lock(&sc->sc_queue_mtx);
  468                 MSLEEP(error, ep, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:event",
  469                     hz * 5);
  470         }
  471         error = ep->e_error;
  472         g_raid_event_free(ep);
  473         sx_xlock(&sc->sc_lock);
  474         return (error);
  475 }
  476 
  477 static void
  478 g_raid_event_cancel(struct g_raid_softc *sc, void *tgt)
  479 {
  480         struct g_raid_event *ep, *tmpep;
  481 
  482         sx_assert(&sc->sc_lock, SX_XLOCKED);
  483 
  484         mtx_lock(&sc->sc_queue_mtx);
  485         TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
  486                 if (ep->e_tgt != tgt)
  487                         continue;
  488                 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
  489                 if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0)
  490                         g_raid_event_free(ep);
  491                 else {
  492                         ep->e_error = ECANCELED;
  493                         wakeup(ep);
  494                 }
  495         }
  496         mtx_unlock(&sc->sc_queue_mtx);
  497 }
  498 
  499 static int
  500 g_raid_event_check(struct g_raid_softc *sc, void *tgt)
  501 {
  502         struct g_raid_event *ep;
  503         int     res = 0;
  504 
  505         sx_assert(&sc->sc_lock, SX_XLOCKED);
  506 
  507         mtx_lock(&sc->sc_queue_mtx);
  508         TAILQ_FOREACH(ep, &sc->sc_events, e_next) {
  509                 if (ep->e_tgt != tgt)
  510                         continue;
  511                 res = 1;
  512                 break;
  513         }
  514         mtx_unlock(&sc->sc_queue_mtx);
  515         return (res);
  516 }
  517 
  518 /*
  519  * Return the number of disks in given state.
  520  * If state is equal to -1, count all connected disks.
  521  */
  522 u_int
  523 g_raid_ndisks(struct g_raid_softc *sc, int state)
  524 {
  525         struct g_raid_disk *disk;
  526         u_int n;
  527 
  528         sx_assert(&sc->sc_lock, SX_LOCKED);
  529 
  530         n = 0;
  531         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
  532                 if (disk->d_state == state || state == -1)
  533                         n++;
  534         }
  535         return (n);
  536 }
  537 
  538 /*
  539  * Return the number of subdisks in given state.
  540  * If state is equal to -1, count all connected disks.
  541  */
  542 u_int
  543 g_raid_nsubdisks(struct g_raid_volume *vol, int state)
  544 {
  545         struct g_raid_subdisk *subdisk;
  546         struct g_raid_softc *sc;
  547         u_int i, n ;
  548 
  549         sc = vol->v_softc;
  550         sx_assert(&sc->sc_lock, SX_LOCKED);
  551 
  552         n = 0;
  553         for (i = 0; i < vol->v_disks_count; i++) {
  554                 subdisk = &vol->v_subdisks[i];
  555                 if ((state == -1 &&
  556                      subdisk->sd_state != G_RAID_SUBDISK_S_NONE) ||
  557                     subdisk->sd_state == state)
  558                         n++;
  559         }
  560         return (n);
  561 }
  562 
  563 /*
  564  * Return the first subdisk in given state.
  565  * If state is equal to -1, then the first connected disks.
  566  */
  567 struct g_raid_subdisk *
  568 g_raid_get_subdisk(struct g_raid_volume *vol, int state)
  569 {
  570         struct g_raid_subdisk *sd;
  571         struct g_raid_softc *sc;
  572         u_int i;
  573 
  574         sc = vol->v_softc;
  575         sx_assert(&sc->sc_lock, SX_LOCKED);
  576 
  577         for (i = 0; i < vol->v_disks_count; i++) {
  578                 sd = &vol->v_subdisks[i];
  579                 if ((state == -1 &&
  580                      sd->sd_state != G_RAID_SUBDISK_S_NONE) ||
  581                     sd->sd_state == state)
  582                         return (sd);
  583         }
  584         return (NULL);
  585 }
  586 
  587 struct g_consumer *
  588 g_raid_open_consumer(struct g_raid_softc *sc, const char *name)
  589 {
  590         struct g_consumer *cp;
  591         struct g_provider *pp;
  592 
  593         g_topology_assert();
  594 
  595         if (strncmp(name, "/dev/", 5) == 0)
  596                 name += 5;
  597         pp = g_provider_by_name(name);
  598         if (pp == NULL)
  599                 return (NULL);
  600         cp = g_new_consumer(sc->sc_geom);
  601         if (g_attach(cp, pp) != 0) {
  602                 g_destroy_consumer(cp);
  603                 return (NULL);
  604         }
  605         if (g_access(cp, 1, 1, 1) != 0) {
  606                 g_detach(cp);
  607                 g_destroy_consumer(cp);
  608                 return (NULL);
  609         }
  610         return (cp);
  611 }
  612 
  613 static u_int
  614 g_raid_nrequests(struct g_raid_softc *sc, struct g_consumer *cp)
  615 {
  616         struct bio *bp;
  617         u_int nreqs = 0;
  618 
  619         mtx_lock(&sc->sc_queue_mtx);
  620         TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
  621                 if (bp->bio_from == cp)
  622                         nreqs++;
  623         }
  624         mtx_unlock(&sc->sc_queue_mtx);
  625         return (nreqs);
  626 }
  627 
  628 u_int
  629 g_raid_nopens(struct g_raid_softc *sc)
  630 {
  631         struct g_raid_volume *vol;
  632         u_int opens;
  633 
  634         opens = 0;
  635         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
  636                 if (vol->v_provider_open != 0)
  637                         opens++;
  638         }
  639         return (opens);
  640 }
  641 
  642 static int
  643 g_raid_consumer_is_busy(struct g_raid_softc *sc, struct g_consumer *cp)
  644 {
  645 
  646         if (cp->index > 0) {
  647                 G_RAID_DEBUG1(2, sc,
  648                     "I/O requests for %s exist, can't destroy it now.",
  649                     cp->provider->name);
  650                 return (1);
  651         }
  652         if (g_raid_nrequests(sc, cp) > 0) {
  653                 G_RAID_DEBUG1(2, sc,
  654                     "I/O requests for %s in queue, can't destroy it now.",
  655                     cp->provider->name);
  656                 return (1);
  657         }
  658         return (0);
  659 }
  660 
  661 static void
  662 g_raid_destroy_consumer(void *arg, int flags __unused)
  663 {
  664         struct g_consumer *cp;
  665 
  666         g_topology_assert();
  667 
  668         cp = arg;
  669         G_RAID_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
  670         g_detach(cp);
  671         g_destroy_consumer(cp);
  672 }
  673 
  674 void
  675 g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp)
  676 {
  677         struct g_provider *pp;
  678         int retaste_wait;
  679 
  680         g_topology_assert_not();
  681 
  682         g_topology_lock();
  683         cp->private = NULL;
  684         if (g_raid_consumer_is_busy(sc, cp))
  685                 goto out;
  686         pp = cp->provider;
  687         retaste_wait = 0;
  688         if (cp->acw == 1) {
  689                 if ((pp->geom->flags & G_GEOM_WITHER) == 0)
  690                         retaste_wait = 1;
  691         }
  692         if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
  693                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
  694         if (retaste_wait) {
  695                 /*
  696                  * After retaste event was send (inside g_access()), we can send
  697                  * event to detach and destroy consumer.
  698                  * A class, which has consumer to the given provider connected
  699                  * will not receive retaste event for the provider.
  700                  * This is the way how I ignore retaste events when I close
  701                  * consumers opened for write: I detach and destroy consumer
  702                  * after retaste event is sent.
  703                  */
  704                 g_post_event(g_raid_destroy_consumer, cp, M_WAITOK, NULL);
  705                 goto out;
  706         }
  707         G_RAID_DEBUG(1, "Consumer %s destroyed.", pp->name);
  708         g_detach(cp);
  709         g_destroy_consumer(cp);
  710 out:
  711         g_topology_unlock();
  712 }
  713 
  714 static void
  715 g_raid_orphan(struct g_consumer *cp)
  716 {
  717         struct g_raid_disk *disk;
  718 
  719         g_topology_assert();
  720 
  721         disk = cp->private;
  722         if (disk == NULL)
  723                 return;
  724         g_raid_event_send(disk, G_RAID_DISK_E_DISCONNECTED,
  725             G_RAID_EVENT_DISK);
  726 }
  727 
  728 static int
  729 g_raid_clean(struct g_raid_volume *vol, int acw)
  730 {
  731         struct g_raid_softc *sc;
  732         int timeout;
  733 
  734         sc = vol->v_softc;
  735         g_topology_assert_not();
  736         sx_assert(&sc->sc_lock, SX_XLOCKED);
  737 
  738 //      if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
  739 //              return (0);
  740         if (!vol->v_dirty)
  741                 return (0);
  742         if (vol->v_writes > 0)
  743                 return (0);
  744         if (acw > 0 || (acw == -1 &&
  745             vol->v_provider != NULL && vol->v_provider->acw > 0)) {
  746                 timeout = g_raid_clean_time - (time_uptime - vol->v_last_write);
  747                 if (timeout > 0)
  748                         return (timeout);
  749         }
  750         vol->v_dirty = 0;
  751         G_RAID_DEBUG1(1, sc, "Volume %s marked as clean.",
  752             vol->v_name);
  753         g_raid_write_metadata(sc, vol, NULL, NULL);
  754         return (0);
  755 }
  756 
  757 static void
  758 g_raid_dirty(struct g_raid_volume *vol)
  759 {
  760         struct g_raid_softc *sc;
  761 
  762         sc = vol->v_softc;
  763         g_topology_assert_not();
  764         sx_assert(&sc->sc_lock, SX_XLOCKED);
  765 
  766 //      if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
  767 //              return;
  768         vol->v_dirty = 1;
  769         G_RAID_DEBUG1(1, sc, "Volume %s marked as dirty.",
  770             vol->v_name);
  771         g_raid_write_metadata(sc, vol, NULL, NULL);
  772 }
  773 
  774 void
  775 g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp)
  776 {
  777         struct g_raid_softc *sc;
  778         struct g_raid_volume *vol;
  779         struct g_raid_subdisk *sd;
  780         struct bio_queue_head queue;
  781         struct bio *cbp;
  782         int i;
  783 
  784         vol = tr->tro_volume;
  785         sc = vol->v_softc;
  786 
  787         /*
  788          * Allocate all bios before sending any request, so we can return
  789          * ENOMEM in nice and clean way.
  790          */
  791         bioq_init(&queue);
  792         for (i = 0; i < vol->v_disks_count; i++) {
  793                 sd = &vol->v_subdisks[i];
  794                 if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
  795                     sd->sd_state == G_RAID_SUBDISK_S_FAILED)
  796                         continue;
  797                 cbp = g_clone_bio(bp);
  798                 if (cbp == NULL)
  799                         goto failure;
  800                 cbp->bio_caller1 = sd;
  801                 bioq_insert_tail(&queue, cbp);
  802         }
  803         for (cbp = bioq_first(&queue); cbp != NULL;
  804             cbp = bioq_first(&queue)) {
  805                 bioq_remove(&queue, cbp);
  806                 sd = cbp->bio_caller1;
  807                 cbp->bio_caller1 = NULL;
  808                 g_raid_subdisk_iostart(sd, cbp);
  809         }
  810         return;
  811 failure:
  812         for (cbp = bioq_first(&queue); cbp != NULL;
  813             cbp = bioq_first(&queue)) {
  814                 bioq_remove(&queue, cbp);
  815                 g_destroy_bio(cbp);
  816         }
  817         if (bp->bio_error == 0)
  818                 bp->bio_error = ENOMEM;
  819         g_raid_iodone(bp, bp->bio_error);
  820 }
  821 
  822 static void
  823 g_raid_tr_kerneldump_common_done(struct bio *bp)
  824 {
  825 
  826         bp->bio_flags |= BIO_DONE;
  827 }
  828 
  829 int
  830 g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
  831     void *virtual, vm_offset_t physical, off_t offset, size_t length)
  832 {
  833         struct g_raid_softc *sc;
  834         struct g_raid_volume *vol;
  835         struct bio bp;
  836 
  837         vol = tr->tro_volume;
  838         sc = vol->v_softc;
  839 
  840         bzero(&bp, sizeof(bp));
  841         bp.bio_cmd = BIO_WRITE;
  842         bp.bio_done = g_raid_tr_kerneldump_common_done;
  843         bp.bio_attribute = NULL;
  844         bp.bio_offset = offset;
  845         bp.bio_length = length;
  846         bp.bio_data = virtual;
  847         bp.bio_to = vol->v_provider;
  848 
  849         g_raid_start(&bp);
  850         while (!(bp.bio_flags & BIO_DONE)) {
  851                 G_RAID_DEBUG1(4, sc, "Poll...");
  852                 g_raid_poll(sc);
  853                 DELAY(10);
  854         }
  855 
  856         return (bp.bio_error != 0 ? EIO : 0);
  857 }
  858 
  859 static int
  860 g_raid_dump(void *arg,
  861     void *virtual, vm_offset_t physical, off_t offset, size_t length)
  862 {
  863         struct g_raid_volume *vol;
  864         int error;
  865 
  866         vol = (struct g_raid_volume *)arg;
  867         G_RAID_DEBUG1(3, vol->v_softc, "Dumping at off %llu len %llu.",
  868             (long long unsigned)offset, (long long unsigned)length);
  869 
  870         error = G_RAID_TR_KERNELDUMP(vol->v_tr,
  871             virtual, physical, offset, length);
  872         return (error);
  873 }
  874 
  875 static void
  876 g_raid_kerneldump(struct g_raid_softc *sc, struct bio *bp)
  877 {
  878         struct g_kerneldump *gkd;
  879         struct g_provider *pp;
  880         struct g_raid_volume *vol;
  881 
  882         gkd = (struct g_kerneldump*)bp->bio_data;
  883         pp = bp->bio_to;
  884         vol = pp->private;
  885         g_trace(G_T_TOPOLOGY, "g_raid_kerneldump(%s, %jd, %jd)",
  886                 pp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
  887         gkd->di.dumper = g_raid_dump;
  888         gkd->di.priv = vol;
  889         gkd->di.blocksize = vol->v_sectorsize;
  890         gkd->di.maxiosize = DFLTPHYS;
  891         gkd->di.mediaoffset = gkd->offset;
  892         if ((gkd->offset + gkd->length) > vol->v_mediasize)
  893                 gkd->length = vol->v_mediasize - gkd->offset;
  894         gkd->di.mediasize = gkd->length;
  895         g_io_deliver(bp, 0);
  896 }
  897 
  898 static void
  899 g_raid_start(struct bio *bp)
  900 {
  901         struct g_raid_softc *sc;
  902 
  903         sc = bp->bio_to->geom->softc;
  904         /*
  905          * If sc == NULL or there are no valid disks, provider's error
  906          * should be set and g_raid_start() should not be called at all.
  907          */
  908 //      KASSERT(sc != NULL && sc->sc_state == G_RAID_VOLUME_S_RUNNING,
  909 //          ("Provider's error should be set (error=%d)(mirror=%s).",
  910 //          bp->bio_to->error, bp->bio_to->name));
  911         G_RAID_LOGREQ(3, bp, "Request received.");
  912 
  913         switch (bp->bio_cmd) {
  914         case BIO_READ:
  915         case BIO_WRITE:
  916         case BIO_DELETE:
  917         case BIO_FLUSH:
  918                 break;
  919         case BIO_GETATTR:
  920                 if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
  921                         g_raid_kerneldump(sc, bp);
  922                 else
  923                         g_io_deliver(bp, EOPNOTSUPP);
  924                 return;
  925         default:
  926                 g_io_deliver(bp, EOPNOTSUPP);
  927                 return;
  928         }
  929         mtx_lock(&sc->sc_queue_mtx);
  930         bioq_disksort(&sc->sc_queue, bp);
  931         mtx_unlock(&sc->sc_queue_mtx);
  932         if (!dumping) {
  933                 G_RAID_DEBUG1(4, sc, "Waking up %p.", sc);
  934                 wakeup(sc);
  935         }
  936 }
  937 
  938 static int
  939 g_raid_bio_overlaps(const struct bio *bp, off_t lstart, off_t len)
  940 {
  941         /*
  942          * 5 cases:
  943          * (1) bp entirely below NO
  944          * (2) bp entirely above NO
  945          * (3) bp start below, but end in range YES
  946          * (4) bp entirely within YES
  947          * (5) bp starts within, ends above YES
  948          *
  949          * lock range 10-19 (offset 10 length 10)
  950          * (1) 1-5: first if kicks it out
  951          * (2) 30-35: second if kicks it out
  952          * (3) 5-15: passes both ifs
  953          * (4) 12-14: passes both ifs
  954          * (5) 19-20: passes both
  955          */
  956         off_t lend = lstart + len - 1;
  957         off_t bstart = bp->bio_offset;
  958         off_t bend = bp->bio_offset + bp->bio_length - 1;
  959 
  960         if (bend < lstart)
  961                 return (0);
  962         if (lend < bstart)
  963                 return (0);
  964         return (1);
  965 }
  966 
  967 static int
  968 g_raid_is_in_locked_range(struct g_raid_volume *vol, const struct bio *bp)
  969 {
  970         struct g_raid_lock *lp;
  971 
  972         sx_assert(&vol->v_softc->sc_lock, SX_LOCKED);
  973 
  974         LIST_FOREACH(lp, &vol->v_locks, l_next) {
  975                 if (g_raid_bio_overlaps(bp, lp->l_offset, lp->l_length))
  976                         return (1);
  977         }
  978         return (0);
  979 }
  980 
  981 static void
  982 g_raid_start_request(struct bio *bp)
  983 {
  984         struct g_raid_softc *sc;
  985         struct g_raid_volume *vol;
  986 
  987         sc = bp->bio_to->geom->softc;
  988         sx_assert(&sc->sc_lock, SX_LOCKED);
  989         vol = bp->bio_to->private;
  990 
  991         /*
  992          * Check to see if this item is in a locked range.  If so,
  993          * queue it to our locked queue and return.  We'll requeue
  994          * it when the range is unlocked.  Internal I/O for the
  995          * rebuild/rescan/recovery process is excluded from this
  996          * check so we can actually do the recovery.
  997          */
  998         if (!(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL) &&
  999             g_raid_is_in_locked_range(vol, bp)) {
 1000                 G_RAID_LOGREQ(3, bp, "Defer request.");
 1001                 bioq_insert_tail(&vol->v_locked, bp);
 1002                 return;
 1003         }
 1004 
 1005         /*
 1006          * If we're actually going to do the write/delete, then
 1007          * update the idle stats for the volume.
 1008          */
 1009         if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) {
 1010                 if (!vol->v_dirty)
 1011                         g_raid_dirty(vol);
 1012                 vol->v_writes++;
 1013         }
 1014 
 1015         /*
 1016          * Put request onto inflight queue, so we can check if new
 1017          * synchronization requests don't collide with it.  Then tell
 1018          * the transformation layer to start the I/O.
 1019          */
 1020         bioq_insert_tail(&vol->v_inflight, bp);
 1021         G_RAID_LOGREQ(4, bp, "Request started");
 1022         G_RAID_TR_IOSTART(vol->v_tr, bp);
 1023 }
 1024 
 1025 static void
 1026 g_raid_finish_with_locked_ranges(struct g_raid_volume *vol, struct bio *bp)
 1027 {
 1028         off_t off, len;
 1029         struct bio *nbp;
 1030         struct g_raid_lock *lp;
 1031 
 1032         vol->v_pending_lock = 0;
 1033         LIST_FOREACH(lp, &vol->v_locks, l_next) {
 1034                 if (lp->l_pending) {
 1035                         off = lp->l_offset;
 1036                         len = lp->l_length;
 1037                         lp->l_pending = 0;
 1038                         TAILQ_FOREACH(nbp, &vol->v_inflight.queue, bio_queue) {
 1039                                 if (g_raid_bio_overlaps(nbp, off, len))
 1040                                         lp->l_pending++;
 1041                         }
 1042                         if (lp->l_pending) {
 1043                                 vol->v_pending_lock = 1;
 1044                                 G_RAID_DEBUG1(4, vol->v_softc,
 1045                                     "Deferred lock(%jd, %jd) has %d pending",
 1046                                     (intmax_t)off, (intmax_t)(off + len),
 1047                                     lp->l_pending);
 1048                                 continue;
 1049                         }
 1050                         G_RAID_DEBUG1(4, vol->v_softc,
 1051                             "Deferred lock of %jd to %jd completed",
 1052                             (intmax_t)off, (intmax_t)(off + len));
 1053                         G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg);
 1054                 }
 1055         }
 1056 }
 1057 
 1058 void
 1059 g_raid_iodone(struct bio *bp, int error)
 1060 {
 1061         struct g_raid_softc *sc;
 1062         struct g_raid_volume *vol;
 1063 
 1064         sc = bp->bio_to->geom->softc;
 1065         sx_assert(&sc->sc_lock, SX_LOCKED);
 1066         vol = bp->bio_to->private;
 1067         G_RAID_LOGREQ(3, bp, "Request done: %d.", error);
 1068 
 1069         /* Update stats if we done write/delete. */
 1070         if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) {
 1071                 vol->v_writes--;
 1072                 vol->v_last_write = time_uptime;
 1073         }
 1074 
 1075         bioq_remove(&vol->v_inflight, bp);
 1076         if (vol->v_pending_lock && g_raid_is_in_locked_range(vol, bp))
 1077                 g_raid_finish_with_locked_ranges(vol, bp);
 1078         getmicrouptime(&vol->v_last_done);
 1079         g_io_deliver(bp, error);
 1080 }
 1081 
 1082 int
 1083 g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
 1084     struct bio *ignore, void *argp)
 1085 {
 1086         struct g_raid_softc *sc;
 1087         struct g_raid_lock *lp;
 1088         struct bio *bp;
 1089 
 1090         sc = vol->v_softc;
 1091         lp = malloc(sizeof(*lp), M_RAID, M_WAITOK | M_ZERO);
 1092         LIST_INSERT_HEAD(&vol->v_locks, lp, l_next);
 1093         lp->l_offset = off;
 1094         lp->l_length = len;
 1095         lp->l_callback_arg = argp;
 1096 
 1097         lp->l_pending = 0;
 1098         TAILQ_FOREACH(bp, &vol->v_inflight.queue, bio_queue) {
 1099                 if (bp != ignore && g_raid_bio_overlaps(bp, off, len))
 1100                         lp->l_pending++;
 1101         }       
 1102 
 1103         /*
 1104          * If there are any writes that are pending, we return EBUSY.  All
 1105          * callers will have to wait until all pending writes clear.
 1106          */
 1107         if (lp->l_pending > 0) {
 1108                 vol->v_pending_lock = 1;
 1109                 G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd deferred %d pend",
 1110                     (intmax_t)off, (intmax_t)(off+len), lp->l_pending);
 1111                 return (EBUSY);
 1112         }
 1113         G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd",
 1114             (intmax_t)off, (intmax_t)(off+len));
 1115         G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg);
 1116         return (0);
 1117 }
 1118 
 1119 int
 1120 g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len)
 1121 {
 1122         struct g_raid_lock *lp;
 1123         struct g_raid_softc *sc;
 1124         struct bio *bp;
 1125 
 1126         sc = vol->v_softc;
 1127         LIST_FOREACH(lp, &vol->v_locks, l_next) {
 1128                 if (lp->l_offset == off && lp->l_length == len) {
 1129                         LIST_REMOVE(lp, l_next);
 1130                         /* XXX
 1131                          * Right now we just put them all back on the queue
 1132                          * and hope for the best.  We hope this because any
 1133                          * locked ranges will go right back on this list
 1134                          * when the worker thread runs.
 1135                          * XXX
 1136                          */
 1137                         G_RAID_DEBUG1(4, sc, "Unlocked %jd to %jd",
 1138                             (intmax_t)lp->l_offset,
 1139                             (intmax_t)(lp->l_offset+lp->l_length));
 1140                         mtx_lock(&sc->sc_queue_mtx);
 1141                         while ((bp = bioq_takefirst(&vol->v_locked)) != NULL)
 1142                                 bioq_disksort(&sc->sc_queue, bp);
 1143                         mtx_unlock(&sc->sc_queue_mtx);
 1144                         free(lp, M_RAID);
 1145                         return (0);
 1146                 }
 1147         }
 1148         return (EINVAL);
 1149 }
 1150 
 1151 void
 1152 g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp)
 1153 {
 1154         struct g_consumer *cp;
 1155         struct g_raid_disk *disk, *tdisk;
 1156 
 1157         bp->bio_caller1 = sd;
 1158 
 1159         /*
 1160          * Make sure that the disk is present. Generally it is a task of
 1161          * transformation layers to not send requests to absent disks, but
 1162          * it is better to be safe and report situation then sorry.
 1163          */
 1164         if (sd->sd_disk == NULL) {
 1165                 G_RAID_LOGREQ(0, bp, "Warning! I/O request to an absent disk!");
 1166 nodisk:
 1167                 bp->bio_from = NULL;
 1168                 bp->bio_to = NULL;
 1169                 bp->bio_error = ENXIO;
 1170                 g_raid_disk_done(bp);
 1171                 return;
 1172         }
 1173         disk = sd->sd_disk;
 1174         if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 1175             disk->d_state != G_RAID_DISK_S_FAILED) {
 1176                 G_RAID_LOGREQ(0, bp, "Warning! I/O request to a disk in a "
 1177                     "wrong state (%s)!", g_raid_disk_state2str(disk->d_state));
 1178                 goto nodisk;
 1179         }
 1180 
 1181         cp = disk->d_consumer;
 1182         bp->bio_from = cp;
 1183         bp->bio_to = cp->provider;
 1184         cp->index++;
 1185 
 1186         /* Update average disks load. */
 1187         TAILQ_FOREACH(tdisk, &sd->sd_softc->sc_disks, d_next) {
 1188                 if (tdisk->d_consumer == NULL)
 1189                         tdisk->d_load = 0;
 1190                 else
 1191                         tdisk->d_load = (tdisk->d_consumer->index *
 1192                             G_RAID_SUBDISK_LOAD_SCALE + tdisk->d_load * 7) / 8;
 1193         }
 1194 
 1195         disk->d_last_offset = bp->bio_offset + bp->bio_length;
 1196         if (dumping) {
 1197                 G_RAID_LOGREQ(3, bp, "Sending dumping request.");
 1198                 if (bp->bio_cmd == BIO_WRITE) {
 1199                         bp->bio_error = g_raid_subdisk_kerneldump(sd,
 1200                             bp->bio_data, 0, bp->bio_offset, bp->bio_length);
 1201                 } else
 1202                         bp->bio_error = EOPNOTSUPP;
 1203                 g_raid_disk_done(bp);
 1204         } else {
 1205                 bp->bio_done = g_raid_disk_done;
 1206                 bp->bio_offset += sd->sd_offset;
 1207                 G_RAID_LOGREQ(3, bp, "Sending request.");
 1208                 g_io_request(bp, cp);
 1209         }
 1210 }
 1211 
 1212 int
 1213 g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
 1214     void *virtual, vm_offset_t physical, off_t offset, size_t length)
 1215 {
 1216 
 1217         if (sd->sd_disk == NULL)
 1218                 return (ENXIO);
 1219         if (sd->sd_disk->d_kd.di.dumper == NULL)
 1220                 return (EOPNOTSUPP);
 1221         return (dump_write(&sd->sd_disk->d_kd.di,
 1222             virtual, physical,
 1223             sd->sd_disk->d_kd.di.mediaoffset + sd->sd_offset + offset,
 1224             length));
 1225 }
 1226 
 1227 static void
 1228 g_raid_disk_done(struct bio *bp)
 1229 {
 1230         struct g_raid_softc *sc;
 1231         struct g_raid_subdisk *sd;
 1232 
 1233         sd = bp->bio_caller1;
 1234         sc = sd->sd_softc;
 1235         mtx_lock(&sc->sc_queue_mtx);
 1236         bioq_disksort(&sc->sc_queue, bp);
 1237         mtx_unlock(&sc->sc_queue_mtx);
 1238         if (!dumping)
 1239                 wakeup(sc);
 1240 }
 1241 
 1242 static void
 1243 g_raid_disk_done_request(struct bio *bp)
 1244 {
 1245         struct g_raid_softc *sc;
 1246         struct g_raid_disk *disk;
 1247         struct g_raid_subdisk *sd;
 1248         struct g_raid_volume *vol;
 1249 
 1250         g_topology_assert_not();
 1251 
 1252         G_RAID_LOGREQ(3, bp, "Disk request done: %d.", bp->bio_error);
 1253         sd = bp->bio_caller1;
 1254         sc = sd->sd_softc;
 1255         vol = sd->sd_volume;
 1256         if (bp->bio_from != NULL) {
 1257                 bp->bio_from->index--;
 1258                 disk = bp->bio_from->private;
 1259                 if (disk == NULL)
 1260                         g_raid_kill_consumer(sc, bp->bio_from);
 1261         }
 1262         bp->bio_offset -= sd->sd_offset;
 1263 
 1264         G_RAID_TR_IODONE(vol->v_tr, sd, bp);
 1265 }
 1266 
 1267 static void
 1268 g_raid_handle_event(struct g_raid_softc *sc, struct g_raid_event *ep)
 1269 {
 1270 
 1271         if ((ep->e_flags & G_RAID_EVENT_VOLUME) != 0)
 1272                 ep->e_error = g_raid_update_volume(ep->e_tgt, ep->e_event);
 1273         else if ((ep->e_flags & G_RAID_EVENT_DISK) != 0)
 1274                 ep->e_error = g_raid_update_disk(ep->e_tgt, ep->e_event);
 1275         else if ((ep->e_flags & G_RAID_EVENT_SUBDISK) != 0)
 1276                 ep->e_error = g_raid_update_subdisk(ep->e_tgt, ep->e_event);
 1277         else
 1278                 ep->e_error = g_raid_update_node(ep->e_tgt, ep->e_event);
 1279         if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) {
 1280                 KASSERT(ep->e_error == 0,
 1281                     ("Error cannot be handled."));
 1282                 g_raid_event_free(ep);
 1283         } else {
 1284                 ep->e_flags |= G_RAID_EVENT_DONE;
 1285                 G_RAID_DEBUG1(4, sc, "Waking up %p.", ep);
 1286                 mtx_lock(&sc->sc_queue_mtx);
 1287                 wakeup(ep);
 1288                 mtx_unlock(&sc->sc_queue_mtx);
 1289         }
 1290 }
 1291 
 1292 /*
 1293  * Worker thread.
 1294  */
 1295 static void
 1296 g_raid_worker(void *arg)
 1297 {
 1298         struct g_raid_softc *sc;
 1299         struct g_raid_event *ep;
 1300         struct g_raid_volume *vol;
 1301         struct bio *bp;
 1302         struct timeval now, t;
 1303         int timeout, rv;
 1304 
 1305         sc = arg;
 1306         thread_lock(curthread);
 1307         sched_prio(curthread, PRIBIO);
 1308         thread_unlock(curthread);
 1309 
 1310         sx_xlock(&sc->sc_lock);
 1311         for (;;) {
 1312                 mtx_lock(&sc->sc_queue_mtx);
 1313                 /*
 1314                  * First take a look at events.
 1315                  * This is important to handle events before any I/O requests.
 1316                  */
 1317                 bp = NULL;
 1318                 vol = NULL;
 1319                 rv = 0;
 1320                 ep = TAILQ_FIRST(&sc->sc_events);
 1321                 if (ep != NULL)
 1322                         TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 1323                 else if ((bp = bioq_takefirst(&sc->sc_queue)) != NULL)
 1324                         ;
 1325                 else {
 1326                         getmicrouptime(&now);
 1327                         t = now;
 1328                         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1329                                 if (bioq_first(&vol->v_inflight) == NULL &&
 1330                                     vol->v_tr &&
 1331                                     timevalcmp(&vol->v_last_done, &t, < ))
 1332                                         t = vol->v_last_done;
 1333                         }
 1334                         timevalsub(&t, &now);
 1335                         timeout = g_raid_idle_threshold +
 1336                             t.tv_sec * 1000000 + t.tv_usec;
 1337                         if (timeout > 0) {
 1338                                 /*
 1339                                  * Two steps to avoid overflows at HZ=1000
 1340                                  * and idle timeouts > 2.1s.  Some rounding
 1341                                  * errors can occur, but they are < 1tick,
 1342                                  * which is deemed to be close enough for
 1343                                  * this purpose.
 1344                                  */
 1345                                 int micpertic = 1000000 / hz;
 1346                                 timeout = (timeout + micpertic - 1) / micpertic;
 1347                                 sx_xunlock(&sc->sc_lock);
 1348                                 MSLEEP(rv, sc, &sc->sc_queue_mtx,
 1349                                     PRIBIO | PDROP, "-", timeout);
 1350                                 sx_xlock(&sc->sc_lock);
 1351                                 goto process;
 1352                         } else
 1353                                 rv = EWOULDBLOCK;
 1354                 }
 1355                 mtx_unlock(&sc->sc_queue_mtx);
 1356 process:
 1357                 if (ep != NULL) {
 1358                         g_raid_handle_event(sc, ep);
 1359                 } else if (bp != NULL) {
 1360                         if (bp->bio_to != NULL &&
 1361                             bp->bio_to->geom == sc->sc_geom)
 1362                                 g_raid_start_request(bp);
 1363                         else
 1364                                 g_raid_disk_done_request(bp);
 1365                 } else if (rv == EWOULDBLOCK) {
 1366                         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 1367                                 if (vol->v_writes == 0 && vol->v_dirty)
 1368                                         g_raid_clean(vol, -1);
 1369                                 if (bioq_first(&vol->v_inflight) == NULL &&
 1370                                     vol->v_tr) {
 1371                                         t.tv_sec = g_raid_idle_threshold / 1000000;
 1372                                         t.tv_usec = g_raid_idle_threshold % 1000000;
 1373                                         timevaladd(&t, &vol->v_last_done);
 1374                                         getmicrouptime(&now);
 1375                                         if (timevalcmp(&t, &now, <= )) {
 1376                                                 G_RAID_TR_IDLE(vol->v_tr);
 1377                                                 vol->v_last_done = now;
 1378                                         }
 1379                                 }
 1380                         }
 1381                 }
 1382                 if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 1383                         g_raid_destroy_node(sc, 1);     /* May not return. */
 1384         }
 1385 }
 1386 
 1387 static void
 1388 g_raid_poll(struct g_raid_softc *sc)
 1389 {
 1390         struct g_raid_event *ep;
 1391         struct bio *bp;
 1392 
 1393         sx_xlock(&sc->sc_lock);
 1394         mtx_lock(&sc->sc_queue_mtx);
 1395         /*
 1396          * First take a look at events.
 1397          * This is important to handle events before any I/O requests.
 1398          */
 1399         ep = TAILQ_FIRST(&sc->sc_events);
 1400         if (ep != NULL) {
 1401                 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 1402                 mtx_unlock(&sc->sc_queue_mtx);
 1403                 g_raid_handle_event(sc, ep);
 1404                 goto out;
 1405         }
 1406         bp = bioq_takefirst(&sc->sc_queue);
 1407         if (bp != NULL) {
 1408                 mtx_unlock(&sc->sc_queue_mtx);
 1409                 if (bp->bio_from == NULL ||
 1410                     bp->bio_from->geom != sc->sc_geom)
 1411                         g_raid_start_request(bp);
 1412                 else
 1413                         g_raid_disk_done_request(bp);
 1414         }
 1415 out:
 1416         sx_xunlock(&sc->sc_lock);
 1417 }
 1418 
 1419 static void
 1420 g_raid_launch_provider(struct g_raid_volume *vol)
 1421 {
 1422         struct g_raid_disk *disk;
 1423         struct g_raid_softc *sc;
 1424         struct g_provider *pp;
 1425         char name[G_RAID_MAX_VOLUMENAME];
 1426         off_t off;
 1427 
 1428         sc = vol->v_softc;
 1429         sx_assert(&sc->sc_lock, SX_LOCKED);
 1430 
 1431         g_topology_lock();
 1432         /* Try to name provider with volume name. */
 1433         snprintf(name, sizeof(name), "raid/%s", vol->v_name);
 1434         if (g_raid_name_format == 0 || vol->v_name[0] == 0 ||
 1435             g_provider_by_name(name) != NULL) {
 1436                 /* Otherwise use sequential volume number. */
 1437                 snprintf(name, sizeof(name), "raid/r%d", vol->v_global_id);
 1438         }
 1439         pp = g_new_providerf(sc->sc_geom, "%s", name);
 1440         pp->private = vol;
 1441         pp->mediasize = vol->v_mediasize;
 1442         pp->sectorsize = vol->v_sectorsize;
 1443         pp->stripesize = 0;
 1444         pp->stripeoffset = 0;
 1445         if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 1446             vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 ||
 1447             vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE ||
 1448             vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) {
 1449                 if ((disk = vol->v_subdisks[0].sd_disk) != NULL &&
 1450                     disk->d_consumer != NULL &&
 1451                     disk->d_consumer->provider != NULL) {
 1452                         pp->stripesize = disk->d_consumer->provider->stripesize;
 1453                         off = disk->d_consumer->provider->stripeoffset;
 1454                         pp->stripeoffset = off + vol->v_subdisks[0].sd_offset;
 1455                         if (off > 0)
 1456                                 pp->stripeoffset %= off;
 1457                 }
 1458                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) {
 1459                         pp->stripesize *= (vol->v_disks_count - 1);
 1460                         pp->stripeoffset *= (vol->v_disks_count - 1);
 1461                 }
 1462         } else
 1463                 pp->stripesize = vol->v_strip_size;
 1464         vol->v_provider = pp;
 1465         g_error_provider(pp, 0);
 1466         g_topology_unlock();
 1467         G_RAID_DEBUG1(0, sc, "Provider %s for volume %s created.",
 1468             pp->name, vol->v_name);
 1469 }
 1470 
 1471 static void
 1472 g_raid_destroy_provider(struct g_raid_volume *vol)
 1473 {
 1474         struct g_raid_softc *sc;
 1475         struct g_provider *pp;
 1476         struct bio *bp, *tmp;
 1477 
 1478         g_topology_assert_not();
 1479         sc = vol->v_softc;
 1480         pp = vol->v_provider;
 1481         KASSERT(pp != NULL, ("NULL provider (volume=%s).", vol->v_name));
 1482 
 1483         g_topology_lock();
 1484         g_error_provider(pp, ENXIO);
 1485         mtx_lock(&sc->sc_queue_mtx);
 1486         TAILQ_FOREACH_SAFE(bp, &sc->sc_queue.queue, bio_queue, tmp) {
 1487                 if (bp->bio_to != pp)
 1488                         continue;
 1489                 bioq_remove(&sc->sc_queue, bp);
 1490                 g_io_deliver(bp, ENXIO);
 1491         }
 1492         mtx_unlock(&sc->sc_queue_mtx);
 1493         G_RAID_DEBUG1(0, sc, "Provider %s for volume %s destroyed.",
 1494             pp->name, vol->v_name);
 1495         g_wither_provider(pp, ENXIO);
 1496         g_topology_unlock();
 1497         vol->v_provider = NULL;
 1498 }
 1499 
 1500 /*
 1501  * Update device state.
 1502  */
 1503 static int
 1504 g_raid_update_volume(struct g_raid_volume *vol, u_int event)
 1505 {
 1506         struct g_raid_softc *sc;
 1507 
 1508         sc = vol->v_softc;
 1509         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1510 
 1511         G_RAID_DEBUG1(2, sc, "Event %s for volume %s.",
 1512             g_raid_volume_event2str(event),
 1513             vol->v_name);
 1514         switch (event) {
 1515         case G_RAID_VOLUME_E_DOWN:
 1516                 if (vol->v_provider != NULL)
 1517                         g_raid_destroy_provider(vol);
 1518                 break;
 1519         case G_RAID_VOLUME_E_UP:
 1520                 if (vol->v_provider == NULL)
 1521                         g_raid_launch_provider(vol);
 1522                 break;
 1523         case G_RAID_VOLUME_E_START:
 1524                 if (vol->v_tr)
 1525                         G_RAID_TR_START(vol->v_tr);
 1526                 return (0);
 1527         default:
 1528                 if (sc->sc_md)
 1529                         G_RAID_MD_VOLUME_EVENT(sc->sc_md, vol, event);
 1530                 return (0);
 1531         }
 1532 
 1533         /* Manage root mount release. */
 1534         if (vol->v_starting) {
 1535                 vol->v_starting = 0;
 1536                 G_RAID_DEBUG1(1, sc, "root_mount_rel %p", vol->v_rootmount);
 1537                 root_mount_rel(vol->v_rootmount);
 1538                 vol->v_rootmount = NULL;
 1539         }
 1540         if (vol->v_stopping && vol->v_provider_open == 0)
 1541                 g_raid_destroy_volume(vol);
 1542         return (0);
 1543 }
 1544 
 1545 /*
 1546  * Update subdisk state.
 1547  */
 1548 static int
 1549 g_raid_update_subdisk(struct g_raid_subdisk *sd, u_int event)
 1550 {
 1551         struct g_raid_softc *sc;
 1552         struct g_raid_volume *vol;
 1553 
 1554         sc = sd->sd_softc;
 1555         vol = sd->sd_volume;
 1556         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1557 
 1558         G_RAID_DEBUG1(2, sc, "Event %s for subdisk %s:%d-%s.",
 1559             g_raid_subdisk_event2str(event),
 1560             vol->v_name, sd->sd_pos,
 1561             sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 1562         if (vol->v_tr)
 1563                 G_RAID_TR_EVENT(vol->v_tr, sd, event);
 1564 
 1565         return (0);
 1566 }
 1567 
 1568 /*
 1569  * Update disk state.
 1570  */
 1571 static int
 1572 g_raid_update_disk(struct g_raid_disk *disk, u_int event)
 1573 {
 1574         struct g_raid_softc *sc;
 1575 
 1576         sc = disk->d_softc;
 1577         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1578 
 1579         G_RAID_DEBUG1(2, sc, "Event %s for disk %s.",
 1580             g_raid_disk_event2str(event),
 1581             g_raid_get_diskname(disk));
 1582 
 1583         if (sc->sc_md)
 1584                 G_RAID_MD_EVENT(sc->sc_md, disk, event);
 1585         return (0);
 1586 }
 1587 
 1588 /*
 1589  * Node event.
 1590  */
 1591 static int
 1592 g_raid_update_node(struct g_raid_softc *sc, u_int event)
 1593 {
 1594         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1595 
 1596         G_RAID_DEBUG1(2, sc, "Event %s for the array.",
 1597             g_raid_node_event2str(event));
 1598 
 1599         if (event == G_RAID_NODE_E_WAKE)
 1600                 return (0);
 1601         if (sc->sc_md)
 1602                 G_RAID_MD_EVENT(sc->sc_md, NULL, event);
 1603         return (0);
 1604 }
 1605 
 1606 static int
 1607 g_raid_access(struct g_provider *pp, int acr, int acw, int ace)
 1608 {
 1609         struct g_raid_volume *vol;
 1610         struct g_raid_softc *sc;
 1611         int dcw, opens, error = 0;
 1612 
 1613         g_topology_assert();
 1614         sc = pp->geom->softc;
 1615         vol = pp->private;
 1616         KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 1617         KASSERT(vol != NULL, ("NULL volume (provider=%s).", pp->name));
 1618 
 1619         G_RAID_DEBUG1(2, sc, "Access request for %s: r%dw%de%d.", pp->name,
 1620             acr, acw, ace);
 1621         dcw = pp->acw + acw;
 1622 
 1623         g_topology_unlock();
 1624         sx_xlock(&sc->sc_lock);
 1625         /* Deny new opens while dying. */
 1626         if (sc->sc_stopping != 0 && (acr > 0 || acw > 0 || ace > 0)) {
 1627                 error = ENXIO;
 1628                 goto out;
 1629         }
 1630         if (dcw == 0 && vol->v_dirty)
 1631                 g_raid_clean(vol, dcw);
 1632         vol->v_provider_open += acr + acw + ace;
 1633         /* Handle delayed node destruction. */
 1634         if (sc->sc_stopping == G_RAID_DESTROY_DELAYED &&
 1635             vol->v_provider_open == 0) {
 1636                 /* Count open volumes. */
 1637                 opens = g_raid_nopens(sc);
 1638                 if (opens == 0) {
 1639                         sc->sc_stopping = G_RAID_DESTROY_HARD;
 1640                         /* Wake up worker to make it selfdestruct. */
 1641                         g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1642                 }
 1643         }
 1644         /* Handle open volume destruction. */
 1645         if (vol->v_stopping && vol->v_provider_open == 0)
 1646                 g_raid_destroy_volume(vol);
 1647 out:
 1648         sx_xunlock(&sc->sc_lock);
 1649         g_topology_lock();
 1650         return (error);
 1651 }
 1652 
 1653 struct g_raid_softc *
 1654 g_raid_create_node(struct g_class *mp,
 1655     const char *name, struct g_raid_md_object *md)
 1656 {
 1657         struct g_raid_softc *sc;
 1658         struct g_geom *gp;
 1659         int error;
 1660 
 1661         g_topology_assert();
 1662         G_RAID_DEBUG(1, "Creating array %s.", name);
 1663 
 1664         gp = g_new_geomf(mp, "%s", name);
 1665         sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO);
 1666         gp->start = g_raid_start;
 1667         gp->orphan = g_raid_orphan;
 1668         gp->access = g_raid_access;
 1669         gp->dumpconf = g_raid_dumpconf;
 1670 
 1671         sc->sc_md = md;
 1672         sc->sc_geom = gp;
 1673         sc->sc_flags = 0;
 1674         TAILQ_INIT(&sc->sc_volumes);
 1675         TAILQ_INIT(&sc->sc_disks);
 1676         sx_init(&sc->sc_lock, "gmirror:lock");
 1677         mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 1678         TAILQ_INIT(&sc->sc_events);
 1679         bioq_init(&sc->sc_queue);
 1680         gp->softc = sc;
 1681         error = kproc_create(g_raid_worker, sc, &sc->sc_worker, 0, 0,
 1682             "g_raid %s", name);
 1683         if (error != 0) {
 1684                 G_RAID_DEBUG(0, "Cannot create kernel thread for %s.", name);
 1685                 mtx_destroy(&sc->sc_queue_mtx);
 1686                 sx_destroy(&sc->sc_lock);
 1687                 g_destroy_geom(sc->sc_geom);
 1688                 free(sc, M_RAID);
 1689                 return (NULL);
 1690         }
 1691 
 1692         G_RAID_DEBUG1(0, sc, "Array %s created.", name);
 1693         return (sc);
 1694 }
 1695 
 1696 struct g_raid_volume *
 1697 g_raid_create_volume(struct g_raid_softc *sc, const char *name, int id)
 1698 {
 1699         struct g_raid_volume    *vol, *vol1;
 1700         int i;
 1701 
 1702         G_RAID_DEBUG1(1, sc, "Creating volume %s.", name);
 1703         vol = malloc(sizeof(*vol), M_RAID, M_WAITOK | M_ZERO);
 1704         vol->v_softc = sc;
 1705         strlcpy(vol->v_name, name, G_RAID_MAX_VOLUMENAME);
 1706         vol->v_state = G_RAID_VOLUME_S_STARTING;
 1707         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 1708         vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN;
 1709         bioq_init(&vol->v_inflight);
 1710         bioq_init(&vol->v_locked);
 1711         LIST_INIT(&vol->v_locks);
 1712         for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) {
 1713                 vol->v_subdisks[i].sd_softc = sc;
 1714                 vol->v_subdisks[i].sd_volume = vol;
 1715                 vol->v_subdisks[i].sd_pos = i;
 1716                 vol->v_subdisks[i].sd_state = G_RAID_DISK_S_NONE;
 1717         }
 1718 
 1719         /* Find free ID for this volume. */
 1720         g_topology_lock();
 1721         vol1 = vol;
 1722         if (id >= 0) {
 1723                 LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) {
 1724                         if (vol1->v_global_id == id)
 1725                                 break;
 1726                 }
 1727         }
 1728         if (vol1 != NULL) {
 1729                 for (id = 0; ; id++) {
 1730                         LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) {
 1731                                 if (vol1->v_global_id == id)
 1732                                         break;
 1733                         }
 1734                         if (vol1 == NULL)
 1735                                 break;
 1736                 }
 1737         }
 1738         vol->v_global_id = id;
 1739         LIST_INSERT_HEAD(&g_raid_volumes, vol, v_global_next);
 1740         g_topology_unlock();
 1741 
 1742         /* Delay root mounting. */
 1743         vol->v_rootmount = root_mount_hold("GRAID");
 1744         G_RAID_DEBUG1(1, sc, "root_mount_hold %p", vol->v_rootmount);
 1745         vol->v_starting = 1;
 1746         TAILQ_INSERT_TAIL(&sc->sc_volumes, vol, v_next);
 1747         return (vol);
 1748 }
 1749 
 1750 struct g_raid_disk *
 1751 g_raid_create_disk(struct g_raid_softc *sc)
 1752 {
 1753         struct g_raid_disk      *disk;
 1754 
 1755         G_RAID_DEBUG1(1, sc, "Creating disk.");
 1756         disk = malloc(sizeof(*disk), M_RAID, M_WAITOK | M_ZERO);
 1757         disk->d_softc = sc;
 1758         disk->d_state = G_RAID_DISK_S_NONE;
 1759         TAILQ_INIT(&disk->d_subdisks);
 1760         TAILQ_INSERT_TAIL(&sc->sc_disks, disk, d_next);
 1761         return (disk);
 1762 }
 1763 
 1764 int g_raid_start_volume(struct g_raid_volume *vol)
 1765 {
 1766         struct g_raid_tr_class *class;
 1767         struct g_raid_tr_object *obj;
 1768         int status;
 1769 
 1770         G_RAID_DEBUG1(2, vol->v_softc, "Starting volume %s.", vol->v_name);
 1771         LIST_FOREACH(class, &g_raid_tr_classes, trc_list) {
 1772                 G_RAID_DEBUG1(2, vol->v_softc,
 1773                     "Tasting volume %s for %s transformation.",
 1774                     vol->v_name, class->name);
 1775                 obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 1776                     M_WAITOK);
 1777                 obj->tro_class = class;
 1778                 obj->tro_volume = vol;
 1779                 status = G_RAID_TR_TASTE(obj, vol);
 1780                 if (status != G_RAID_TR_TASTE_FAIL)
 1781                         break;
 1782                 kobj_delete((kobj_t)obj, M_RAID);
 1783         }
 1784         if (class == NULL) {
 1785                 G_RAID_DEBUG1(0, vol->v_softc,
 1786                     "No transformation module found for %s.",
 1787                     vol->v_name);
 1788                 vol->v_tr = NULL;
 1789                 g_raid_change_volume_state(vol, G_RAID_VOLUME_S_UNSUPPORTED);
 1790                 g_raid_event_send(vol, G_RAID_VOLUME_E_DOWN,
 1791                     G_RAID_EVENT_VOLUME);
 1792                 return (-1);
 1793         }
 1794         G_RAID_DEBUG1(2, vol->v_softc,
 1795             "Transformation module %s chosen for %s.",
 1796             class->name, vol->v_name);
 1797         vol->v_tr = obj;
 1798         return (0);
 1799 }
 1800 
 1801 int
 1802 g_raid_destroy_node(struct g_raid_softc *sc, int worker)
 1803 {
 1804         struct g_raid_volume *vol, *tmpv;
 1805         struct g_raid_disk *disk, *tmpd;
 1806         int error = 0;
 1807 
 1808         sc->sc_stopping = G_RAID_DESTROY_HARD;
 1809         TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tmpv) {
 1810                 if (g_raid_destroy_volume(vol))
 1811                         error = EBUSY;
 1812         }
 1813         if (error)
 1814                 return (error);
 1815         TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tmpd) {
 1816                 if (g_raid_destroy_disk(disk))
 1817                         error = EBUSY;
 1818         }
 1819         if (error)
 1820                 return (error);
 1821         if (sc->sc_md) {
 1822                 G_RAID_MD_FREE(sc->sc_md);
 1823                 kobj_delete((kobj_t)sc->sc_md, M_RAID);
 1824                 sc->sc_md = NULL;
 1825         }
 1826         if (sc->sc_geom != NULL) {
 1827                 G_RAID_DEBUG1(0, sc, "Array %s destroyed.", sc->sc_name);
 1828                 g_topology_lock();
 1829                 sc->sc_geom->softc = NULL;
 1830                 g_wither_geom(sc->sc_geom, ENXIO);
 1831                 g_topology_unlock();
 1832                 sc->sc_geom = NULL;
 1833         } else
 1834                 G_RAID_DEBUG(1, "Array destroyed.");
 1835         if (worker) {
 1836                 g_raid_event_cancel(sc, sc);
 1837                 mtx_destroy(&sc->sc_queue_mtx);
 1838                 sx_xunlock(&sc->sc_lock);
 1839                 sx_destroy(&sc->sc_lock);
 1840                 wakeup(&sc->sc_stopping);
 1841                 free(sc, M_RAID);
 1842                 curthread->td_pflags &= ~TDP_GEOM;
 1843                 G_RAID_DEBUG(1, "Thread exiting.");
 1844                 kproc_exit(0);
 1845         } else {
 1846                 /* Wake up worker to make it selfdestruct. */
 1847                 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1848         }
 1849         return (0);
 1850 }
 1851 
 1852 int
 1853 g_raid_destroy_volume(struct g_raid_volume *vol)
 1854 {
 1855         struct g_raid_softc *sc;
 1856         struct g_raid_disk *disk;
 1857         int i;
 1858 
 1859         sc = vol->v_softc;
 1860         G_RAID_DEBUG1(2, sc, "Destroying volume %s.", vol->v_name);
 1861         vol->v_stopping = 1;
 1862         if (vol->v_state != G_RAID_VOLUME_S_STOPPED) {
 1863                 if (vol->v_tr) {
 1864                         G_RAID_TR_STOP(vol->v_tr);
 1865                         return (EBUSY);
 1866                 } else
 1867                         vol->v_state = G_RAID_VOLUME_S_STOPPED;
 1868         }
 1869         if (g_raid_event_check(sc, vol) != 0)
 1870                 return (EBUSY);
 1871         if (vol->v_provider != NULL)
 1872                 return (EBUSY);
 1873         if (vol->v_provider_open != 0)
 1874                 return (EBUSY);
 1875         if (vol->v_tr) {
 1876                 G_RAID_TR_FREE(vol->v_tr);
 1877                 kobj_delete((kobj_t)vol->v_tr, M_RAID);
 1878                 vol->v_tr = NULL;
 1879         }
 1880         if (vol->v_rootmount)
 1881                 root_mount_rel(vol->v_rootmount);
 1882         g_topology_lock();
 1883         LIST_REMOVE(vol, v_global_next);
 1884         g_topology_unlock();
 1885         TAILQ_REMOVE(&sc->sc_volumes, vol, v_next);
 1886         for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) {
 1887                 g_raid_event_cancel(sc, &vol->v_subdisks[i]);
 1888                 disk = vol->v_subdisks[i].sd_disk;
 1889                 if (disk == NULL)
 1890                         continue;
 1891                 TAILQ_REMOVE(&disk->d_subdisks, &vol->v_subdisks[i], sd_next);
 1892         }
 1893         G_RAID_DEBUG1(2, sc, "Volume %s destroyed.", vol->v_name);
 1894         if (sc->sc_md)
 1895                 G_RAID_MD_FREE_VOLUME(sc->sc_md, vol);
 1896         g_raid_event_cancel(sc, vol);
 1897         free(vol, M_RAID);
 1898         if (sc->sc_stopping == G_RAID_DESTROY_HARD) {
 1899                 /* Wake up worker to let it selfdestruct. */
 1900                 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1901         }
 1902         return (0);
 1903 }
 1904 
 1905 int
 1906 g_raid_destroy_disk(struct g_raid_disk *disk)
 1907 {
 1908         struct g_raid_softc *sc;
 1909         struct g_raid_subdisk *sd, *tmp;
 1910 
 1911         sc = disk->d_softc;
 1912         G_RAID_DEBUG1(2, sc, "Destroying disk.");
 1913         if (disk->d_consumer) {
 1914                 g_raid_kill_consumer(sc, disk->d_consumer);
 1915                 disk->d_consumer = NULL;
 1916         }
 1917         TAILQ_FOREACH_SAFE(sd, &disk->d_subdisks, sd_next, tmp) {
 1918                 g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_NONE);
 1919                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 1920                     G_RAID_EVENT_SUBDISK);
 1921                 TAILQ_REMOVE(&disk->d_subdisks, sd, sd_next);
 1922                 sd->sd_disk = NULL;
 1923         }
 1924         TAILQ_REMOVE(&sc->sc_disks, disk, d_next);
 1925         if (sc->sc_md)
 1926                 G_RAID_MD_FREE_DISK(sc->sc_md, disk);
 1927         g_raid_event_cancel(sc, disk);
 1928         free(disk, M_RAID);
 1929         return (0);
 1930 }
 1931 
 1932 int
 1933 g_raid_destroy(struct g_raid_softc *sc, int how)
 1934 {
 1935         int opens;
 1936 
 1937         g_topology_assert_not();
 1938         if (sc == NULL)
 1939                 return (ENXIO);
 1940         sx_assert(&sc->sc_lock, SX_XLOCKED);
 1941 
 1942         /* Count open volumes. */
 1943         opens = g_raid_nopens(sc);
 1944 
 1945         /* React on some opened volumes. */
 1946         if (opens > 0) {
 1947                 switch (how) {
 1948                 case G_RAID_DESTROY_SOFT:
 1949                         G_RAID_DEBUG1(1, sc,
 1950                             "%d volumes are still open.",
 1951                             opens);
 1952                         return (EBUSY);
 1953                 case G_RAID_DESTROY_DELAYED:
 1954                         G_RAID_DEBUG1(1, sc,
 1955                             "Array will be destroyed on last close.");
 1956                         sc->sc_stopping = G_RAID_DESTROY_DELAYED;
 1957                         return (EBUSY);
 1958                 case G_RAID_DESTROY_HARD:
 1959                         G_RAID_DEBUG1(1, sc,
 1960                             "%d volumes are still open.",
 1961                             opens);
 1962                 }
 1963         }
 1964 
 1965         /* Mark node for destruction. */
 1966         sc->sc_stopping = G_RAID_DESTROY_HARD;
 1967         /* Wake up worker to let it selfdestruct. */
 1968         g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 1969         /* Sleep until node destroyed. */
 1970         sx_sleep(&sc->sc_stopping, &sc->sc_lock,
 1971             PRIBIO | PDROP, "r:destroy", 0);
 1972         return (0);
 1973 }
 1974 
 1975 static void
 1976 g_raid_taste_orphan(struct g_consumer *cp)
 1977 {
 1978 
 1979         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 1980             cp->provider->name));
 1981 }
 1982 
 1983 static struct g_geom *
 1984 g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 1985 {
 1986         struct g_consumer *cp;
 1987         struct g_geom *gp, *geom;
 1988         struct g_raid_md_class *class;
 1989         struct g_raid_md_object *obj;
 1990         int status;
 1991 
 1992         g_topology_assert();
 1993         g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 1994         G_RAID_DEBUG(2, "Tasting provider %s.", pp->name);
 1995 
 1996         gp = g_new_geomf(mp, "mirror:taste");
 1997         /*
 1998          * This orphan function should be never called.
 1999          */
 2000         gp->orphan = g_raid_taste_orphan;
 2001         cp = g_new_consumer(gp);
 2002         g_attach(cp, pp);
 2003 
 2004         geom = NULL;
 2005         LIST_FOREACH(class, &g_raid_md_classes, mdc_list) {
 2006                 G_RAID_DEBUG(2, "Tasting provider %s for %s metadata.",
 2007                     pp->name, class->name);
 2008                 obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 2009                     M_WAITOK);
 2010                 obj->mdo_class = class;
 2011                 status = G_RAID_MD_TASTE(obj, mp, cp, &geom);
 2012                 if (status != G_RAID_MD_TASTE_NEW)
 2013                         kobj_delete((kobj_t)obj, M_RAID);
 2014                 if (status != G_RAID_MD_TASTE_FAIL)
 2015                         break;
 2016         }
 2017 
 2018         g_detach(cp);
 2019         g_destroy_consumer(cp);
 2020         g_destroy_geom(gp);
 2021         G_RAID_DEBUG(2, "Tasting provider %s done.", pp->name);
 2022         return (geom);
 2023 }
 2024 
 2025 int
 2026 g_raid_create_node_format(const char *format, struct g_geom **gp)
 2027 {
 2028         struct g_raid_md_class *class;
 2029         struct g_raid_md_object *obj;
 2030         int status;
 2031 
 2032         G_RAID_DEBUG(2, "Creating array for %s metadata.", format);
 2033         LIST_FOREACH(class, &g_raid_md_classes, mdc_list) {
 2034                 if (strcasecmp(class->name, format) == 0)
 2035                         break;
 2036         }
 2037         if (class == NULL) {
 2038                 G_RAID_DEBUG(1, "No support for %s metadata.", format);
 2039                 return (G_RAID_MD_TASTE_FAIL);
 2040         }
 2041         obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 2042             M_WAITOK);
 2043         obj->mdo_class = class;
 2044         status = G_RAID_MD_CREATE(obj, &g_raid_class, gp);
 2045         if (status != G_RAID_MD_TASTE_NEW)
 2046                 kobj_delete((kobj_t)obj, M_RAID);
 2047         return (status);
 2048 }
 2049 
 2050 static int
 2051 g_raid_destroy_geom(struct gctl_req *req __unused,
 2052     struct g_class *mp __unused, struct g_geom *gp)
 2053 {
 2054         struct g_raid_softc *sc;
 2055         int error;
 2056 
 2057         g_topology_unlock();
 2058         sc = gp->softc;
 2059         sx_xlock(&sc->sc_lock);
 2060         g_cancel_event(sc);
 2061         error = g_raid_destroy(gp->softc, G_RAID_DESTROY_SOFT);
 2062         if (error != 0)
 2063                 sx_xunlock(&sc->sc_lock);
 2064         g_topology_lock();
 2065         return (error);
 2066 }
 2067 
 2068 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
 2069     struct g_raid_subdisk *sd, struct g_raid_disk *disk)
 2070 {
 2071 
 2072         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 2073                 return;
 2074         if (sc->sc_md)
 2075                 G_RAID_MD_WRITE(sc->sc_md, vol, sd, disk);
 2076 }
 2077 
 2078 void g_raid_fail_disk(struct g_raid_softc *sc,
 2079     struct g_raid_subdisk *sd, struct g_raid_disk *disk)
 2080 {
 2081 
 2082         if (disk == NULL)
 2083                 disk = sd->sd_disk;
 2084         if (disk == NULL) {
 2085                 G_RAID_DEBUG1(0, sc, "Warning! Fail request to an absent disk!");
 2086                 return;
 2087         }
 2088         if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 2089                 G_RAID_DEBUG1(0, sc, "Warning! Fail request to a disk in a "
 2090                     "wrong state (%s)!", g_raid_disk_state2str(disk->d_state));
 2091                 return;
 2092         }
 2093         if (sc->sc_md)
 2094                 G_RAID_MD_FAIL_DISK(sc->sc_md, sd, disk);
 2095 }
 2096 
 2097 static void
 2098 g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 2099     struct g_consumer *cp, struct g_provider *pp)
 2100 {
 2101         struct g_raid_softc *sc;
 2102         struct g_raid_volume *vol;
 2103         struct g_raid_subdisk *sd;
 2104         struct g_raid_disk *disk;
 2105         int i, s;
 2106 
 2107         g_topology_assert();
 2108 
 2109         sc = gp->softc;
 2110         if (sc == NULL)
 2111                 return;
 2112         if (pp != NULL) {
 2113                 vol = pp->private;
 2114                 g_topology_unlock();
 2115                 sx_xlock(&sc->sc_lock);
 2116                 sbuf_printf(sb, "%s<Label>%s</Label>\n", indent,
 2117                     vol->v_name);
 2118                 sbuf_printf(sb, "%s<RAIDLevel>%s</RAIDLevel>\n", indent,
 2119                     g_raid_volume_level2str(vol->v_raid_level,
 2120                     vol->v_raid_level_qualifier));
 2121                 sbuf_printf(sb,
 2122                     "%s<Transformation>%s</Transformation>\n", indent,
 2123                     vol->v_tr ? vol->v_tr->tro_class->name : "NONE");
 2124                 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 2125                     vol->v_disks_count);
 2126                 sbuf_printf(sb, "%s<Strip>%u</Strip>\n", indent,
 2127                     vol->v_strip_size);
 2128                 sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 2129                     g_raid_volume_state2str(vol->v_state));
 2130                 sbuf_printf(sb, "%s<Dirty>%s</Dirty>\n", indent,
 2131                     vol->v_dirty ? "Yes" : "No");
 2132                 sbuf_printf(sb, "%s<Subdisks>", indent);
 2133                 for (i = 0; i < vol->v_disks_count; i++) {
 2134                         sd = &vol->v_subdisks[i];
 2135                         if (sd->sd_disk != NULL &&
 2136                             sd->sd_disk->d_consumer != NULL) {
 2137                                 sbuf_printf(sb, "%s ",
 2138                                     g_raid_get_diskname(sd->sd_disk));
 2139                         } else {
 2140                                 sbuf_printf(sb, "NONE ");
 2141                         }
 2142                         sbuf_printf(sb, "(%s",
 2143                             g_raid_subdisk_state2str(sd->sd_state));
 2144                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2145                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2146                                 sbuf_printf(sb, " %d%%",
 2147                                     (int)(sd->sd_rebuild_pos * 100 /
 2148                                      sd->sd_size));
 2149                         }
 2150                         sbuf_printf(sb, ")");
 2151                         if (i + 1 < vol->v_disks_count)
 2152                                 sbuf_printf(sb, ", ");
 2153                 }
 2154                 sbuf_printf(sb, "</Subdisks>\n");
 2155                 sx_xunlock(&sc->sc_lock);
 2156                 g_topology_lock();
 2157         } else if (cp != NULL) {
 2158                 disk = cp->private;
 2159                 if (disk == NULL)
 2160                         return;
 2161                 g_topology_unlock();
 2162                 sx_xlock(&sc->sc_lock);
 2163                 sbuf_printf(sb, "%s<State>%s", indent,
 2164                     g_raid_disk_state2str(disk->d_state));
 2165                 if (!TAILQ_EMPTY(&disk->d_subdisks)) {
 2166                         sbuf_printf(sb, " (");
 2167                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 2168                                 sbuf_printf(sb, "%s",
 2169                                     g_raid_subdisk_state2str(sd->sd_state));
 2170                                 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 2171                                     sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 2172                                         sbuf_printf(sb, " %d%%",
 2173                                             (int)(sd->sd_rebuild_pos * 100 /
 2174                                              sd->sd_size));
 2175                                 }
 2176                                 if (TAILQ_NEXT(sd, sd_next))
 2177                                         sbuf_printf(sb, ", ");
 2178                         }
 2179                         sbuf_printf(sb, ")");
 2180                 }
 2181                 sbuf_printf(sb, "</State>\n");
 2182                 sbuf_printf(sb, "%s<Subdisks>", indent);
 2183                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 2184                         sbuf_printf(sb, "r%d(%s):%d@%ju",
 2185                             sd->sd_volume->v_global_id,
 2186                             sd->sd_volume->v_name,
 2187                             sd->sd_pos, sd->sd_offset);
 2188                         if (TAILQ_NEXT(sd, sd_next))
 2189                                 sbuf_printf(sb, ", ");
 2190                 }
 2191                 sbuf_printf(sb, "</Subdisks>\n");
 2192                 sbuf_printf(sb, "%s<ReadErrors>%d</ReadErrors>\n", indent,
 2193                     disk->d_read_errs);
 2194                 sx_xunlock(&sc->sc_lock);
 2195                 g_topology_lock();
 2196         } else {
 2197                 g_topology_unlock();
 2198                 sx_xlock(&sc->sc_lock);
 2199                 if (sc->sc_md) {
 2200                         sbuf_printf(sb, "%s<Metadata>%s</Metadata>\n", indent,
 2201                             sc->sc_md->mdo_class->name);
 2202                 }
 2203                 if (!TAILQ_EMPTY(&sc->sc_volumes)) {
 2204                         s = 0xff;
 2205                         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 2206                                 if (vol->v_state < s)
 2207                                         s = vol->v_state;
 2208                         }
 2209                         sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 2210                             g_raid_volume_state2str(s));
 2211                 }
 2212                 sx_xunlock(&sc->sc_lock);
 2213                 g_topology_lock();
 2214         }
 2215 }
 2216 
 2217 static void
 2218 g_raid_shutdown_pre_sync(void *arg, int howto)
 2219 {
 2220         struct g_class *mp;
 2221         struct g_geom *gp, *gp2;
 2222         struct g_raid_softc *sc;
 2223         int error;
 2224 
 2225         mp = arg;
 2226         DROP_GIANT();
 2227         g_topology_lock();
 2228         LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 2229                 if ((sc = gp->softc) == NULL)
 2230                         continue;
 2231                 g_topology_unlock();
 2232                 sx_xlock(&sc->sc_lock);
 2233                 g_cancel_event(sc);
 2234                 error = g_raid_destroy(sc, G_RAID_DESTROY_DELAYED);
 2235                 if (error != 0)
 2236                         sx_xunlock(&sc->sc_lock);
 2237                 g_topology_lock();
 2238         }
 2239         g_topology_unlock();
 2240         PICKUP_GIANT();
 2241 }
 2242 
 2243 static void
 2244 g_raid_init(struct g_class *mp)
 2245 {
 2246 
 2247         g_raid_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 2248             g_raid_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 2249         if (g_raid_pre_sync == NULL)
 2250                 G_RAID_DEBUG(0, "Warning! Cannot register shutdown event.");
 2251         g_raid_started = 1;
 2252 }
 2253 
 2254 static void
 2255 g_raid_fini(struct g_class *mp)
 2256 {
 2257 
 2258         if (g_raid_pre_sync != NULL)
 2259                 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_raid_pre_sync);
 2260         g_raid_started = 0;
 2261 }
 2262 
 2263 int
 2264 g_raid_md_modevent(module_t mod, int type, void *arg)
 2265 {
 2266         struct g_raid_md_class *class, *c, *nc;
 2267         int error;
 2268 
 2269         error = 0;
 2270         class = arg;
 2271         switch (type) {
 2272         case MOD_LOAD:
 2273                 c = LIST_FIRST(&g_raid_md_classes);
 2274                 if (c == NULL || c->mdc_priority > class->mdc_priority)
 2275                         LIST_INSERT_HEAD(&g_raid_md_classes, class, mdc_list);
 2276                 else {
 2277                         while ((nc = LIST_NEXT(c, mdc_list)) != NULL &&
 2278                             nc->mdc_priority < class->mdc_priority)
 2279                                 c = nc;
 2280                         LIST_INSERT_AFTER(c, class, mdc_list);
 2281                 }
 2282                 if (g_raid_started)
 2283                         g_retaste(&g_raid_class);
 2284                 break;
 2285         case MOD_UNLOAD:
 2286                 LIST_REMOVE(class, mdc_list);
 2287                 break;
 2288         default:
 2289                 error = EOPNOTSUPP;
 2290                 break;
 2291         }
 2292 
 2293         return (error);
 2294 }
 2295 
 2296 int
 2297 g_raid_tr_modevent(module_t mod, int type, void *arg)
 2298 {
 2299         struct g_raid_tr_class *class, *c, *nc;
 2300         int error;
 2301 
 2302         error = 0;
 2303         class = arg;
 2304         switch (type) {
 2305         case MOD_LOAD:
 2306                 c = LIST_FIRST(&g_raid_tr_classes);
 2307                 if (c == NULL || c->trc_priority > class->trc_priority)
 2308                         LIST_INSERT_HEAD(&g_raid_tr_classes, class, trc_list);
 2309                 else {
 2310                         while ((nc = LIST_NEXT(c, trc_list)) != NULL &&
 2311                             nc->trc_priority < class->trc_priority)
 2312                                 c = nc;
 2313                         LIST_INSERT_AFTER(c, class, trc_list);
 2314                 }
 2315                 break;
 2316         case MOD_UNLOAD:
 2317                 LIST_REMOVE(class, trc_list);
 2318                 break;
 2319         default:
 2320                 error = EOPNOTSUPP;
 2321                 break;
 2322         }
 2323 
 2324         return (error);
 2325 }
 2326 
 2327 /*
 2328  * Use local implementation of DECLARE_GEOM_CLASS(g_raid_class, g_raid)
 2329  * to reduce module priority, allowing submodules to register them first.
 2330  */
 2331 static moduledata_t g_raid_mod = {
 2332         "g_raid",
 2333         g_modevent,
 2334         &g_raid_class
 2335 };
 2336 DECLARE_MODULE(g_raid, g_raid_mod, SI_SUB_DRIVERS, SI_ORDER_THIRD);
 2337 MODULE_VERSION(geom_raid, 0);

Cache object: 3fef83b253464477d79c013dcf741f62


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.