The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/g_raid.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  *
   26  * $FreeBSD: releng/9.0/sys/geom/raid/g_raid.h 219974 2011-03-24 21:31:32Z mav $
   27  */
   28 
   29 #ifndef _G_RAID_H_
   30 #define _G_RAID_H_
   31 
   32 #include <sys/param.h>
   33 #include <sys/kobj.h>
   34 #include <sys/bio.h>
   35 #include <sys/time.h>
   36 
   37 #define G_RAID_CLASS_NAME       "RAID"
   38 
   39 #define G_RAID_MAGIC            "GEOM::RAID"
   40 
   41 #define G_RAID_VERSION          0
   42 
   43 struct g_raid_md_object;
   44 struct g_raid_tr_object;
   45 
   46 #define G_RAID_DEVICE_FLAG_NOAUTOSYNC   0x0000000000000001ULL
   47 #define G_RAID_DEVICE_FLAG_NOFAILSYNC   0x0000000000000002ULL
   48 #define G_RAID_DEVICE_FLAG_MASK (G_RAID_DEVICE_FLAG_NOAUTOSYNC | \
   49                                          G_RAID_DEVICE_FLAG_NOFAILSYNC)
   50 
   51 #ifdef _KERNEL
   52 extern u_int g_raid_aggressive_spare;
   53 extern u_int g_raid_debug;
   54 extern int g_raid_read_err_thresh;
   55 extern u_int g_raid_start_timeout;
   56 extern struct g_class g_raid_class;
   57 
   58 #define G_RAID_DEBUG(lvl, fmt, ...)     do {                            \
   59         if (g_raid_debug >= (lvl)) {                                    \
   60                 if (g_raid_debug > 0) {                                 \
   61                         printf("GEOM_RAID[%u]: " fmt "\n",              \
   62                             lvl, ## __VA_ARGS__);                       \
   63                 } else {                                                \
   64                         printf("GEOM_RAID: " fmt "\n",                  \
   65                             ## __VA_ARGS__);                            \
   66                 }                                                       \
   67         }                                                               \
   68 } while (0)
   69 #define G_RAID_DEBUG1(lvl, sc, fmt, ...)        do {                    \
   70         if (g_raid_debug >= (lvl)) {                                    \
   71                 if (g_raid_debug > 0) {                                 \
   72                         printf("GEOM_RAID[%u]: %s: " fmt "\n",          \
   73                             lvl, (sc)->sc_name, ## __VA_ARGS__);        \
   74                 } else {                                                \
   75                         printf("GEOM_RAID: %s: " fmt "\n",              \
   76                             (sc)->sc_name, ## __VA_ARGS__);             \
   77                 }                                                       \
   78         }                                                               \
   79 } while (0)
   80 #define G_RAID_LOGREQ(lvl, bp, fmt, ...)        do {                    \
   81         if (g_raid_debug >= (lvl)) {                                    \
   82                 if (g_raid_debug > 0) {                                 \
   83                         printf("GEOM_RAID[%u]: " fmt " ",               \
   84                             lvl, ## __VA_ARGS__);                       \
   85                 } else                                                  \
   86                         printf("GEOM_RAID: " fmt " ", ## __VA_ARGS__);  \
   87                 g_print_bio(bp);                                        \
   88                 printf("\n");                                           \
   89         }                                                               \
   90 } while (0)
   91 
   92 /*
   93  * Flags we use to distinguish I/O initiated by the TR layer to maintain
   94  * the volume's characteristics, fix subdisks, extra copies of data, etc.
   95  *
   96  * G_RAID_BIO_FLAG_SYNC         I/O to update an extra copy of the data
   97  *                              for RAID volumes that maintain extra data
   98  *                              and need to rebuild that data.
   99  * G_RAID_BIO_FLAG_REMAP        I/O done to try to provoke a subdisk into
  100  *                              doing some desirable action such as bad
  101  *                              block remapping after we detect a bad part
  102  *                              of the disk.
  103  * G_RAID_BIO_FLAG_LOCKED       I/O holds range lock that should re released.
  104  *
  105  * and the following meta item:
  106  * G_RAID_BIO_FLAG_SPECIAL      And of the I/O flags that need to make it
  107  *                              through the range locking which would
  108  *                              otherwise defer the I/O until after that
  109  *                              range is unlocked.
  110  */
  111 #define G_RAID_BIO_FLAG_SYNC            0x01
  112 #define G_RAID_BIO_FLAG_REMAP           0x02
  113 #define G_RAID_BIO_FLAG_SPECIAL \
  114                 (G_RAID_BIO_FLAG_SYNC|G_RAID_BIO_FLAG_REMAP)
  115 #define G_RAID_BIO_FLAG_LOCKED          0x80
  116 
  117 struct g_raid_lock {
  118         off_t                    l_offset;
  119         off_t                    l_length;
  120         void                    *l_callback_arg;
  121         int                      l_pending;
  122         LIST_ENTRY(g_raid_lock)  l_next;
  123 };
  124 
  125 #define G_RAID_EVENT_WAIT       0x01
  126 #define G_RAID_EVENT_VOLUME     0x02
  127 #define G_RAID_EVENT_SUBDISK    0x04
  128 #define G_RAID_EVENT_DISK       0x08
  129 #define G_RAID_EVENT_DONE       0x10
  130 struct g_raid_event {
  131         void                    *e_tgt;
  132         int                      e_event;
  133         int                      e_flags;
  134         int                      e_error;
  135         TAILQ_ENTRY(g_raid_event) e_next;
  136 };
  137 #define G_RAID_DISK_S_NONE              0x00    /* State is unknown. */
  138 #define G_RAID_DISK_S_OFFLINE           0x01    /* Missing disk placeholder. */
  139 #define G_RAID_DISK_S_FAILED            0x02    /* Failed. */
  140 #define G_RAID_DISK_S_STALE_FAILED      0x03    /* Old failed. */
  141 #define G_RAID_DISK_S_SPARE             0x04    /* Hot-spare. */
  142 #define G_RAID_DISK_S_STALE             0x05    /* Old disk, unused now. */
  143 #define G_RAID_DISK_S_ACTIVE            0x06    /* Operational. */
  144 
  145 #define G_RAID_DISK_E_DISCONNECTED      0x01
  146 
  147 struct g_raid_disk {
  148         struct g_raid_softc     *d_softc;       /* Back-pointer to softc. */
  149         struct g_consumer       *d_consumer;    /* GEOM disk consumer. */
  150         void                    *d_md_data;     /* Disk's metadata storage. */
  151         struct g_kerneldump      d_kd;          /* Kernel dumping method/args. */
  152         uint64_t                 d_flags;       /* Additional flags. */
  153         u_int                    d_state;       /* Disk state. */
  154         u_int                    d_load;        /* Disk average load. */
  155         off_t                    d_last_offset; /* Last head offset. */
  156         int                      d_read_errs;   /* Count of the read errors */
  157         TAILQ_HEAD(, g_raid_subdisk)     d_subdisks; /* List of subdisks. */
  158         TAILQ_ENTRY(g_raid_disk)         d_next;        /* Next disk in the node. */
  159 };
  160 
  161 #define G_RAID_SUBDISK_S_NONE           0x00    /* Absent. */
  162 #define G_RAID_SUBDISK_S_FAILED         0x01    /* Failed. */
  163 #define G_RAID_SUBDISK_S_NEW            0x02    /* Blank. */
  164 #define G_RAID_SUBDISK_S_REBUILD        0x03    /* Blank + rebuild. */
  165 #define G_RAID_SUBDISK_S_UNINITIALIZED  0x04    /* Disk of the new volume. */
  166 #define G_RAID_SUBDISK_S_STALE          0x05    /* Dirty. */
  167 #define G_RAID_SUBDISK_S_RESYNC         0x06    /* Dirty + check/repair. */
  168 #define G_RAID_SUBDISK_S_ACTIVE         0x07    /* Usable. */
  169 
  170 #define G_RAID_SUBDISK_E_NEW            0x01    /* A new subdisk has arrived */
  171 #define G_RAID_SUBDISK_E_FAILED         0x02    /* A subdisk failed, but remains in volume */
  172 #define G_RAID_SUBDISK_E_DISCONNECTED   0x03    /* A subdisk removed from volume. */
  173 #define G_RAID_SUBDISK_E_FIRST_TR_PRIVATE 0x80  /* translation private events */
  174 
  175 #define G_RAID_SUBDISK_POS(sd)                                          \
  176     ((sd)->sd_disk ? ((sd)->sd_disk->d_last_offset - (sd)->sd_offset) : 0)
  177 #define G_RAID_SUBDISK_TRACK_SIZE       (1 * 1024 * 1024)
  178 #define G_RAID_SUBDISK_LOAD(sd)                                         \
  179     ((sd)->sd_disk ? ((sd)->sd_disk->d_load) : 0)
  180 #define G_RAID_SUBDISK_LOAD_SCALE       256
  181 
  182 struct g_raid_subdisk {
  183         struct g_raid_softc     *sd_softc;      /* Back-pointer to softc. */
  184         struct g_raid_disk      *sd_disk;       /* Where this subdisk lives. */
  185         struct g_raid_volume    *sd_volume;     /* Volume, sd is a part of. */
  186         off_t                    sd_offset;     /* Offset on the disk. */
  187         off_t                    sd_size;       /* Size on the disk. */
  188         u_int                    sd_pos;        /* Position in volume. */
  189         u_int                    sd_state;      /* Subdisk state. */
  190         off_t                    sd_rebuild_pos; /* Rebuild position. */
  191         int                      sd_recovery;   /* Count of recovery reqs. */
  192         TAILQ_ENTRY(g_raid_subdisk)      sd_next; /* Next subdisk on disk. */
  193 };
  194 
  195 #define G_RAID_MAX_SUBDISKS     16
  196 #define G_RAID_MAX_VOLUMENAME   32
  197 
  198 #define G_RAID_VOLUME_S_STARTING        0x00
  199 #define G_RAID_VOLUME_S_BROKEN          0x01
  200 #define G_RAID_VOLUME_S_DEGRADED        0x02
  201 #define G_RAID_VOLUME_S_SUBOPTIMAL      0x03
  202 #define G_RAID_VOLUME_S_OPTIMAL         0x04
  203 #define G_RAID_VOLUME_S_UNSUPPORTED     0x05
  204 #define G_RAID_VOLUME_S_STOPPED         0x06
  205 
  206 #define G_RAID_VOLUME_S_ALIVE(s)                        \
  207     ((s) == G_RAID_VOLUME_S_DEGRADED ||                 \
  208      (s) == G_RAID_VOLUME_S_SUBOPTIMAL ||               \
  209      (s) == G_RAID_VOLUME_S_OPTIMAL)
  210 
  211 #define G_RAID_VOLUME_E_DOWN            0x00
  212 #define G_RAID_VOLUME_E_UP              0x01
  213 #define G_RAID_VOLUME_E_START           0x10
  214 #define G_RAID_VOLUME_E_STARTMD         0x11
  215 
  216 #define G_RAID_VOLUME_RL_RAID0          0x00
  217 #define G_RAID_VOLUME_RL_RAID1          0x01
  218 #define G_RAID_VOLUME_RL_RAID3          0x03
  219 #define G_RAID_VOLUME_RL_RAID4          0x04
  220 #define G_RAID_VOLUME_RL_RAID5          0x05
  221 #define G_RAID_VOLUME_RL_RAID6          0x06
  222 #define G_RAID_VOLUME_RL_RAID1E         0x11
  223 #define G_RAID_VOLUME_RL_SINGLE         0x0f
  224 #define G_RAID_VOLUME_RL_CONCAT         0x1f
  225 #define G_RAID_VOLUME_RL_RAID5E         0x15
  226 #define G_RAID_VOLUME_RL_RAID5EE        0x25
  227 #define G_RAID_VOLUME_RL_UNKNOWN        0xff
  228 
  229 #define G_RAID_VOLUME_RLQ_NONE          0x00
  230 #define G_RAID_VOLUME_RLQ_UNKNOWN       0xff
  231 
  232 struct g_raid_volume;
  233 
  234 struct g_raid_volume {
  235         struct g_raid_softc     *v_softc;       /* Back-pointer to softc. */
  236         struct g_provider       *v_provider;    /* GEOM provider. */
  237         struct g_raid_subdisk    v_subdisks[G_RAID_MAX_SUBDISKS];
  238                                                 /* Subdisks of this volume. */
  239         void                    *v_md_data;     /* Volume's metadata storage. */
  240         struct g_raid_tr_object *v_tr;          /* Transformation object. */
  241         char                     v_name[G_RAID_MAX_VOLUMENAME];
  242                                                 /* Volume name. */
  243         u_int                    v_state;       /* Volume state. */
  244         u_int                    v_raid_level;  /* Array RAID level. */
  245         u_int                    v_raid_level_qualifier; /* RAID level det. */
  246         u_int                    v_disks_count; /* Number of disks in array. */
  247         u_int                    v_strip_size;  /* Array strip size. */
  248         u_int                    v_sectorsize;  /* Volume sector size. */
  249         off_t                    v_mediasize;   /* Volume media size.  */
  250         struct bio_queue_head    v_inflight;    /* In-flight write requests. */
  251         struct bio_queue_head    v_locked;      /* Blocked I/O requests. */
  252         LIST_HEAD(, g_raid_lock) v_locks;        /* List of locked regions. */
  253         int                      v_pending_lock; /* writes to locked region */
  254         int                      v_dirty;       /* Volume is DIRTY. */
  255         struct timeval           v_last_done;   /* Time of the last I/O. */
  256         time_t                   v_last_write;  /* Time of the last write. */
  257         u_int                    v_writes;      /* Number of active writes. */
  258         struct root_hold_token  *v_rootmount;   /* Root mount delay token. */
  259         int                      v_starting;    /* Volume is starting */
  260         int                      v_stopping;    /* Volume is stopping */
  261         int                      v_provider_open; /* Number of opens. */
  262         int                      v_global_id;   /* Global volume ID (rX). */
  263         TAILQ_ENTRY(g_raid_volume)       v_next; /* List of volumes entry. */
  264         LIST_ENTRY(g_raid_volume)        v_global_next; /* Global list entry. */
  265 };
  266 
  267 #define G_RAID_NODE_E_WAKE      0x00
  268 #define G_RAID_NODE_E_START     0x01
  269 
  270 struct g_raid_softc {
  271         struct g_raid_md_object *sc_md;         /* Metadata object. */
  272         struct g_geom           *sc_geom;       /* GEOM class instance. */
  273         uint64_t                 sc_flags;      /* Additional flags. */
  274         TAILQ_HEAD(, g_raid_volume)      sc_volumes;    /* List of volumes. */
  275         TAILQ_HEAD(, g_raid_disk)        sc_disks;      /* List of disks. */
  276         struct sx                sc_lock;       /* Main node lock. */
  277         struct proc             *sc_worker;     /* Worker process. */
  278         struct mtx               sc_queue_mtx;  /* Worker queues lock. */
  279         TAILQ_HEAD(, g_raid_event) sc_events;   /* Worker events queue. */
  280         struct bio_queue_head    sc_queue;      /* Worker I/O queue. */
  281         int                      sc_stopping;   /* Node is stopping */
  282 };
  283 #define sc_name sc_geom->name
  284 
  285 /*
  286  * KOBJ parent class of metadata processing modules.
  287  */
  288 struct g_raid_md_class {
  289         KOBJ_CLASS_FIELDS;
  290         int              mdc_priority;
  291         LIST_ENTRY(g_raid_md_class) mdc_list;
  292 };
  293 
  294 /*
  295  * KOBJ instance of metadata processing module.
  296  */
  297 struct g_raid_md_object {
  298         KOBJ_FIELDS;
  299         struct g_raid_md_class  *mdo_class;
  300         struct g_raid_softc     *mdo_softc;     /* Back-pointer to softc. */
  301 };
  302 
  303 int g_raid_md_modevent(module_t, int, void *);
  304 
  305 #define G_RAID_MD_DECLARE(name)                                 \
  306     static moduledata_t name##_mod = {                          \
  307         #name,                                                  \
  308         g_raid_md_modevent,                                     \
  309         &name##_class                                           \
  310     };                                                          \
  311     DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);  \
  312     MODULE_DEPEND(name, geom_raid, 0, 0, 0)
  313 
  314 /*
  315  * KOBJ parent class of data transformation modules.
  316  */
  317 struct g_raid_tr_class {
  318         KOBJ_CLASS_FIELDS;
  319         int              trc_priority;
  320         LIST_ENTRY(g_raid_tr_class) trc_list;
  321 };
  322 
  323 /*
  324  * KOBJ instance of data transformation module.
  325  */
  326 struct g_raid_tr_object {
  327         KOBJ_FIELDS;
  328         struct g_raid_tr_class  *tro_class;
  329         struct g_raid_volume    *tro_volume;    /* Back-pointer to volume. */
  330 };
  331 
  332 int g_raid_tr_modevent(module_t, int, void *);
  333 
  334 #define G_RAID_TR_DECLARE(name)                                 \
  335     static moduledata_t name##_mod = {                          \
  336         #name,                                                  \
  337         g_raid_tr_modevent,                                     \
  338         &name##_class                                           \
  339     };                                                          \
  340     DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);   \
  341     MODULE_DEPEND(name, geom_raid, 0, 0, 0)
  342 
  343 const char * g_raid_volume_level2str(int level, int qual);
  344 int g_raid_volume_str2level(const char *str, int *level, int *qual);
  345 const char * g_raid_volume_state2str(int state);
  346 const char * g_raid_subdisk_state2str(int state);
  347 const char * g_raid_disk_state2str(int state);
  348 
  349 struct g_raid_softc * g_raid_create_node(struct g_class *mp,
  350     const char *name, struct g_raid_md_object *md);
  351 int g_raid_create_node_format(const char *format, struct g_geom **gp);
  352 struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc,
  353     const char *name, int id);
  354 struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc);
  355 const char * g_raid_get_diskname(struct g_raid_disk *disk);
  356 
  357 int g_raid_start_volume(struct g_raid_volume *vol);
  358 
  359 int g_raid_destroy_node(struct g_raid_softc *sc, int worker);
  360 int g_raid_destroy_volume(struct g_raid_volume *vol);
  361 int g_raid_destroy_disk(struct g_raid_disk *disk);
  362 
  363 void g_raid_iodone(struct bio *bp, int error);
  364 void g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp);
  365 int g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
  366     void *virtual, vm_offset_t physical, off_t offset, size_t length);
  367 
  368 struct g_consumer *g_raid_open_consumer(struct g_raid_softc *sc,
  369     const char *name);
  370 void g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp);
  371 
  372 void g_raid_report_disk_state(struct g_raid_disk *disk);
  373 void g_raid_change_disk_state(struct g_raid_disk *disk, int state);
  374 void g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state);
  375 void g_raid_change_volume_state(struct g_raid_volume *vol, int state);
  376 
  377 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
  378     struct g_raid_subdisk *sd, struct g_raid_disk *disk);
  379 void g_raid_fail_disk(struct g_raid_softc *sc,
  380     struct g_raid_subdisk *sd, struct g_raid_disk *disk);
  381 
  382 void g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp);
  383 int g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
  384     void *virtual, vm_offset_t physical, off_t offset, size_t length);
  385 
  386 u_int g_raid_ndisks(struct g_raid_softc *sc, int state);
  387 u_int g_raid_nsubdisks(struct g_raid_volume *vol, int state);
  388 u_int g_raid_nopens(struct g_raid_softc *sc);
  389 struct g_raid_subdisk * g_raid_get_subdisk(struct g_raid_volume *vol,
  390     int state);
  391 #define G_RAID_DESTROY_SOFT             0
  392 #define G_RAID_DESTROY_DELAYED  1
  393 #define G_RAID_DESTROY_HARD             2
  394 int g_raid_destroy(struct g_raid_softc *sc, int how);
  395 int g_raid_event_send(void *arg, int event, int flags);
  396 int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
  397     struct bio *ignore, void *argp);
  398 int g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len);
  399 
  400 g_ctl_req_t g_raid_ctl;
  401 #endif  /* _KERNEL */
  402 
  403 #endif  /* !_G_RAID_H_ */

Cache object: 3fccc559a7a8946626e3f8b901153e83


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.