FreeBSD/Linux Kernel Cross Reference
sys/geom/raid/g_raid.h
1 /*-
2 * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/9.0/sys/geom/raid/g_raid.h 219974 2011-03-24 21:31:32Z mav $
27 */
28
29 #ifndef _G_RAID_H_
30 #define _G_RAID_H_
31
32 #include <sys/param.h>
33 #include <sys/kobj.h>
34 #include <sys/bio.h>
35 #include <sys/time.h>
36
37 #define G_RAID_CLASS_NAME "RAID"
38
39 #define G_RAID_MAGIC "GEOM::RAID"
40
41 #define G_RAID_VERSION 0
42
43 struct g_raid_md_object;
44 struct g_raid_tr_object;
45
46 #define G_RAID_DEVICE_FLAG_NOAUTOSYNC 0x0000000000000001ULL
47 #define G_RAID_DEVICE_FLAG_NOFAILSYNC 0x0000000000000002ULL
48 #define G_RAID_DEVICE_FLAG_MASK (G_RAID_DEVICE_FLAG_NOAUTOSYNC | \
49 G_RAID_DEVICE_FLAG_NOFAILSYNC)
50
51 #ifdef _KERNEL
52 extern u_int g_raid_aggressive_spare;
53 extern u_int g_raid_debug;
54 extern int g_raid_read_err_thresh;
55 extern u_int g_raid_start_timeout;
56 extern struct g_class g_raid_class;
57
58 #define G_RAID_DEBUG(lvl, fmt, ...) do { \
59 if (g_raid_debug >= (lvl)) { \
60 if (g_raid_debug > 0) { \
61 printf("GEOM_RAID[%u]: " fmt "\n", \
62 lvl, ## __VA_ARGS__); \
63 } else { \
64 printf("GEOM_RAID: " fmt "\n", \
65 ## __VA_ARGS__); \
66 } \
67 } \
68 } while (0)
69 #define G_RAID_DEBUG1(lvl, sc, fmt, ...) do { \
70 if (g_raid_debug >= (lvl)) { \
71 if (g_raid_debug > 0) { \
72 printf("GEOM_RAID[%u]: %s: " fmt "\n", \
73 lvl, (sc)->sc_name, ## __VA_ARGS__); \
74 } else { \
75 printf("GEOM_RAID: %s: " fmt "\n", \
76 (sc)->sc_name, ## __VA_ARGS__); \
77 } \
78 } \
79 } while (0)
80 #define G_RAID_LOGREQ(lvl, bp, fmt, ...) do { \
81 if (g_raid_debug >= (lvl)) { \
82 if (g_raid_debug > 0) { \
83 printf("GEOM_RAID[%u]: " fmt " ", \
84 lvl, ## __VA_ARGS__); \
85 } else \
86 printf("GEOM_RAID: " fmt " ", ## __VA_ARGS__); \
87 g_print_bio(bp); \
88 printf("\n"); \
89 } \
90 } while (0)
91
92 /*
93 * Flags we use to distinguish I/O initiated by the TR layer to maintain
94 * the volume's characteristics, fix subdisks, extra copies of data, etc.
95 *
96 * G_RAID_BIO_FLAG_SYNC I/O to update an extra copy of the data
97 * for RAID volumes that maintain extra data
98 * and need to rebuild that data.
99 * G_RAID_BIO_FLAG_REMAP I/O done to try to provoke a subdisk into
100 * doing some desirable action such as bad
101 * block remapping after we detect a bad part
102 * of the disk.
103 * G_RAID_BIO_FLAG_LOCKED I/O holds range lock that should re released.
104 *
105 * and the following meta item:
106 * G_RAID_BIO_FLAG_SPECIAL And of the I/O flags that need to make it
107 * through the range locking which would
108 * otherwise defer the I/O until after that
109 * range is unlocked.
110 */
111 #define G_RAID_BIO_FLAG_SYNC 0x01
112 #define G_RAID_BIO_FLAG_REMAP 0x02
113 #define G_RAID_BIO_FLAG_SPECIAL \
114 (G_RAID_BIO_FLAG_SYNC|G_RAID_BIO_FLAG_REMAP)
115 #define G_RAID_BIO_FLAG_LOCKED 0x80
116
117 struct g_raid_lock {
118 off_t l_offset;
119 off_t l_length;
120 void *l_callback_arg;
121 int l_pending;
122 LIST_ENTRY(g_raid_lock) l_next;
123 };
124
125 #define G_RAID_EVENT_WAIT 0x01
126 #define G_RAID_EVENT_VOLUME 0x02
127 #define G_RAID_EVENT_SUBDISK 0x04
128 #define G_RAID_EVENT_DISK 0x08
129 #define G_RAID_EVENT_DONE 0x10
130 struct g_raid_event {
131 void *e_tgt;
132 int e_event;
133 int e_flags;
134 int e_error;
135 TAILQ_ENTRY(g_raid_event) e_next;
136 };
137 #define G_RAID_DISK_S_NONE 0x00 /* State is unknown. */
138 #define G_RAID_DISK_S_OFFLINE 0x01 /* Missing disk placeholder. */
139 #define G_RAID_DISK_S_FAILED 0x02 /* Failed. */
140 #define G_RAID_DISK_S_STALE_FAILED 0x03 /* Old failed. */
141 #define G_RAID_DISK_S_SPARE 0x04 /* Hot-spare. */
142 #define G_RAID_DISK_S_STALE 0x05 /* Old disk, unused now. */
143 #define G_RAID_DISK_S_ACTIVE 0x06 /* Operational. */
144
145 #define G_RAID_DISK_E_DISCONNECTED 0x01
146
147 struct g_raid_disk {
148 struct g_raid_softc *d_softc; /* Back-pointer to softc. */
149 struct g_consumer *d_consumer; /* GEOM disk consumer. */
150 void *d_md_data; /* Disk's metadata storage. */
151 struct g_kerneldump d_kd; /* Kernel dumping method/args. */
152 uint64_t d_flags; /* Additional flags. */
153 u_int d_state; /* Disk state. */
154 u_int d_load; /* Disk average load. */
155 off_t d_last_offset; /* Last head offset. */
156 int d_read_errs; /* Count of the read errors */
157 TAILQ_HEAD(, g_raid_subdisk) d_subdisks; /* List of subdisks. */
158 TAILQ_ENTRY(g_raid_disk) d_next; /* Next disk in the node. */
159 };
160
161 #define G_RAID_SUBDISK_S_NONE 0x00 /* Absent. */
162 #define G_RAID_SUBDISK_S_FAILED 0x01 /* Failed. */
163 #define G_RAID_SUBDISK_S_NEW 0x02 /* Blank. */
164 #define G_RAID_SUBDISK_S_REBUILD 0x03 /* Blank + rebuild. */
165 #define G_RAID_SUBDISK_S_UNINITIALIZED 0x04 /* Disk of the new volume. */
166 #define G_RAID_SUBDISK_S_STALE 0x05 /* Dirty. */
167 #define G_RAID_SUBDISK_S_RESYNC 0x06 /* Dirty + check/repair. */
168 #define G_RAID_SUBDISK_S_ACTIVE 0x07 /* Usable. */
169
170 #define G_RAID_SUBDISK_E_NEW 0x01 /* A new subdisk has arrived */
171 #define G_RAID_SUBDISK_E_FAILED 0x02 /* A subdisk failed, but remains in volume */
172 #define G_RAID_SUBDISK_E_DISCONNECTED 0x03 /* A subdisk removed from volume. */
173 #define G_RAID_SUBDISK_E_FIRST_TR_PRIVATE 0x80 /* translation private events */
174
175 #define G_RAID_SUBDISK_POS(sd) \
176 ((sd)->sd_disk ? ((sd)->sd_disk->d_last_offset - (sd)->sd_offset) : 0)
177 #define G_RAID_SUBDISK_TRACK_SIZE (1 * 1024 * 1024)
178 #define G_RAID_SUBDISK_LOAD(sd) \
179 ((sd)->sd_disk ? ((sd)->sd_disk->d_load) : 0)
180 #define G_RAID_SUBDISK_LOAD_SCALE 256
181
182 struct g_raid_subdisk {
183 struct g_raid_softc *sd_softc; /* Back-pointer to softc. */
184 struct g_raid_disk *sd_disk; /* Where this subdisk lives. */
185 struct g_raid_volume *sd_volume; /* Volume, sd is a part of. */
186 off_t sd_offset; /* Offset on the disk. */
187 off_t sd_size; /* Size on the disk. */
188 u_int sd_pos; /* Position in volume. */
189 u_int sd_state; /* Subdisk state. */
190 off_t sd_rebuild_pos; /* Rebuild position. */
191 int sd_recovery; /* Count of recovery reqs. */
192 TAILQ_ENTRY(g_raid_subdisk) sd_next; /* Next subdisk on disk. */
193 };
194
195 #define G_RAID_MAX_SUBDISKS 16
196 #define G_RAID_MAX_VOLUMENAME 32
197
198 #define G_RAID_VOLUME_S_STARTING 0x00
199 #define G_RAID_VOLUME_S_BROKEN 0x01
200 #define G_RAID_VOLUME_S_DEGRADED 0x02
201 #define G_RAID_VOLUME_S_SUBOPTIMAL 0x03
202 #define G_RAID_VOLUME_S_OPTIMAL 0x04
203 #define G_RAID_VOLUME_S_UNSUPPORTED 0x05
204 #define G_RAID_VOLUME_S_STOPPED 0x06
205
206 #define G_RAID_VOLUME_S_ALIVE(s) \
207 ((s) == G_RAID_VOLUME_S_DEGRADED || \
208 (s) == G_RAID_VOLUME_S_SUBOPTIMAL || \
209 (s) == G_RAID_VOLUME_S_OPTIMAL)
210
211 #define G_RAID_VOLUME_E_DOWN 0x00
212 #define G_RAID_VOLUME_E_UP 0x01
213 #define G_RAID_VOLUME_E_START 0x10
214 #define G_RAID_VOLUME_E_STARTMD 0x11
215
216 #define G_RAID_VOLUME_RL_RAID0 0x00
217 #define G_RAID_VOLUME_RL_RAID1 0x01
218 #define G_RAID_VOLUME_RL_RAID3 0x03
219 #define G_RAID_VOLUME_RL_RAID4 0x04
220 #define G_RAID_VOLUME_RL_RAID5 0x05
221 #define G_RAID_VOLUME_RL_RAID6 0x06
222 #define G_RAID_VOLUME_RL_RAID1E 0x11
223 #define G_RAID_VOLUME_RL_SINGLE 0x0f
224 #define G_RAID_VOLUME_RL_CONCAT 0x1f
225 #define G_RAID_VOLUME_RL_RAID5E 0x15
226 #define G_RAID_VOLUME_RL_RAID5EE 0x25
227 #define G_RAID_VOLUME_RL_UNKNOWN 0xff
228
229 #define G_RAID_VOLUME_RLQ_NONE 0x00
230 #define G_RAID_VOLUME_RLQ_UNKNOWN 0xff
231
232 struct g_raid_volume;
233
234 struct g_raid_volume {
235 struct g_raid_softc *v_softc; /* Back-pointer to softc. */
236 struct g_provider *v_provider; /* GEOM provider. */
237 struct g_raid_subdisk v_subdisks[G_RAID_MAX_SUBDISKS];
238 /* Subdisks of this volume. */
239 void *v_md_data; /* Volume's metadata storage. */
240 struct g_raid_tr_object *v_tr; /* Transformation object. */
241 char v_name[G_RAID_MAX_VOLUMENAME];
242 /* Volume name. */
243 u_int v_state; /* Volume state. */
244 u_int v_raid_level; /* Array RAID level. */
245 u_int v_raid_level_qualifier; /* RAID level det. */
246 u_int v_disks_count; /* Number of disks in array. */
247 u_int v_strip_size; /* Array strip size. */
248 u_int v_sectorsize; /* Volume sector size. */
249 off_t v_mediasize; /* Volume media size. */
250 struct bio_queue_head v_inflight; /* In-flight write requests. */
251 struct bio_queue_head v_locked; /* Blocked I/O requests. */
252 LIST_HEAD(, g_raid_lock) v_locks; /* List of locked regions. */
253 int v_pending_lock; /* writes to locked region */
254 int v_dirty; /* Volume is DIRTY. */
255 struct timeval v_last_done; /* Time of the last I/O. */
256 time_t v_last_write; /* Time of the last write. */
257 u_int v_writes; /* Number of active writes. */
258 struct root_hold_token *v_rootmount; /* Root mount delay token. */
259 int v_starting; /* Volume is starting */
260 int v_stopping; /* Volume is stopping */
261 int v_provider_open; /* Number of opens. */
262 int v_global_id; /* Global volume ID (rX). */
263 TAILQ_ENTRY(g_raid_volume) v_next; /* List of volumes entry. */
264 LIST_ENTRY(g_raid_volume) v_global_next; /* Global list entry. */
265 };
266
267 #define G_RAID_NODE_E_WAKE 0x00
268 #define G_RAID_NODE_E_START 0x01
269
270 struct g_raid_softc {
271 struct g_raid_md_object *sc_md; /* Metadata object. */
272 struct g_geom *sc_geom; /* GEOM class instance. */
273 uint64_t sc_flags; /* Additional flags. */
274 TAILQ_HEAD(, g_raid_volume) sc_volumes; /* List of volumes. */
275 TAILQ_HEAD(, g_raid_disk) sc_disks; /* List of disks. */
276 struct sx sc_lock; /* Main node lock. */
277 struct proc *sc_worker; /* Worker process. */
278 struct mtx sc_queue_mtx; /* Worker queues lock. */
279 TAILQ_HEAD(, g_raid_event) sc_events; /* Worker events queue. */
280 struct bio_queue_head sc_queue; /* Worker I/O queue. */
281 int sc_stopping; /* Node is stopping */
282 };
283 #define sc_name sc_geom->name
284
285 /*
286 * KOBJ parent class of metadata processing modules.
287 */
288 struct g_raid_md_class {
289 KOBJ_CLASS_FIELDS;
290 int mdc_priority;
291 LIST_ENTRY(g_raid_md_class) mdc_list;
292 };
293
294 /*
295 * KOBJ instance of metadata processing module.
296 */
297 struct g_raid_md_object {
298 KOBJ_FIELDS;
299 struct g_raid_md_class *mdo_class;
300 struct g_raid_softc *mdo_softc; /* Back-pointer to softc. */
301 };
302
303 int g_raid_md_modevent(module_t, int, void *);
304
305 #define G_RAID_MD_DECLARE(name) \
306 static moduledata_t name##_mod = { \
307 #name, \
308 g_raid_md_modevent, \
309 &name##_class \
310 }; \
311 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); \
312 MODULE_DEPEND(name, geom_raid, 0, 0, 0)
313
314 /*
315 * KOBJ parent class of data transformation modules.
316 */
317 struct g_raid_tr_class {
318 KOBJ_CLASS_FIELDS;
319 int trc_priority;
320 LIST_ENTRY(g_raid_tr_class) trc_list;
321 };
322
323 /*
324 * KOBJ instance of data transformation module.
325 */
326 struct g_raid_tr_object {
327 KOBJ_FIELDS;
328 struct g_raid_tr_class *tro_class;
329 struct g_raid_volume *tro_volume; /* Back-pointer to volume. */
330 };
331
332 int g_raid_tr_modevent(module_t, int, void *);
333
334 #define G_RAID_TR_DECLARE(name) \
335 static moduledata_t name##_mod = { \
336 #name, \
337 g_raid_tr_modevent, \
338 &name##_class \
339 }; \
340 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); \
341 MODULE_DEPEND(name, geom_raid, 0, 0, 0)
342
343 const char * g_raid_volume_level2str(int level, int qual);
344 int g_raid_volume_str2level(const char *str, int *level, int *qual);
345 const char * g_raid_volume_state2str(int state);
346 const char * g_raid_subdisk_state2str(int state);
347 const char * g_raid_disk_state2str(int state);
348
349 struct g_raid_softc * g_raid_create_node(struct g_class *mp,
350 const char *name, struct g_raid_md_object *md);
351 int g_raid_create_node_format(const char *format, struct g_geom **gp);
352 struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc,
353 const char *name, int id);
354 struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc);
355 const char * g_raid_get_diskname(struct g_raid_disk *disk);
356
357 int g_raid_start_volume(struct g_raid_volume *vol);
358
359 int g_raid_destroy_node(struct g_raid_softc *sc, int worker);
360 int g_raid_destroy_volume(struct g_raid_volume *vol);
361 int g_raid_destroy_disk(struct g_raid_disk *disk);
362
363 void g_raid_iodone(struct bio *bp, int error);
364 void g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp);
365 int g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
366 void *virtual, vm_offset_t physical, off_t offset, size_t length);
367
368 struct g_consumer *g_raid_open_consumer(struct g_raid_softc *sc,
369 const char *name);
370 void g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp);
371
372 void g_raid_report_disk_state(struct g_raid_disk *disk);
373 void g_raid_change_disk_state(struct g_raid_disk *disk, int state);
374 void g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state);
375 void g_raid_change_volume_state(struct g_raid_volume *vol, int state);
376
377 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
378 struct g_raid_subdisk *sd, struct g_raid_disk *disk);
379 void g_raid_fail_disk(struct g_raid_softc *sc,
380 struct g_raid_subdisk *sd, struct g_raid_disk *disk);
381
382 void g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp);
383 int g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
384 void *virtual, vm_offset_t physical, off_t offset, size_t length);
385
386 u_int g_raid_ndisks(struct g_raid_softc *sc, int state);
387 u_int g_raid_nsubdisks(struct g_raid_volume *vol, int state);
388 u_int g_raid_nopens(struct g_raid_softc *sc);
389 struct g_raid_subdisk * g_raid_get_subdisk(struct g_raid_volume *vol,
390 int state);
391 #define G_RAID_DESTROY_SOFT 0
392 #define G_RAID_DESTROY_DELAYED 1
393 #define G_RAID_DESTROY_HARD 2
394 int g_raid_destroy(struct g_raid_softc *sc, int how);
395 int g_raid_event_send(void *arg, int event, int flags);
396 int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
397 struct bio *ignore, void *argp);
398 int g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len);
399
400 g_ctl_req_t g_raid_ctl;
401 #endif /* _KERNEL */
402
403 #endif /* !_G_RAID_H_ */
Cache object: 3fccc559a7a8946626e3f8b901153e83
|