1 /*-
2 * Copyright (c) 1997, 1998, 1999
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6 *
7 * Written by Greg Lehey
8 *
9 * This software is distributed under the so-called ``Berkeley
10 * License'':
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Nan Yang Computer
23 * Services Limited.
24 * 4. Neither the name of the Company nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided ``as is'', and any express or implied
29 * warranties, including, but not limited to, the implied warranties of
30 * merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall the company or contributors be liable for any
32 * direct, indirect, incidental, special, exemplary, or consequential
33 * damages (including, but not limited to, procurement of substitute
34 * goods or services; loss of use, data, or profits; or business
35 * interruption) however caused and on any theory of liability, whether
36 * in contract, strict liability, or tort (including negligence or
37 * otherwise) arising in any way out of the use of this software, even if
38 * advised of the possibility of such damage.
39 *
40 * $Id: vinumvar.h,v 1.33 2003/05/23 01:09:23 grog Exp grog $
41 * $FreeBSD$
42 */
43
44 #include <sys/time.h>
45 #include <dev/vinum/vinumstate.h>
46 #include <sys/mutex.h>
47
48 /* Directory for device nodes. */
49 #define VINUM_DIR "/dev/vinum"
50
51 /*
52 * Some configuration maxima. They're an enum because
53 * we can't define global constants. Sorry about that.
54 *
55 * These aren't as bad as they look: most of them are soft limits.
56 */
57
58 #define VINUMROOT
59 enum constants {
60 /*
61 * Current version of the data structures. This
62 * is used to ensure synchronization between
63 * kernel module and userland vinum(8).
64 */
65 VINUMVERSION = 1,
66 VINUM_HEADER = 512, /* size of header on disk */
67 MAXCONFIGLINE = 1024, /* maximum size of a single config line */
68 MINVINUMSLICE = 1048576, /* minimum size of a slice */
69
70 ROUND_ROBIN_READPOL = -1, /* round robin read policy */
71
72 /*
73 * Type field in high-order two bits of minor
74 * number. Subdisks are in fact both type 2 and
75 * type 3, giving twice the number of subdisks.
76 * This causes some ugliness in the code.
77 */
78 VINUM_VOLUME_TYPE = 0,
79 VINUM_PLEX_TYPE = 1,
80 VINUM_SD_TYPE = 2,
81 VINUM_SD2_TYPE = 3,
82
83
84 /*
85 * Define a minor device number.
86 * This is not used directly; instead, it's
87 * called by the other macros.
88 */
89 #define VINUMMINOR(o,t) ((o & 0xff) | ((o & 0x3fff00) << 8) | (t << VINUM_TYPE_SHIFT))
90
91 VINUM_TYPE_SHIFT = 30,
92 VINUM_MAXVOL = 0x3ffffd, /* highest numbered volume */
93
94 /*
95 * The super device and the daemon device are
96 * magic: they're the two highest-numbered
97 * volumes.
98 */
99 VINUM_SUPERDEV_VOL = 0x3ffffe,
100 VINUM_DAEMON_VOL = 0x3fffff,
101 VINUM_MAXPLEX = 0x3fffff,
102 VINUM_MAXSD = 0x7fffff,
103
104 #define VINUM_SUPERDEV_MINOR VINUMMINOR (VINUM_SUPERDEV_VOL, VINUM_VOLUME_TYPE)
105 #define VINUM_DAEMON_MINOR VINUMMINOR (VINUM_DAEMON_VOL, VINUM_VOLUME_TYPE)
106
107 /*
108 * Mask for the number part of each object.
109 * Plexes and volumes are the same, subdisks use
110 * the low-order bit of the type field and thus
111 * have twice the number.
112 */
113
114 MAJORDEV_SHIFT = 8,
115
116 MAXPLEX = 8, /* maximum number of plexes in a volume */
117 MAXSD = 256, /* maximum number of subdisks in a plex */
118 MAXDRIVENAME = 32, /* maximum length of a device name */
119 MAXSDNAME = 64, /* maximum length of a subdisk name */
120 MAXPLEXNAME = 64, /* maximum length of a plex name */
121 MAXVOLNAME = 64, /* maximum length of a volume name */
122 MAXNAME = 64, /* maximum length of any name */
123
124
125 #define OBJTYPE(x) ((minor(x) >> VINUM_TYPE_SHIFT) & 3)
126
127 /* extract device type */
128 #define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 3)
129
130 #define VINUM_SUPERDEV_NAME VINUM_DIR"/control" /* normal super device */
131 #define VINUM_DAEMON_DEV_NAME VINUM_DIR"/controld" /* super device for daemon only */
132
133 /*
134 * the number of object entries to cater for initially, and also the
135 * value by which they are incremented. It doesn't take long
136 * to extend them, so theoretically we could start with 1 of each, but
137 * it's untidy to allocate such small areas. These values are
138 * probably too small.
139 */
140
141 INITIAL_DRIVES = 4,
142 INITIAL_VOLUMES = 4,
143 INITIAL_PLEXES = 8,
144 INITIAL_SUBDISKS = 16,
145 INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
146 INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
147 INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
148 PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
149 PLEX_LOCKS = 256, /* number of locks to allocate to a plex */
150 PLEXMUTEXES = 32,
151 MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */
152 DEFAULT_REVIVE_BLOCKSIZE = 65536, /* default revive block size */
153 VINUMHOSTNAMELEN = 32, /* host name field in label */
154 };
155
156 /*
157 * Slice header
158 *
159 * Vinum drives start with this structure:
160 *
161 *\ Sector
162 * |--------------------------------------|
163 * | PDP-11 memorial boot block | 0
164 * |--------------------------------------|
165 * | Disk label, maybe | 1
166 * |--------------------------------------|
167 * | Slice definition (vinum_hdr) | 8
168 * |--------------------------------------|
169 * | |
170 * | Configuration info, first copy | 9
171 * | |
172 * |--------------------------------------|
173 * | |
174 * | Configuration info, second copy | 9 + size of config
175 * | |
176 * |--------------------------------------|
177 */
178
179 /* Sizes and offsets of our information */
180 enum {
181 VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
182 VINUMHEADERLEN = 512, /* size of vinum label */
183 VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
184 MAXCONFIG = 65536, /* and size of config copy */
185 DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
186 };
187
188 /*
189 * hostname is 256 bytes long, but we don't need to shlep
190 * multiple copies in vinum. We use the host name just
191 * to identify this system, and 32 bytes should be ample
192 * for that purpose
193 */
194
195 struct vinum_label {
196 char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
197 char name[MAXDRIVENAME]; /* our name of the drive */
198 struct timeval date_of_birth; /* the time it was created */
199 struct timeval last_update; /* and the time of last update */
200 /*
201 * total size in bytes of the drive. This value
202 * includes the headers.
203 */
204 off_t drive_size;
205 };
206
207 struct vinum_hdr {
208 uint64_t magic; /* we're long on magic numbers */
209 #define VINUM_MAGIC 22322600044678729LL /* should be this */
210 #define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
211 /*
212 * Size in bytes of each copy of the
213 * configuration info. This must be a multiple
214 * of the sector size.
215 */
216 int config_length;
217 struct vinum_label label; /* unique label */
218 };
219
220 /* Information returned from read_drive_label */
221 enum drive_label_info {
222 DL_CANT_OPEN, /* invalid partition */
223 DL_NOT_OURS, /* valid partition, but no vinum label */
224 DL_DELETED_LABEL, /* valid partition, deleted label found */
225 DL_WRONG_DRIVE, /* drive name doesn't match */
226 DL_OURS /* valid partition and label found */
227 };
228
229 /* kinds of plex organization */
230 enum plexorg {
231 plex_disorg, /* disorganized */
232 plex_concat, /* concatenated plex */
233 plex_striped, /* striped plex */
234 plex_raid4, /* RAID4 plex */
235 plex_raid5 /* RAID5 plex */
236 };
237
238 /* Recognize plex organizations */
239 #define isstriped(p) (p->organization >= plex_striped) /* RAID 1, 4 or 5 */
240 #define isparity(p) (p->organization >= plex_raid4) /* RAID 4 or 5 */
241
242 /* Address range definitions, for locking volumes */
243 struct rangelock {
244 daddr_t stripe; /* address + 1 of the range being locked */
245 struct buf *bp; /* user's buffer pointer */
246 };
247
248 struct drive_freelist { /* sorted list of free space on drive */
249 u_int64_t offset; /* offset of entry */
250 u_int64_t sectors; /* and length in sectors */
251 };
252
253 /*
254 * Include the structure definitions shared
255 * between userland and kernel.
256 */
257
258 #ifdef _KERNEL
259 #include <dev/vinum/vinumobj.h>
260 #undef _KERNEL
261 #include <dev/vinum/vinumobj.h>
262 #define _KERNEL
263 #else
264 #include <dev/vinum/vinumobj.h>
265 #endif
266
267 /*
268 * Table expansion. Expand table, which contains oldcount
269 * entries of type element, by increment entries, and change
270 * oldcount accordingly
271 */
272 #ifdef VINUMDEBUG
273 #define EXPAND(table, element, oldcount, increment) \
274 { \
275 expand_table ((void **) &table, \
276 oldcount * sizeof (element), \
277 (oldcount + increment) * sizeof (element), \
278 __FILE__, \
279 __LINE__ ); \
280 oldcount += increment; \
281 }
282 #else
283 #define EXPAND(table, element, oldcount, increment) \
284 { \
285 expand_table ((void **) &table, \
286 oldcount * sizeof (element), \
287 (oldcount + increment) * sizeof (element)); \
288 oldcount += increment; \
289 }
290 #endif
291
292 /* Information on vinum's memory usage */
293 struct meminfo {
294 int mallocs; /* number of malloced blocks */
295 int total_malloced; /* total amount malloced */
296 int highwater; /* maximum number of mallocs */
297 struct mc *malloced; /* pointer to kernel table */
298 };
299
300 #define MCFILENAMELEN 16
301 struct mc {
302 struct timeval time;
303 int seq;
304 int size;
305 short line;
306 caddr_t address;
307 char file[MCFILENAMELEN];
308 };
309
310 /*
311 * These enums are used by the state transition
312 * routines. They're in bit map format:
313 *
314 * Bit 0: Other plexes in the volume are down
315 * Bit 1: Other plexes in the volume are up
316 * Bit 2: The current plex is up
317 * Maybe they should be local to
318 * state.c
319 */
320 enum volplexstate {
321 volplex_onlyusdown = 0, /* 0: we're the only plex, and we're down */
322 volplex_alldown, /* 1: another plex is down, and so are we */
323 volplex_otherup, /* 2: another plex is up */
324 volplex_otherupdown, /* 3: other plexes are up and down */
325 volplex_onlyus, /* 4: we're up and alone */
326 volplex_onlyusup, /* 5: only we are up, others are down */
327 volplex_allup, /* 6: all plexes are up */
328 volplex_someup /* 7: some plexes are up, including us */
329 };
330
331 /* state map for plex */
332 enum sdstates {
333 sd_emptystate = 1,
334 sd_downstate = 2, /* SD is down */
335 sd_crashedstate = 4, /* SD is crashed */
336 sd_obsoletestate = 8, /* SD is obsolete */
337 sd_stalestate = 16, /* SD is stale */
338 sd_rebornstate = 32, /* SD is reborn */
339 sd_upstate = 64, /* SD is up */
340 sd_initstate = 128, /* SD is initializing */
341 sd_initializedstate = 256, /* SD is initialized */
342 sd_otherstate = 512, /* SD is in some other state */
343 };
344
345 /*
346 * This is really just a parameter to pass to
347 * set_<foo>_state, but since it needs to be known
348 * in the external definitions, we need to define
349 * it here
350 */
351 enum setstateflags {
352 setstate_none = 0, /* no flags */
353 setstate_force = 1, /* force the state change */
354 setstate_configuring = 2, /* we're currently configuring, don't save */
355 };
356
357 /* Operations for parityops to perform. */
358 enum parityop {
359 checkparity,
360 rebuildparity,
361 rebuildandcheckparity, /* rebuildparity with the -v option */
362 };
363
364 /*
365 * When doing round-robin reads from a multi-plex volume, switch to the
366 * next plex if the difference of the last read sector and the next sector
367 * to be read is this many sectors.
368 */
369 #define ROUNDROBIN_SWITCH 128 /* 64k */
370
371 #ifdef VINUMDEBUG
372 /* Debugging stuff */
373 enum debugflags {
374 DEBUG_ADDRESSES = 1, /* show buffer information during requests */
375 DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */
376 DEBUG_RESID = 4, /* go into debugger in complete_rqe */
377 DEBUG_LASTREQS = 8, /* keep a circular buffer of last requests */
378 DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */
379 DEBUG_EOFINFO = 32, /* print info about EOF detection */
380 DEBUG_MEMFREE = 64, /* keep info about Frees */
381 DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times the size */
382 DEBUG_REMOTEGDB = 256, /* go into remote gdb */
383 DEBUG_WARNINGS = 512, /* log various relatively harmless warnings */
384 DEBUG_LOCKREQS = 1024, /* log locking requests */
385 };
386
387 #ifdef _KERNEL
388 #ifdef __i386__
389 #define longjmp LongJmp /* test our longjmps */
390 #endif
391 #endif
392 #endif
393 /* Local Variables: */
394 /* fill-column: 50 */
395 /* End: */
Cache object: 98d8db142440e8bfe700b6d2ed55910b
|