1 /*-
2 * Copyright (c) 1997, 1998, 1999
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6 *
7 * Written by Greg Lehey
8 *
9 * This software is distributed under the so-called ``Berkeley
10 * License'':
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Nan Yang Computer
23 * Services Limited.
24 * 4. Neither the name of the Company nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided ``as is'', and any express or implied
29 * warranties, including, but not limited to, the implied warranties of
30 * merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall the company or contributors be liable for any
32 * direct, indirect, incidental, special, exemplary, or consequential
33 * damages (including, but not limited to, procurement of substitute
34 * goods or services; loss of use, data, or profits; or business
35 * interruption) however caused and on any theory of liability, whether
36 * in contract, strict liability, or tort (including negligence or
37 * otherwise) arising in any way out of the use of this software, even if
38 * advised of the possibility of such damage.
39 *
40 * $Id: vinumvar.h,v 1.27 2001/05/22 04:07:22 grog Exp grog $
41 * $FreeBSD: releng/5.0/sys/dev/vinum/vinumvar.h 84811 2001-10-11 17:52:20Z jhb $
42 */
43
44 #include <sys/time.h>
45 #include <dev/vinum/vinumstate.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48
49 /*
50 * Some configuration maxima. They're an enum because
51 * we can't define global constants. Sorry about that.
52 *
53 * These aren't as bad as they look: most of them are soft limits.
54 */
55
56 #define VINUMROOT
57 enum constants {
58 /*
59 * Current version of the data structures. This
60 * is used to ensure synchronization between
61 * kernel module and userland vinum(8).
62 */
63 VINUMVERSION = 1,
64 VINUM_HEADER = 512, /* size of header on disk */
65 MAXCONFIGLINE = 1024, /* maximum size of a single config line */
66 MINVINUMSLICE = 1048576, /* minimum size of a slice */
67
68 VINUM_CDEV_MAJOR = 91, /* major number for character device */
69
70 ROUND_ROBIN_READPOL = -1, /* round robin read policy */
71
72 /* type field in minor number */
73 VINUM_VOLUME_TYPE = 0,
74 VINUM_PLEX_TYPE = 1,
75 VINUM_SD_TYPE = 2,
76 VINUM_DRIVE_TYPE = 3,
77 VINUM_SUPERDEV_TYPE = 4, /* super device. */
78 VINUM_RAWPLEX_TYPE = 5, /* anonymous plex */
79 VINUM_RAWSD_TYPE = 6, /* anonymous subdisk */
80
81 /* Shifts for the individual fields in the device */
82 VINUM_TYPE_SHIFT = 28,
83 VINUM_VOL_SHIFT = 0,
84 VINUM_PLEX_SHIFT = 16,
85 VINUM_SD_SHIFT = 20,
86 VINUM_VOL_WIDTH = 8,
87 VINUM_PLEX_WIDTH = 3,
88 VINUM_SD_WIDTH = 8,
89
90 /*
91 * Shifts for the second half of raw plex and
92 * subdisk numbers
93 */
94 VINUM_RAWPLEX_SHIFT = 8, /* shift the second half this much */
95 VINUM_RAWPLEX_WIDTH = 12, /* width of second half */
96
97 MAJORDEV_SHIFT = 8,
98
99 MAXPLEX = 8, /* maximum number of plexes in a volume */
100 MAXSD = 256, /* maximum number of subdisks in a plex */
101 MAXDRIVENAME = 32, /* maximum length of a device name */
102 MAXSDNAME = 64, /* maximum length of a subdisk name */
103 MAXPLEXNAME = 64, /* maximum length of a plex name */
104 MAXVOLNAME = 64, /* maximum length of a volume name */
105 MAXNAME = 64, /* maximum length of any name */
106
107
108 /*
109 * Define a minor device number.
110 * This is not used directly; instead, it's
111 * called by the other macros.
112 */
113 #define VINUMMINOR(v,p,s,t) ( (v << VINUM_VOL_SHIFT) \
114 | (p << VINUM_PLEX_SHIFT) \
115 | (s << VINUM_SD_SHIFT) \
116 | (t << VINUM_TYPE_SHIFT) )
117
118 /* Create device minor numbers */
119 #define VINUMDEV(v,p,s,t) makedev (VINUM_CDEV_MAJOR, VINUMMINOR (v, p, s, t))
120
121 #define VINUM_PLEX(p) makedev (VINUM_CDEV_MAJOR, \
122 (VINUM_RAWPLEX_TYPE << VINUM_TYPE_SHIFT) \
123 | (p & 0xff) \
124 | ((p & ~0xff) << 8) )
125
126 #define VINUM_SD(s) makedev (VINUM_CDEV_MAJOR, \
127 (VINUM_RAWSD_TYPE << VINUM_TYPE_SHIFT) \
128 | (s & 0xff) \
129 | ((s & ~0xff) << 8) )
130
131 /* Create a bit mask for x bits */
132 #define MASK(x) ((1 << (x)) - 1)
133
134 /* Create a raw block device minor number */
135 #define VINUMRMINOR(d,t) ( ((d & MASK (VINUM_VOL_WIDTH)) << VINUM_VOL_SHIFT) \
136 | ((d & ~MASK (VINUM_VOL_WIDTH)) \
137 << (VINUM_PLEX_SHIFT + VINUM_VOL_WIDTH)) \
138 | (t << VINUM_TYPE_SHIFT) )
139
140
141 /* extract device type */
142 #define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 7)
143
144 /*
145 * This mess is used to catch people who compile
146 * a debug vinum(8) and non-debug kernel module,
147 * or the other way round.
148 */
149
150 #define VINUM_SUPERDEV VINUMMINOR (1, 0, 0, VINUM_SUPERDEV_TYPE) /* superdevice number */
151 #define VINUM_DAEMON_DEV VINUMMINOR (0, 0, 0, VINUM_SUPERDEV_TYPE) /* daemon superdevice number */
152
153 /*
154 * the number of object entries to cater for initially, and also the
155 * value by which they are incremented. It doesn't take long
156 * to extend them, so theoretically we could start with 1 of each, but
157 * it's untidy to allocate such small areas. These values are
158 * probably too small.
159 */
160
161 INITIAL_DRIVES = 4,
162 INITIAL_VOLUMES = 4,
163 INITIAL_PLEXES = 8,
164 INITIAL_SUBDISKS = 16,
165 INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
166 INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
167 INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
168 PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
169 PLEX_LOCKS = 256, /* number of locks to allocate to a plex */
170 MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */
171 DEFAULT_REVIVE_BLOCKSIZE = 65536, /* default revive block size */
172 VINUMHOSTNAMELEN = 32, /* host name field in label */
173 };
174
175 /* device numbers */
176
177 /*
178 * 31 30 28 27 20 19 18 16 15 8 7 0
179 * |-----------------------------------------------------------------------------------------------|
180 * |X | Type | Subdisk number | X| Plex | Major number | volume number |
181 * |-----------------------------------------------------------------------------------------------|
182 *
183 * 0x2 03 1 19 06
184 *
185 * The fields in the minor number are interpreted as follows:
186 *
187 * Volume: Only type and volume number are relevant
188 * Plex in volume: type, plex number in volume and volume number are relevant
189 * raw plex: type, plex number is made of bits 27-16 and 7-0
190 * raw subdisk: type, subdisk number is made of bits 27-16 and 7-0
191 */
192
193 /* This doesn't get used. Consider removing it. */
194 struct devcode {
195 /*
196 * CARE. These fields assume a big-endian word. On a
197 * little-endian system, they're the wrong way around
198 */
199 unsigned volume:8; /* up to 256 volumes */
200 unsigned major:8; /* this is where the major number fits */
201 unsigned plex:3; /* up to 8 plexes per volume */
202 unsigned unused:1; /* up for grabs */
203 unsigned sd:8; /* up to 256 subdisks per plex */
204 unsigned type:3; /* type of object */
205 /*
206 * type field
207 VINUM_VOLUME = 0,
208 VINUM_PLEX = 1,
209 VINUM_SUBDISK = 2,
210 VINUM_DRIVE = 3,
211 VINUM_SUPERDEV = 4,
212 VINUM_RAWPLEX = 5,
213 VINUM_RAWSD = 6 */
214 unsigned signbit:1; /* to make 32 bits */
215 };
216
217 #define VINUM_DIR "/dev/vinum"
218
219 /*
220 * These definitions help catch
221 * userland/kernel mismatches.
222 */
223 #define VINUM_SUPERDEV_NAME VINUM_DIR"/control" /* normal super device */
224 #define VINUM_DAEMON_DEV_NAME VINUM_DIR"/controld" /* super device for daemon only */
225
226 /*
227 * Slice header
228 *
229 * Vinum drives start with this structure:
230 *
231 *\ Sector
232 * |--------------------------------------|
233 * | PDP-11 memorial boot block | 0
234 * |--------------------------------------|
235 * | Disk label, maybe | 1
236 * |--------------------------------------|
237 * | Slice definition (vinum_hdr) | 8
238 * |--------------------------------------|
239 * | |
240 * | Configuration info, first copy | 9
241 * | |
242 * |--------------------------------------|
243 * | |
244 * | Configuration info, second copy | 9 + size of config
245 * | |
246 * |--------------------------------------|
247 */
248
249 /* Sizes and offsets of our information */
250 enum {
251 VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
252 VINUMHEADERLEN = 512, /* size of vinum label */
253 VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
254 MAXCONFIG = 65536, /* and size of config copy */
255 DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
256 };
257
258 /*
259 * hostname is 256 bytes long, but we don't need to shlep
260 * multiple copies in vinum. We use the host name just
261 * to identify this system, and 32 bytes should be ample
262 * for that purpose
263 */
264
265 struct vinum_label {
266 char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
267 char name[MAXDRIVENAME]; /* our name of the drive */
268 struct timeval date_of_birth; /* the time it was created */
269 struct timeval last_update; /* and the time of last update */
270 /*
271 * total size in bytes of the drive. This value
272 * includes the headers.
273 */
274 off_t drive_size;
275 };
276
277 struct vinum_hdr {
278 uint64_t magic; /* we're long on magic numbers */
279 #define VINUM_MAGIC 22322600044678729LL /* should be this */
280 #define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
281 /*
282 * Size in bytes of each copy of the
283 * configuration info. This must be a multiple
284 * of the sector size.
285 */
286 int config_length;
287 struct vinum_label label; /* unique label */
288 };
289
290 /* Information returned from read_drive_label */
291 enum drive_label_info {
292 DL_CANT_OPEN, /* invalid partition */
293 DL_NOT_OURS, /* valid partition, but no vinum label */
294 DL_DELETED_LABEL, /* valid partition, deleted label found */
295 DL_WRONG_DRIVE, /* drive name doesn't match */
296 DL_OURS /* valid partition and label found */
297 };
298
299 /* kinds of plex organization */
300 enum plexorg {
301 plex_disorg, /* disorganized */
302 plex_concat, /* concatenated plex */
303 plex_striped, /* striped plex */
304 plex_raid4, /* RAID4 plex */
305 plex_raid5 /* RAID5 plex */
306 };
307
308 /* Recognize plex organizations */
309 #define isstriped(p) (p->organization >= plex_striped) /* RAID 1, 4 or 5 */
310 #define isparity(p) (p->organization >= plex_raid4) /* RAID 4 or 5 */
311
312 /* Address range definitions, for locking volumes */
313 struct rangelock {
314 daddr_t stripe; /* address + 1 of the range being locked */
315 struct buf *bp; /* user's buffer pointer */
316 };
317
318 struct drive_freelist { /* sorted list of free space on drive */
319 u_int64_t offset; /* offset of entry */
320 u_int64_t sectors; /* and length in sectors */
321 };
322
323 /*
324 * Include the structure definitions shared
325 * between userland and kernel.
326 */
327
328 #ifdef _KERNEL
329 #include <dev/vinum/vinumobj.h>
330 #undef _KERNEL
331 #include <dev/vinum/vinumobj.h>
332 #define _KERNEL
333 #else
334 #include <dev/vinum/vinumobj.h>
335 #endif
336
337 /*
338 * Table expansion. Expand table, which contains oldcount
339 * entries of type element, by increment entries, and change
340 * oldcount accordingly
341 */
342 #define EXPAND(table, element, oldcount, increment) \
343 { \
344 expand_table ((void **) &table, \
345 oldcount * sizeof (element), \
346 (oldcount + increment) * sizeof (element) ); \
347 oldcount += increment; \
348 }
349
350 /* Information on vinum's memory usage */
351 struct meminfo {
352 int mallocs; /* number of malloced blocks */
353 int total_malloced; /* total amount malloced */
354 int highwater; /* maximum number of mallocs */
355 struct mc *malloced; /* pointer to kernel table */
356 };
357
358 #define MCFILENAMELEN 16
359 struct mc {
360 struct timeval time;
361 int seq;
362 int size;
363 short line;
364 caddr_t address;
365 char file[MCFILENAMELEN];
366 };
367
368 /*
369 * These enums are used by the state transition
370 * routines. They're in bit map format:
371 *
372 * Bit 0: Other plexes in the volume are down
373 * Bit 1: Other plexes in the volume are up
374 * Bit 2: The current plex is up
375 * Maybe they should be local to
376 * state.c
377 */
378 enum volplexstate {
379 volplex_onlyusdown = 0, /* 0: we're the only plex, and we're down */
380 volplex_alldown, /* 1: another plex is down, and so are we */
381 volplex_otherup, /* 2: another plex is up */
382 volplex_otherupdown, /* 3: other plexes are up and down */
383 volplex_onlyus, /* 4: we're up and alone */
384 volplex_onlyusup, /* 5: only we are up, others are down */
385 volplex_allup, /* 6: all plexes are up */
386 volplex_someup /* 7: some plexes are up, including us */
387 };
388
389 /* state map for plex */
390 enum sdstates {
391 sd_emptystate = 1,
392 sd_downstate = 2, /* SD is down */
393 sd_crashedstate = 4, /* SD is crashed */
394 sd_obsoletestate = 8, /* SD is obsolete */
395 sd_stalestate = 16, /* SD is stale */
396 sd_rebornstate = 32, /* SD is reborn */
397 sd_upstate = 64, /* SD is up */
398 sd_initstate = 128, /* SD is initializing */
399 sd_initializedstate = 256, /* SD is initialized */
400 sd_otherstate = 512, /* SD is in some other state */
401 };
402
403 /*
404 * This is really just a parameter to pass to
405 * set_<foo>_state, but since it needs to be known
406 * in the external definitions, we need to define
407 * it here
408 */
409 enum setstateflags {
410 setstate_none = 0, /* no flags */
411 setstate_force = 1, /* force the state change */
412 setstate_configuring = 2, /* we're currently configuring, don't save */
413 };
414
415 /* Operations for parityops to perform. */
416 enum parityop {
417 checkparity,
418 rebuildparity,
419 rebuildandcheckparity, /* rebuildparity with the -v option */
420 };
421
422 #ifdef VINUMDEBUG
423 /* Debugging stuff */
424 enum debugflags {
425 DEBUG_ADDRESSES = 1, /* show buffer information during requests */
426 DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */
427 DEBUG_RESID = 4, /* go into debugger in complete_rqe */
428 DEBUG_LASTREQS = 8, /* keep a circular buffer of last requests */
429 DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */
430 DEBUG_EOFINFO = 32, /* print info about EOF detection */
431 DEBUG_MEMFREE = 64, /* keep info about Frees */
432 DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times the size */
433 DEBUG_REMOTEGDB = 256, /* go into remote gdb */
434 DEBUG_WARNINGS = 512, /* log various relatively harmless warnings */
435 DEBUG_LOCKREQS = 1024, /* log locking requests */
436 };
437
438 #ifdef _KERNEL
439 #ifdef __i386__
440 #define longjmp LongJmp /* test our longjmps */
441 #endif
442 #endif
443 #endif
444 /* Local Variables: */
445 /* fill-column: 50 */
446 /* End: */
Cache object: 95e3cd365d47fa480a0299eb0da2031a
|