1 /*-
2 * Copyright (c) 2004, 2007 Lukas Ertl
3 * Copyright (c) 2007, 2009 Ulf Lilleengen
4 * Copyright (c) 1997, 1998, 1999
5 * Nan Yang Computer Services Limited. All rights reserved.
6 *
7 * Parts written by Greg Lehey
8 *
9 * This software is distributed under the so-called ``Berkeley
10 * License'':
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Nan Yang Computer
23 * Services Limited.
24 * 4. Neither the name of the Company nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided ``as is'', and any express or implied
29 * warranties, including, but not limited to, the implied warranties of
30 * merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall the company or contributors be liable for any
32 * direct, indirect, incidental, special, exemplary, or consequential
33 * damages (including, but not limited to, procurement of substitute
34 * goods or services; loss of use, data, or profits; or business
35 * interruption) however caused and on any theory of liability, whether
36 * in contract, strict liability, or tort (including negligence or
37 * otherwise) arising in any way out of the use of this software, even if
38 * advised of the possibility of such damage.
39 *
40 */
41
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD: releng/8.3/sys/geom/vinum/geom_vinum_subr.c 191855 2009-05-06 19:18:19Z lulf $");
44
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/systm.h>
48
49 #include <geom/geom.h>
50 #include <geom/vinum/geom_vinum_var.h>
51 #include <geom/vinum/geom_vinum.h>
52 #include <geom/vinum/geom_vinum_share.h>
53
54 int gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
55 static off_t gv_plex_smallest_sd(struct gv_plex *);
56
57 void
58 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
59 {
60 char *aptr, *bptr, *cptr;
61 struct gv_volume *v, *v2;
62 struct gv_plex *p, *p2;
63 struct gv_sd *s, *s2;
64 int error, is_newer, tokens;
65 char *token[GV_MAXARGS];
66
67 is_newer = gv_drive_is_newer(sc, d);
68
69 /* Until the end of the string *buf. */
70 for (aptr = buf; *aptr != '\0'; aptr = bptr) {
71 bptr = aptr;
72 cptr = aptr;
73
74 /* Seperate input lines. */
75 while (*bptr != '\n')
76 bptr++;
77 *bptr = '\0';
78 bptr++;
79
80 tokens = gv_tokenize(cptr, token, GV_MAXARGS);
81
82 if (tokens <= 0)
83 continue;
84
85 if (!strcmp(token[0], "volume")) {
86 v = gv_new_volume(tokens, token);
87 if (v == NULL) {
88 G_VINUM_DEBUG(0, "config parse failed volume");
89 break;
90 }
91
92 v2 = gv_find_vol(sc, v->name);
93 if (v2 != NULL) {
94 if (is_newer) {
95 v2->state = v->state;
96 G_VINUM_DEBUG(2, "newer volume found!");
97 }
98 g_free(v);
99 continue;
100 }
101
102 gv_create_volume(sc, v);
103
104 } else if (!strcmp(token[0], "plex")) {
105 p = gv_new_plex(tokens, token);
106 if (p == NULL) {
107 G_VINUM_DEBUG(0, "config parse failed plex");
108 break;
109 }
110
111 p2 = gv_find_plex(sc, p->name);
112 if (p2 != NULL) {
113 /* XXX */
114 if (is_newer) {
115 p2->state = p->state;
116 G_VINUM_DEBUG(2, "newer plex found!");
117 }
118 g_free(p);
119 continue;
120 }
121
122 error = gv_create_plex(sc, p);
123 if (error)
124 continue;
125 /*
126 * These flags were set in gv_create_plex() and are not
127 * needed here (on-disk config parsing).
128 */
129 p->flags &= ~GV_PLEX_ADDED;
130
131 } else if (!strcmp(token[0], "sd")) {
132 s = gv_new_sd(tokens, token);
133
134 if (s == NULL) {
135 G_VINUM_DEBUG(0, "config parse failed subdisk");
136 break;
137 }
138
139 s2 = gv_find_sd(sc, s->name);
140 if (s2 != NULL) {
141 /* XXX */
142 if (is_newer) {
143 s2->state = s->state;
144 G_VINUM_DEBUG(2, "newer subdisk found!");
145 }
146 g_free(s);
147 continue;
148 }
149
150 /*
151 * Signal that this subdisk was tasted, and could
152 * possibly reference a drive that isn't in our config
153 * yet.
154 */
155 s->flags |= GV_SD_TASTED;
156
157 if (s->state == GV_SD_UP)
158 s->flags |= GV_SD_CANGOUP;
159
160 error = gv_create_sd(sc, s);
161 if (error)
162 continue;
163
164 /*
165 * This flag was set in gv_create_sd() and is not
166 * needed here (on-disk config parsing).
167 */
168 s->flags &= ~GV_SD_NEWBORN;
169 s->flags &= ~GV_SD_GROW;
170 }
171 }
172 }
173
174 /*
175 * Format the vinum configuration properly. If ondisk is non-zero then the
176 * configuration is intended to be written to disk later.
177 */
178 void
179 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
180 {
181 struct gv_drive *d;
182 struct gv_sd *s;
183 struct gv_plex *p;
184 struct gv_volume *v;
185
186 /*
187 * We don't need the drive configuration if we're not writing the
188 * config to disk.
189 */
190 if (!ondisk) {
191 LIST_FOREACH(d, &sc->drives, drive) {
192 sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
193 d->name, d->device);
194 }
195 }
196
197 LIST_FOREACH(v, &sc->volumes, volume) {
198 if (!ondisk)
199 sbuf_printf(sb, "%s", prefix);
200 sbuf_printf(sb, "volume %s", v->name);
201 if (ondisk)
202 sbuf_printf(sb, " state %s", gv_volstate(v->state));
203 sbuf_printf(sb, "\n");
204 }
205
206 LIST_FOREACH(p, &sc->plexes, plex) {
207 if (!ondisk)
208 sbuf_printf(sb, "%s", prefix);
209 sbuf_printf(sb, "plex name %s org %s ", p->name,
210 gv_plexorg(p->org));
211 if (gv_is_striped(p))
212 sbuf_printf(sb, "%ds ", p->stripesize / 512);
213 if (p->vol_sc != NULL)
214 sbuf_printf(sb, "vol %s", p->volume);
215 if (ondisk)
216 sbuf_printf(sb, " state %s", gv_plexstate(p->state));
217 sbuf_printf(sb, "\n");
218 }
219
220 LIST_FOREACH(s, &sc->subdisks, sd) {
221 if (!ondisk)
222 sbuf_printf(sb, "%s", prefix);
223 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
224 "%jds", s->name, s->drive, s->size / 512,
225 s->drive_offset / 512);
226 if (s->plex_sc != NULL) {
227 sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
228 s->plex_offset / 512);
229 }
230 if (ondisk)
231 sbuf_printf(sb, " state %s", gv_sdstate(s->state));
232 sbuf_printf(sb, "\n");
233 }
234 }
235
236 static off_t
237 gv_plex_smallest_sd(struct gv_plex *p)
238 {
239 struct gv_sd *s;
240 off_t smallest;
241
242 KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
243
244 s = LIST_FIRST(&p->subdisks);
245 if (s == NULL)
246 return (-1);
247 smallest = s->size;
248 LIST_FOREACH(s, &p->subdisks, in_plex) {
249 if (s->size < smallest)
250 smallest = s->size;
251 }
252 return (smallest);
253 }
254
255 /* Walk over plexes in a volume and count how many are down. */
256 int
257 gv_plexdown(struct gv_volume *v)
258 {
259 int plexdown;
260 struct gv_plex *p;
261
262 KASSERT(v != NULL, ("gv_plexdown: NULL v"));
263
264 plexdown = 0;
265
266 LIST_FOREACH(p, &v->plexes, plex) {
267 if (p->state == GV_PLEX_DOWN)
268 plexdown++;
269 }
270 return (plexdown);
271 }
272
273 int
274 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
275 {
276 struct gv_sd *s2;
277 off_t psizeorig, remainder, smallest;
278
279 /* If this subdisk was already given to this plex, do nothing. */
280 if (s->plex_sc == p)
281 return (0);
282
283 /* Check correct size of this subdisk. */
284 s2 = LIST_FIRST(&p->subdisks);
285 /* Adjust the subdisk-size if necessary. */
286 if (s2 != NULL && gv_is_striped(p)) {
287 /* First adjust to the stripesize. */
288 remainder = s->size % p->stripesize;
289
290 if (remainder) {
291 G_VINUM_DEBUG(1, "size of sd %s is not a "
292 "multiple of plex stripesize, taking off "
293 "%jd bytes", s->name,
294 (intmax_t)remainder);
295 gv_adjust_freespace(s, remainder);
296 }
297
298 smallest = gv_plex_smallest_sd(p);
299 /* Then take off extra if other subdisks are smaller. */
300 remainder = s->size - smallest;
301
302 /*
303 * Don't allow a remainder below zero for running plexes, it's too
304 * painful, and if someone were to accidentally do this, the
305 * resulting array might be smaller than the original... not god
306 */
307 if (remainder < 0) {
308 if (!(p->flags & GV_PLEX_NEWBORN)) {
309 G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
310 s->name, p->name);
311 return (GV_ERR_BADSIZE);
312 }
313 /* Adjust other subdisks. */
314 LIST_FOREACH(s2, &p->subdisks, in_plex) {
315 G_VINUM_DEBUG(1, "size of sd %s is to big, "
316 "taking off %jd bytes", s->name,
317 (intmax_t)remainder);
318 gv_adjust_freespace(s2, (remainder * -1));
319 }
320 } else if (remainder > 0) {
321 G_VINUM_DEBUG(1, "size of sd %s is to big, "
322 "taking off %jd bytes", s->name,
323 (intmax_t)remainder);
324 gv_adjust_freespace(s, remainder);
325 }
326 }
327
328 /* Find the correct plex offset for this subdisk, if needed. */
329 if (s->plex_offset == -1) {
330 /*
331 * First set it to 0 to catch the case where we had a detached
332 * subdisk that didn't get any good offset.
333 */
334 s->plex_offset = 0;
335 if (p->sdcount) {
336 LIST_FOREACH(s2, &p->subdisks, in_plex) {
337 if (gv_is_striped(p))
338 s->plex_offset = p->sdcount *
339 p->stripesize;
340 else
341 s->plex_offset = s2->plex_offset +
342 s2->size;
343 }
344 }
345 }
346
347 /* There are no subdisks for this plex yet, just insert it. */
348 if (LIST_EMPTY(&p->subdisks)) {
349 LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
350
351 /* Insert in correct order, depending on plex_offset. */
352 } else {
353 LIST_FOREACH(s2, &p->subdisks, in_plex) {
354 if (s->plex_offset < s2->plex_offset) {
355 LIST_INSERT_BEFORE(s2, s, in_plex);
356 break;
357 } else if (LIST_NEXT(s2, in_plex) == NULL) {
358 LIST_INSERT_AFTER(s2, s, in_plex);
359 break;
360 }
361 }
362 }
363
364 s->plex_sc = p;
365 /* Adjust the size of our plex. We check if the plex misses a subdisk,
366 * so we don't make the plex smaller than it actually should be.
367 */
368 psizeorig = p->size;
369 p->size = gv_plex_size(p);
370 /* Make sure the size is not changed. */
371 if (p->sddetached > 0) {
372 if (p->size < psizeorig) {
373 p->size = psizeorig;
374 /* We make sure wee need another subdisk. */
375 if (p->sddetached == 1)
376 p->sddetached++;
377 }
378 p->sddetached--;
379 } else {
380 if ((p->org == GV_PLEX_RAID5 ||
381 p->org == GV_PLEX_STRIPED) &&
382 !(p->flags & GV_PLEX_NEWBORN) &&
383 p->state == GV_PLEX_UP) {
384 s->flags |= GV_SD_GROW;
385 }
386 p->sdcount++;
387 }
388
389 return (0);
390 }
391
392 void
393 gv_update_vol_size(struct gv_volume *v, off_t size)
394 {
395 if (v == NULL)
396 return;
397 if (v->provider != NULL) {
398 g_topology_lock();
399 v->provider->mediasize = size;
400 g_topology_unlock();
401 }
402 v->size = size;
403 }
404
405 /* Return how many subdisks that constitute the original plex. */
406 int
407 gv_sdcount(struct gv_plex *p, int growing)
408 {
409 struct gv_sd *s;
410 int sdcount;
411
412 sdcount = p->sdcount;
413 if (growing) {
414 LIST_FOREACH(s, &p->subdisks, in_plex) {
415 if (s->flags & GV_SD_GROW)
416 sdcount--;
417 }
418 }
419
420 return (sdcount);
421 }
422
423 /* Calculates the plex size. */
424 off_t
425 gv_plex_size(struct gv_plex *p)
426 {
427 struct gv_sd *s;
428 off_t size;
429 int sdcount;
430
431 KASSERT(p != NULL, ("gv_plex_size: NULL p"));
432
433 /* Adjust the size of our plex. */
434 size = 0;
435 sdcount = gv_sdcount(p, 1);
436 switch (p->org) {
437 case GV_PLEX_CONCAT:
438 LIST_FOREACH(s, &p->subdisks, in_plex)
439 size += s->size;
440 break;
441 case GV_PLEX_STRIPED:
442 s = LIST_FIRST(&p->subdisks);
443 size = ((s != NULL) ? (sdcount * s->size) : 0);
444 break;
445 case GV_PLEX_RAID5:
446 s = LIST_FIRST(&p->subdisks);
447 size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
448 break;
449 }
450
451 return (size);
452 }
453
454 /* Returns the size of a volume. */
455 off_t
456 gv_vol_size(struct gv_volume *v)
457 {
458 struct gv_plex *p;
459 off_t minplexsize;
460
461 KASSERT(v != NULL, ("gv_vol_size: NULL v"));
462
463 p = LIST_FIRST(&v->plexes);
464 if (p == NULL)
465 return (0);
466
467 minplexsize = p->size;
468 LIST_FOREACH(p, &v->plexes, in_volume) {
469 if (p->size < minplexsize) {
470 minplexsize = p->size;
471 }
472 }
473 return (minplexsize);
474 }
475
476 void
477 gv_update_plex_config(struct gv_plex *p)
478 {
479 struct gv_sd *s, *s2;
480 off_t remainder;
481 int required_sds, state;
482
483 KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
484
485 /* The plex was added to an already running volume. */
486 if (p->flags & GV_PLEX_ADDED)
487 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
488
489 switch (p->org) {
490 case GV_PLEX_STRIPED:
491 required_sds = 2;
492 break;
493 case GV_PLEX_RAID5:
494 required_sds = 3;
495 break;
496 case GV_PLEX_CONCAT:
497 default:
498 required_sds = 0;
499 break;
500 }
501
502 if (required_sds) {
503 if (p->sdcount < required_sds) {
504 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
505 }
506
507 /*
508 * The subdisks in striped plexes must all have the same size.
509 */
510 s = LIST_FIRST(&p->subdisks);
511 LIST_FOREACH(s2, &p->subdisks, in_plex) {
512 if (s->size != s2->size) {
513 G_VINUM_DEBUG(0, "subdisk size mismatch %s"
514 "(%jd) <> %s (%jd)", s->name, s->size,
515 s2->name, s2->size);
516 gv_set_plex_state(p, GV_PLEX_DOWN,
517 GV_SETSTATE_FORCE);
518 }
519 }
520
521 LIST_FOREACH(s, &p->subdisks, in_plex) {
522 /* Trim subdisk sizes to match the stripe size. */
523 remainder = s->size % p->stripesize;
524 if (remainder) {
525 G_VINUM_DEBUG(1, "size of sd %s is not a "
526 "multiple of plex stripesize, taking off "
527 "%jd bytes", s->name, (intmax_t)remainder);
528 gv_adjust_freespace(s, remainder);
529 }
530 }
531 }
532
533 p->size = gv_plex_size(p);
534 if (p->sdcount == 0)
535 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
536 else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
537 LIST_FOREACH(s, &p->subdisks, in_plex)
538 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
539 /* If added to a volume, we want the plex to be down. */
540 state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
541 gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
542 p->flags &= ~GV_PLEX_ADDED;
543 } else if (p->flags & GV_PLEX_ADDED) {
544 LIST_FOREACH(s, &p->subdisks, in_plex)
545 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
546 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
547 p->flags &= ~GV_PLEX_ADDED;
548 } else if (p->state == GV_PLEX_UP) {
549 LIST_FOREACH(s, &p->subdisks, in_plex) {
550 if (s->flags & GV_SD_GROW) {
551 gv_set_plex_state(p, GV_PLEX_GROWABLE,
552 GV_SETSTATE_FORCE);
553 break;
554 }
555 }
556 }
557 /* Our plex is grown up now. */
558 p->flags &= ~GV_PLEX_NEWBORN;
559 }
560
561 /*
562 * Give a subdisk to a drive, check and adjust several parameters, adjust
563 * freelist.
564 */
565 int
566 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
567 {
568 struct gv_sd *s2;
569 struct gv_freelist *fl, *fl2;
570 off_t tmp;
571 int i;
572
573 fl2 = NULL;
574
575 /* Shortcut for "referenced" drives. */
576 if (d->flags & GV_DRIVE_REFERENCED) {
577 s->drive_sc = d;
578 return (0);
579 }
580
581 /* Check if this subdisk was already given to this drive. */
582 if (s->drive_sc != NULL) {
583 if (s->drive_sc == d) {
584 if (!(s->flags & GV_SD_TASTED)) {
585 return (0);
586 }
587 } else {
588 G_VINUM_DEBUG(0, "can't give sd '%s' to '%s' "
589 "(already on '%s')", s->name, d->name,
590 s->drive_sc->name);
591 return (GV_ERR_ISATTACHED);
592 }
593 }
594
595 /* Preliminary checks. */
596 if ((s->size > d->avail) || (d->freelist_entries == 0)) {
597 G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
598 s->name);
599 return (GV_ERR_NOSPACE);
600 }
601
602 /* If no size was given for this subdisk, try to auto-size it... */
603 if (s->size == -1) {
604 /* Find the largest available slot. */
605 LIST_FOREACH(fl, &d->freelist, freelist) {
606 if (fl->size < s->size)
607 continue;
608 s->size = fl->size;
609 s->drive_offset = fl->offset;
610 fl2 = fl;
611 }
612
613 /* No good slot found? */
614 if (s->size == -1) {
615 G_VINUM_DEBUG(0, "couldn't autosize '%s' on '%s'",
616 s->name, d->name);
617 return (GV_ERR_BADSIZE);
618 }
619
620 /*
621 * ... or check if we have a free slot that's large enough for the
622 * given size.
623 */
624 } else {
625 i = 0;
626 LIST_FOREACH(fl, &d->freelist, freelist) {
627 if (fl->size < s->size)
628 continue;
629 /* Assign drive offset, if not given. */
630 if (s->drive_offset == -1)
631 s->drive_offset = fl->offset;
632 fl2 = fl;
633 i++;
634 break;
635 }
636
637 /* Couldn't find a good free slot. */
638 if (i == 0) {
639 G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
640 s->name, d->name);
641 return (GV_ERR_NOSPACE);
642 }
643 }
644
645 /* No drive offset given, try to calculate it. */
646 if (s->drive_offset == -1) {
647
648 /* Add offsets and sizes from other subdisks on this drive. */
649 LIST_FOREACH(s2, &d->subdisks, from_drive) {
650 s->drive_offset = s2->drive_offset + s2->size;
651 }
652
653 /*
654 * If there are no other subdisks yet, then set the default
655 * offset to GV_DATA_START.
656 */
657 if (s->drive_offset == -1)
658 s->drive_offset = GV_DATA_START;
659
660 /* Check if we have a free slot at the given drive offset. */
661 } else {
662 i = 0;
663 LIST_FOREACH(fl, &d->freelist, freelist) {
664 /* Yes, this subdisk fits. */
665 if ((fl->offset <= s->drive_offset) &&
666 (fl->offset + fl->size >=
667 s->drive_offset + s->size)) {
668 i++;
669 fl2 = fl;
670 break;
671 }
672 }
673
674 /* Couldn't find a good free slot. */
675 if (i == 0) {
676 G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
677 "on '%s'", s->name, d->name);
678 return (GV_ERR_NOSPACE);
679 }
680 }
681
682 /*
683 * Now that all parameters are checked and set up, we can give the
684 * subdisk to the drive and adjust the freelist.
685 */
686
687 /* First, adjust the freelist. */
688 LIST_FOREACH(fl, &d->freelist, freelist) {
689 /* Look for the free slot that we have found before. */
690 if (fl != fl2)
691 continue;
692
693 /* The subdisk starts at the beginning of the free slot. */
694 if (fl->offset == s->drive_offset) {
695 fl->offset += s->size;
696 fl->size -= s->size;
697
698 /* The subdisk uses the whole slot, so remove it. */
699 if (fl->size == 0) {
700 d->freelist_entries--;
701 LIST_REMOVE(fl, freelist);
702 }
703 /*
704 * The subdisk does not start at the beginning of the free
705 * slot.
706 */
707 } else {
708 tmp = fl->offset + fl->size;
709 fl->size = s->drive_offset - fl->offset;
710
711 /*
712 * The subdisk didn't use the complete rest of the free
713 * slot, so we need to split it.
714 */
715 if (s->drive_offset + s->size != tmp) {
716 fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
717 fl2->offset = s->drive_offset + s->size;
718 fl2->size = tmp - fl2->offset;
719 LIST_INSERT_AFTER(fl, fl2, freelist);
720 d->freelist_entries++;
721 }
722 }
723 break;
724 }
725
726 /*
727 * This is the first subdisk on this drive, just insert it into the
728 * list.
729 */
730 if (LIST_EMPTY(&d->subdisks)) {
731 LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
732
733 /* There are other subdisks, so insert this one in correct order. */
734 } else {
735 LIST_FOREACH(s2, &d->subdisks, from_drive) {
736 if (s->drive_offset < s2->drive_offset) {
737 LIST_INSERT_BEFORE(s2, s, from_drive);
738 break;
739 } else if (LIST_NEXT(s2, from_drive) == NULL) {
740 LIST_INSERT_AFTER(s2, s, from_drive);
741 break;
742 }
743 }
744 }
745
746 d->sdcount++;
747 d->avail -= s->size;
748
749 s->flags &= ~GV_SD_TASTED;
750
751 /* Link back from the subdisk to this drive. */
752 s->drive_sc = d;
753
754 return (0);
755 }
756
757 void
758 gv_free_sd(struct gv_sd *s)
759 {
760 struct gv_drive *d;
761 struct gv_freelist *fl, *fl2;
762
763 KASSERT(s != NULL, ("gv_free_sd: NULL s"));
764
765 d = s->drive_sc;
766 if (d == NULL)
767 return;
768
769 /*
770 * First, find the free slot that's immediately before or after this
771 * subdisk.
772 */
773 fl = NULL;
774 LIST_FOREACH(fl, &d->freelist, freelist) {
775 if (fl->offset == s->drive_offset + s->size)
776 break;
777 if (fl->offset + fl->size == s->drive_offset)
778 break;
779 }
780
781 /* If there is no free slot behind this subdisk, so create one. */
782 if (fl == NULL) {
783
784 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
785 fl->size = s->size;
786 fl->offset = s->drive_offset;
787
788 if (d->freelist_entries == 0) {
789 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
790 } else {
791 LIST_FOREACH(fl2, &d->freelist, freelist) {
792 if (fl->offset < fl2->offset) {
793 LIST_INSERT_BEFORE(fl2, fl, freelist);
794 break;
795 } else if (LIST_NEXT(fl2, freelist) == NULL) {
796 LIST_INSERT_AFTER(fl2, fl, freelist);
797 break;
798 }
799 }
800 }
801
802 d->freelist_entries++;
803
804 /* Expand the free slot we just found. */
805 } else {
806 fl->size += s->size;
807 if (fl->offset > s->drive_offset)
808 fl->offset = s->drive_offset;
809 }
810
811 d->avail += s->size;
812 d->sdcount--;
813 }
814
815 void
816 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
817 {
818 struct gv_drive *d;
819 struct gv_freelist *fl, *fl2;
820
821 KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
822 d = s->drive_sc;
823 KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
824
825 /* First, find the free slot that's immediately after this subdisk. */
826 fl = NULL;
827 LIST_FOREACH(fl, &d->freelist, freelist) {
828 if (fl->offset == s->drive_offset + s->size)
829 break;
830 }
831
832 /* If there is no free slot behind this subdisk, so create one. */
833 if (fl == NULL) {
834
835 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
836 fl->size = remainder;
837 fl->offset = s->drive_offset + s->size - remainder;
838
839 if (d->freelist_entries == 0) {
840 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
841 } else {
842 LIST_FOREACH(fl2, &d->freelist, freelist) {
843 if (fl->offset < fl2->offset) {
844 LIST_INSERT_BEFORE(fl2, fl, freelist);
845 break;
846 } else if (LIST_NEXT(fl2, freelist) == NULL) {
847 LIST_INSERT_AFTER(fl2, fl, freelist);
848 break;
849 }
850 }
851 }
852
853 d->freelist_entries++;
854
855 /* Expand the free slot we just found. */
856 } else {
857 fl->offset -= remainder;
858 fl->size += remainder;
859 }
860
861 s->size -= remainder;
862 d->avail += remainder;
863 }
864
865 /* Check if the given plex is a striped one. */
866 int
867 gv_is_striped(struct gv_plex *p)
868 {
869 KASSERT(p != NULL, ("gv_is_striped: NULL p"));
870 switch(p->org) {
871 case GV_PLEX_STRIPED:
872 case GV_PLEX_RAID5:
873 return (1);
874 default:
875 return (0);
876 }
877 }
878
879 /* Find a volume by name. */
880 struct gv_volume *
881 gv_find_vol(struct gv_softc *sc, char *name)
882 {
883 struct gv_volume *v;
884
885 LIST_FOREACH(v, &sc->volumes, volume) {
886 if (!strncmp(v->name, name, GV_MAXVOLNAME))
887 return (v);
888 }
889
890 return (NULL);
891 }
892
893 /* Find a plex by name. */
894 struct gv_plex *
895 gv_find_plex(struct gv_softc *sc, char *name)
896 {
897 struct gv_plex *p;
898
899 LIST_FOREACH(p, &sc->plexes, plex) {
900 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
901 return (p);
902 }
903
904 return (NULL);
905 }
906
907 /* Find a subdisk by name. */
908 struct gv_sd *
909 gv_find_sd(struct gv_softc *sc, char *name)
910 {
911 struct gv_sd *s;
912
913 LIST_FOREACH(s, &sc->subdisks, sd) {
914 if (!strncmp(s->name, name, GV_MAXSDNAME))
915 return (s);
916 }
917
918 return (NULL);
919 }
920
921 /* Find a drive by name. */
922 struct gv_drive *
923 gv_find_drive(struct gv_softc *sc, char *name)
924 {
925 struct gv_drive *d;
926
927 LIST_FOREACH(d, &sc->drives, drive) {
928 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
929 return (d);
930 }
931
932 return (NULL);
933 }
934
935 /* Find a drive given a device. */
936 struct gv_drive *
937 gv_find_drive_device(struct gv_softc *sc, char *device)
938 {
939 struct gv_drive *d;
940
941 LIST_FOREACH(d, &sc->drives, drive) {
942 if(!strcmp(d->device, device))
943 return (d);
944 }
945
946 return (NULL);
947 }
948
949 /* Check if any consumer of the given geom is open. */
950 int
951 gv_consumer_is_open(struct g_consumer *cp)
952 {
953 if (cp == NULL)
954 return (0);
955
956 if (cp->acr || cp->acw || cp->ace)
957 return (1);
958
959 return (0);
960 }
961
962 int
963 gv_provider_is_open(struct g_provider *pp)
964 {
965 if (pp == NULL)
966 return (0);
967
968 if (pp->acr || pp->acw || pp->ace)
969 return (1);
970
971 return (0);
972 }
973
974 /*
975 * Compare the modification dates of the drives.
976 * Return 1 if a > b, 0 otherwise.
977 */
978 int
979 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
980 {
981 struct gv_drive *d2;
982 struct timeval *a, *b;
983
984 KASSERT(!LIST_EMPTY(&sc->drives),
985 ("gv_is_drive_newer: empty drive list"));
986
987 a = &d->hdr->label.last_update;
988 LIST_FOREACH(d2, &sc->drives, drive) {
989 if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
990 (d2->hdr == NULL))
991 continue;
992 b = &d2->hdr->label.last_update;
993 if (timevalcmp(a, b, >))
994 return (1);
995 }
996
997 return (0);
998 }
999
1000 /* Return the type of object identified by string 'name'. */
1001 int
1002 gv_object_type(struct gv_softc *sc, char *name)
1003 {
1004 struct gv_drive *d;
1005 struct gv_plex *p;
1006 struct gv_sd *s;
1007 struct gv_volume *v;
1008
1009 LIST_FOREACH(v, &sc->volumes, volume) {
1010 if (!strncmp(v->name, name, GV_MAXVOLNAME))
1011 return (GV_TYPE_VOL);
1012 }
1013
1014 LIST_FOREACH(p, &sc->plexes, plex) {
1015 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1016 return (GV_TYPE_PLEX);
1017 }
1018
1019 LIST_FOREACH(s, &sc->subdisks, sd) {
1020 if (!strncmp(s->name, name, GV_MAXSDNAME))
1021 return (GV_TYPE_SD);
1022 }
1023
1024 LIST_FOREACH(d, &sc->drives, drive) {
1025 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1026 return (GV_TYPE_DRIVE);
1027 }
1028
1029 return (GV_ERR_NOTFOUND);
1030 }
1031
1032 void
1033 gv_setup_objects(struct gv_softc *sc)
1034 {
1035 struct g_provider *pp;
1036 struct gv_volume *v;
1037 struct gv_plex *p;
1038 struct gv_sd *s;
1039 struct gv_drive *d;
1040
1041 LIST_FOREACH(s, &sc->subdisks, sd) {
1042 d = gv_find_drive(sc, s->drive);
1043 if (d != NULL)
1044 gv_sd_to_drive(s, d);
1045 p = gv_find_plex(sc, s->plex);
1046 if (p != NULL)
1047 gv_sd_to_plex(s, p);
1048 gv_update_sd_state(s);
1049 }
1050
1051 LIST_FOREACH(p, &sc->plexes, plex) {
1052 gv_update_plex_config(p);
1053 v = gv_find_vol(sc, p->volume);
1054 if (v != NULL && p->vol_sc != v) {
1055 p->vol_sc = v;
1056 v->plexcount++;
1057 LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1058 }
1059 gv_update_plex_config(p);
1060 }
1061
1062 LIST_FOREACH(v, &sc->volumes, volume) {
1063 v->size = gv_vol_size(v);
1064 if (v->provider == NULL) {
1065 g_topology_lock();
1066 pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1067 pp->mediasize = v->size;
1068 pp->sectorsize = 512; /* XXX */
1069 g_error_provider(pp, 0);
1070 v->provider = pp;
1071 pp->private = v;
1072 g_topology_unlock();
1073 } else if (v->provider->mediasize != v->size) {
1074 g_topology_lock();
1075 v->provider->mediasize = v->size;
1076 g_topology_unlock();
1077 }
1078 v->flags &= ~GV_VOL_NEWBORN;
1079 gv_update_vol_state(v);
1080 }
1081 }
1082
1083 void
1084 gv_cleanup(struct gv_softc *sc)
1085 {
1086 struct gv_volume *v, *v2;
1087 struct gv_plex *p, *p2;
1088 struct gv_sd *s, *s2;
1089 struct gv_drive *d, *d2;
1090 struct gv_freelist *fl, *fl2;
1091
1092 mtx_lock(&sc->config_mtx);
1093 LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1094 LIST_REMOVE(v, volume);
1095 g_free(v->wqueue);
1096 g_free(v);
1097 }
1098 LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1099 LIST_REMOVE(p, plex);
1100 g_free(p->bqueue);
1101 g_free(p->rqueue);
1102 g_free(p->wqueue);
1103 g_free(p);
1104 }
1105 LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1106 LIST_REMOVE(s, sd);
1107 g_free(s);
1108 }
1109 LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1110 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1111 LIST_REMOVE(fl, freelist);
1112 g_free(fl);
1113 }
1114 LIST_REMOVE(d, drive);
1115 g_free(d->hdr);
1116 g_free(d);
1117 }
1118 mtx_destroy(&sc->config_mtx);
1119 }
1120
1121 /* General 'attach' routine. */
1122 int
1123 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1124 {
1125 struct gv_sd *s;
1126 struct gv_softc *sc;
1127
1128 g_topology_assert();
1129
1130 sc = p->vinumconf;
1131 KASSERT(sc != NULL, ("NULL sc"));
1132
1133 if (p->vol_sc != NULL) {
1134 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1135 p->name, p->volume);
1136 return (GV_ERR_ISATTACHED);
1137 }
1138
1139 /* Stale all subdisks of this plex. */
1140 LIST_FOREACH(s, &p->subdisks, in_plex) {
1141 if (s->state != GV_SD_STALE)
1142 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1143 }
1144 /* Attach to volume. Make sure volume is not up and running. */
1145 if (gv_provider_is_open(v->provider)) {
1146 G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1147 p->name, v->name);
1148 return (GV_ERR_ISBUSY);
1149 }
1150 p->vol_sc = v;
1151 strlcpy(p->volume, v->name, sizeof(p->volume));
1152 v->plexcount++;
1153 if (rename) {
1154 snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1155 v->plexcount);
1156 }
1157 LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1158
1159 /* Get plex up again. */
1160 gv_update_vol_size(v, gv_vol_size(v));
1161 gv_set_plex_state(p, GV_PLEX_UP, 0);
1162 gv_save_config(p->vinumconf);
1163 return (0);
1164 }
1165
1166 int
1167 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1168 {
1169 struct gv_sd *s2;
1170 int error, sdcount;
1171
1172 g_topology_assert();
1173
1174 /* If subdisk is attached, don't do it. */
1175 if (s->plex_sc != NULL) {
1176 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1177 s->name, s->plex);
1178 return (GV_ERR_ISATTACHED);
1179 }
1180
1181 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1182 /* First check that this subdisk has a correct offset. If none other
1183 * starts at the same, and it's correct module stripesize, it is */
1184 if (offset != -1 && offset % p->stripesize != 0)
1185 return (GV_ERR_BADOFFSET);
1186 LIST_FOREACH(s2, &p->subdisks, in_plex) {
1187 if (s2->plex_offset == offset)
1188 return (GV_ERR_BADOFFSET);
1189 }
1190
1191 /* Attach the subdisk to the plex at given offset. */
1192 s->plex_offset = offset;
1193 strlcpy(s->plex, p->name, sizeof(s->plex));
1194
1195 sdcount = p->sdcount;
1196 error = gv_sd_to_plex(s, p);
1197 if (error)
1198 return (error);
1199 gv_update_plex_config(p);
1200
1201 if (rename) {
1202 snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1203 p->sdcount);
1204 }
1205 if (p->vol_sc != NULL)
1206 gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1207 gv_save_config(p->vinumconf);
1208 /* We don't update the subdisk state since the user might have to
1209 * initiate a rebuild/sync first. */
1210 return (0);
1211 }
1212
1213 /* Detach a plex from a volume. */
1214 int
1215 gv_detach_plex(struct gv_plex *p, int flags)
1216 {
1217 struct gv_volume *v;
1218
1219 g_topology_assert();
1220 v = p->vol_sc;
1221
1222 if (v == NULL) {
1223 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1224 p->name);
1225 return (0); /* Not an error. */
1226 }
1227
1228 /*
1229 * Only proceed if forced or volume inactive.
1230 */
1231 if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1232 p->state == GV_PLEX_UP)) {
1233 G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1234 p->name, p->volume);
1235 return (GV_ERR_ISBUSY);
1236 }
1237 v->plexcount--;
1238 /* Make sure someone don't read us when gone. */
1239 v->last_read_plex = NULL;
1240 LIST_REMOVE(p, in_volume);
1241 p->vol_sc = NULL;
1242 memset(p->volume, 0, GV_MAXVOLNAME);
1243 gv_update_vol_size(v, gv_vol_size(v));
1244 gv_save_config(p->vinumconf);
1245 return (0);
1246 }
1247
1248 /* Detach a subdisk from a plex. */
1249 int
1250 gv_detach_sd(struct gv_sd *s, int flags)
1251 {
1252 struct gv_plex *p;
1253
1254 g_topology_assert();
1255 p = s->plex_sc;
1256
1257 if (p == NULL) {
1258 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1259 s->name);
1260 return (0); /* Not an error. */
1261 }
1262
1263 /*
1264 * Don't proceed if we're not forcing, and the plex is up, or degraded
1265 * with this subdisk up.
1266 */
1267 if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1268 ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1269 G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1270 s->name, s->plex);
1271 return (GV_ERR_ISBUSY);
1272 }
1273
1274 LIST_REMOVE(s, in_plex);
1275 s->plex_sc = NULL;
1276 memset(s->plex, 0, GV_MAXPLEXNAME);
1277 p->sddetached++;
1278 gv_save_config(s->vinumconf);
1279 return (0);
1280 }
Cache object: 2cb42571ac58207f0c100343eaa81d95
|