1 /*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/kthread.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 #include <sys/queue.h>
37
38 #include <geom/geom.h>
39 #include <geom/vinum/geom_vinum_var.h>
40 #include <geom/vinum/geom_vinum.h>
41 #include <geom/vinum/geom_vinum_share.h>
42
43 static int gv_init_plex(struct gv_plex *);
44 void gv_init_td(void *);
45 static int gv_rebuild_plex(struct gv_plex *);
46 void gv_rebuild_td(void *);
47 static int gv_start_plex(struct gv_plex *);
48 static int gv_start_vol(struct gv_volume *);
49 static int gv_sync(struct gv_volume *);
50 void gv_sync_td(void *);
51
52 struct gv_sync_args {
53 struct gv_volume *v;
54 struct gv_plex *from;
55 struct gv_plex *to;
56 off_t syncsize;
57 };
58
59 void
60 gv_parityop(struct g_geom *gp, struct gctl_req *req)
61 {
62 struct gv_softc *sc;
63 struct gv_plex *p;
64 struct bio *bp;
65 struct g_consumer *cp;
66 int error, *flags, type, *rebuild, rv;
67 char *plex;
68
69 rv = -1;
70
71 plex = gctl_get_param(req, "plex", NULL);
72 if (plex == NULL) {
73 gctl_error(req, "no plex given");
74 goto out;
75 }
76
77 flags = gctl_get_paraml(req, "flags", sizeof(*flags));
78 if (flags == NULL) {
79 gctl_error(req, "no flags given");
80 goto out;
81 }
82
83 rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
84 if (rebuild == NULL) {
85 gctl_error(req, "no rebuild op given");
86 goto out;
87 }
88
89 sc = gp->softc;
90 type = gv_object_type(sc, plex);
91 switch (type) {
92 case GV_TYPE_PLEX:
93 break;
94 case GV_TYPE_VOL:
95 case GV_TYPE_SD:
96 case GV_TYPE_DRIVE:
97 default:
98 gctl_error(req, "'%s' is not a plex", plex);
99 goto out;
100 }
101
102 p = gv_find_plex(sc, plex);
103 if (p->state != GV_PLEX_UP) {
104 gctl_error(req, "plex %s is not completely accessible",
105 p->name);
106 goto out;
107 }
108 if (p->org != GV_PLEX_RAID5) {
109 gctl_error(req, "plex %s is not a RAID5 plex", p->name);
110 goto out;
111 }
112
113 cp = p->consumer;
114 error = g_access(cp, 1, 1, 0);
115 if (error) {
116 gctl_error(req, "cannot access consumer");
117 goto out;
118 }
119 g_topology_unlock();
120
121 /* Reset the check pointer when using -f. */
122 if (*flags & GV_FLAG_F)
123 p->synced = 0;
124
125 bp = g_new_bio();
126 if (bp == NULL) {
127 gctl_error(req, "cannot create BIO - out of memory");
128 g_topology_lock();
129 error = g_access(cp, -1, -1, 0);
130 goto out;
131 }
132 bp->bio_cmd = BIO_WRITE;
133 bp->bio_done = NULL;
134 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
135 bp->bio_cflags |= GV_BIO_CHECK;
136 if (*rebuild)
137 bp->bio_cflags |= GV_BIO_PARITY;
138 bp->bio_offset = p->synced;
139 bp->bio_length = p->stripesize;
140
141 /* Schedule it down ... */
142 g_io_request(bp, cp);
143
144 /* ... and wait for the result. */
145 error = biowait(bp, "gwrite");
146 g_free(bp->bio_data);
147 g_destroy_bio(bp);
148
149 if (error) {
150 /* Incorrect parity. */
151 if (error == EAGAIN)
152 rv = 1;
153
154 /* Some other error happened. */
155 else
156 gctl_error(req, "Parity check failed at offset 0x%jx, "
157 "errno %d", (intmax_t)p->synced, error);
158
159 /* Correct parity. */
160 } else
161 rv = 0;
162
163 gctl_set_param(req, "offset", &p->synced, sizeof(p->synced));
164
165 /* Advance the checkpointer if there was no error. */
166 if (rv == 0)
167 p->synced += p->stripesize;
168
169 /* End of plex; reset the check pointer and signal it to the caller. */
170 if (p->synced >= p->size) {
171 p->synced = 0;
172 rv = -2;
173 }
174
175 g_topology_lock();
176 error = g_access(cp, -1, -1, 0);
177
178 out:
179 gctl_set_param(req, "rv", &rv, sizeof(rv));
180 }
181
182 void
183 gv_start_obj(struct g_geom *gp, struct gctl_req *req)
184 {
185 struct gv_softc *sc;
186 struct gv_volume *v;
187 struct gv_plex *p;
188 int *argc, *initsize;
189 char *argv, buf[20];
190 int err, i, type;
191
192 argc = gctl_get_paraml(req, "argc", sizeof(*argc));
193 initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
194
195 if (argc == NULL || *argc == 0) {
196 gctl_error(req, "no arguments given");
197 return;
198 }
199
200 sc = gp->softc;
201
202 for (i = 0; i < *argc; i++) {
203 snprintf(buf, sizeof(buf), "argv%d", i);
204 argv = gctl_get_param(req, buf, NULL);
205 if (argv == NULL)
206 continue;
207 type = gv_object_type(sc, argv);
208 switch (type) {
209 case GV_TYPE_VOL:
210 v = gv_find_vol(sc, argv);
211 err = gv_start_vol(v);
212 if (err) {
213 if (err == EINPROGRESS) {
214 gctl_error(req, "cannot start volume "
215 "'%s': already in progress", argv);
216 } else {
217 gctl_error(req, "cannot start volume "
218 "'%s'; errno: %d", argv, err);
219 }
220 return;
221 }
222 break;
223
224 case GV_TYPE_PLEX:
225 p = gv_find_plex(sc, argv);
226 err = gv_start_plex(p);
227 if (err) {
228 if (err == EINPROGRESS) {
229 gctl_error(req, "cannot start plex "
230 "'%s': already in progress", argv);
231 } else {
232 gctl_error(req, "cannot start plex "
233 "'%s'; errno: %d", argv, err);
234 }
235 return;
236 }
237 break;
238
239 case GV_TYPE_SD:
240 case GV_TYPE_DRIVE:
241 /* XXX not yet */
242 gctl_error(req, "cannot start '%s' - not yet supported",
243 argv);
244 return;
245 default:
246 gctl_error(req, "unknown object '%s'", argv);
247 return;
248 }
249 }
250 }
251
252 static int
253 gv_start_plex(struct gv_plex *p)
254 {
255 struct gv_volume *v;
256 int error;
257
258 KASSERT(p != NULL, ("gv_start_plex: NULL p"));
259
260 if (p->state == GV_PLEX_UP)
261 return (0);
262
263 error = 0;
264 v = p->vol_sc;
265 if ((v != NULL) && (v->plexcount > 1))
266 error = gv_sync(v);
267 else if (p->org == GV_PLEX_RAID5) {
268 if (p->state == GV_PLEX_DEGRADED)
269 error = gv_rebuild_plex(p);
270 else
271 error = gv_init_plex(p);
272 }
273
274 return (error);
275 }
276
277 static int
278 gv_start_vol(struct gv_volume *v)
279 {
280 struct gv_plex *p;
281 struct gv_sd *s;
282 int error;
283
284 KASSERT(v != NULL, ("gv_start_vol: NULL v"));
285
286 error = 0;
287
288 if (v->plexcount == 0)
289 return (ENXIO);
290
291 else if (v->plexcount == 1) {
292 p = LIST_FIRST(&v->plexes);
293 KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
294 if (p->org == GV_PLEX_RAID5) {
295 switch (p->state) {
296 case GV_PLEX_DOWN:
297 error = gv_init_plex(p);
298 break;
299 case GV_PLEX_DEGRADED:
300 error = gv_rebuild_plex(p);
301 break;
302 default:
303 return (0);
304 }
305 } else {
306 LIST_FOREACH(s, &p->subdisks, in_plex) {
307 gv_set_sd_state(s, GV_SD_UP,
308 GV_SETSTATE_CONFIG);
309 }
310 }
311 } else
312 error = gv_sync(v);
313
314 return (error);
315 }
316
317 static int
318 gv_sync(struct gv_volume *v)
319 {
320 struct gv_softc *sc;
321 struct gv_plex *p, *up;
322 struct gv_sync_args *sync;
323
324 KASSERT(v != NULL, ("gv_sync: NULL v"));
325 sc = v->vinumconf;
326 KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
327
328 /* Find the plex that's up. */
329 up = NULL;
330 LIST_FOREACH(up, &v->plexes, in_volume) {
331 if (up->state == GV_PLEX_UP)
332 break;
333 }
334
335 /* Didn't find a good plex. */
336 if (up == NULL)
337 return (ENXIO);
338
339 LIST_FOREACH(p, &v->plexes, in_volume) {
340 if ((p == up) || (p->state == GV_PLEX_UP))
341 continue;
342 if (p->flags & GV_PLEX_SYNCING) {
343 return (EINPROGRESS);
344 }
345 p->flags |= GV_PLEX_SYNCING;
346 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
347 sync->v = v;
348 sync->from = up;
349 sync->to = p;
350 sync->syncsize = GV_DFLT_SYNCSIZE;
351 kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'",
352 p->name);
353 }
354
355 return (0);
356 }
357
358 static int
359 gv_rebuild_plex(struct gv_plex *p)
360 {
361 struct gv_sync_args *sync;
362
363 if (gv_is_open(p->geom))
364 return (EBUSY);
365
366 if (p->flags & GV_PLEX_SYNCING)
367 return (EINPROGRESS);
368 p->flags |= GV_PLEX_SYNCING;
369
370 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
371 sync->to = p;
372 sync->syncsize = GV_DFLT_SYNCSIZE;
373
374 kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
375 p->name);
376
377 return (0);
378 }
379
380 static int
381 gv_init_plex(struct gv_plex *p)
382 {
383 struct gv_sd *s;
384
385 KASSERT(p != NULL, ("gv_init_plex: NULL p"));
386
387 LIST_FOREACH(s, &p->subdisks, in_plex) {
388 if (s->state == GV_SD_INITIALIZING)
389 return (EINPROGRESS);
390 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
391 s->init_size = GV_DFLT_SYNCSIZE;
392 kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s",
393 s->name);
394 }
395
396 return (0);
397 }
398
399 /* This thread is responsible for rebuilding a degraded RAID5 plex. */
400 void
401 gv_rebuild_td(void *arg)
402 {
403 struct bio *bp;
404 struct gv_plex *p;
405 struct g_consumer *cp;
406 struct gv_sync_args *sync;
407 u_char *buf;
408 off_t i;
409 int error;
410
411 buf = NULL;
412 bp = NULL;
413
414 sync = arg;
415 p = sync->to;
416 p->synced = 0;
417 cp = p->consumer;
418
419 g_topology_lock();
420 error = g_access(cp, 1, 1, 0);
421 if (error) {
422 g_topology_unlock();
423 printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
424 "%d\n", p->name, error);
425 kthread_exit(error);
426 }
427 g_topology_unlock();
428
429 buf = g_malloc(sync->syncsize, M_WAITOK);
430
431 printf("GEOM_VINUM: rebuild of %s started\n", p->name);
432 i = 0;
433 for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
434 /*
435 if (i + sync->syncsize > p->size)
436 sync->syncsize = p->size - i;
437 */
438 bp = g_new_bio();
439 if (bp == NULL) {
440 printf("GEOM_VINUM: rebuild of %s failed creating bio: "
441 "out of memory\n", p->name);
442 break;
443 }
444 bp->bio_cmd = BIO_WRITE;
445 bp->bio_done = NULL;
446 bp->bio_data = buf;
447 bp->bio_cflags |= GV_BIO_REBUILD;
448 bp->bio_offset = i;
449 bp->bio_length = p->stripesize;
450
451 /* Schedule it down ... */
452 g_io_request(bp, cp);
453
454 /* ... and wait for the result. */
455 error = biowait(bp, "gwrite");
456 if (error) {
457 printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
458 "errno: %d\n", p->name, i, error);
459 break;
460 }
461 g_destroy_bio(bp);
462 bp = NULL;
463 }
464
465 if (bp != NULL)
466 g_destroy_bio(bp);
467 if (buf != NULL)
468 g_free(buf);
469
470 g_topology_lock();
471 g_access(cp, -1, -1, 0);
472 gv_save_config_all(p->vinumconf);
473 g_topology_unlock();
474
475 p->flags &= ~GV_PLEX_SYNCING;
476 p->synced = 0;
477
478 /* Successful initialization. */
479 if (!error)
480 printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
481
482 g_free(sync);
483 kthread_exit(error);
484 }
485
486 void
487 gv_sync_td(void *arg)
488 {
489 struct bio *bp;
490 struct gv_plex *p;
491 struct g_consumer *from, *to;
492 struct gv_sync_args *sync;
493 u_char *buf;
494 off_t i;
495 int error;
496
497 sync = arg;
498
499 from = sync->from->consumer;
500 to = sync->to->consumer;
501
502 p = sync->to;
503 p->synced = 0;
504
505 error = 0;
506
507 g_topology_lock();
508 error = g_access(from, 1, 0, 0);
509 if (error) {
510 g_topology_unlock();
511 printf("GEOM_VINUM: sync from '%s' failed to access "
512 "consumer: %d\n", sync->from->name, error);
513 g_free(sync);
514 kthread_exit(error);
515 }
516 error = g_access(to, 0, 1, 0);
517 if (error) {
518 g_access(from, -1, 0, 0);
519 g_topology_unlock();
520 printf("GEOM_VINUM: sync to '%s' failed to access "
521 "consumer: %d\n", p->name, error);
522 g_free(sync);
523 kthread_exit(error);
524 }
525 g_topology_unlock();
526
527 printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name,
528 sync->to->name);
529 for (i = 0; i < p->size; i+= sync->syncsize) {
530 /* Read some bits from the good plex. */
531 buf = g_read_data(from, i, sync->syncsize, &error);
532 if (buf == NULL) {
533 printf("GEOM_VINUM: sync read from '%s' failed at "
534 "offset %jd; errno: %d\n", sync->from->name, i,
535 error);
536 break;
537 }
538
539 /*
540 * Create a bio and schedule it down on the 'bad' plex. We
541 * cannot simply use g_write_data() because we have to let the
542 * lower parts know that we are an initialization process and
543 * not a 'normal' request.
544 */
545 bp = g_new_bio();
546 if (bp == NULL) {
547 printf("GEOM_VINUM: sync write to '%s' failed at "
548 "offset %jd; out of memory\n", p->name, i);
549 g_free(buf);
550 break;
551 }
552 bp->bio_cmd = BIO_WRITE;
553 bp->bio_offset = i;
554 bp->bio_length = sync->syncsize;
555 bp->bio_data = buf;
556 bp->bio_done = NULL;
557
558 /*
559 * This hack declare this bio as part of an initialization
560 * process, so that the lower levels allow it to get through.
561 */
562 bp->bio_cflags |= GV_BIO_SYNCREQ;
563
564 /* Schedule it down ... */
565 g_io_request(bp, to);
566
567 /* ... and wait for the result. */
568 error = biowait(bp, "gwrite");
569 g_destroy_bio(bp);
570 g_free(buf);
571 if (error) {
572 printf("GEOM_VINUM: sync write to '%s' failed at "
573 "offset %jd; errno: %d\n", p->name, i, error);
574 break;
575 }
576
577 /* Note that we have synced a little bit more. */
578 p->synced += sync->syncsize;
579 }
580
581 g_topology_lock();
582 g_access(from, -1, 0, 0);
583 g_access(to, 0, -1, 0);
584 gv_save_config_all(p->vinumconf);
585 g_topology_unlock();
586
587 /* Successful initialization. */
588 if (!error)
589 printf("GEOM_VINUM: plex sync %s -> %s finished\n",
590 sync->from->name, sync->to->name);
591
592 p->flags &= ~GV_PLEX_SYNCING;
593 p->synced = 0;
594
595 g_free(sync);
596 kthread_exit(error);
597 }
598
599 void
600 gv_init_td(void *arg)
601 {
602 struct gv_sd *s;
603 struct gv_drive *d;
604 struct g_geom *gp;
605 struct g_consumer *cp;
606 int error;
607 off_t i, init_size, start, offset, length;
608 u_char *buf;
609
610 s = arg;
611 KASSERT(s != NULL, ("gv_init_td: NULL s"));
612 d = s->drive_sc;
613 KASSERT(d != NULL, ("gv_init_td: NULL d"));
614 gp = d->geom;
615 KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
616
617 cp = LIST_FIRST(&gp->consumer);
618 KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
619
620 s->init_error = 0;
621 init_size = s->init_size;
622 start = s->drive_offset + s->initialized;
623 offset = s->drive_offset;
624 length = s->size;
625
626 buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
627
628 g_topology_lock();
629 error = g_access(cp, 0, 1, 0);
630 if (error) {
631 s->init_error = error;
632 g_topology_unlock();
633 printf("GEOM_VINUM: subdisk '%s' init: failed to access "
634 "consumer; error: %d\n", s->name, error);
635 kthread_exit(error);
636 }
637 g_topology_unlock();
638
639 for (i = start; i < offset + length; i += init_size) {
640 error = g_write_data(cp, i, buf, init_size);
641 if (error) {
642 printf("GEOM_VINUM: subdisk '%s' init: write failed"
643 " at offset %jd (drive offset %jd); error %d\n",
644 s->name, (intmax_t)s->initialized, (intmax_t)i,
645 error);
646 break;
647 }
648 s->initialized += init_size;
649 }
650
651 g_free(buf);
652
653 g_topology_lock();
654 g_access(cp, 0, -1, 0);
655 g_topology_unlock();
656 if (error) {
657 s->init_error = error;
658 g_topology_lock();
659 gv_set_sd_state(s, GV_SD_STALE,
660 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
661 g_topology_unlock();
662 } else {
663 g_topology_lock();
664 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
665 g_topology_unlock();
666 s->initialized = 0;
667 printf("GEOM_VINUM: subdisk '%s' init: finished successfully\n",
668 s->name);
669 }
670 kthread_exit(error);
671 }
Cache object: 0e20d176be9afcfee0e7ced142d2d820
|