1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2004, 2007 Lukas Ertl
5 * Copyright (c) 2007, 2009 Ulf Lilleengen
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/bio.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/systm.h>
38
39 #include <geom/geom.h>
40 #include <geom/geom_dbg.h>
41 #include <geom/vinum/geom_vinum_var.h>
42 #include <geom/vinum/geom_vinum_raid5.h>
43 #include <geom/vinum/geom_vinum.h>
44
45 static int gv_check_parity(struct gv_plex *, struct bio *,
46 struct gv_raid5_packet *);
47 static int gv_normal_parity(struct gv_plex *, struct bio *,
48 struct gv_raid5_packet *);
49 static void gv_plex_flush(struct gv_plex *);
50 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
51 int *, int);
52 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
53 off_t, caddr_t);
54 static void gv_post_bio(struct gv_softc *, struct bio *);
55
56 void
57 gv_plex_start(struct gv_plex *p, struct bio *bp)
58 {
59 struct bio *cbp;
60 struct gv_sd *s;
61 struct gv_raid5_packet *wp;
62 caddr_t addr;
63 off_t bcount, boff, len;
64
65 bcount = bp->bio_length;
66 addr = bp->bio_data;
67 boff = bp->bio_offset;
68
69 /* Walk over the whole length of the request, we might split it up. */
70 while (bcount > 0) {
71 wp = NULL;
72
73 /*
74 * RAID5 plexes need special treatment, as a single request
75 * might involve several read/write sub-requests.
76 */
77 if (p->org == GV_PLEX_RAID5) {
78 wp = gv_raid5_start(p, bp, addr, boff, bcount);
79 if (wp == NULL)
80 return;
81
82 len = wp->length;
83
84 if (TAILQ_EMPTY(&wp->bits))
85 g_free(wp);
86 else if (wp->lockbase != -1)
87 TAILQ_INSERT_TAIL(&p->packets, wp, list);
88
89 /*
90 * Requests to concatenated and striped plexes go straight
91 * through.
92 */
93 } else {
94 len = gv_plex_normal_request(p, bp, boff, bcount, addr);
95 }
96 if (len < 0)
97 return;
98
99 bcount -= len;
100 addr += len;
101 boff += len;
102 }
103
104 /*
105 * Fire off all sub-requests. We get the correct consumer (== drive)
106 * to send each request to via the subdisk that was stored in
107 * cbp->bio_caller1.
108 */
109 cbp = bioq_takefirst(p->bqueue);
110 while (cbp != NULL) {
111 /*
112 * RAID5 sub-requests need to come in correct order, otherwise
113 * we trip over the parity, as it might be overwritten by
114 * another sub-request. We abuse cbp->bio_caller2 to mark
115 * potential overlap situations.
116 */
117 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
118 /* Park the bio on the waiting queue. */
119 cbp->bio_pflags |= GV_BIO_ONHOLD;
120 bioq_disksort(p->wqueue, cbp);
121 } else {
122 s = cbp->bio_caller1;
123 g_io_request(cbp, s->drive_sc->consumer);
124 }
125 cbp = bioq_takefirst(p->bqueue);
126 }
127 }
128
129 static int
130 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
131 off_t *real_len, int *sdno, int growing)
132 {
133 struct gv_sd *s;
134 int i, sdcount;
135 off_t len_left, stripeend, stripeno, stripestart;
136
137 switch (p->org) {
138 case GV_PLEX_CONCAT:
139 /*
140 * Find the subdisk where this request starts. The subdisks in
141 * this list must be ordered by plex_offset.
142 */
143 i = 0;
144 LIST_FOREACH(s, &p->subdisks, in_plex) {
145 if (s->plex_offset <= boff &&
146 s->plex_offset + s->size > boff) {
147 *sdno = i;
148 break;
149 }
150 i++;
151 }
152 if (s == NULL || s->drive_sc == NULL)
153 return (GV_ERR_NOTFOUND);
154
155 /* Calculate corresponding offsets on disk. */
156 *real_off = boff - s->plex_offset;
157 len_left = s->size - (*real_off);
158 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
159 *real_len = (bcount > len_left) ? len_left : bcount;
160 break;
161
162 case GV_PLEX_STRIPED:
163 /* The number of the stripe where the request starts. */
164 stripeno = boff / p->stripesize;
165 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
166
167 /* Take growing subdisks into account when calculating. */
168 sdcount = gv_sdcount(p, (boff >= p->synced));
169
170 if (!(boff + bcount <= p->synced) &&
171 (p->flags & GV_PLEX_GROWING) &&
172 !growing)
173 return (GV_ERR_ISBUSY);
174 *sdno = stripeno % sdcount;
175
176 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
177 stripestart = (stripeno / sdcount) *
178 p->stripesize;
179 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
180 stripeend = stripestart + p->stripesize;
181 *real_off = boff - (stripeno * p->stripesize) +
182 stripestart;
183 len_left = stripeend - *real_off;
184 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
185
186 *real_len = (bcount <= len_left) ? bcount : len_left;
187 break;
188
189 default:
190 return (GV_ERR_PLEXORG);
191 }
192 return (0);
193 }
194
195 /*
196 * Prepare a normal plex request.
197 */
198 static int
199 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
200 off_t bcount, caddr_t addr)
201 {
202 struct gv_sd *s;
203 struct bio *cbp;
204 off_t real_len, real_off;
205 int i, err, sdno;
206
207 s = NULL;
208 sdno = -1;
209 real_len = real_off = 0;
210
211 err = ENXIO;
212
213 if (p == NULL || LIST_EMPTY(&p->subdisks))
214 goto bad;
215
216 err = gv_plex_offset(p, boff, bcount, &real_off,
217 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
218 /* If the request was blocked, put it into wait. */
219 if (err == GV_ERR_ISBUSY) {
220 bioq_disksort(p->rqueue, bp);
221 return (-1); /* "Fail", and delay request. */
222 }
223 if (err) {
224 err = ENXIO;
225 goto bad;
226 }
227 err = ENXIO;
228
229 /* Find the right subdisk. */
230 i = 0;
231 LIST_FOREACH(s, &p->subdisks, in_plex) {
232 if (i == sdno)
233 break;
234 i++;
235 }
236
237 /* Subdisk not found. */
238 if (s == NULL || s->drive_sc == NULL)
239 goto bad;
240
241 /* Now check if we can handle the request on this subdisk. */
242 switch (s->state) {
243 case GV_SD_UP:
244 /* If the subdisk is up, just continue. */
245 break;
246 case GV_SD_DOWN:
247 if (bp->bio_pflags & GV_BIO_INTERNAL)
248 G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
249 " order to perform administrative requests");
250 goto bad;
251 case GV_SD_STALE:
252 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
253 G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
254 "regular requests");
255 goto bad;
256 }
257
258 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
259 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
260 break;
261 case GV_SD_INITIALIZING:
262 if (bp->bio_cmd == BIO_READ)
263 goto bad;
264 break;
265 default:
266 /* All other subdisk states mean it's not accessible. */
267 goto bad;
268 }
269
270 /* Clone the bio and adjust the offsets and sizes. */
271 cbp = g_clone_bio(bp);
272 if (cbp == NULL) {
273 err = ENOMEM;
274 goto bad;
275 }
276 cbp->bio_offset = real_off + s->drive_offset;
277 cbp->bio_length = real_len;
278 cbp->bio_data = addr;
279 cbp->bio_done = gv_done;
280 cbp->bio_caller1 = s;
281 s->drive_sc->active++;
282
283 /* Store the sub-requests now and let others issue them. */
284 bioq_insert_tail(p->bqueue, cbp);
285 return (real_len);
286 bad:
287 G_VINUM_LOGREQ(0, bp, "plex request failed.");
288 /* Building the sub-request failed. If internal BIO, do not deliver. */
289 if (bp->bio_pflags & GV_BIO_INTERNAL) {
290 if (bp->bio_pflags & GV_BIO_MALLOC)
291 g_free(bp->bio_data);
292 g_destroy_bio(bp);
293 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
294 GV_PLEX_GROWING);
295 return (-1);
296 }
297 g_io_deliver(bp, err);
298 return (-1);
299 }
300
301 /*
302 * Handle a completed request to a striped or concatenated plex.
303 */
304 void
305 gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
306 {
307 struct bio *pbp;
308
309 pbp = bp->bio_parent;
310 if (pbp->bio_error == 0)
311 pbp->bio_error = bp->bio_error;
312 g_destroy_bio(bp);
313 pbp->bio_inbed++;
314 if (pbp->bio_children == pbp->bio_inbed) {
315 /* Just set it to length since multiple plexes will
316 * screw things up. */
317 pbp->bio_completed = pbp->bio_length;
318 if (pbp->bio_pflags & GV_BIO_SYNCREQ)
319 gv_sync_complete(p, pbp);
320 else if (pbp->bio_pflags & GV_BIO_GROW)
321 gv_grow_complete(p, pbp);
322 else
323 g_io_deliver(pbp, pbp->bio_error);
324 }
325 }
326
327 /*
328 * Handle a completed request to a RAID-5 plex.
329 */
330 void
331 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
332 {
333 struct gv_softc *sc;
334 struct bio *cbp, *pbp;
335 struct gv_bioq *bq, *bq2;
336 struct gv_raid5_packet *wp;
337 off_t completed;
338 int i;
339
340 completed = 0;
341 sc = p->vinumconf;
342 wp = bp->bio_caller2;
343
344 switch (bp->bio_parent->bio_cmd) {
345 case BIO_READ:
346 if (wp == NULL) {
347 completed = bp->bio_completed;
348 break;
349 }
350
351 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
352 if (bq->bp != bp)
353 continue;
354 TAILQ_REMOVE(&wp->bits, bq, queue);
355 g_free(bq);
356 for (i = 0; i < wp->length; i++)
357 wp->data[i] ^= bp->bio_data[i];
358 break;
359 }
360 if (TAILQ_EMPTY(&wp->bits)) {
361 completed = wp->length;
362 if (wp->lockbase != -1) {
363 TAILQ_REMOVE(&p->packets, wp, list);
364 /* Bring the waiting bios back into the game. */
365 pbp = bioq_takefirst(p->wqueue);
366 while (pbp != NULL) {
367 gv_post_bio(sc, pbp);
368 pbp = bioq_takefirst(p->wqueue);
369 }
370 }
371 g_free(wp);
372 }
373
374 break;
375
376 case BIO_WRITE:
377 /* XXX can this ever happen? */
378 if (wp == NULL) {
379 completed = bp->bio_completed;
380 break;
381 }
382
383 /* Check if we need to handle parity data. */
384 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
385 if (bq->bp != bp)
386 continue;
387 TAILQ_REMOVE(&wp->bits, bq, queue);
388 g_free(bq);
389 cbp = wp->parity;
390 if (cbp != NULL) {
391 for (i = 0; i < wp->length; i++)
392 cbp->bio_data[i] ^= bp->bio_data[i];
393 }
394 break;
395 }
396
397 /* Handle parity data. */
398 if (TAILQ_EMPTY(&wp->bits)) {
399 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
400 i = gv_check_parity(p, bp, wp);
401 else
402 i = gv_normal_parity(p, bp, wp);
403
404 /* All of our sub-requests have finished. */
405 if (i) {
406 completed = wp->length;
407 TAILQ_REMOVE(&p->packets, wp, list);
408 /* Bring the waiting bios back into the game. */
409 pbp = bioq_takefirst(p->wqueue);
410 while (pbp != NULL) {
411 gv_post_bio(sc, pbp);
412 pbp = bioq_takefirst(p->wqueue);
413 }
414 g_free(wp);
415 }
416 }
417
418 break;
419 }
420
421 pbp = bp->bio_parent;
422 if (pbp->bio_error == 0)
423 pbp->bio_error = bp->bio_error;
424 pbp->bio_completed += completed;
425
426 /* When the original request is finished, we deliver it. */
427 pbp->bio_inbed++;
428 if (pbp->bio_inbed == pbp->bio_children) {
429 /* Hand it over for checking or delivery. */
430 if (pbp->bio_cmd == BIO_WRITE &&
431 (pbp->bio_pflags & GV_BIO_CHECK)) {
432 gv_parity_complete(p, pbp);
433 } else if (pbp->bio_cmd == BIO_WRITE &&
434 (pbp->bio_pflags & GV_BIO_REBUILD)) {
435 gv_rebuild_complete(p, pbp);
436 } else if (pbp->bio_pflags & GV_BIO_INIT) {
437 gv_init_complete(p, pbp);
438 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
439 gv_sync_complete(p, pbp);
440 } else if (pbp->bio_pflags & GV_BIO_GROW) {
441 gv_grow_complete(p, pbp);
442 } else {
443 g_io_deliver(pbp, pbp->bio_error);
444 }
445 }
446
447 /* Clean up what we allocated. */
448 if (bp->bio_cflags & GV_BIO_MALLOC)
449 g_free(bp->bio_data);
450 g_destroy_bio(bp);
451 }
452
453 static int
454 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
455 {
456 struct bio *pbp;
457 struct gv_sd *s;
458 int err, finished, i;
459
460 err = 0;
461 finished = 1;
462
463 if (wp->waiting != NULL) {
464 pbp = wp->waiting;
465 wp->waiting = NULL;
466 s = pbp->bio_caller1;
467 g_io_request(pbp, s->drive_sc->consumer);
468 finished = 0;
469
470 } else if (wp->parity != NULL) {
471 pbp = wp->parity;
472 wp->parity = NULL;
473
474 /* Check if the parity is correct. */
475 for (i = 0; i < wp->length; i++) {
476 if (bp->bio_data[i] != pbp->bio_data[i]) {
477 err = 1;
478 break;
479 }
480 }
481
482 /* The parity is not correct... */
483 if (err) {
484 bp->bio_parent->bio_error = EAGAIN;
485
486 /* ... but we rebuild it. */
487 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
488 s = pbp->bio_caller1;
489 g_io_request(pbp, s->drive_sc->consumer);
490 finished = 0;
491 }
492 }
493
494 /*
495 * Clean up the BIO we would have used for rebuilding the
496 * parity.
497 */
498 if (finished) {
499 bp->bio_parent->bio_inbed++;
500 g_destroy_bio(pbp);
501 }
502 }
503
504 return (finished);
505 }
506
507 static int
508 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
509 {
510 struct bio *cbp, *pbp;
511 struct gv_sd *s;
512 int finished, i;
513
514 finished = 1;
515
516 if (wp->waiting != NULL) {
517 pbp = wp->waiting;
518 wp->waiting = NULL;
519 cbp = wp->parity;
520 for (i = 0; i < wp->length; i++)
521 cbp->bio_data[i] ^= pbp->bio_data[i];
522 s = pbp->bio_caller1;
523 g_io_request(pbp, s->drive_sc->consumer);
524 finished = 0;
525
526 } else if (wp->parity != NULL) {
527 cbp = wp->parity;
528 wp->parity = NULL;
529 s = cbp->bio_caller1;
530 g_io_request(cbp, s->drive_sc->consumer);
531 finished = 0;
532 }
533
534 return (finished);
535 }
536
537 /* Flush the queue with delayed requests. */
538 static void
539 gv_plex_flush(struct gv_plex *p)
540 {
541 struct bio *bp;
542
543 bp = bioq_takefirst(p->rqueue);
544 while (bp != NULL) {
545 gv_plex_start(p, bp);
546 bp = bioq_takefirst(p->rqueue);
547 }
548 }
549
550 static void
551 gv_post_bio(struct gv_softc *sc, struct bio *bp)
552 {
553
554 KASSERT(sc != NULL, ("NULL sc"));
555 KASSERT(bp != NULL, ("NULL bp"));
556 mtx_lock(&sc->bqueue_mtx);
557 bioq_disksort(sc->bqueue_down, bp);
558 wakeup(sc);
559 mtx_unlock(&sc->bqueue_mtx);
560 }
561
562 int
563 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
564 off_t length, int type, caddr_t data)
565 {
566 struct gv_softc *sc;
567 struct bio *bp;
568
569 KASSERT(from != NULL, ("NULL from"));
570 KASSERT(to != NULL, ("NULL to"));
571 sc = from->vinumconf;
572 KASSERT(sc != NULL, ("NULL sc"));
573
574 bp = g_new_bio();
575 if (bp == NULL) {
576 G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
577 " %jd; out of memory", from->name, offset);
578 return (ENOMEM);
579 }
580 bp->bio_length = length;
581 bp->bio_done = NULL;
582 bp->bio_pflags |= GV_BIO_SYNCREQ;
583 bp->bio_offset = offset;
584 bp->bio_caller1 = from;
585 bp->bio_caller2 = to;
586 bp->bio_cmd = type;
587 if (data == NULL)
588 data = g_malloc(length, M_WAITOK);
589 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
590 bp->bio_data = data;
591
592 /* Send down next. */
593 gv_post_bio(sc, bp);
594 //gv_plex_start(from, bp);
595 return (0);
596 }
597
598 /*
599 * Handle a finished plex sync bio.
600 */
601 int
602 gv_sync_complete(struct gv_plex *to, struct bio *bp)
603 {
604 struct gv_plex *from, *p;
605 struct gv_sd *s;
606 struct gv_volume *v;
607 struct gv_softc *sc;
608 off_t offset;
609 int err;
610
611 g_topology_assert_not();
612
613 err = 0;
614 KASSERT(to != NULL, ("NULL to"));
615 KASSERT(bp != NULL, ("NULL bp"));
616 from = bp->bio_caller2;
617 KASSERT(from != NULL, ("NULL from"));
618 v = to->vol_sc;
619 KASSERT(v != NULL, ("NULL v"));
620 sc = v->vinumconf;
621 KASSERT(sc != NULL, ("NULL sc"));
622
623 /* If it was a read, write it. */
624 if (bp->bio_cmd == BIO_READ) {
625 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
626 BIO_WRITE, bp->bio_data);
627 /* If it was a write, read the next one. */
628 } else if (bp->bio_cmd == BIO_WRITE) {
629 if (bp->bio_pflags & GV_BIO_MALLOC)
630 g_free(bp->bio_data);
631 to->synced += bp->bio_length;
632 /* If we're finished, clean up. */
633 if (bp->bio_offset + bp->bio_length >= from->size) {
634 G_VINUM_DEBUG(1, "syncing of %s from %s completed",
635 to->name, from->name);
636 /* Update our state. */
637 LIST_FOREACH(s, &to->subdisks, in_plex)
638 gv_set_sd_state(s, GV_SD_UP, 0);
639 gv_update_plex_state(to);
640 to->flags &= ~GV_PLEX_SYNCING;
641 to->synced = 0;
642 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
643 } else {
644 offset = bp->bio_offset + bp->bio_length;
645 err = gv_sync_request(from, to, offset,
646 MIN(bp->bio_length, from->size - offset),
647 BIO_READ, NULL);
648 }
649 }
650 g_destroy_bio(bp);
651 /* Clean up if there was an error. */
652 if (err) {
653 to->flags &= ~GV_PLEX_SYNCING;
654 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
655 }
656
657 /* Check if all plexes are synced, and lower refcounts. */
658 g_topology_lock();
659 LIST_FOREACH(p, &v->plexes, in_volume) {
660 if (p->flags & GV_PLEX_SYNCING) {
661 g_topology_unlock();
662 return (-1);
663 }
664 }
665 /* If we came here, all plexes are synced, and we're free. */
666 gv_access(v->provider, -1, -1, 0);
667 g_topology_unlock();
668 G_VINUM_DEBUG(1, "plex sync completed");
669 gv_volume_flush(v);
670 return (0);
671 }
672
673 /*
674 * Create a new bio struct for the next grow request.
675 */
676 int
677 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
678 caddr_t data)
679 {
680 struct gv_softc *sc;
681 struct bio *bp;
682
683 KASSERT(p != NULL, ("gv_grow_request: NULL p"));
684 sc = p->vinumconf;
685 KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
686
687 bp = g_new_bio();
688 if (bp == NULL) {
689 G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
690 "out of memory", p->name);
691 return (ENOMEM);
692 }
693
694 bp->bio_cmd = type;
695 bp->bio_done = NULL;
696 bp->bio_error = 0;
697 bp->bio_caller1 = p;
698 bp->bio_offset = offset;
699 bp->bio_length = length;
700 bp->bio_pflags |= GV_BIO_GROW;
701 if (data == NULL)
702 data = g_malloc(length, M_WAITOK);
703 bp->bio_pflags |= GV_BIO_MALLOC;
704 bp->bio_data = data;
705
706 gv_post_bio(sc, bp);
707 //gv_plex_start(p, bp);
708 return (0);
709 }
710
711 /*
712 * Finish handling of a bio to a growing plex.
713 */
714 void
715 gv_grow_complete(struct gv_plex *p, struct bio *bp)
716 {
717 struct gv_softc *sc;
718 struct gv_sd *s;
719 struct gv_volume *v;
720 off_t origsize, offset;
721 int sdcount, err;
722
723 v = p->vol_sc;
724 KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
725 sc = v->vinumconf;
726 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
727 err = 0;
728
729 /* If it was a read, write it. */
730 if (bp->bio_cmd == BIO_READ) {
731 p->synced += bp->bio_length;
732 err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
733 BIO_WRITE, bp->bio_data);
734 /* If it was a write, read next. */
735 } else if (bp->bio_cmd == BIO_WRITE) {
736 if (bp->bio_pflags & GV_BIO_MALLOC)
737 g_free(bp->bio_data);
738
739 /* Find the real size of the plex. */
740 sdcount = gv_sdcount(p, 1);
741 s = LIST_FIRST(&p->subdisks);
742 KASSERT(s != NULL, ("NULL s"));
743 origsize = (s->size * (sdcount - 1));
744 if (bp->bio_offset + bp->bio_length >= origsize) {
745 G_VINUM_DEBUG(1, "growing of %s completed", p->name);
746 p->flags &= ~GV_PLEX_GROWING;
747 LIST_FOREACH(s, &p->subdisks, in_plex) {
748 s->flags &= ~GV_SD_GROW;
749 gv_set_sd_state(s, GV_SD_UP, 0);
750 }
751 p->size = gv_plex_size(p);
752 gv_update_vol_size(v, gv_vol_size(v));
753 gv_set_plex_state(p, GV_PLEX_UP, 0);
754 g_topology_lock();
755 gv_access(v->provider, -1, -1, 0);
756 g_topology_unlock();
757 p->synced = 0;
758 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
759 /* Issue delayed requests. */
760 gv_plex_flush(p);
761 } else {
762 offset = bp->bio_offset + bp->bio_length;
763 err = gv_grow_request(p, offset,
764 MIN(bp->bio_length, origsize - offset),
765 BIO_READ, NULL);
766 }
767 }
768 g_destroy_bio(bp);
769
770 if (err) {
771 p->flags &= ~GV_PLEX_GROWING;
772 G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
773 }
774 }
775
776 /*
777 * Create an initialization BIO and send it off to the consumer. Assume that
778 * we're given initialization data as parameter.
779 */
780 void
781 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
782 {
783 struct gv_drive *d;
784 struct g_consumer *cp;
785 struct bio *bp, *cbp;
786
787 KASSERT(s != NULL, ("gv_init_request: NULL s"));
788 d = s->drive_sc;
789 KASSERT(d != NULL, ("gv_init_request: NULL d"));
790 cp = d->consumer;
791 KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
792
793 bp = g_new_bio();
794 if (bp == NULL) {
795 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
796 " (drive offset %jd); out of memory", s->name,
797 (intmax_t)s->initialized, (intmax_t)start);
798 return; /* XXX: Error codes. */
799 }
800 bp->bio_cmd = BIO_WRITE;
801 bp->bio_data = data;
802 bp->bio_done = NULL;
803 bp->bio_error = 0;
804 bp->bio_length = length;
805 bp->bio_pflags |= GV_BIO_INIT;
806 bp->bio_offset = start;
807 bp->bio_caller1 = s;
808
809 /* Then ofcourse, we have to clone it. */
810 cbp = g_clone_bio(bp);
811 if (cbp == NULL) {
812 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
813 " (drive offset %jd); out of memory", s->name,
814 (intmax_t)s->initialized, (intmax_t)start);
815 return; /* XXX: Error codes. */
816 }
817 cbp->bio_done = gv_done;
818 cbp->bio_caller1 = s;
819 d->active++;
820 /* Send it off to the consumer. */
821 g_io_request(cbp, cp);
822 }
823
824 /*
825 * Handle a finished initialization BIO.
826 */
827 void
828 gv_init_complete(struct gv_plex *p, struct bio *bp)
829 {
830 struct gv_softc *sc;
831 struct gv_drive *d;
832 struct g_consumer *cp;
833 struct gv_sd *s;
834 off_t start, length;
835 caddr_t data;
836 int error;
837
838 s = bp->bio_caller1;
839 start = bp->bio_offset;
840 length = bp->bio_length;
841 error = bp->bio_error;
842 data = bp->bio_data;
843
844 KASSERT(s != NULL, ("gv_init_complete: NULL s"));
845 d = s->drive_sc;
846 KASSERT(d != NULL, ("gv_init_complete: NULL d"));
847 cp = d->consumer;
848 KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
849 sc = p->vinumconf;
850 KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
851
852 g_destroy_bio(bp);
853
854 /*
855 * First we need to find out if it was okay, and abort if it's not.
856 * Then we need to free previous buffers, find out the correct subdisk,
857 * as well as getting the correct starting point and length of the BIO.
858 */
859 if (start >= s->drive_offset + s->size) {
860 /* Free the data we initialized. */
861 g_free(data);
862 g_topology_assert_not();
863 g_topology_lock();
864 g_access(cp, 0, -1, 0);
865 g_topology_unlock();
866 if (error) {
867 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
868 GV_SETSTATE_CONFIG);
869 } else {
870 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
871 s->initialized = 0;
872 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
873 G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
874 "successfully", s->name);
875 }
876 return;
877 }
878 s->initialized += length;
879 start += length;
880 gv_init_request(s, start, data, length);
881 }
882
883 /*
884 * Create a new bio struct for the next parity rebuild. Used both by internal
885 * rebuild of degraded plexes as well as user initiated rebuilds/checks.
886 */
887 void
888 gv_parity_request(struct gv_plex *p, int flags, off_t offset)
889 {
890 struct gv_softc *sc;
891 struct bio *bp;
892
893 KASSERT(p != NULL, ("gv_parity_request: NULL p"));
894 sc = p->vinumconf;
895 KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
896
897 bp = g_new_bio();
898 if (bp == NULL) {
899 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
900 "out of memory", p->name);
901 return;
902 }
903
904 bp->bio_cmd = BIO_WRITE;
905 bp->bio_done = NULL;
906 bp->bio_error = 0;
907 bp->bio_length = p->stripesize;
908 bp->bio_caller1 = p;
909
910 /*
911 * Check if it's a rebuild of a degraded plex or a user request of
912 * parity rebuild.
913 */
914 if (flags & GV_BIO_REBUILD)
915 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
916 else if (flags & GV_BIO_CHECK)
917 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
918 else {
919 G_VINUM_DEBUG(0, "invalid flags given in rebuild");
920 return;
921 }
922
923 bp->bio_pflags = flags;
924 bp->bio_pflags |= GV_BIO_MALLOC;
925
926 /* We still have more parity to build. */
927 bp->bio_offset = offset;
928 gv_post_bio(sc, bp);
929 //gv_plex_start(p, bp); /* Send it down to the plex. */
930 }
931
932 /*
933 * Handle a finished parity write.
934 */
935 void
936 gv_parity_complete(struct gv_plex *p, struct bio *bp)
937 {
938 struct gv_softc *sc;
939 int error, flags;
940
941 error = bp->bio_error;
942 flags = bp->bio_pflags;
943 flags &= ~GV_BIO_MALLOC;
944
945 sc = p->vinumconf;
946 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
947
948 /* Clean up what we allocated. */
949 if (bp->bio_pflags & GV_BIO_MALLOC)
950 g_free(bp->bio_data);
951 g_destroy_bio(bp);
952
953 if (error == EAGAIN) {
954 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
955 (intmax_t)p->synced);
956 }
957
958 /* Any error is fatal, except EAGAIN when we're rebuilding. */
959 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
960 /* Make sure we don't have the lock. */
961 g_topology_assert_not();
962 g_topology_lock();
963 gv_access(p->vol_sc->provider, -1, -1, 0);
964 g_topology_unlock();
965 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
966 "errno %d", p->name, (intmax_t)p->synced, error);
967 return;
968 } else {
969 p->synced += p->stripesize;
970 }
971
972 if (p->synced >= p->size) {
973 /* Make sure we don't have the lock. */
974 g_topology_assert_not();
975 g_topology_lock();
976 gv_access(p->vol_sc->provider, -1, -1, 0);
977 g_topology_unlock();
978 /* We're finished. */
979 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
980 p->synced = 0;
981 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
982 return;
983 }
984
985 /* Send down next. It will determine if we need to itself. */
986 gv_parity_request(p, flags, p->synced);
987 }
988
989 /*
990 * Handle a finished plex rebuild bio.
991 */
992 void
993 gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
994 {
995 struct gv_softc *sc;
996 struct gv_sd *s;
997 int error, flags;
998 off_t offset;
999
1000 error = bp->bio_error;
1001 flags = bp->bio_pflags;
1002 offset = bp->bio_offset;
1003 flags &= ~GV_BIO_MALLOC;
1004 sc = p->vinumconf;
1005 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
1006
1007 /* Clean up what we allocated. */
1008 if (bp->bio_pflags & GV_BIO_MALLOC)
1009 g_free(bp->bio_data);
1010 g_destroy_bio(bp);
1011
1012 if (error) {
1013 g_topology_assert_not();
1014 g_topology_lock();
1015 gv_access(p->vol_sc->provider, -1, -1, 0);
1016 g_topology_unlock();
1017
1018 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
1019 p->name, (intmax_t)offset, error);
1020 p->flags &= ~GV_PLEX_REBUILDING;
1021 p->synced = 0;
1022 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1023 return;
1024 }
1025
1026 offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
1027 if (offset >= p->size) {
1028 /* We're finished. */
1029 g_topology_assert_not();
1030 g_topology_lock();
1031 gv_access(p->vol_sc->provider, -1, -1, 0);
1032 g_topology_unlock();
1033
1034 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
1035 gv_save_config(p->vinumconf);
1036 p->flags &= ~GV_PLEX_REBUILDING;
1037 p->synced = 0;
1038 /* Try to up all subdisks. */
1039 LIST_FOREACH(s, &p->subdisks, in_plex)
1040 gv_update_sd_state(s);
1041 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
1042 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1043 return;
1044 }
1045
1046 /* Send down next. It will determine if we need to itself. */
1047 gv_parity_request(p, flags, offset);
1048 }
Cache object: 4b756707bb54d96b2c1de0d1f06ef9a3
|