1 /*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: src/sys/geom/bde/g_bde_work.c,v 1.24.2.1 2005/01/31 23:26:01 imp Exp $
33 */
34
35 /*
36 * This source file contains the state-engine which makes things happen in the
37 * right order.
38 *
39 * Outline:
40 * 1) g_bde_start1()
41 * Break the struct bio into multiple work packets one per zone.
42 * 2) g_bde_start2()
43 * Setup the necessary sector buffers and start those read operations
44 * which we can start at this time and put the item on the work-list.
45 * 3) g_bde_worker()
46 * Scan the work-list for items which are ready for crypto processing
47 * and call the matching crypto function in g_bde_crypt.c and schedule
48 * any writes needed. Read operations finish here by releasing the
49 * sector buffers and delivering the original bio request.
50 * 4) g_bde_write_done()
51 * Release sector buffers and deliver the original bio request.
52 *
53 * Because of the C-scope rules, the functions are almost perfectly in the
54 * opposite order in this source file.
55 *
56 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
57 * XXX: additional states to this state-engine. Since no hardware available
58 * XXX: at this time has AES support, implementing this has been postponed
59 * XXX: until such time as it would result in a benefit.
60 */
61
62 #include <sys/param.h>
63 #include <sys/bio.h>
64 #include <sys/lock.h>
65 #include <sys/mutex.h>
66 #include <sys/queue.h>
67 #include <sys/malloc.h>
68 #include <sys/systm.h>
69 #include <sys/kernel.h>
70 #include <sys/sysctl.h>
71 #include <sys/proc.h>
72 #include <sys/kthread.h>
73
74 #include <crypto/rijndael/rijndael.h>
75 #include <crypto/sha2/sha2.h>
76 #include <geom/geom.h>
77 #include <geom/bde/g_bde.h>
78
79 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
80 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
81 static void g_bde_release_keysector(struct g_bde_work *wp);
82 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
83 static int g_bde_start_read(struct g_bde_sector *sp);
84 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
85
86 /*
87 * Work item allocation.
88 *
89 * C++ would call these constructors and destructors.
90 */
91 static u_int g_bde_nwork;
92 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
93
94 static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures");
95
96 static struct g_bde_work *
97 g_bde_new_work(struct g_bde_softc *sc)
98 {
99 struct g_bde_work *wp;
100
101 wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
102 if (wp == NULL)
103 return (wp);
104 wp->state = SETUP;
105 wp->softc = sc;
106 g_bde_nwork++;
107 sc->nwork++;
108 TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
109 return (wp);
110 }
111
112 static void
113 g_bde_delete_work(struct g_bde_work *wp)
114 {
115 struct g_bde_softc *sc;
116
117 sc = wp->softc;
118 g_bde_nwork--;
119 sc->nwork--;
120 TAILQ_REMOVE(&sc->worklist, wp, list);
121 free(wp, M_GBDE);
122 }
123
124 /*
125 * Sector buffer allocation
126 *
127 * These two functions allocate and free back variable sized sector buffers
128 */
129
130 static u_int g_bde_nsect;
131 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
132
133 static void
134 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
135 {
136
137 g_bde_nsect--;
138 sc->nsect--;
139 if (sp->malloc)
140 free(sp->data, M_GBDE);
141 free(sp, M_GBDE);
142 }
143
144 static struct g_bde_sector *
145 g_bde_new_sector(struct g_bde_work *wp, u_int len)
146 {
147 struct g_bde_sector *sp;
148
149 sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
150 if (sp == NULL)
151 return (sp);
152 if (len > 0) {
153 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
154 if (sp->data == NULL) {
155 free(sp, M_GBDE);
156 return (NULL);
157 }
158 sp->malloc = 1;
159 }
160 g_bde_nsect++;
161 wp->softc->nsect++;
162 sp->size = len;
163 sp->softc = wp->softc;
164 sp->ref = 1;
165 sp->owner = wp;
166 sp->offset = wp->so;
167 sp->state = JUNK;
168 return (sp);
169 }
170
171 /*
172 * Skey sector cache.
173 *
174 * Nothing prevents two separate I/O requests from addressing the same zone
175 * and thereby needing the same skey sector. We therefore need to sequence
176 * I/O operations to the skey sectors. A certain amount of caching is also
177 * desirable, although the extent of benefit from this is not at this point
178 * determined.
179 *
180 * XXX: GEOM may be able to grow a generic caching facility at some point
181 * XXX: to support such needs.
182 */
183
184 static u_int g_bde_ncache;
185 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
186
187 static void
188 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
189 {
190
191 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
192 if (sp->ref != 0)
193 return;
194 TAILQ_REMOVE(&sc->freelist, sp, list);
195 g_bde_ncache--;
196 sc->ncache--;
197 bzero(sp->data, sp->size);
198 g_bde_delete_sector(sc, sp);
199 }
200
201 static struct g_bde_sector *
202 g_bde_get_keysector(struct g_bde_work *wp)
203 {
204 struct g_bde_sector *sp;
205 struct g_bde_softc *sc;
206 off_t offset;
207
208 offset = wp->kso;
209 g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
210 sc = wp->softc;
211
212 if (malloc_last_fail() < g_bde_ncache)
213 g_bde_purge_sector(sc, -1);
214
215 sp = TAILQ_FIRST(&sc->freelist);
216 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
217 g_bde_purge_one_sector(sc, sp);
218
219 TAILQ_FOREACH(sp, &sc->freelist, list) {
220 if (sp->offset == offset)
221 break;
222 }
223 if (sp != NULL) {
224 sp->ref++;
225 KASSERT(sp->offset == offset, ("wrong offset"));
226 KASSERT(sp->softc == wp->softc, ("wrong softc"));
227 if (sp->ref == 1)
228 sp->owner = wp;
229 } else {
230 if (malloc_last_fail() < g_bde_ncache) {
231 TAILQ_FOREACH(sp, &sc->freelist, list)
232 if (sp->ref == 0)
233 break;
234 }
235 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
236 sp = TAILQ_FIRST(&sc->freelist);
237 if (sp != NULL && sp->ref > 0)
238 sp = NULL;
239 if (sp == NULL) {
240 sp = g_bde_new_sector(wp, sc->sectorsize);
241 if (sp != NULL) {
242 g_bde_ncache++;
243 sc->ncache++;
244 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
245 sp->malloc = 2;
246 }
247 }
248 if (sp != NULL) {
249 sp->offset = offset;
250 sp->softc = wp->softc;
251 sp->ref = 1;
252 sp->owner = wp;
253 sp->state = JUNK;
254 sp->error = 0;
255 }
256 }
257 if (sp != NULL) {
258 TAILQ_REMOVE(&sc->freelist, sp, list);
259 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
260 sp->used = time_uptime;
261 }
262 wp->ksp = sp;
263 return(sp);
264 }
265
266 static void
267 g_bde_release_keysector(struct g_bde_work *wp)
268 {
269 struct g_bde_softc *sc;
270 struct g_bde_work *wp2;
271 struct g_bde_sector *sp;
272
273 sp = wp->ksp;
274 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
275 KASSERT(sp->malloc == 2, ("Wrong sector released"));
276 sc = sp->softc;
277 KASSERT(sc != NULL, ("NULL sp->softc"));
278 KASSERT(wp == sp->owner, ("Releasing, not owner"));
279 sp->owner = NULL;
280 wp->ksp = NULL;
281 sp->ref--;
282 if (sp->ref > 0) {
283 TAILQ_REMOVE(&sc->freelist, sp, list);
284 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
285 TAILQ_FOREACH(wp2, &sc->worklist, list) {
286 if (wp2->ksp == sp) {
287 KASSERT(wp2 != wp, ("Self-reowning"));
288 sp->owner = wp2;
289 wakeup(sp->softc);
290 break;
291 }
292 }
293 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
294 } else if (sp->error != 0) {
295 sp->offset = ~0;
296 sp->error = 0;
297 sp->state = JUNK;
298 }
299 TAILQ_REMOVE(&sc->freelist, sp, list);
300 TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
301 }
302
303 static void
304 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
305 {
306 struct g_bde_sector *sp;
307 int n;
308
309 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
310 if (fraction > 0)
311 n = sc->ncache / fraction + 1;
312 else
313 n = g_bde_ncache - malloc_last_fail();
314 if (n < 0)
315 return;
316 if (n > sc->ncache)
317 n = sc->ncache;
318 while(n--) {
319 TAILQ_FOREACH(sp, &sc->freelist, list) {
320 if (sp->ref != 0)
321 continue;
322 TAILQ_REMOVE(&sc->freelist, sp, list);
323 g_bde_ncache--;
324 sc->ncache--;
325 bzero(sp->data, sp->size);
326 g_bde_delete_sector(sc, sp);
327 break;
328 }
329 }
330 }
331
332 static struct g_bde_sector *
333 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
334 {
335 struct g_bde_sector *sp;
336
337 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
338 sp = g_bde_get_keysector(wp);
339 if (sp == NULL) {
340 g_bde_purge_sector(sc, -1);
341 sp = g_bde_get_keysector(wp);
342 }
343 if (sp == NULL)
344 return (sp);
345 if (sp->owner != wp)
346 return (sp);
347 if (sp->state == VALID)
348 return (sp);
349 if (g_bde_start_read(sp) == 0)
350 return (sp);
351 g_bde_release_keysector(wp);
352 return (NULL);
353 }
354
355 /*
356 * Contribute to the completion of the original bio request.
357 *
358 * We have no simple way to tell how many bits the original bio request has
359 * been segmented into, so the easiest way to determine when we can deliver
360 * it is to keep track of the number of bytes we have completed. We keep
361 * track of any errors underway and latch onto the first one.
362 *
363 * We always report "nothing done" in case of error, because random bits here
364 * and there may be completed and returning a number of completed bytes does
365 * not convey any useful information about which bytes they were. If some
366 * piece of broken code somewhere interprets this to mean that nothing has
367 * changed on the underlying media they deserve the lossage headed for them.
368 *
369 * A single mutex per g_bde instance is used to prevent contention.
370 */
371
372 static void
373 g_bde_contribute(struct bio *bp, off_t bytes, int error)
374 {
375
376 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
377 bp, (intmax_t)bytes, error);
378 if (bp->bio_error == 0)
379 bp->bio_error = error;
380 bp->bio_completed += bytes;
381 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
382 if (bp->bio_completed == bp->bio_length) {
383 if (bp->bio_error != 0)
384 bp->bio_completed = 0;
385 g_io_deliver(bp, bp->bio_error);
386 }
387 }
388
389 /*
390 * This is the common case "we're done with this work package" function
391 */
392
393 static void
394 g_bde_work_done(struct g_bde_work *wp, int error)
395 {
396
397 g_bde_contribute(wp->bp, wp->length, error);
398 if (wp->sp != NULL)
399 g_bde_delete_sector(wp->softc, wp->sp);
400 if (wp->ksp != NULL)
401 g_bde_release_keysector(wp);
402 g_bde_delete_work(wp);
403 }
404
405 /*
406 * A write operation has finished. When we have all expected cows in the
407 * barn close the door and call it a day.
408 */
409
410 static void
411 g_bde_write_done(struct bio *bp)
412 {
413 struct g_bde_sector *sp;
414 struct g_bde_work *wp;
415 struct g_bde_softc *sc;
416
417 sp = bp->bio_caller1;
418 sc = bp->bio_caller2;
419 mtx_lock(&sc->worklist_mutex);
420 KASSERT(sp != NULL, ("NULL sp"));
421 KASSERT(sc != NULL, ("NULL sc"));
422 KASSERT(sp->owner != NULL, ("NULL sp->owner"));
423 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
424 if (bp->bio_error == 0 && bp->bio_completed != sp->size)
425 bp->bio_error = EIO;
426 sp->error = bp->bio_error;
427 g_destroy_bio(bp);
428 wp = sp->owner;
429 if (wp->error == 0)
430 wp->error = sp->error;
431
432 if (wp->bp->bio_cmd == BIO_DELETE) {
433 KASSERT(sp == wp->sp, ("trashed delete op"));
434 g_bde_work_done(wp, wp->error);
435 mtx_unlock(&sc->worklist_mutex);
436 return;
437 }
438
439 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
440 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
441 if (wp->sp == sp) {
442 g_bde_delete_sector(sc, wp->sp);
443 wp->sp = NULL;
444 } else {
445 sp->state = VALID;
446 }
447 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
448 g_bde_work_done(wp, wp->error);
449 mtx_unlock(&sc->worklist_mutex);
450 return;
451 }
452
453 /*
454 * Send a write request for the given sector down the pipeline.
455 */
456
457 static int
458 g_bde_start_write(struct g_bde_sector *sp)
459 {
460 struct bio *bp;
461 struct g_bde_softc *sc;
462
463 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
464 sc = sp->softc;
465 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
466 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
467 bp = g_new_bio();
468 if (bp == NULL)
469 return (ENOMEM);
470 bp->bio_cmd = BIO_WRITE;
471 bp->bio_offset = sp->offset;
472 bp->bio_data = sp->data;
473 bp->bio_length = sp->size;
474 bp->bio_done = g_bde_write_done;
475 bp->bio_caller1 = sp;
476 bp->bio_caller2 = sc;
477 sp->state = IO;
478 g_io_request(bp, sc->consumer);
479 return(0);
480 }
481
482 /*
483 * A read operation has finished. Mark the sector no longer iobusy and
484 * wake up the worker thread and let it do its thing.
485 */
486
487 static void
488 g_bde_read_done(struct bio *bp)
489 {
490 struct g_bde_sector *sp;
491 struct g_bde_softc *sc;
492
493 sp = bp->bio_caller1;
494 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
495 sc = bp->bio_caller2;
496 mtx_lock(&sc->worklist_mutex);
497 if (bp->bio_error == 0 && bp->bio_completed != sp->size)
498 bp->bio_error = EIO;
499 sp->error = bp->bio_error;
500 if (sp->error == 0)
501 sp->state = VALID;
502 else
503 sp->state = JUNK;
504 wakeup(sc);
505 g_destroy_bio(bp);
506 mtx_unlock(&sc->worklist_mutex);
507 }
508
509 /*
510 * Send a read request for the given sector down the pipeline.
511 */
512
513 static int
514 g_bde_start_read(struct g_bde_sector *sp)
515 {
516 struct bio *bp;
517 struct g_bde_softc *sc;
518
519 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
520 sc = sp->softc;
521 KASSERT(sc != NULL, ("Null softc in sp %p", sp));
522 bp = g_new_bio();
523 if (bp == NULL)
524 return (ENOMEM);
525 bp->bio_cmd = BIO_READ;
526 bp->bio_offset = sp->offset;
527 bp->bio_data = sp->data;
528 bp->bio_length = sp->size;
529 bp->bio_done = g_bde_read_done;
530 bp->bio_caller1 = sp;
531 bp->bio_caller2 = sc;
532 sp->state = IO;
533 g_io_request(bp, sc->consumer);
534 return(0);
535 }
536
537 /*
538 * The worker thread.
539 *
540 * The up/down path of GEOM is not allowed to sleep or do any major work
541 * so we use this thread to do the actual crypto operations and to push
542 * the state engine onwards.
543 *
544 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
545 * XXX: using a thread here is probably not needed.
546 */
547
548 void
549 g_bde_worker(void *arg)
550 {
551 struct g_bde_softc *sc;
552 struct g_bde_work *wp, *twp;
553 struct g_geom *gp;
554 int restart, error;
555
556 gp = arg;
557 sc = gp->softc;
558
559 mtx_lock(&sc->worklist_mutex);
560 for (;;) {
561 restart = 0;
562 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
563 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
564 KASSERT(wp != NULL, ("NULL wp"));
565 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
566 if (wp->state != WAIT)
567 continue; /* Not interesting here */
568
569 KASSERT(wp->bp != NULL, ("NULL wp->bp"));
570 KASSERT(wp->sp != NULL, ("NULL wp->sp"));
571
572 if (wp->ksp != NULL) {
573 if (wp->ksp->owner != wp)
574 continue;
575 if (wp->ksp->state == IO)
576 continue;
577 KASSERT(wp->ksp->state == VALID,
578 ("Illegal sector state (%d)",
579 wp->ksp->state));
580 }
581
582 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
583 continue;
584
585 if (wp->ksp != NULL && wp->ksp->error != 0) {
586 g_bde_work_done(wp, wp->ksp->error);
587 continue;
588 }
589 switch(wp->bp->bio_cmd) {
590 case BIO_READ:
591 if (wp->ksp == NULL) {
592 KASSERT(wp->error != 0,
593 ("BIO_READ, no ksp and no error"));
594 g_bde_work_done(wp, wp->error);
595 break;
596 }
597 if (wp->sp->error != 0) {
598 g_bde_work_done(wp, wp->sp->error);
599 break;
600 }
601 mtx_unlock(&sc->worklist_mutex);
602 g_bde_crypt_read(wp);
603 mtx_lock(&sc->worklist_mutex);
604 restart++;
605 g_bde_work_done(wp, wp->sp->error);
606 break;
607 case BIO_WRITE:
608 wp->state = FINISH;
609 KASSERT(wp->sp->owner == wp,
610 ("Write not owner sp"));
611 KASSERT(wp->ksp->owner == wp,
612 ("Write not owner ksp"));
613 mtx_unlock(&sc->worklist_mutex);
614 g_bde_crypt_write(wp);
615 mtx_lock(&sc->worklist_mutex);
616 restart++;
617 error = g_bde_start_write(wp->sp);
618 if (error) {
619 g_bde_work_done(wp, error);
620 break;
621 }
622 error = g_bde_start_write(wp->ksp);
623 if (wp->error != 0)
624 wp->error = error;
625 break;
626 case BIO_DELETE:
627 wp->state = FINISH;
628 mtx_unlock(&sc->worklist_mutex);
629 g_bde_crypt_delete(wp);
630 mtx_lock(&sc->worklist_mutex);
631 restart++;
632 g_bde_start_write(wp->sp);
633 break;
634 }
635 if (restart)
636 break;
637 }
638 if (!restart) {
639 /*
640 * We don't look for our death-warrant until we are
641 * idle. Shouldn't make a difference in practice.
642 */
643 if (sc->dead)
644 break;
645 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
646 error = msleep(sc, &sc->worklist_mutex,
647 PRIBIO, "-", hz);
648 if (error == EWOULDBLOCK) {
649 /*
650 * Loose our skey cache in an orderly fashion.
651 * The exact rate can be tuned to be less
652 * aggressive if this is desirable. 10% per
653 * second means that the cache is gone in a
654 * few minutes.
655 */
656 g_bde_purge_sector(sc, 10);
657 }
658 }
659 }
660 g_trace(G_T_TOPOLOGY, "g_bde_worker die");
661 g_bde_purge_sector(sc, 1);
662 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
663 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
664 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
665 mtx_unlock(&sc->worklist_mutex);
666 sc->dead = 2;
667 wakeup(sc);
668 kthread_exit(0);
669 }
670
671 /*
672 * g_bde_start1 has chopped the incoming request up so all the requests
673 * we see here are inside a single zone. Map the data and key locations
674 * grab the buffers we need and fire off the first volley of read requests.
675 */
676
677 static void
678 g_bde_start2(struct g_bde_work *wp)
679 {
680 struct g_bde_softc *sc;
681
682 KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
683 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
684 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
685 sc = wp->softc;
686 switch (wp->bp->bio_cmd) {
687 case BIO_READ:
688 wp->sp = g_bde_new_sector(wp, 0);
689 if (wp->sp == NULL) {
690 g_bde_work_done(wp, ENOMEM);
691 return;
692 }
693 wp->sp->size = wp->length;
694 wp->sp->data = wp->data;
695 if (g_bde_start_read(wp->sp) != 0) {
696 g_bde_work_done(wp, ENOMEM);
697 return;
698 }
699 g_bde_read_keysector(sc, wp);
700 if (wp->ksp == NULL)
701 wp->error = ENOMEM;
702 break;
703 case BIO_DELETE:
704 wp->sp = g_bde_new_sector(wp, wp->length);
705 if (wp->sp == NULL) {
706 g_bde_work_done(wp, ENOMEM);
707 return;
708 }
709 break;
710 case BIO_WRITE:
711 wp->sp = g_bde_new_sector(wp, wp->length);
712 if (wp->sp == NULL) {
713 g_bde_work_done(wp, ENOMEM);
714 return;
715 }
716 g_bde_read_keysector(sc, wp);
717 if (wp->ksp == NULL) {
718 g_bde_work_done(wp, ENOMEM);
719 return;
720 }
721 break;
722 default:
723 KASSERT(0 == 1,
724 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
725 }
726
727 wp->state = WAIT;
728 wakeup(sc);
729 }
730
731 /*
732 * Create a sequence of work structures, and have g_bde_map_sector() determine
733 * how long they each can be. Feed them to g_bde_start2().
734 */
735
736 void
737 g_bde_start1(struct bio *bp)
738 {
739 struct g_bde_softc *sc;
740 struct g_bde_work *wp;
741 off_t done;
742
743 sc = bp->bio_to->geom->softc;
744 bp->bio_driver1 = sc;
745
746 mtx_lock(&sc->worklist_mutex);
747 for(done = 0; done < bp->bio_length; ) {
748 wp = g_bde_new_work(sc);
749 if (wp != NULL) {
750 wp->bp = bp;
751 wp->offset = bp->bio_offset + done;
752 wp->data = bp->bio_data + done;
753 wp->length = bp->bio_length - done;
754 g_bde_map_sector(wp);
755 done += wp->length;
756 g_bde_start2(wp);
757 }
758 if (wp == NULL || bp->bio_error != 0) {
759 g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
760 break;
761 }
762 }
763 mtx_unlock(&sc->worklist_mutex);
764 return;
765 }
Cache object: 98db283655268f84ef159cd7199fc0bf
|