1 /*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: releng/5.1/sys/geom/bde/g_bde_work.c 114715 2003-05-05 08:37:07Z phk $
33 *
34 * This source file contains the state-engine which makes things happen in the
35 * right order.
36 *
37 * Outline:
38 * 1) g_bde_start1()
39 * Break the struct bio into multiple work packets one per zone.
40 * 2) g_bde_start2()
41 * Setup the necessary sector buffers and start those read operations
42 * which we can start at this time and put the item on the work-list.
43 * 3) g_bde_worker()
44 * Scan the work-list for items which are ready for crypto processing
45 * and call the matching crypto function in g_bde_crypt.c and schedule
46 * any writes needed. Read operations finish here by releasing the
47 * sector buffers and delivering the original bio request.
48 * 4) g_bde_write_done()
49 * Release sector buffers and deliver the original bio request.
50 *
51 * Because of the C-scope rules, the functions are almost perfectly in the
52 * opposite order in this source file.
53 *
54 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
55 * XXX: additional states to this state-engine. Since no hardware available
56 * XXX: at this time has AES support, implementing this has been postponed
57 * XXX: until such time as it would result in a benefit.
58 */
59
60 #include <sys/param.h>
61 #include <sys/bio.h>
62 #include <sys/lock.h>
63 #include <sys/mutex.h>
64 #include <sys/queue.h>
65 #include <sys/malloc.h>
66 #include <sys/systm.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
69 #include <sys/proc.h>
70 #include <sys/kthread.h>
71
72 #include <crypto/rijndael/rijndael.h>
73 #include <crypto/sha2/sha2.h>
74 #include <geom/geom.h>
75 #include <geom/bde/g_bde.h>
76
77 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
78 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
79 static void g_bde_release_keysector(struct g_bde_work *wp);
80 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
81 static int g_bde_start_read(struct g_bde_sector *sp);
82 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
83
84 /*
85 * Work item allocation.
86 *
87 * C++ would call these constructors and destructors.
88 */
89 static u_int g_bde_nwork;
90 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
91
92 static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures");
93
94 static struct g_bde_work *
95 g_bde_new_work(struct g_bde_softc *sc)
96 {
97 struct g_bde_work *wp;
98
99 wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
100 if (wp == NULL)
101 return (wp);
102 wp->state = SETUP;
103 wp->softc = sc;
104 g_bde_nwork++;
105 sc->nwork++;
106 TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
107 return (wp);
108 }
109
110 static void
111 g_bde_delete_work(struct g_bde_work *wp)
112 {
113 struct g_bde_softc *sc;
114
115 sc = wp->softc;
116 g_bde_nwork--;
117 sc->nwork--;
118 TAILQ_REMOVE(&sc->worklist, wp, list);
119 free(wp, M_GBDE);
120 }
121
122 /*
123 * Sector buffer allocation
124 *
125 * These two functions allocate and free back variable sized sector buffers
126 */
127
128 static u_int g_bde_nsect;
129 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
130
131 static void
132 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
133 {
134
135 g_bde_nsect--;
136 sc->nsect--;
137 if (sp->malloc)
138 free(sp->data, M_GBDE);
139 free(sp, M_GBDE);
140 }
141
142 static struct g_bde_sector *
143 g_bde_new_sector(struct g_bde_work *wp, u_int len)
144 {
145 struct g_bde_sector *sp;
146
147 sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
148 if (sp == NULL)
149 return (sp);
150 if (len > 0) {
151 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
152 if (sp->data == NULL) {
153 free(sp, M_GBDE);
154 return (NULL);
155 }
156 sp->malloc = 1;
157 }
158 g_bde_nsect++;
159 wp->softc->nsect++;
160 sp->size = len;
161 sp->softc = wp->softc;
162 sp->ref = 1;
163 sp->owner = wp;
164 sp->offset = wp->so;
165 sp->state = JUNK;
166 return (sp);
167 }
168
169 /*
170 * Skey sector cache.
171 *
172 * Nothing prevents two separate I/O requests from addressing the same zone
173 * and thereby needing the same skey sector. We therefore need to sequence
174 * I/O operations to the skey sectors. A certain amount of caching is also
175 * desirable, although the extent of benefit from this is not at this point
176 * determined.
177 *
178 * XXX: GEOM may be able to grow a generic caching facility at some point
179 * XXX: to support such needs.
180 */
181
182 static u_int g_bde_ncache;
183 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
184
185 static void
186 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
187 {
188
189 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
190 if (sp->ref != 0)
191 return;
192 TAILQ_REMOVE(&sc->freelist, sp, list);
193 g_bde_ncache--;
194 sc->ncache--;
195 bzero(sp->data, sp->size);
196 g_bde_delete_sector(sc, sp);
197 }
198
199 static struct g_bde_sector *
200 g_bde_get_keysector(struct g_bde_work *wp)
201 {
202 struct g_bde_sector *sp;
203 struct g_bde_softc *sc;
204 off_t offset;
205
206 offset = wp->kso;
207 g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
208 sc = wp->softc;
209
210 if (malloc_last_fail() < g_bde_ncache)
211 g_bde_purge_sector(sc, -1);
212
213 sp = TAILQ_FIRST(&sc->freelist);
214 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
215 g_bde_purge_one_sector(sc, sp);
216
217 TAILQ_FOREACH(sp, &sc->freelist, list) {
218 if (sp->offset == offset)
219 break;
220 }
221 if (sp != NULL) {
222 sp->ref++;
223 KASSERT(sp->offset == offset, ("wrong offset"));
224 KASSERT(sp->softc == wp->softc, ("wrong softc"));
225 if (sp->ref == 1)
226 sp->owner = wp;
227 } else {
228 if (malloc_last_fail() < g_bde_ncache) {
229 TAILQ_FOREACH(sp, &sc->freelist, list)
230 if (sp->ref == 0)
231 break;
232 }
233 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
234 sp = TAILQ_FIRST(&sc->freelist);
235 if (sp != NULL && sp->ref > 0)
236 sp = NULL;
237 if (sp == NULL) {
238 sp = g_bde_new_sector(wp, sc->sectorsize);
239 if (sp != NULL) {
240 g_bde_ncache++;
241 sc->ncache++;
242 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
243 sp->malloc = 2;
244 }
245 }
246 if (sp != NULL) {
247 sp->offset = offset;
248 sp->softc = wp->softc;
249 sp->ref = 1;
250 sp->owner = wp;
251 sp->state = JUNK;
252 sp->error = 0;
253 }
254 }
255 if (sp != NULL) {
256 TAILQ_REMOVE(&sc->freelist, sp, list);
257 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
258 sp->used = time_uptime;
259 }
260 wp->ksp = sp;
261 return(sp);
262 }
263
264 static void
265 g_bde_release_keysector(struct g_bde_work *wp)
266 {
267 struct g_bde_softc *sc;
268 struct g_bde_work *wp2;
269 struct g_bde_sector *sp;
270
271 sp = wp->ksp;
272 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
273 KASSERT(sp->malloc == 2, ("Wrong sector released"));
274 sc = sp->softc;
275 KASSERT(sc != NULL, ("NULL sp->softc"));
276 KASSERT(wp == sp->owner, ("Releasing, not owner"));
277 sp->owner = NULL;
278 wp->ksp = NULL;
279 sp->ref--;
280 if (sp->ref > 0) {
281 TAILQ_REMOVE(&sc->freelist, sp, list);
282 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
283 TAILQ_FOREACH(wp2, &sc->worklist, list) {
284 if (wp2->ksp == sp) {
285 KASSERT(wp2 != wp, ("Self-reowning"));
286 sp->owner = wp2;
287 wakeup(sp->softc);
288 break;
289 }
290 }
291 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
292 } else if (sp->error != 0) {
293 sp->offset = ~0;
294 sp->error = 0;
295 sp->state = JUNK;
296 }
297 TAILQ_REMOVE(&sc->freelist, sp, list);
298 TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
299 }
300
301 static void
302 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
303 {
304 struct g_bde_sector *sp;
305 int n;
306
307 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
308 if (fraction > 0)
309 n = sc->ncache / fraction + 1;
310 else
311 n = g_bde_ncache - malloc_last_fail();
312 if (n < 0)
313 return;
314 if (n > sc->ncache)
315 n = sc->ncache;
316 while(n--) {
317 TAILQ_FOREACH(sp, &sc->freelist, list) {
318 if (sp->ref != 0)
319 continue;
320 TAILQ_REMOVE(&sc->freelist, sp, list);
321 g_bde_ncache--;
322 sc->ncache--;
323 bzero(sp->data, sp->size);
324 g_bde_delete_sector(sc, sp);
325 break;
326 }
327 }
328 }
329
330 static struct g_bde_sector *
331 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
332 {
333 struct g_bde_sector *sp;
334
335 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
336 sp = g_bde_get_keysector(wp);
337 if (sp == NULL) {
338 g_bde_purge_sector(sc, -1);
339 sp = g_bde_get_keysector(wp);
340 }
341 if (sp == NULL)
342 return (sp);
343 if (sp->owner != wp)
344 return (sp);
345 if (sp->state == VALID)
346 return (sp);
347 if (g_bde_start_read(sp) == 0)
348 return (sp);
349 g_bde_release_keysector(wp);
350 return (NULL);
351 }
352
353 /*
354 * Contribute to the completion of the original bio request.
355 *
356 * We have no simple way to tell how many bits the original bio request has
357 * been segmented into, so the easiest way to determine when we can deliver
358 * it is to keep track of the number of bytes we have completed. We keep
359 * track of any errors underway and latch onto the first one.
360 *
361 * We always report "nothing done" in case of error, because random bits here
362 * and there may be completed and returning a number of completed bytes does
363 * not convey any useful information about which bytes they were. If some
364 * piece of broken code somewhere interprets this to mean that nothing has
365 * changed on the underlying media they deserve the lossage headed for them.
366 *
367 * A single mutex per g_bde instance is used to prevent contention.
368 */
369
370 static void
371 g_bde_contribute(struct bio *bp, off_t bytes, int error)
372 {
373 struct g_bde_softc *sc;
374
375 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
376 bp, (intmax_t)bytes, error);
377 sc = bp->bio_driver1;
378 if (bp->bio_error == 0)
379 bp->bio_error = error;
380 bp->bio_completed += bytes;
381 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
382 if (bp->bio_completed == bp->bio_length) {
383 if (bp->bio_error != 0)
384 bp->bio_completed = 0;
385 g_io_deliver(bp, bp->bio_error);
386 }
387 }
388
389 /*
390 * A write operation has finished. When we have all expected cows in the
391 * barn close the door and call it a day.
392 */
393
394 static void
395 g_bde_write_done(struct bio *bp)
396 {
397 struct g_bde_sector *sp;
398 struct g_bde_work *wp;
399 struct g_bde_softc *sc;
400
401 sp = bp->bio_caller1;
402 sc = bp->bio_caller2;
403 mtx_lock(&sc->worklist_mutex);
404 KASSERT(sp != NULL, ("NULL sp"));
405 KASSERT(sc != NULL, ("NULL sc"));
406 KASSERT(sp->owner != NULL, ("NULL sp->owner"));
407 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
408 if (bp->bio_error == 0 && bp->bio_completed != sp->size)
409 bp->bio_error = EIO;
410 sp->error = bp->bio_error;
411 g_destroy_bio(bp);
412 wp = sp->owner;
413 if (wp->error == 0)
414 wp->error = sp->error;
415
416 if (wp->bp->bio_cmd == BIO_DELETE) {
417 KASSERT(sp == wp->sp, ("trashed delete op"));
418 g_bde_contribute(wp->bp, wp->length, wp->error);
419 g_bde_delete_sector(sc, sp);
420 g_bde_delete_work(wp);
421 mtx_unlock(&sc->worklist_mutex);
422 return;
423 }
424
425 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
426 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
427 if (wp->sp == sp) {
428 g_bde_delete_sector(sc, wp->sp);
429 wp->sp = NULL;
430 } else {
431 sp->state = VALID;
432 }
433 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
434 g_bde_contribute(wp->bp, wp->length, wp->error);
435 g_bde_release_keysector(wp);
436 g_bde_delete_work(wp);
437 }
438 mtx_unlock(&sc->worklist_mutex);
439 return;
440 }
441
442 /*
443 * Send a write request for the given sector down the pipeline.
444 */
445
446 static int
447 g_bde_start_write(struct g_bde_sector *sp)
448 {
449 struct bio *bp;
450 struct g_bde_softc *sc;
451
452 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
453 sc = sp->softc;
454 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
455 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
456 bp = g_new_bio();
457 if (bp == NULL)
458 return (ENOMEM);
459 bp->bio_cmd = BIO_WRITE;
460 bp->bio_offset = sp->offset;
461 bp->bio_data = sp->data;
462 bp->bio_length = sp->size;
463 bp->bio_done = g_bde_write_done;
464 bp->bio_caller1 = sp;
465 bp->bio_caller2 = sc;
466 sp->state = IO;
467 g_io_request(bp, sc->consumer);
468 return(0);
469 }
470
471 /*
472 * A read operation has finished. Mark the sector no longer iobusy and
473 * wake up the worker thread and let it do its thing.
474 */
475
476 static void
477 g_bde_read_done(struct bio *bp)
478 {
479 struct g_bde_sector *sp;
480 struct g_bde_softc *sc;
481
482 sp = bp->bio_caller1;
483 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
484 sc = bp->bio_caller2;
485 mtx_lock(&sc->worklist_mutex);
486 if (bp->bio_error == 0 && bp->bio_completed != sp->size)
487 bp->bio_error = EIO;
488 sp->error = bp->bio_error;
489 if (sp->error == 0)
490 sp->state = VALID;
491 else
492 sp->state = JUNK;
493 wakeup(sc);
494 g_destroy_bio(bp);
495 mtx_unlock(&sc->worklist_mutex);
496 }
497
498 /*
499 * Send a read request for the given sector down the pipeline.
500 */
501
502 static int
503 g_bde_start_read(struct g_bde_sector *sp)
504 {
505 struct bio *bp;
506 struct g_bde_softc *sc;
507
508 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
509 sc = sp->softc;
510 KASSERT(sc != NULL, ("Null softc in sp %p", sp));
511 bp = g_new_bio();
512 if (bp == NULL)
513 return (ENOMEM);
514 bp->bio_cmd = BIO_READ;
515 bp->bio_offset = sp->offset;
516 bp->bio_data = sp->data;
517 bp->bio_length = sp->size;
518 bp->bio_done = g_bde_read_done;
519 bp->bio_caller1 = sp;
520 bp->bio_caller2 = sc;
521 sp->state = IO;
522 g_io_request(bp, sc->consumer);
523 return(0);
524 }
525
526 /*
527 * The worker thread.
528 *
529 * The up/down path of GEOM is not allowed to sleep or do any major work
530 * so we use this thread to do the actual crypto operations and to push
531 * the state engine onwards.
532 *
533 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
534 * XXX: using a thread here is probably not needed.
535 */
536
537 void
538 g_bde_worker(void *arg)
539 {
540 struct g_bde_softc *sc;
541 struct g_bde_work *wp;
542 struct g_geom *gp;
543 int busy, error;
544
545 gp = arg;
546 sc = gp->softc;
547
548 mtx_lock(&sc->worklist_mutex);
549 for (;;) {
550 busy = 0;
551 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
552 TAILQ_FOREACH(wp, &sc->worklist, list) {
553 KASSERT(wp != NULL, ("NULL wp"));
554 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
555 if (wp->state != WAIT)
556 continue; /* Not interesting here */
557
558 KASSERT(wp->bp != NULL, ("NULL wp->bp"));
559 KASSERT(wp->sp != NULL, ("NULL wp->sp"));
560
561 if (wp->ksp != NULL) {
562 if (wp->ksp->owner != wp)
563 continue;
564 if (wp->ksp->state == IO)
565 continue;
566 KASSERT(wp->ksp->state == VALID,
567 ("Illegal sector state (JUNK ?)"));
568 }
569
570 if (wp->bp->bio_cmd == BIO_READ &&
571 wp->sp->state == IO)
572 continue;
573
574 if (wp->ksp != NULL && wp->ksp->error != 0) {
575 g_bde_contribute(wp->bp, wp->length,
576 wp->ksp->error);
577 g_bde_delete_sector(sc, wp->sp);
578 g_bde_release_keysector(wp);
579 g_bde_delete_work(wp);
580 busy++;
581 break;
582 }
583 switch(wp->bp->bio_cmd) {
584 case BIO_READ:
585 if (wp->ksp == NULL) {
586 KASSERT(wp->error != 0,
587 ("BIO_READ, no ksp and no error"));
588 g_bde_contribute(wp->bp, wp->length,
589 wp->error);
590 } else {
591 if (wp->sp->error == 0) {
592 mtx_unlock(&sc->worklist_mutex);
593 g_bde_crypt_read(wp);
594 mtx_lock(&sc->worklist_mutex);
595 }
596 g_bde_contribute(wp->bp, wp->length,
597 wp->sp->error);
598 }
599 g_bde_delete_sector(sc, wp->sp);
600 if (wp->ksp != NULL)
601 g_bde_release_keysector(wp);
602 g_bde_delete_work(wp);
603 break;
604 case BIO_WRITE:
605 wp->state = FINISH;
606 KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
607 KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
608 mtx_unlock(&sc->worklist_mutex);
609 g_bde_crypt_write(wp);
610 mtx_lock(&sc->worklist_mutex);
611 error = g_bde_start_write(wp->sp);
612 if (error) {
613 g_bde_contribute(wp->bp, wp->length, error);
614 g_bde_release_keysector(wp);
615 g_bde_delete_sector(sc, wp->sp);
616 g_bde_delete_work(wp);
617 break;
618 }
619 error = g_bde_start_write(wp->ksp);
620 if (wp->error == 0)
621 wp->error = error;
622 break;
623 case BIO_DELETE:
624 wp->state = FINISH;
625 mtx_unlock(&sc->worklist_mutex);
626 g_bde_crypt_delete(wp);
627 mtx_lock(&sc->worklist_mutex);
628 g_bde_start_write(wp->sp);
629 break;
630 }
631 busy++;
632 break;
633 }
634 if (!busy) {
635 /*
636 * We don't look for our death-warrant until we are
637 * idle. Shouldn't make a difference in practice.
638 */
639 if (sc->dead)
640 break;
641 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
642 error = msleep(sc, &sc->worklist_mutex,
643 PRIBIO, "g_bde", hz);
644 if (error == EWOULDBLOCK) {
645 /*
646 * Loose our skey cache in an orderly fashion.
647 * The exact rate can be tuned to be less
648 * aggressive if this is desirable. 10% per
649 * second means that the cache is gone in a
650 * few minutes.
651 */
652 g_bde_purge_sector(sc, 10);
653 }
654 }
655 }
656 g_trace(G_T_TOPOLOGY, "g_bde_worker die");
657 g_bde_purge_sector(sc, 1);
658 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
659 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
660 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
661 mtx_unlock(&sc->worklist_mutex);
662 sc->dead = 2;
663 wakeup(sc);
664 mtx_lock(&Giant);
665 kthread_exit(0);
666 }
667
668 /*
669 * g_bde_start1 has chopped the incoming request up so all the requests
670 * we see here are inside a single zone. Map the data and key locations
671 * grab the buffers we need and fire off the first volley of read requests.
672 */
673
674 static void
675 g_bde_start2(struct g_bde_work *wp)
676 {
677 struct g_bde_softc *sc;
678
679 KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
680 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
681 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
682 sc = wp->softc;
683 if (wp->bp->bio_cmd == BIO_READ) {
684 wp->sp = g_bde_new_sector(wp, 0);
685 if (wp->sp == NULL) {
686 g_bde_contribute(wp->bp, wp->length, ENOMEM);
687 g_bde_delete_work(wp);
688 return;
689 }
690 wp->sp->size = wp->length;
691 wp->sp->data = wp->data;
692 if (g_bde_start_read(wp->sp) != 0) {
693 g_bde_contribute(wp->bp, wp->length, ENOMEM);
694 g_bde_delete_sector(sc, wp->sp);
695 g_bde_delete_work(wp);
696 return;
697 }
698 g_bde_read_keysector(sc, wp);
699 if (wp->ksp == NULL)
700 wp->error = ENOMEM;
701 } else if (wp->bp->bio_cmd == BIO_DELETE) {
702 wp->sp = g_bde_new_sector(wp, wp->length);
703 if (wp->sp == NULL) {
704 g_bde_contribute(wp->bp, wp->length, ENOMEM);
705 g_bde_delete_work(wp);
706 return;
707 }
708 } else if (wp->bp->bio_cmd == BIO_WRITE) {
709 wp->sp = g_bde_new_sector(wp, wp->length);
710 if (wp->sp == NULL) {
711 g_bde_contribute(wp->bp, wp->length, ENOMEM);
712 g_bde_delete_work(wp);
713 return;
714 }
715 g_bde_read_keysector(sc, wp);
716 if (wp->ksp == NULL) {
717 g_bde_contribute(wp->bp, wp->length, ENOMEM);
718 g_bde_delete_sector(sc, wp->sp);
719 g_bde_delete_work(wp);
720 return;
721 }
722 } else {
723 KASSERT(0 == 1,
724 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
725 }
726
727 wp->state = WAIT;
728 wakeup(sc);
729 }
730
731 /*
732 * Create a sequence of work structures, and have g_bde_map_sector() determine
733 * how long they each can be. Feed them to g_bde_start2().
734 */
735
736 void
737 g_bde_start1(struct bio *bp)
738 {
739 struct g_bde_softc *sc;
740 struct g_bde_work *wp;
741 off_t done;
742
743 sc = bp->bio_to->geom->softc;
744 bp->bio_driver1 = sc;
745
746 mtx_lock(&sc->worklist_mutex);
747 for(done = 0; done < bp->bio_length; ) {
748 wp = g_bde_new_work(sc);
749 if (wp != NULL) {
750 wp->bp = bp;
751 wp->offset = bp->bio_offset + done;
752 wp->data = bp->bio_data + done;
753 wp->length = bp->bio_length - done;
754 g_bde_map_sector(wp);
755 done += wp->length;
756 g_bde_start2(wp);
757 }
758 if (wp == NULL || bp->bio_error != 0) {
759 g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
760 break;
761 }
762 }
763 mtx_unlock(&sc->worklist_mutex);
764 return;
765 }
Cache object: 70c4e8d216e34af0ab7e0e6525177b96
|