1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2002 Poul-Henning Kamp
5 * Copyright (c) 2002 Networks Associates Technology, Inc.
6 * All rights reserved.
7 *
8 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
9 * and NAI Labs, the Security Research Division of Network Associates, Inc.
10 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
11 * DARPA CHATS research program.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $FreeBSD: releng/12.0/sys/geom/bde/g_bde_work.c 326270 2017-11-27 15:17:37Z pfg $
35 */
36 /*
37 * This source file contains the state-engine which makes things happen in the
38 * right order.
39 *
40 * Outline:
41 * 1) g_bde_start1()
42 * Break the struct bio into multiple work packets one per zone.
43 * 2) g_bde_start2()
44 * Setup the necessary sector buffers and start those read operations
45 * which we can start at this time and put the item on the work-list.
46 * 3) g_bde_worker()
47 * Scan the work-list for items which are ready for crypto processing
48 * and call the matching crypto function in g_bde_crypt.c and schedule
49 * any writes needed. Read operations finish here by releasing the
50 * sector buffers and delivering the original bio request.
51 * 4) g_bde_write_done()
52 * Release sector buffers and deliver the original bio request.
53 *
54 * Because of the C-scope rules, the functions are almost perfectly in the
55 * opposite order in this source file.
56 *
57 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
58 * XXX: additional states to this state-engine. Since no hardware available
59 * XXX: at this time has AES support, implementing this has been postponed
60 * XXX: until such time as it would result in a benefit.
61 */
62
63 #include <sys/param.h>
64 #include <sys/bio.h>
65 #include <sys/lock.h>
66 #include <sys/mutex.h>
67 #include <sys/queue.h>
68 #include <sys/malloc.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <sys/proc.h>
73 #include <sys/kthread.h>
74
75 #include <crypto/rijndael/rijndael-api-fst.h>
76 #include <crypto/sha2/sha512.h>
77 #include <geom/geom.h>
78 #include <geom/bde/g_bde.h>
79
80 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
81 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
82 static void g_bde_release_keysector(struct g_bde_work *wp);
83 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
84 static int g_bde_start_read(struct g_bde_sector *sp);
85 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
86
87 /*
88 * Work item allocation.
89 *
90 * C++ would call these constructors and destructors.
91 */
92 static u_int g_bde_nwork;
93 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
94
95 static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
96
97 static struct g_bde_work *
98 g_bde_new_work(struct g_bde_softc *sc)
99 {
100 struct g_bde_work *wp;
101
102 wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
103 if (wp == NULL)
104 return (wp);
105 wp->state = SETUP;
106 wp->softc = sc;
107 g_bde_nwork++;
108 sc->nwork++;
109 TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
110 return (wp);
111 }
112
113 static void
114 g_bde_delete_work(struct g_bde_work *wp)
115 {
116 struct g_bde_softc *sc;
117
118 sc = wp->softc;
119 g_bde_nwork--;
120 sc->nwork--;
121 TAILQ_REMOVE(&sc->worklist, wp, list);
122 free(wp, M_GBDE);
123 }
124
125 /*
126 * Sector buffer allocation
127 *
128 * These two functions allocate and free back variable sized sector buffers
129 */
130
131 static u_int g_bde_nsect;
132 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
133
134 static void
135 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
136 {
137
138 g_bde_nsect--;
139 sc->nsect--;
140 if (sp->malloc)
141 free(sp->data, M_GBDE);
142 free(sp, M_GBDE);
143 }
144
145 static struct g_bde_sector *
146 g_bde_new_sector(struct g_bde_work *wp, u_int len)
147 {
148 struct g_bde_sector *sp;
149
150 sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
151 if (sp == NULL)
152 return (sp);
153 if (len > 0) {
154 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
155 if (sp->data == NULL) {
156 free(sp, M_GBDE);
157 return (NULL);
158 }
159 sp->malloc = 1;
160 }
161 g_bde_nsect++;
162 wp->softc->nsect++;
163 sp->size = len;
164 sp->softc = wp->softc;
165 sp->ref = 1;
166 sp->owner = wp;
167 sp->offset = wp->so;
168 sp->state = JUNK;
169 return (sp);
170 }
171
172 /*
173 * Skey sector cache.
174 *
175 * Nothing prevents two separate I/O requests from addressing the same zone
176 * and thereby needing the same skey sector. We therefore need to sequence
177 * I/O operations to the skey sectors. A certain amount of caching is also
178 * desirable, although the extent of benefit from this is not at this point
179 * determined.
180 *
181 * XXX: GEOM may be able to grow a generic caching facility at some point
182 * XXX: to support such needs.
183 */
184
185 static u_int g_bde_ncache;
186 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
187
188 static void
189 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
190 {
191
192 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
193 if (sp->ref != 0)
194 return;
195 TAILQ_REMOVE(&sc->freelist, sp, list);
196 g_bde_ncache--;
197 sc->ncache--;
198 bzero(sp->data, sp->size);
199 g_bde_delete_sector(sc, sp);
200 }
201
202 static struct g_bde_sector *
203 g_bde_get_keysector(struct g_bde_work *wp)
204 {
205 struct g_bde_sector *sp;
206 struct g_bde_softc *sc;
207 off_t offset;
208
209 offset = wp->kso;
210 g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
211 sc = wp->softc;
212
213 if (malloc_last_fail() < g_bde_ncache)
214 g_bde_purge_sector(sc, -1);
215
216 sp = TAILQ_FIRST(&sc->freelist);
217 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
218 g_bde_purge_one_sector(sc, sp);
219
220 TAILQ_FOREACH(sp, &sc->freelist, list) {
221 if (sp->offset == offset)
222 break;
223 }
224 if (sp != NULL) {
225 sp->ref++;
226 KASSERT(sp->offset == offset, ("wrong offset"));
227 KASSERT(sp->softc == wp->softc, ("wrong softc"));
228 if (sp->ref == 1)
229 sp->owner = wp;
230 } else {
231 if (malloc_last_fail() < g_bde_ncache) {
232 TAILQ_FOREACH(sp, &sc->freelist, list)
233 if (sp->ref == 0)
234 break;
235 }
236 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
237 sp = TAILQ_FIRST(&sc->freelist);
238 if (sp != NULL && sp->ref > 0)
239 sp = NULL;
240 if (sp == NULL) {
241 sp = g_bde_new_sector(wp, sc->sectorsize);
242 if (sp != NULL) {
243 g_bde_ncache++;
244 sc->ncache++;
245 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
246 sp->malloc = 2;
247 }
248 }
249 if (sp != NULL) {
250 sp->offset = offset;
251 sp->softc = wp->softc;
252 sp->ref = 1;
253 sp->owner = wp;
254 sp->state = JUNK;
255 sp->error = 0;
256 }
257 }
258 if (sp != NULL) {
259 TAILQ_REMOVE(&sc->freelist, sp, list);
260 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
261 sp->used = time_uptime;
262 }
263 wp->ksp = sp;
264 return(sp);
265 }
266
267 static void
268 g_bde_release_keysector(struct g_bde_work *wp)
269 {
270 struct g_bde_softc *sc;
271 struct g_bde_work *wp2;
272 struct g_bde_sector *sp;
273
274 sp = wp->ksp;
275 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
276 KASSERT(sp->malloc == 2, ("Wrong sector released"));
277 sc = sp->softc;
278 KASSERT(sc != NULL, ("NULL sp->softc"));
279 KASSERT(wp == sp->owner, ("Releasing, not owner"));
280 sp->owner = NULL;
281 wp->ksp = NULL;
282 sp->ref--;
283 if (sp->ref > 0) {
284 TAILQ_REMOVE(&sc->freelist, sp, list);
285 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
286 TAILQ_FOREACH(wp2, &sc->worklist, list) {
287 if (wp2->ksp == sp) {
288 KASSERT(wp2 != wp, ("Self-reowning"));
289 sp->owner = wp2;
290 wakeup(sp->softc);
291 break;
292 }
293 }
294 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
295 } else if (sp->error != 0) {
296 sp->offset = ~0;
297 sp->error = 0;
298 sp->state = JUNK;
299 }
300 TAILQ_REMOVE(&sc->freelist, sp, list);
301 TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
302 }
303
304 static void
305 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
306 {
307 struct g_bde_sector *sp;
308 int n;
309
310 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
311 if (fraction > 0)
312 n = sc->ncache / fraction + 1;
313 else
314 n = g_bde_ncache - malloc_last_fail();
315 if (n < 0)
316 return;
317 if (n > sc->ncache)
318 n = sc->ncache;
319 while(n--) {
320 TAILQ_FOREACH(sp, &sc->freelist, list) {
321 if (sp->ref != 0)
322 continue;
323 TAILQ_REMOVE(&sc->freelist, sp, list);
324 g_bde_ncache--;
325 sc->ncache--;
326 bzero(sp->data, sp->size);
327 g_bde_delete_sector(sc, sp);
328 break;
329 }
330 }
331 }
332
333 static struct g_bde_sector *
334 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
335 {
336 struct g_bde_sector *sp;
337
338 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
339 sp = g_bde_get_keysector(wp);
340 if (sp == NULL) {
341 g_bde_purge_sector(sc, -1);
342 sp = g_bde_get_keysector(wp);
343 }
344 if (sp == NULL)
345 return (sp);
346 if (sp->owner != wp)
347 return (sp);
348 if (sp->state == VALID)
349 return (sp);
350 if (g_bde_start_read(sp) == 0)
351 return (sp);
352 g_bde_release_keysector(wp);
353 return (NULL);
354 }
355
356 /*
357 * Contribute to the completion of the original bio request.
358 *
359 * We have no simple way to tell how many bits the original bio request has
360 * been segmented into, so the easiest way to determine when we can deliver
361 * it is to keep track of the number of bytes we have completed. We keep
362 * track of any errors underway and latch onto the first one.
363 *
364 * We always report "nothing done" in case of error, because random bits here
365 * and there may be completed and returning a number of completed bytes does
366 * not convey any useful information about which bytes they were. If some
367 * piece of broken code somewhere interprets this to mean that nothing has
368 * changed on the underlying media they deserve the lossage headed for them.
369 *
370 * A single mutex per g_bde instance is used to prevent contention.
371 */
372
373 static void
374 g_bde_contribute(struct bio *bp, off_t bytes, int error)
375 {
376
377 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
378 bp, (intmax_t)bytes, error);
379 if (bp->bio_error == 0)
380 bp->bio_error = error;
381 bp->bio_completed += bytes;
382 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
383 if (bp->bio_completed == bp->bio_length) {
384 if (bp->bio_error != 0)
385 bp->bio_completed = 0;
386 g_io_deliver(bp, bp->bio_error);
387 }
388 }
389
390 /*
391 * This is the common case "we're done with this work package" function
392 */
393
394 static void
395 g_bde_work_done(struct g_bde_work *wp, int error)
396 {
397
398 g_bde_contribute(wp->bp, wp->length, error);
399 if (wp->sp != NULL)
400 g_bde_delete_sector(wp->softc, wp->sp);
401 if (wp->ksp != NULL)
402 g_bde_release_keysector(wp);
403 g_bde_delete_work(wp);
404 }
405
406 /*
407 * A write operation has finished. When we have all expected cows in the
408 * barn close the door and call it a day.
409 */
410
411 static void
412 g_bde_write_done(struct bio *bp)
413 {
414 struct g_bde_sector *sp;
415 struct g_bde_work *wp;
416 struct g_bde_softc *sc;
417
418 sp = bp->bio_caller1;
419 sc = bp->bio_caller2;
420 mtx_lock(&sc->worklist_mutex);
421 KASSERT(sp != NULL, ("NULL sp"));
422 KASSERT(sc != NULL, ("NULL sc"));
423 KASSERT(sp->owner != NULL, ("NULL sp->owner"));
424 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
425 if (bp->bio_error == 0 && bp->bio_completed != sp->size)
426 bp->bio_error = EIO;
427 sp->error = bp->bio_error;
428 g_destroy_bio(bp);
429 wp = sp->owner;
430 if (wp->error == 0)
431 wp->error = sp->error;
432
433 if (wp->bp->bio_cmd == BIO_DELETE) {
434 KASSERT(sp == wp->sp, ("trashed delete op"));
435 g_bde_work_done(wp, wp->error);
436 mtx_unlock(&sc->worklist_mutex);
437 return;
438 }
439
440 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
441 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
442 if (wp->sp == sp) {
443 g_bde_delete_sector(sc, wp->sp);
444 wp->sp = NULL;
445 } else {
446 sp->state = VALID;
447 }
448 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
449 g_bde_work_done(wp, wp->error);
450 mtx_unlock(&sc->worklist_mutex);
451 return;
452 }
453
454 /*
455 * Send a write request for the given sector down the pipeline.
456 */
457
458 static int
459 g_bde_start_write(struct g_bde_sector *sp)
460 {
461 struct bio *bp;
462 struct g_bde_softc *sc;
463
464 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
465 sc = sp->softc;
466 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
467 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
468 bp = g_new_bio();
469 if (bp == NULL)
470 return (ENOMEM);
471 bp->bio_cmd = BIO_WRITE;
472 bp->bio_offset = sp->offset;
473 bp->bio_data = sp->data;
474 bp->bio_length = sp->size;
475 bp->bio_done = g_bde_write_done;
476 bp->bio_caller1 = sp;
477 bp->bio_caller2 = sc;
478 sp->state = IO;
479 g_io_request(bp, sc->consumer);
480 return(0);
481 }
482
483 /*
484 * A read operation has finished. Mark the sector no longer iobusy and
485 * wake up the worker thread and let it do its thing.
486 */
487
488 static void
489 g_bde_read_done(struct bio *bp)
490 {
491 struct g_bde_sector *sp;
492 struct g_bde_softc *sc;
493
494 sp = bp->bio_caller1;
495 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
496 sc = bp->bio_caller2;
497 mtx_lock(&sc->worklist_mutex);
498 if (bp->bio_error == 0 && bp->bio_completed != sp->size)
499 bp->bio_error = EIO;
500 sp->error = bp->bio_error;
501 if (sp->error == 0)
502 sp->state = VALID;
503 else
504 sp->state = JUNK;
505 wakeup(sc);
506 g_destroy_bio(bp);
507 mtx_unlock(&sc->worklist_mutex);
508 }
509
510 /*
511 * Send a read request for the given sector down the pipeline.
512 */
513
514 static int
515 g_bde_start_read(struct g_bde_sector *sp)
516 {
517 struct bio *bp;
518 struct g_bde_softc *sc;
519
520 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
521 sc = sp->softc;
522 KASSERT(sc != NULL, ("Null softc in sp %p", sp));
523 bp = g_new_bio();
524 if (bp == NULL)
525 return (ENOMEM);
526 bp->bio_cmd = BIO_READ;
527 bp->bio_offset = sp->offset;
528 bp->bio_data = sp->data;
529 bp->bio_length = sp->size;
530 bp->bio_done = g_bde_read_done;
531 bp->bio_caller1 = sp;
532 bp->bio_caller2 = sc;
533 sp->state = IO;
534 g_io_request(bp, sc->consumer);
535 return(0);
536 }
537
538 /*
539 * The worker thread.
540 *
541 * The up/down path of GEOM is not allowed to sleep or do any major work
542 * so we use this thread to do the actual crypto operations and to push
543 * the state engine onwards.
544 *
545 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
546 * XXX: using a thread here is probably not needed.
547 */
548
549 void
550 g_bde_worker(void *arg)
551 {
552 struct g_bde_softc *sc;
553 struct g_bde_work *wp, *twp;
554 struct g_geom *gp;
555 int restart, error;
556
557 gp = arg;
558 sc = gp->softc;
559
560 mtx_lock(&sc->worklist_mutex);
561 for (;;) {
562 restart = 0;
563 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
564 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
565 KASSERT(wp != NULL, ("NULL wp"));
566 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
567 if (wp->state != WAIT)
568 continue; /* Not interesting here */
569
570 KASSERT(wp->bp != NULL, ("NULL wp->bp"));
571 KASSERT(wp->sp != NULL, ("NULL wp->sp"));
572
573 if (wp->ksp != NULL) {
574 if (wp->ksp->owner != wp)
575 continue;
576 if (wp->ksp->state == IO)
577 continue;
578 KASSERT(wp->ksp->state == VALID,
579 ("Illegal sector state (%d)",
580 wp->ksp->state));
581 }
582
583 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
584 continue;
585
586 if (wp->ksp != NULL && wp->ksp->error != 0) {
587 g_bde_work_done(wp, wp->ksp->error);
588 continue;
589 }
590 switch(wp->bp->bio_cmd) {
591 case BIO_READ:
592 if (wp->ksp == NULL) {
593 KASSERT(wp->error != 0,
594 ("BIO_READ, no ksp and no error"));
595 g_bde_work_done(wp, wp->error);
596 break;
597 }
598 if (wp->sp->error != 0) {
599 g_bde_work_done(wp, wp->sp->error);
600 break;
601 }
602 mtx_unlock(&sc->worklist_mutex);
603 g_bde_crypt_read(wp);
604 mtx_lock(&sc->worklist_mutex);
605 restart++;
606 g_bde_work_done(wp, wp->sp->error);
607 break;
608 case BIO_WRITE:
609 wp->state = FINISH;
610 KASSERT(wp->sp->owner == wp,
611 ("Write not owner sp"));
612 KASSERT(wp->ksp->owner == wp,
613 ("Write not owner ksp"));
614 mtx_unlock(&sc->worklist_mutex);
615 g_bde_crypt_write(wp);
616 mtx_lock(&sc->worklist_mutex);
617 restart++;
618 error = g_bde_start_write(wp->sp);
619 if (error) {
620 g_bde_work_done(wp, error);
621 break;
622 }
623 error = g_bde_start_write(wp->ksp);
624 if (wp->error != 0)
625 wp->error = error;
626 break;
627 case BIO_DELETE:
628 wp->state = FINISH;
629 mtx_unlock(&sc->worklist_mutex);
630 g_bde_crypt_delete(wp);
631 mtx_lock(&sc->worklist_mutex);
632 restart++;
633 g_bde_start_write(wp->sp);
634 break;
635 }
636 if (restart)
637 break;
638 }
639 if (!restart) {
640 /*
641 * We don't look for our death-warrant until we are
642 * idle. Shouldn't make a difference in practice.
643 */
644 if (sc->dead)
645 break;
646 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
647 error = msleep(sc, &sc->worklist_mutex,
648 PRIBIO, "-", hz);
649 if (error == EWOULDBLOCK) {
650 /*
651 * Lose our skey cache in an orderly fashion.
652 * The exact rate can be tuned to be less
653 * aggressive if this is desirable. 10% per
654 * second means that the cache is gone in a
655 * few minutes.
656 */
657 g_bde_purge_sector(sc, 10);
658 }
659 }
660 }
661 g_trace(G_T_TOPOLOGY, "g_bde_worker die");
662 g_bde_purge_sector(sc, 1);
663 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
664 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
665 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
666 mtx_unlock(&sc->worklist_mutex);
667 sc->dead = 2;
668 wakeup(sc);
669 kproc_exit(0);
670 }
671
672 /*
673 * g_bde_start1 has chopped the incoming request up so all the requests
674 * we see here are inside a single zone. Map the data and key locations
675 * grab the buffers we need and fire off the first volley of read requests.
676 */
677
678 static void
679 g_bde_start2(struct g_bde_work *wp)
680 {
681 struct g_bde_softc *sc;
682
683 KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
684 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
685 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
686 sc = wp->softc;
687 switch (wp->bp->bio_cmd) {
688 case BIO_READ:
689 wp->sp = g_bde_new_sector(wp, 0);
690 if (wp->sp == NULL) {
691 g_bde_work_done(wp, ENOMEM);
692 return;
693 }
694 wp->sp->size = wp->length;
695 wp->sp->data = wp->data;
696 if (g_bde_start_read(wp->sp) != 0) {
697 g_bde_work_done(wp, ENOMEM);
698 return;
699 }
700 g_bde_read_keysector(sc, wp);
701 if (wp->ksp == NULL)
702 wp->error = ENOMEM;
703 break;
704 case BIO_DELETE:
705 wp->sp = g_bde_new_sector(wp, wp->length);
706 if (wp->sp == NULL) {
707 g_bde_work_done(wp, ENOMEM);
708 return;
709 }
710 break;
711 case BIO_WRITE:
712 wp->sp = g_bde_new_sector(wp, wp->length);
713 if (wp->sp == NULL) {
714 g_bde_work_done(wp, ENOMEM);
715 return;
716 }
717 g_bde_read_keysector(sc, wp);
718 if (wp->ksp == NULL) {
719 g_bde_work_done(wp, ENOMEM);
720 return;
721 }
722 break;
723 default:
724 KASSERT(0 == 1,
725 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
726 }
727
728 wp->state = WAIT;
729 wakeup(sc);
730 }
731
732 /*
733 * Create a sequence of work structures, and have g_bde_map_sector() determine
734 * how long they each can be. Feed them to g_bde_start2().
735 */
736
737 void
738 g_bde_start1(struct bio *bp)
739 {
740 struct g_bde_softc *sc;
741 struct g_bde_work *wp;
742 off_t done;
743
744 sc = bp->bio_to->geom->softc;
745 bp->bio_driver1 = sc;
746
747 mtx_lock(&sc->worklist_mutex);
748 for(done = 0; done < bp->bio_length; ) {
749 wp = g_bde_new_work(sc);
750 if (wp != NULL) {
751 wp->bp = bp;
752 wp->offset = bp->bio_offset + done;
753 wp->data = bp->bio_data + done;
754 wp->length = bp->bio_length - done;
755 g_bde_map_sector(wp);
756 done += wp->length;
757 g_bde_start2(wp);
758 }
759 if (wp == NULL || bp->bio_error != 0) {
760 g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
761 break;
762 }
763 }
764 mtx_unlock(&sc->worklist_mutex);
765 return;
766 }
Cache object: 0a8f397a034cdebc5150cc34838fb10e
|