1 /*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: releng/5.0/sys/geom/bde/g_bde_work.c 108891 2003-01-07 18:21:39Z phk $
33 *
34 * This source file contains the state-engine which makes things happen in the
35 * right order.
36 *
37 * Outline:
38 * 1) g_bde_start1()
39 * Break the struct bio into multiple work packets one per zone.
40 * 2) g_bde_start2()
41 * Setup the necessary sector buffers and start those read operations
42 * which we can start at this time and put the item on the work-list.
43 * 3) g_bde_worker()
44 * Scan the work-list for items which are ready for crypto processing
45 * and call the matching crypto function in g_bde_crypt.c and schedule
46 * any writes needed. Read operations finish here by releasing the
47 * sector buffers and delivering the original bio request.
48 * 4) g_bde_write_done()
49 * Release sector buffers and deliver the original bio request.
50 *
51 * Because of the C-scope rules, the functions are almost perfectly in the
52 * opposite order in this source file.
53 *
54 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
55 * XXX: additional states to this state-engine. Since no hardware available
56 * XXX: at this time has AES support, implementing this has been postponed
57 * XXX: until such time as it would result in a benefit.
58 */
59
60 #include <sys/param.h>
61 #include <sys/stdint.h>
62 #include <sys/bio.h>
63 #include <sys/lock.h>
64 #include <sys/mutex.h>
65 #include <sys/queue.h>
66 #include <sys/malloc.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/sysctl.h>
70 #include <sys/time.h>
71 #include <sys/proc.h>
72 #include <sys/kthread.h>
73
74 #include <crypto/rijndael/rijndael.h>
75 #include <crypto/sha2/sha2.h>
76 #include <geom/geom.h>
77 #include <geom/bde/g_bde.h>
78
79 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
80 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
81 static void g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp);
82 static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset);
83 static int g_bde_start_read(struct g_bde_sector *sp);
84 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
85
86 /*
87 * Work item allocation.
88 *
89 * C++ would call these constructors and destructors.
90 */
91 static u_int g_bde_nwork;
92 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
93
94 static struct g_bde_work *
95 g_bde_new_work(struct g_bde_softc *sc)
96 {
97 struct g_bde_work *wp;
98
99 wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
100 if (wp == NULL)
101 return (wp);
102 wp->state = SETUP;
103 wp->softc = sc;
104 g_bde_nwork++;
105 sc->nwork++;
106 TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
107 return (wp);
108 }
109
110 static void
111 g_bde_delete_work(struct g_bde_work *wp)
112 {
113 struct g_bde_softc *sc;
114
115 sc = wp->softc;
116 g_bde_nwork--;
117 sc->nwork--;
118 TAILQ_REMOVE(&sc->worklist, wp, list);
119 g_free(wp);
120 }
121
122 /*
123 * Sector buffer allocation
124 *
125 * These two functions allocate and free back variable sized sector buffers
126 */
127
128 static u_int g_bde_nsect;
129 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
130
131 static void
132 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
133 {
134
135 g_bde_nsect--;
136 sc->nsect--;
137 if (sp->malloc)
138 g_free(sp->data);
139 g_free(sp);
140 }
141
142 static struct g_bde_sector *
143 g_bde_new_sector(struct g_bde_work *wp, u_int len)
144 {
145 struct g_bde_sector *sp;
146
147 sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
148 if (sp == NULL)
149 return (sp);
150 if (len > 0) {
151 sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
152 if (sp->data == NULL) {
153 g_free(sp);
154 return (NULL);
155 }
156 sp->malloc = 1;
157 }
158 g_bde_nsect++;
159 wp->softc->nsect++;
160 sp->size = len;
161 sp->softc = wp->softc;
162 sp->ref = 1;
163 sp->owner = wp;
164 sp->offset = wp->so;
165 sp->state = JUNK;
166 return (sp);
167 }
168
169 /*
170 * Skey sector cache.
171 *
172 * Nothing prevents two separate I/O requests from addressing the same zone
173 * and thereby needing the same skey sector. We therefore need to sequence
174 * I/O operations to the skey sectors. A certain amount of caching is also
175 * desirable, although the extent of benefit from this is not at this point
176 * determined.
177 *
178 * XXX: GEOM may be able to grow a generic caching facility at some point
179 * XXX: to support such needs.
180 */
181
182 static u_int g_bde_ncache;
183 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
184
185 static void
186 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
187 {
188
189 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
190 if (sp->ref != 0)
191 return;
192 TAILQ_REMOVE(&sc->freelist, sp, list);
193 g_bde_ncache--;
194 sc->ncache--;
195 bzero(sp->data, sp->size);
196 g_bde_delete_sector(sc, sp);
197 }
198
199 static struct g_bde_sector *
200 g_bde_get_sector(struct g_bde_work *wp, off_t offset)
201 {
202 struct g_bde_sector *sp;
203 struct g_bde_softc *sc;
204
205 g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset);
206 sc = wp->softc;
207
208 if (malloc_last_fail() < g_bde_ncache)
209 g_bde_purge_sector(sc, -1);
210
211 sp = TAILQ_FIRST(&sc->freelist);
212 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
213 g_bde_purge_one_sector(sc, sp);
214
215 TAILQ_FOREACH(sp, &sc->freelist, list) {
216 if (sp->offset == offset)
217 break;
218 }
219 if (sp != NULL) {
220 sp->ref++;
221 KASSERT(sp->offset == offset, ("wrong offset"));
222 KASSERT(sp->softc == wp->softc, ("wrong softc"));
223 if (sp->ref == 1)
224 sp->owner = wp;
225 } else {
226 if (malloc_last_fail() < g_bde_ncache) {
227 TAILQ_FOREACH(sp, &sc->freelist, list)
228 if (sp->ref == 0)
229 break;
230 }
231 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
232 sp = TAILQ_FIRST(&sc->freelist);
233 if (sp != NULL && sp->ref > 0)
234 sp = NULL;
235 if (sp == NULL) {
236 g_bde_ncache++;
237 sc->ncache++;
238 sp = g_bde_new_sector(wp, sc->sectorsize);
239 if (sp != NULL) {
240 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
241 sp->malloc = 2;
242 }
243 }
244 if (sp != NULL) {
245 sp->offset = offset;
246 sp->softc = wp->softc;
247 sp->ref = 1;
248 sp->owner = wp;
249 sp->state = JUNK;
250 sp->error = 0;
251 }
252 }
253 if (sp != NULL) {
254 TAILQ_REMOVE(&sc->freelist, sp, list);
255 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
256 }
257 wp->ksp = sp;
258 if (sp == NULL) {
259 g_bde_purge_sector(sc, -1);
260 sp = g_bde_get_sector(wp, offset);
261 }
262 if (sp != NULL)
263 sp->used = time_uptime;
264 KASSERT(sp != NULL, ("get_sector failed"));
265 return(sp);
266 }
267
268 static void
269 g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp)
270 {
271 struct g_bde_softc *sc;
272 struct g_bde_work *wp2;
273
274 g_trace(G_T_TOPOLOGY, "g_bde_release_sector(%p)", sp);
275 KASSERT(sp->malloc == 2, ("Wrong sector released"));
276 sc = sp->softc;
277 KASSERT(sc != NULL, ("NULL sp->softc"));
278 KASSERT(wp == sp->owner, ("Releasing, not owner"));
279 sp->owner = NULL;
280 wp->ksp = NULL;
281 sp->ref--;
282 if (sp->ref > 0) {
283 TAILQ_REMOVE(&sc->freelist, sp, list);
284 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
285 TAILQ_FOREACH(wp2, &sc->worklist, list) {
286 if (wp2->ksp == sp) {
287 KASSERT(wp2 != wp, ("Self-reowning"));
288 sp->owner = wp2;
289 wakeup(sp->softc);
290 break;
291 }
292 }
293 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
294 } else if (sp->error != 0) {
295 sp->offset = ~0;
296 sp->error = 0;
297 sp->state = JUNK;
298 }
299 TAILQ_REMOVE(&sc->freelist, sp, list);
300 TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
301 }
302
303 static void
304 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
305 {
306 struct g_bde_sector *sp;
307 int n;
308
309 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
310 if (fraction > 0)
311 n = sc->ncache / fraction + 1;
312 else
313 n = g_bde_ncache - malloc_last_fail();
314 if (n < 0)
315 return;
316 if (n > sc->ncache)
317 n = sc->ncache;
318 while(n--) {
319 TAILQ_FOREACH(sp, &sc->freelist, list) {
320 if (sp->ref != 0)
321 continue;
322 TAILQ_REMOVE(&sc->freelist, sp, list);
323 g_bde_ncache--;
324 sc->ncache--;
325 bzero(sp->data, sp->size);
326 g_bde_delete_sector(sc, sp);
327 break;
328 }
329 }
330 }
331
332 static struct g_bde_sector *
333 g_bde_read_sector(struct g_bde_softc *sc, struct g_bde_work *wp, off_t offset)
334 {
335 struct g_bde_sector *sp;
336
337 g_trace(G_T_TOPOLOGY, "g_bde_read_sector(%p)", wp);
338 sp = g_bde_get_sector(wp, offset);
339 if (sp == NULL)
340 return (sp);
341 if (sp->owner != wp)
342 return (sp);
343 if (sp->state == VALID)
344 return (sp);
345 if (g_bde_start_read(sp) == 0)
346 return (sp);
347 g_bde_release_sector(wp, sp);
348 return (NULL);
349 }
350
351 /*
352 * Contribute to the completion of the original bio request.
353 *
354 * We have no simple way to tell how many bits the original bio request has
355 * been segmented into, so the easiest way to determine when we can deliver
356 * it is to keep track of the number of bytes we have completed. We keep
357 * track of any errors underway and latch onto the first one.
358 *
359 * We always report "nothing done" in case of error, because random bits here
360 * and there may be completed and returning a number of completed bytes does
361 * not convey any useful information about which bytes they were. If some
362 * piece of broken code somewhere interprets this to mean that nothing has
363 * changed on the underlying media they deserve the lossage headed for them.
364 *
365 * A single mutex per g_bde instance is used to prevent contention.
366 */
367
368 static void
369 g_bde_contribute(struct bio *bp, off_t bytes, int error)
370 {
371 struct g_bde_softc *sc;
372
373 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
374 bp, (intmax_t)bytes, error);
375 sc = bp->bio_driver1;
376 if (bp->bio_error == 0)
377 bp->bio_error = error;
378 bp->bio_completed += bytes;
379 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
380 if (bp->bio_completed == bp->bio_length) {
381 if (bp->bio_error != 0)
382 bp->bio_completed = 0;
383 g_io_deliver(bp, bp->bio_error);
384 }
385 }
386
387 /*
388 * A write operation has finished. When we have all expected cows in the
389 * barn close the door and call it a day.
390 */
391
392 static void
393 g_bde_write_done(struct bio *bp)
394 {
395 struct g_bde_sector *sp;
396 struct g_bde_work *wp;
397 struct g_bde_softc *sc;
398
399 sp = bp->bio_caller1;
400 sc = bp->bio_caller2;
401 mtx_lock(&sc->worklist_mutex);
402 KASSERT(sp != NULL, ("NULL sp"));
403 KASSERT(sc != NULL, ("NULL sc"));
404 KASSERT(sp->owner != NULL, ("NULL sp->owner"));
405 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
406 sp->error = bp->bio_error;
407 g_destroy_bio(bp);
408 wp = sp->owner;
409 if (wp->error == 0)
410 wp->error = sp->error;
411
412 if (wp->bp->bio_cmd == BIO_DELETE) {
413 KASSERT(sp == wp->sp, ("trashed delete op"));
414 g_bde_contribute(wp->bp, wp->length, wp->error);
415 g_bde_delete_sector(sc, sp);
416 g_bde_delete_work(wp);
417 mtx_unlock(&sc->worklist_mutex);
418 return;
419 }
420
421 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
422 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
423 if (wp->sp == sp) {
424 g_bde_delete_sector(sc, wp->sp);
425 wp->sp = NULL;
426 } else {
427 sp->state = VALID;
428 }
429 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
430 g_bde_contribute(wp->bp, wp->length, wp->error);
431 g_bde_release_sector(wp, wp->ksp);
432 g_bde_delete_work(wp);
433 }
434 mtx_unlock(&sc->worklist_mutex);
435 return;
436 }
437
438 /*
439 * Send a write request for the given sector down the pipeline.
440 */
441
442 static int
443 g_bde_start_write(struct g_bde_sector *sp)
444 {
445 struct bio *bp;
446 struct g_bde_softc *sc;
447
448 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
449 sc = sp->softc;
450 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
451 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
452 bp = g_new_bio();
453 if (bp == NULL)
454 return (ENOMEM);
455 bp->bio_cmd = BIO_WRITE;
456 bp->bio_offset = sp->offset;
457 bp->bio_data = sp->data;
458 bp->bio_length = sp->size;
459 bp->bio_done = g_bde_write_done;
460 bp->bio_caller1 = sp;
461 bp->bio_caller2 = sc;
462 sp->state = IO;
463 g_io_request(bp, sc->consumer);
464 return(0);
465 }
466
467 /*
468 * A read operation has finished. Mark the sector no longer iobusy and
469 * wake up the worker thread and let it do its thing.
470 */
471
472 static void
473 g_bde_read_done(struct bio *bp)
474 {
475 struct g_bde_sector *sp;
476 struct g_bde_softc *sc;
477
478 sp = bp->bio_caller1;
479 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
480 sc = bp->bio_caller2;
481 mtx_lock(&sc->worklist_mutex);
482 sp->error = bp->bio_error;
483 sp->state = VALID;
484 wakeup(sc);
485 g_destroy_bio(bp);
486 mtx_unlock(&sc->worklist_mutex);
487 }
488
489 /*
490 * Send a read request for the given sector down the pipeline.
491 */
492
493 static int
494 g_bde_start_read(struct g_bde_sector *sp)
495 {
496 struct bio *bp;
497 struct g_bde_softc *sc;
498
499 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
500 sc = sp->softc;
501 KASSERT(sc != NULL, ("Null softc in sp %p", sp));
502 bp = g_new_bio();
503 if (bp == NULL)
504 return (ENOMEM);
505 bp->bio_cmd = BIO_READ;
506 bp->bio_offset = sp->offset;
507 bp->bio_data = sp->data;
508 bp->bio_length = sp->size;
509 bp->bio_done = g_bde_read_done;
510 bp->bio_caller1 = sp;
511 bp->bio_caller2 = sc;
512 sp->state = IO;
513 g_io_request(bp, sc->consumer);
514 return(0);
515 }
516
517 /*
518 * The worker thread.
519 *
520 * The up/down path of GEOM is not allowed to sleep or do any major work
521 * so we use this thread to do the actual crypto operations and to push
522 * the state engine onwards.
523 *
524 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
525 * XXX: using a thread here is probably not needed.
526 */
527
528 void
529 g_bde_worker(void *arg)
530 {
531 struct g_bde_softc *sc;
532 struct g_bde_work *wp;
533 struct g_geom *gp;
534 int busy, error;
535
536 gp = arg;
537 sc = gp->softc;
538
539 mtx_lock(&sc->worklist_mutex);
540 for (;;) {
541 busy = 0;
542 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
543 TAILQ_FOREACH(wp, &sc->worklist, list) {
544 KASSERT(wp != NULL, ("NULL wp"));
545 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
546 if (wp->state != WAIT)
547 continue; /* Not interesting here */
548
549 KASSERT(wp->bp != NULL, ("NULL wp->bp"));
550 KASSERT(wp->sp != NULL, ("NULL wp->sp"));
551
552 if (wp->ksp != NULL) {
553 if (wp->ksp->owner != wp)
554 continue;
555 if (wp->ksp->state == IO)
556 continue;
557 KASSERT(wp->ksp->state == VALID,
558 ("Illegal sector state (JUNK ?)"));
559 }
560
561 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state != VALID)
562 continue;
563
564 if (wp->ksp != NULL && wp->ksp->error != 0) {
565 g_bde_contribute(wp->bp, wp->length,
566 wp->ksp->error);
567 g_bde_delete_sector(sc, wp->sp);
568 g_bde_release_sector(wp, wp->ksp);
569 g_bde_delete_work(wp);
570 busy++;
571 break;
572 }
573 switch(wp->bp->bio_cmd) {
574 case BIO_READ:
575 if (wp->ksp != NULL && wp->sp->error == 0) {
576 mtx_unlock(&sc->worklist_mutex);
577 g_bde_crypt_read(wp);
578 mtx_lock(&sc->worklist_mutex);
579 }
580 g_bde_contribute(wp->bp, wp->length,
581 wp->sp->error);
582 g_bde_delete_sector(sc, wp->sp);
583 if (wp->ksp != NULL)
584 g_bde_release_sector(wp, wp->ksp);
585 g_bde_delete_work(wp);
586 break;
587 case BIO_WRITE:
588 wp->state = FINISH;
589 KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
590 KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
591 mtx_unlock(&sc->worklist_mutex);
592 g_bde_crypt_write(wp);
593 mtx_lock(&sc->worklist_mutex);
594 g_bde_start_write(wp->sp);
595 g_bde_start_write(wp->ksp);
596 break;
597 case BIO_DELETE:
598 wp->state = FINISH;
599 mtx_unlock(&sc->worklist_mutex);
600 g_bde_crypt_delete(wp);
601 mtx_lock(&sc->worklist_mutex);
602 g_bde_start_write(wp->sp);
603 break;
604 }
605 busy++;
606 break;
607 }
608 if (!busy) {
609 /*
610 * We don't look for our death-warrant until we are
611 * idle. Shouldn't make a difference in practice.
612 */
613 if (sc->dead)
614 break;
615 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
616 error = msleep(sc, &sc->worklist_mutex,
617 PRIBIO, "g_bde", hz);
618 if (error == EWOULDBLOCK) {
619 /*
620 * Loose our skey cache in an orderly fashion.
621 * The exact rate can be tuned to be less
622 * aggressive if this is desirable. 10% per
623 * second means that the cache is gone in a
624 * few minutes.
625 */
626 g_bde_purge_sector(sc, 10);
627 }
628 }
629 }
630 g_trace(G_T_TOPOLOGY, "g_bde_worker die");
631 g_bde_purge_sector(sc, 1);
632 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
633 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
634 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
635 mtx_unlock(&sc->worklist_mutex);
636 sc->dead = 2;
637 wakeup(sc);
638 mtx_lock(&Giant);
639 kthread_exit(0);
640 }
641
642 /*
643 * g_bde_start1 has chopped the incoming request up so all the requests
644 * we see here are inside a single zone. Map the data and key locations
645 * grab the buffers we need and fire off the first volley of read requests.
646 */
647
648 static void
649 g_bde_start2(struct g_bde_work *wp)
650 {
651 struct g_bde_softc *sc;
652
653 KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
654 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
655 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
656 sc = wp->softc;
657 if (wp->bp->bio_cmd == BIO_READ) {
658 wp->sp = g_bde_new_sector(wp, 0);
659 if (wp->sp == NULL) {
660 g_bde_contribute(wp->bp, wp->length, ENOMEM);
661 g_bde_delete_work(wp);
662 return;
663 }
664 wp->sp->size = wp->length;
665 wp->sp->data = wp->data;
666 if (g_bde_start_read(wp->sp) != 0) {
667 g_bde_contribute(wp->bp, wp->length, ENOMEM);
668 g_bde_delete_sector(sc, wp->sp);
669 g_bde_delete_work(wp);
670 return;
671 }
672 g_bde_read_sector(sc, wp, wp->kso);
673 if (wp->ksp == NULL)
674 wp->error = ENOMEM;
675 } else if (wp->bp->bio_cmd == BIO_DELETE) {
676 wp->sp = g_bde_new_sector(wp, wp->length);
677 if (wp->sp == NULL) {
678 g_bde_contribute(wp->bp, wp->length, ENOMEM);
679 g_bde_delete_work(wp);
680 return;
681 }
682 } else if (wp->bp->bio_cmd == BIO_WRITE) {
683 wp->sp = g_bde_new_sector(wp, wp->length);
684 if (wp->sp == NULL) {
685 g_bde_contribute(wp->bp, wp->length, ENOMEM);
686 g_bde_delete_work(wp);
687 return;
688 }
689 g_bde_read_sector(sc, wp, wp->kso);
690 if (wp->ksp == NULL) {
691 g_bde_contribute(wp->bp, wp->length, ENOMEM);
692 g_bde_delete_sector(sc, wp->sp);
693 g_bde_delete_work(wp);
694 return;
695 }
696 } else {
697 KASSERT(0 == 1,
698 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
699 }
700
701 wp->state = WAIT;
702 wakeup(sc);
703 }
704
705 /*
706 * Create a sequence of work structures, and have g_bde_map_sector() determine
707 * how long they each can be. Feed them to g_bde_start2().
708 */
709
710 void
711 g_bde_start1(struct bio *bp)
712 {
713 struct g_bde_softc *sc;
714 struct g_bde_work *wp;
715 off_t done;
716
717 sc = bp->bio_to->geom->softc;
718 bp->bio_driver1 = sc;
719
720 mtx_lock(&sc->worklist_mutex);
721 for(done = 0; done < bp->bio_length; ) {
722 wp = g_bde_new_work(sc);
723 if (wp == NULL) {
724 g_io_deliver(bp, ENOMEM);
725 mtx_unlock(&sc->worklist_mutex);
726 return;
727 }
728 wp->bp = bp;
729 wp->offset = bp->bio_offset + done;
730 wp->data = bp->bio_data + done;
731 wp->length = bp->bio_length - done;
732 g_bde_map_sector(wp);
733 done += wp->length;
734 g_bde_start2(wp);
735 }
736 mtx_unlock(&sc->worklist_mutex);
737 return;
738 }
Cache object: 88bf2af0f6e5a7ce0dd666d5e4e89f28
|