The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/bde/g_bde_work.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2002 Poul-Henning Kamp
    5  * Copyright (c) 2002 Networks Associates Technology, Inc.
    6  * All rights reserved.
    7  *
    8  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
    9  * and NAI Labs, the Security Research Division of Network Associates, Inc.
   10  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
   11  * DARPA CHATS research program.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  * $FreeBSD$
   35  */
   36 /*
   37  * This source file contains the state-engine which makes things happen in the
   38  * right order.
   39  *
   40  * Outline:
   41  *   1) g_bde_start1()
   42  *      Break the struct bio into multiple work packets one per zone.
   43  *   2) g_bde_start2()
   44  *      Setup the necessary sector buffers and start those read operations
   45  *      which we can start at this time and put the item on the work-list.
   46  *   3) g_bde_worker()
   47  *      Scan the work-list for items which are ready for crypto processing
   48  *      and call the matching crypto function in g_bde_crypt.c and schedule
   49  *      any writes needed.  Read operations finish here by releasing the
   50  *      sector buffers and delivering the original bio request.
   51  *   4) g_bde_write_done()
   52  *      Release sector buffers and deliver the original bio request.
   53  *
   54  * Because of the C-scope rules, the functions are almost perfectly in the
   55  * opposite order in this source file.
   56  *
   57  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
   58  * XXX: additional states to this state-engine.  Since no hardware available
   59  * XXX: at this time has AES support, implementing this has been postponed
   60  * XXX: until such time as it would result in a benefit.
   61  */
   62 
   63 #include <sys/param.h>
   64 #include <sys/bio.h>
   65 #include <sys/lock.h>
   66 #include <sys/mutex.h>
   67 #include <sys/queue.h>
   68 #include <sys/malloc.h>
   69 #include <sys/systm.h>
   70 #include <sys/kernel.h>
   71 #include <sys/sysctl.h>
   72 #include <sys/proc.h>
   73 #include <sys/kthread.h>
   74 
   75 #include <crypto/rijndael/rijndael-api-fst.h>
   76 #include <crypto/sha2/sha512.h>
   77 #include <geom/geom.h>
   78 #include <geom/bde/g_bde.h>
   79 
   80 /*
   81  * FIXME: This used to call malloc_last_fail which in practice was almost
   82  * guaranteed to return time_uptime even in face of severe memory shortage.
   83  * As GBDE is the only consumer the kludge below was added to facilitate the
   84  * removal with minimial changes. The code should be fixed to respond to memory
   85  * pressure (e.g., by using lowmem eventhandler) instead.
   86  */
   87 static int
   88 g_bde_malloc_last_fail(void)
   89 {
   90 
   91         return (time_uptime);
   92 }
   93 
   94 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
   95 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
   96 static void g_bde_release_keysector(struct g_bde_work *wp);
   97 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
   98 static int g_bde_start_read(struct g_bde_sector *sp);
   99 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
  100 
  101 /*
  102  * Work item allocation.
  103  *
  104  * C++ would call these constructors and destructors.
  105  */
  106 static u_int g_bde_nwork;
  107 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
  108 
  109 static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
  110 
  111 static struct g_bde_work *
  112 g_bde_new_work(struct g_bde_softc *sc)
  113 {
  114         struct g_bde_work *wp;
  115 
  116         wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
  117         if (wp == NULL)
  118                 return (wp);
  119         wp->state = SETUP;
  120         wp->softc = sc;
  121         g_bde_nwork++;
  122         sc->nwork++;
  123         TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
  124         return (wp);
  125 }
  126 
  127 static void
  128 g_bde_delete_work(struct g_bde_work *wp)
  129 {
  130         struct g_bde_softc *sc;
  131 
  132         sc = wp->softc;
  133         g_bde_nwork--;
  134         sc->nwork--;
  135         TAILQ_REMOVE(&sc->worklist, wp, list);
  136         free(wp, M_GBDE);
  137 }
  138 
  139 /*
  140  * Sector buffer allocation
  141  *
  142  * These two functions allocate and free back variable sized sector buffers
  143  */
  144 
  145 static u_int g_bde_nsect;
  146 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
  147 
  148 static void
  149 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  150 {
  151 
  152         g_bde_nsect--;
  153         sc->nsect--;
  154         if (sp->malloc)
  155                 free(sp->data, M_GBDE);
  156         free(sp, M_GBDE);
  157 }
  158 
  159 static struct g_bde_sector *
  160 g_bde_new_sector(struct g_bde_work *wp, u_int len)
  161 {
  162         struct g_bde_sector *sp;
  163 
  164         sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
  165         if (sp == NULL)
  166                 return (sp);
  167         if (len > 0) {
  168                 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
  169                 if (sp->data == NULL) {
  170                         free(sp, M_GBDE);
  171                         return (NULL);
  172                 }
  173                 sp->malloc = 1;
  174         }
  175         g_bde_nsect++;
  176         wp->softc->nsect++;
  177         sp->size = len;
  178         sp->softc = wp->softc;
  179         sp->ref = 1;
  180         sp->owner = wp;
  181         sp->offset = wp->so;
  182         sp->state = JUNK;
  183         return (sp);
  184 }
  185 
  186 /*
  187  * Skey sector cache.
  188  *
  189  * Nothing prevents two separate I/O requests from addressing the same zone
  190  * and thereby needing the same skey sector.  We therefore need to sequence
  191  * I/O operations to the skey sectors.  A certain amount of caching is also
  192  * desirable, although the extent of benefit from this is not at this point
  193  * determined.
  194  *
  195  * XXX: GEOM may be able to grow a generic caching facility at some point
  196  * XXX: to support such needs.
  197  */
  198 
  199 static u_int g_bde_ncache;
  200 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
  201 
  202 static void
  203 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  204 {
  205 
  206         g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
  207         if (sp->ref != 0)
  208                 return;
  209         TAILQ_REMOVE(&sc->freelist, sp, list);
  210         g_bde_ncache--;
  211         sc->ncache--;
  212         bzero(sp->data, sp->size);
  213         g_bde_delete_sector(sc, sp);
  214 }
  215 
  216 static struct g_bde_sector *
  217 g_bde_get_keysector(struct g_bde_work *wp)
  218 {
  219         struct g_bde_sector *sp;
  220         struct g_bde_softc *sc;
  221         off_t offset;
  222 
  223         offset = wp->kso;
  224         g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
  225         sc = wp->softc;
  226 
  227         if (g_bde_malloc_last_fail() < g_bde_ncache)
  228                 g_bde_purge_sector(sc, -1);
  229 
  230         sp = TAILQ_FIRST(&sc->freelist);
  231         if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
  232                 g_bde_purge_one_sector(sc, sp);
  233 
  234         TAILQ_FOREACH(sp, &sc->freelist, list) {
  235                 if (sp->offset == offset)
  236                         break;
  237         }
  238         if (sp != NULL) {
  239                 sp->ref++;
  240                 KASSERT(sp->offset == offset, ("wrong offset"));
  241                 KASSERT(sp->softc == wp->softc, ("wrong softc"));
  242                 if (sp->ref == 1)
  243                         sp->owner = wp;
  244         } else {
  245                 if (g_bde_malloc_last_fail() < g_bde_ncache) {
  246                         TAILQ_FOREACH(sp, &sc->freelist, list)
  247                                 if (sp->ref == 0)
  248                                         break;
  249                 }
  250                 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
  251                         sp = TAILQ_FIRST(&sc->freelist);
  252                 if (sp != NULL && sp->ref > 0)
  253                         sp = NULL;
  254                 if (sp == NULL) {
  255                         sp = g_bde_new_sector(wp, sc->sectorsize);
  256                         if (sp != NULL) {
  257                                 g_bde_ncache++;
  258                                 sc->ncache++;
  259                                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  260                                 sp->malloc = 2;
  261                         }
  262                 }
  263                 if (sp != NULL) {
  264                         sp->offset = offset;
  265                         sp->softc = wp->softc;
  266                         sp->ref = 1;
  267                         sp->owner = wp;
  268                         sp->state = JUNK;
  269                         sp->error = 0;
  270                 }
  271         }
  272         if (sp != NULL) {
  273                 TAILQ_REMOVE(&sc->freelist, sp, list);
  274                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  275                 sp->used = time_uptime;
  276         }
  277         wp->ksp = sp;
  278         return(sp);
  279 }
  280 
  281 static void
  282 g_bde_release_keysector(struct g_bde_work *wp)
  283 {
  284         struct g_bde_softc *sc;
  285         struct g_bde_work *wp2;
  286         struct g_bde_sector *sp;
  287 
  288         sp = wp->ksp;
  289         g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
  290         KASSERT(sp->malloc == 2, ("Wrong sector released"));
  291         sc = sp->softc;
  292         KASSERT(sc != NULL, ("NULL sp->softc"));
  293         KASSERT(wp == sp->owner, ("Releasing, not owner"));
  294         sp->owner = NULL;
  295         wp->ksp = NULL;
  296         sp->ref--;
  297         if (sp->ref > 0) {
  298                 TAILQ_REMOVE(&sc->freelist, sp, list);
  299                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  300                 TAILQ_FOREACH(wp2, &sc->worklist, list) {
  301                         if (wp2->ksp == sp) {
  302                                 KASSERT(wp2 != wp, ("Self-reowning"));
  303                                 sp->owner = wp2;
  304                                 wakeup(sp->softc);
  305                                 break;
  306                         }
  307                 }
  308                 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
  309         } else if (sp->error != 0) {
  310                 sp->offset = ~0;
  311                 sp->error = 0;
  312                 sp->state = JUNK;
  313         }
  314         TAILQ_REMOVE(&sc->freelist, sp, list);
  315         TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
  316 }
  317 
  318 static void
  319 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
  320 {
  321         struct g_bde_sector *sp;
  322         int n;
  323 
  324         g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
  325         if (fraction > 0)
  326                 n = sc->ncache / fraction + 1;
  327         else 
  328                 n = g_bde_ncache - g_bde_malloc_last_fail();
  329         if (n < 0)
  330                 return;
  331         if (n > sc->ncache)
  332                 n = sc->ncache;
  333         while(n--) {
  334                 TAILQ_FOREACH(sp, &sc->freelist, list) {
  335                         if (sp->ref != 0)
  336                                 continue;
  337                         TAILQ_REMOVE(&sc->freelist, sp, list);
  338                         g_bde_ncache--;
  339                         sc->ncache--;
  340                         bzero(sp->data, sp->size);
  341                         g_bde_delete_sector(sc, sp);
  342                         break;
  343                 }
  344         }
  345 }
  346 
  347 static struct g_bde_sector *
  348 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
  349 {
  350         struct g_bde_sector *sp;
  351 
  352         g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
  353         sp = g_bde_get_keysector(wp);
  354         if (sp == NULL) {
  355                 g_bde_purge_sector(sc, -1);
  356                 sp = g_bde_get_keysector(wp);
  357         }
  358         if (sp == NULL)
  359                 return (sp);
  360         if (sp->owner != wp)
  361                 return (sp);
  362         if (sp->state == VALID)
  363                 return (sp);
  364         if (g_bde_start_read(sp) == 0)
  365                 return (sp);
  366         g_bde_release_keysector(wp);
  367         return (NULL);
  368 }
  369 
  370 /*
  371  * Contribute to the completion of the original bio request.
  372  *
  373  * We have no simple way to tell how many bits the original bio request has
  374  * been segmented into, so the easiest way to determine when we can deliver
  375  * it is to keep track of the number of bytes we have completed.  We keep
  376  * track of any errors underway and latch onto the first one.
  377  *
  378  * We always report "nothing done" in case of error, because random bits here
  379  * and there may be completed and returning a number of completed bytes does
  380  * not convey any useful information about which bytes they were.  If some
  381  * piece of broken code somewhere interprets this to mean that nothing has
  382  * changed on the underlying media they deserve the lossage headed for them.
  383  *
  384  * A single mutex per g_bde instance is used to prevent contention.
  385  */
  386 
  387 static void
  388 g_bde_contribute(struct bio *bp, off_t bytes, int error)
  389 {
  390 
  391         g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
  392              bp, (intmax_t)bytes, error);
  393         if (bp->bio_error == 0)
  394                 bp->bio_error = error;
  395         bp->bio_completed += bytes;
  396         KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
  397         if (bp->bio_completed == bp->bio_length) {
  398                 if (bp->bio_error != 0)
  399                         bp->bio_completed = 0;
  400                 g_io_deliver(bp, bp->bio_error);
  401         }
  402 }
  403 
  404 /*
  405  * This is the common case "we're done with this work package" function
  406  */
  407 
  408 static void
  409 g_bde_work_done(struct g_bde_work *wp, int error)
  410 {
  411 
  412         g_bde_contribute(wp->bp, wp->length, error);
  413         if (wp->sp != NULL)
  414                 g_bde_delete_sector(wp->softc, wp->sp);
  415         if (wp->ksp != NULL)
  416                 g_bde_release_keysector(wp);
  417         g_bde_delete_work(wp);
  418 }
  419 
  420 /*
  421  * A write operation has finished.  When we have all expected cows in the
  422  * barn close the door and call it a day.
  423  */
  424 
  425 static void
  426 g_bde_write_done(struct bio *bp)
  427 {
  428         struct g_bde_sector *sp;
  429         struct g_bde_work *wp;
  430         struct g_bde_softc *sc;
  431 
  432         sp = bp->bio_caller1;
  433         sc = bp->bio_caller2;
  434         mtx_lock(&sc->worklist_mutex);
  435         KASSERT(sp != NULL, ("NULL sp"));
  436         KASSERT(sc != NULL, ("NULL sc"));
  437         KASSERT(sp->owner != NULL, ("NULL sp->owner"));
  438         g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
  439         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  440                 bp->bio_error = EIO;
  441         sp->error = bp->bio_error;
  442         g_destroy_bio(bp);
  443         wp = sp->owner;
  444         if (wp->error == 0)
  445                 wp->error = sp->error;
  446 
  447         if (wp->bp->bio_cmd == BIO_DELETE) {
  448                 KASSERT(sp == wp->sp, ("trashed delete op"));
  449                 g_bde_work_done(wp, wp->error);
  450                 mtx_unlock(&sc->worklist_mutex);
  451                 return;
  452         }
  453 
  454         KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
  455         KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
  456         if (wp->sp == sp) {
  457                 g_bde_delete_sector(sc, wp->sp);
  458                 wp->sp = NULL;
  459         } else {
  460                 sp->state = VALID;
  461         }
  462         if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
  463                 g_bde_work_done(wp, wp->error);
  464         mtx_unlock(&sc->worklist_mutex);
  465         return;
  466 }
  467 
  468 /*
  469  * Send a write request for the given sector down the pipeline.
  470  */
  471 
  472 static int
  473 g_bde_start_write(struct g_bde_sector *sp)
  474 {
  475         struct bio *bp;
  476         struct g_bde_softc *sc;
  477 
  478         g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
  479         sc = sp->softc;
  480         KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
  481         KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
  482         bp = g_new_bio();
  483         if (bp == NULL)
  484                 return (ENOMEM);
  485         bp->bio_cmd = BIO_WRITE;
  486         bp->bio_offset = sp->offset;
  487         bp->bio_data = sp->data;
  488         bp->bio_length = sp->size;
  489         bp->bio_done = g_bde_write_done;
  490         bp->bio_caller1 = sp;
  491         bp->bio_caller2 = sc;
  492         sp->state = IO;
  493         g_io_request(bp, sc->consumer);
  494         return(0);
  495 }
  496 
  497 /*
  498  * A read operation has finished.  Mark the sector no longer iobusy and
  499  * wake up the worker thread and let it do its thing.
  500  */
  501 
  502 static void
  503 g_bde_read_done(struct bio *bp)
  504 {
  505         struct g_bde_sector *sp;
  506         struct g_bde_softc *sc;
  507 
  508         sp = bp->bio_caller1;
  509         g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
  510         sc = bp->bio_caller2;
  511         mtx_lock(&sc->worklist_mutex);
  512         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  513                 bp->bio_error = EIO;
  514         sp->error = bp->bio_error;
  515         if (sp->error == 0)
  516                 sp->state = VALID;
  517         else
  518                 sp->state = JUNK;
  519         wakeup(sc);
  520         g_destroy_bio(bp);
  521         mtx_unlock(&sc->worklist_mutex);
  522 }
  523 
  524 /*
  525  * Send a read request for the given sector down the pipeline.
  526  */
  527 
  528 static int
  529 g_bde_start_read(struct g_bde_sector *sp)
  530 {
  531         struct bio *bp;
  532         struct g_bde_softc *sc;
  533 
  534         g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
  535         sc = sp->softc;
  536         KASSERT(sc != NULL, ("Null softc in sp %p", sp));
  537         bp = g_new_bio();
  538         if (bp == NULL)
  539                 return (ENOMEM);
  540         bp->bio_cmd = BIO_READ;
  541         bp->bio_offset = sp->offset;
  542         bp->bio_data = sp->data;
  543         bp->bio_length = sp->size;
  544         bp->bio_done = g_bde_read_done;
  545         bp->bio_caller1 = sp;
  546         bp->bio_caller2 = sc;
  547         sp->state = IO;
  548         g_io_request(bp, sc->consumer);
  549         return(0);
  550 }
  551 
  552 /*
  553  * The worker thread.
  554  *
  555  * The up/down path of GEOM is not allowed to sleep or do any major work
  556  * so we use this thread to do the actual crypto operations and to push
  557  * the state engine onwards.
  558  *
  559  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
  560  * XXX: using a thread here is probably not needed.
  561  */
  562 
  563 void
  564 g_bde_worker(void *arg)
  565 {
  566         struct g_bde_softc *sc;
  567         struct g_bde_work *wp, *twp;
  568         struct g_geom *gp;
  569         int restart, error;
  570 
  571         gp = arg;
  572         sc = gp->softc;
  573 
  574         mtx_lock(&sc->worklist_mutex);
  575         for (;;) {
  576                 restart = 0;
  577                 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
  578                 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
  579                         KASSERT(wp != NULL, ("NULL wp"));
  580                         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  581                         if (wp->state != WAIT)
  582                                 continue;       /* Not interesting here */
  583 
  584                         KASSERT(wp->bp != NULL, ("NULL wp->bp"));
  585                         KASSERT(wp->sp != NULL, ("NULL wp->sp"));
  586 
  587                         if (wp->ksp != NULL) {
  588                                 if (wp->ksp->owner != wp)
  589                                         continue;
  590                                 if (wp->ksp->state == IO)
  591                                         continue;
  592                                 KASSERT(wp->ksp->state == VALID,
  593                                     ("Illegal sector state (%d)",
  594                                     wp->ksp->state));
  595                         }
  596 
  597                         if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
  598                                 continue;
  599 
  600                         if (wp->ksp != NULL && wp->ksp->error != 0) {
  601                                 g_bde_work_done(wp, wp->ksp->error);
  602                                 continue;
  603                         } 
  604                         switch(wp->bp->bio_cmd) {
  605                         case BIO_READ:
  606                                 if (wp->ksp == NULL) {
  607                                         KASSERT(wp->error != 0,
  608                                             ("BIO_READ, no ksp and no error"));
  609                                         g_bde_work_done(wp, wp->error);
  610                                         break;
  611                                 }
  612                                 if (wp->sp->error != 0) {
  613                                         g_bde_work_done(wp, wp->sp->error);
  614                                         break;
  615                                 }
  616                                 mtx_unlock(&sc->worklist_mutex);
  617                                 g_bde_crypt_read(wp);
  618                                 mtx_lock(&sc->worklist_mutex);
  619                                 restart++;
  620                                 g_bde_work_done(wp, wp->sp->error);
  621                                 break;
  622                         case BIO_WRITE:
  623                                 wp->state = FINISH;
  624                                 KASSERT(wp->sp->owner == wp,
  625                                     ("Write not owner sp"));
  626                                 KASSERT(wp->ksp->owner == wp,
  627                                     ("Write not owner ksp"));
  628                                 mtx_unlock(&sc->worklist_mutex);
  629                                 g_bde_crypt_write(wp);
  630                                 mtx_lock(&sc->worklist_mutex);
  631                                 restart++;
  632                                 error = g_bde_start_write(wp->sp);
  633                                 if (error) {
  634                                         g_bde_work_done(wp, error);
  635                                         break;
  636                                 }
  637                                 error = g_bde_start_write(wp->ksp);
  638                                 if (wp->error != 0)
  639                                         wp->error = error;
  640                                 break;
  641                         case BIO_DELETE:
  642                                 wp->state = FINISH;
  643                                 mtx_unlock(&sc->worklist_mutex);
  644                                 g_bde_crypt_delete(wp);
  645                                 mtx_lock(&sc->worklist_mutex);
  646                                 restart++;
  647                                 g_bde_start_write(wp->sp);
  648                                 break;
  649                         }
  650                         if (restart)
  651                                 break;
  652                 }
  653                 if (!restart) {
  654                         /*
  655                          * We don't look for our death-warrant until we are
  656                          * idle.  Shouldn't make a difference in practice.
  657                          */
  658                         if (sc->dead)
  659                                 break;
  660                         g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
  661                         error = msleep(sc, &sc->worklist_mutex,
  662                             PRIBIO, "-", hz);
  663                         if (error == EWOULDBLOCK) {
  664                                 /*
  665                                  * Lose our skey cache in an orderly fashion.
  666                                  * The exact rate can be tuned to be less
  667                                  * aggressive if this is desirable.  10% per
  668                                  * second means that the cache is gone in a
  669                                  * few minutes.
  670                                  */
  671                                 g_bde_purge_sector(sc, 10);
  672                         }
  673                 }
  674         }
  675         g_trace(G_T_TOPOLOGY, "g_bde_worker die");
  676         g_bde_purge_sector(sc, 1);
  677         KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
  678         KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
  679         KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
  680         mtx_unlock(&sc->worklist_mutex);
  681         sc->dead = 2;
  682         wakeup(sc);
  683         kproc_exit(0);
  684 }
  685 
  686 /*
  687  * g_bde_start1 has chopped the incoming request up so all the requests
  688  * we see here are inside a single zone.  Map the data and key locations
  689  * grab the buffers we need and fire off the first volley of read requests.
  690  */
  691 
  692 static void
  693 g_bde_start2(struct g_bde_work *wp)
  694 {
  695         struct g_bde_softc *sc;
  696 
  697         KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
  698         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  699         g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
  700         sc = wp->softc;
  701         switch (wp->bp->bio_cmd) {
  702         case BIO_READ:
  703                 wp->sp = g_bde_new_sector(wp, 0);
  704                 if (wp->sp == NULL) {
  705                         g_bde_work_done(wp, ENOMEM);
  706                         return;
  707                 }
  708                 wp->sp->size = wp->length;
  709                 wp->sp->data = wp->data;
  710                 if (g_bde_start_read(wp->sp) != 0) {
  711                         g_bde_work_done(wp, ENOMEM);
  712                         return;
  713                 }
  714                 g_bde_read_keysector(sc, wp);
  715                 if (wp->ksp == NULL)
  716                         wp->error = ENOMEM;
  717                 break;
  718         case BIO_DELETE:
  719                 wp->sp = g_bde_new_sector(wp, wp->length);
  720                 if (wp->sp == NULL) {
  721                         g_bde_work_done(wp, ENOMEM);
  722                         return;
  723                 }
  724                 break;
  725         case BIO_WRITE:
  726                 wp->sp = g_bde_new_sector(wp, wp->length);
  727                 if (wp->sp == NULL) {
  728                         g_bde_work_done(wp, ENOMEM);
  729                         return;
  730                 }
  731                 g_bde_read_keysector(sc, wp);
  732                 if (wp->ksp == NULL) {
  733                         g_bde_work_done(wp, ENOMEM);
  734                         return;
  735                 }
  736                 break;
  737         default:
  738                 KASSERT(0 == 1, 
  739                     ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
  740         }
  741 
  742         wp->state = WAIT;
  743         wakeup(sc);
  744 }
  745 
  746 /*
  747  * Create a sequence of work structures, and have g_bde_map_sector() determine
  748  * how long they each can be.  Feed them to g_bde_start2().
  749  */
  750 
  751 void
  752 g_bde_start1(struct bio *bp)
  753 {
  754         struct g_bde_softc *sc;
  755         struct g_bde_work *wp;
  756         off_t done;
  757 
  758         sc = bp->bio_to->geom->softc;
  759         bp->bio_driver1 = sc;
  760 
  761         mtx_lock(&sc->worklist_mutex);
  762         for(done = 0; done < bp->bio_length; ) {
  763                 wp = g_bde_new_work(sc);
  764                 if (wp != NULL) {
  765                         wp->bp = bp;
  766                         wp->offset = bp->bio_offset + done;
  767                         wp->data = bp->bio_data + done;
  768                         wp->length = bp->bio_length - done;
  769                         g_bde_map_sector(wp);
  770                         done += wp->length;
  771                         g_bde_start2(wp);
  772                 }
  773                 if (wp == NULL || bp->bio_error != 0) {
  774                         g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
  775                         break;
  776                 }
  777         }
  778         mtx_unlock(&sc->worklist_mutex);
  779         return;
  780 }

Cache object: 58d436ad5c64538c7a61f0c407505a81


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.