The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/bde/g_bde_work.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002 Poul-Henning Kamp
    3  * Copyright (c) 2002 Networks Associates Technology, Inc.
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
    7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
    8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
    9  * DARPA CHATS research program.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  * $FreeBSD: releng/5.1/sys/geom/bde/g_bde_work.c 114715 2003-05-05 08:37:07Z phk $
   33  *
   34  * This source file contains the state-engine which makes things happen in the
   35  * right order.
   36  *
   37  * Outline:
   38  *   1) g_bde_start1()
   39  *      Break the struct bio into multiple work packets one per zone.
   40  *   2) g_bde_start2()
   41  *      Setup the necessary sector buffers and start those read operations
   42  *      which we can start at this time and put the item on the work-list.
   43  *   3) g_bde_worker()
   44  *      Scan the work-list for items which are ready for crypto processing
   45  *      and call the matching crypto function in g_bde_crypt.c and schedule
   46  *      any writes needed.  Read operations finish here by releasing the
   47  *      sector buffers and delivering the original bio request.
   48  *   4) g_bde_write_done()
   49  *      Release sector buffers and deliver the original bio request.
   50  *
   51  * Because of the C-scope rules, the functions are almost perfectly in the
   52  * opposite order in this source file.
   53  *
   54  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
   55  * XXX: additional states to this state-engine.  Since no hardware available
   56  * XXX: at this time has AES support, implementing this has been postponed
   57  * XXX: until such time as it would result in a benefit.
   58  */
   59 
   60 #include <sys/param.h>
   61 #include <sys/bio.h>
   62 #include <sys/lock.h>
   63 #include <sys/mutex.h>
   64 #include <sys/queue.h>
   65 #include <sys/malloc.h>
   66 #include <sys/systm.h>
   67 #include <sys/kernel.h>
   68 #include <sys/sysctl.h>
   69 #include <sys/proc.h>
   70 #include <sys/kthread.h>
   71 
   72 #include <crypto/rijndael/rijndael.h>
   73 #include <crypto/sha2/sha2.h>
   74 #include <geom/geom.h>
   75 #include <geom/bde/g_bde.h>
   76 
   77 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
   78 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
   79 static void g_bde_release_keysector(struct g_bde_work *wp);
   80 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
   81 static int g_bde_start_read(struct g_bde_sector *sp);
   82 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
   83 
   84 /*
   85  * Work item allocation.
   86  *
   87  * C++ would call these constructors and destructors.
   88  */
   89 static u_int g_bde_nwork;
   90 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
   91 
   92 static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures");
   93 
   94 static struct g_bde_work *
   95 g_bde_new_work(struct g_bde_softc *sc)
   96 {
   97         struct g_bde_work *wp;
   98 
   99         wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
  100         if (wp == NULL)
  101                 return (wp);
  102         wp->state = SETUP;
  103         wp->softc = sc;
  104         g_bde_nwork++;
  105         sc->nwork++;
  106         TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
  107         return (wp);
  108 }
  109 
  110 static void
  111 g_bde_delete_work(struct g_bde_work *wp)
  112 {
  113         struct g_bde_softc *sc;
  114 
  115         sc = wp->softc;
  116         g_bde_nwork--;
  117         sc->nwork--;
  118         TAILQ_REMOVE(&sc->worklist, wp, list);
  119         free(wp, M_GBDE);
  120 }
  121 
  122 /*
  123  * Sector buffer allocation
  124  *
  125  * These two functions allocate and free back variable sized sector buffers
  126  */
  127 
  128 static u_int g_bde_nsect;
  129 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
  130 
  131 static void
  132 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  133 {
  134 
  135         g_bde_nsect--;
  136         sc->nsect--;
  137         if (sp->malloc)
  138                 free(sp->data, M_GBDE);
  139         free(sp, M_GBDE);
  140 }
  141 
  142 static struct g_bde_sector *
  143 g_bde_new_sector(struct g_bde_work *wp, u_int len)
  144 {
  145         struct g_bde_sector *sp;
  146 
  147         sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
  148         if (sp == NULL)
  149                 return (sp);
  150         if (len > 0) {
  151                 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
  152                 if (sp->data == NULL) {
  153                         free(sp, M_GBDE);
  154                         return (NULL);
  155                 }
  156                 sp->malloc = 1;
  157         }
  158         g_bde_nsect++;
  159         wp->softc->nsect++;
  160         sp->size = len;
  161         sp->softc = wp->softc;
  162         sp->ref = 1;
  163         sp->owner = wp;
  164         sp->offset = wp->so;
  165         sp->state = JUNK;
  166         return (sp);
  167 }
  168 
  169 /*
  170  * Skey sector cache.
  171  *
  172  * Nothing prevents two separate I/O requests from addressing the same zone
  173  * and thereby needing the same skey sector.  We therefore need to sequence
  174  * I/O operations to the skey sectors.  A certain amount of caching is also
  175  * desirable, although the extent of benefit from this is not at this point
  176  * determined.
  177  *
  178  * XXX: GEOM may be able to grow a generic caching facility at some point
  179  * XXX: to support such needs.
  180  */
  181 
  182 static u_int g_bde_ncache;
  183 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
  184 
  185 static void
  186 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  187 {
  188 
  189         g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
  190         if (sp->ref != 0)
  191                 return;
  192         TAILQ_REMOVE(&sc->freelist, sp, list);
  193         g_bde_ncache--;
  194         sc->ncache--;
  195         bzero(sp->data, sp->size);
  196         g_bde_delete_sector(sc, sp);
  197 }
  198 
  199 static struct g_bde_sector *
  200 g_bde_get_keysector(struct g_bde_work *wp)
  201 {
  202         struct g_bde_sector *sp;
  203         struct g_bde_softc *sc;
  204         off_t offset;
  205 
  206         offset = wp->kso;
  207         g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
  208         sc = wp->softc;
  209 
  210         if (malloc_last_fail() < g_bde_ncache)
  211                 g_bde_purge_sector(sc, -1);
  212 
  213         sp = TAILQ_FIRST(&sc->freelist);
  214         if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
  215                 g_bde_purge_one_sector(sc, sp);
  216 
  217         TAILQ_FOREACH(sp, &sc->freelist, list) {
  218                 if (sp->offset == offset)
  219                         break;
  220         }
  221         if (sp != NULL) {
  222                 sp->ref++;
  223                 KASSERT(sp->offset == offset, ("wrong offset"));
  224                 KASSERT(sp->softc == wp->softc, ("wrong softc"));
  225                 if (sp->ref == 1)
  226                         sp->owner = wp;
  227         } else {
  228                 if (malloc_last_fail() < g_bde_ncache) {
  229                         TAILQ_FOREACH(sp, &sc->freelist, list)
  230                                 if (sp->ref == 0)
  231                                         break;
  232                 }
  233                 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
  234                         sp = TAILQ_FIRST(&sc->freelist);
  235                 if (sp != NULL && sp->ref > 0)
  236                         sp = NULL;
  237                 if (sp == NULL) {
  238                         sp = g_bde_new_sector(wp, sc->sectorsize);
  239                         if (sp != NULL) {
  240                                 g_bde_ncache++;
  241                                 sc->ncache++;
  242                                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  243                                 sp->malloc = 2;
  244                         }
  245                 }
  246                 if (sp != NULL) {
  247                         sp->offset = offset;
  248                         sp->softc = wp->softc;
  249                         sp->ref = 1;
  250                         sp->owner = wp;
  251                         sp->state = JUNK;
  252                         sp->error = 0;
  253                 }
  254         }
  255         if (sp != NULL) {
  256                 TAILQ_REMOVE(&sc->freelist, sp, list);
  257                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  258                 sp->used = time_uptime;
  259         }
  260         wp->ksp = sp;
  261         return(sp);
  262 }
  263 
  264 static void
  265 g_bde_release_keysector(struct g_bde_work *wp)
  266 {
  267         struct g_bde_softc *sc;
  268         struct g_bde_work *wp2;
  269         struct g_bde_sector *sp;
  270 
  271         sp = wp->ksp;
  272         g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
  273         KASSERT(sp->malloc == 2, ("Wrong sector released"));
  274         sc = sp->softc;
  275         KASSERT(sc != NULL, ("NULL sp->softc"));
  276         KASSERT(wp == sp->owner, ("Releasing, not owner"));
  277         sp->owner = NULL;
  278         wp->ksp = NULL;
  279         sp->ref--;
  280         if (sp->ref > 0) {
  281                 TAILQ_REMOVE(&sc->freelist, sp, list);
  282                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  283                 TAILQ_FOREACH(wp2, &sc->worklist, list) {
  284                         if (wp2->ksp == sp) {
  285                                 KASSERT(wp2 != wp, ("Self-reowning"));
  286                                 sp->owner = wp2;
  287                                 wakeup(sp->softc);
  288                                 break;
  289                         }
  290                 }
  291                 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
  292         } else if (sp->error != 0) {
  293                 sp->offset = ~0;
  294                 sp->error = 0;
  295                 sp->state = JUNK;
  296         }
  297         TAILQ_REMOVE(&sc->freelist, sp, list);
  298         TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
  299 }
  300 
  301 static void
  302 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
  303 {
  304         struct g_bde_sector *sp;
  305         int n;
  306 
  307         g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
  308         if (fraction > 0)
  309                 n = sc->ncache / fraction + 1;
  310         else 
  311                 n = g_bde_ncache - malloc_last_fail();
  312         if (n < 0)
  313                 return;
  314         if (n > sc->ncache)
  315                 n = sc->ncache;
  316         while(n--) {
  317                 TAILQ_FOREACH(sp, &sc->freelist, list) {
  318                         if (sp->ref != 0)
  319                                 continue;
  320                         TAILQ_REMOVE(&sc->freelist, sp, list);
  321                         g_bde_ncache--;
  322                         sc->ncache--;
  323                         bzero(sp->data, sp->size);
  324                         g_bde_delete_sector(sc, sp);
  325                         break;
  326                 }
  327         }
  328 }
  329 
  330 static struct g_bde_sector *
  331 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
  332 {
  333         struct g_bde_sector *sp;
  334 
  335         g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
  336         sp = g_bde_get_keysector(wp);
  337         if (sp == NULL) {
  338                 g_bde_purge_sector(sc, -1);
  339                 sp = g_bde_get_keysector(wp);
  340         }
  341         if (sp == NULL)
  342                 return (sp);
  343         if (sp->owner != wp)
  344                 return (sp);
  345         if (sp->state == VALID)
  346                 return (sp);
  347         if (g_bde_start_read(sp) == 0)
  348                 return (sp);
  349         g_bde_release_keysector(wp);
  350         return (NULL);
  351 }
  352 
  353 /*
  354  * Contribute to the completion of the original bio request.
  355  *
  356  * We have no simple way to tell how many bits the original bio request has
  357  * been segmented into, so the easiest way to determine when we can deliver
  358  * it is to keep track of the number of bytes we have completed.  We keep
  359  * track of any errors underway and latch onto the first one.
  360  *
  361  * We always report "nothing done" in case of error, because random bits here
  362  * and there may be completed and returning a number of completed bytes does
  363  * not convey any useful information about which bytes they were.  If some
  364  * piece of broken code somewhere interprets this to mean that nothing has
  365  * changed on the underlying media they deserve the lossage headed for them.
  366  *
  367  * A single mutex per g_bde instance is used to prevent contention.
  368  */
  369 
  370 static void
  371 g_bde_contribute(struct bio *bp, off_t bytes, int error)
  372 {
  373         struct g_bde_softc *sc;
  374 
  375         g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
  376              bp, (intmax_t)bytes, error);
  377         sc = bp->bio_driver1;
  378         if (bp->bio_error == 0)
  379                 bp->bio_error = error;
  380         bp->bio_completed += bytes;
  381         KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
  382         if (bp->bio_completed == bp->bio_length) {
  383                 if (bp->bio_error != 0)
  384                         bp->bio_completed = 0;
  385                 g_io_deliver(bp, bp->bio_error);
  386         }
  387 }
  388 
  389 /*
  390  * A write operation has finished.  When we have all expected cows in the
  391  * barn close the door and call it a day.
  392  */
  393 
  394 static void
  395 g_bde_write_done(struct bio *bp)
  396 {
  397         struct g_bde_sector *sp;
  398         struct g_bde_work *wp;
  399         struct g_bde_softc *sc;
  400 
  401         sp = bp->bio_caller1;
  402         sc = bp->bio_caller2;
  403         mtx_lock(&sc->worklist_mutex);
  404         KASSERT(sp != NULL, ("NULL sp"));
  405         KASSERT(sc != NULL, ("NULL sc"));
  406         KASSERT(sp->owner != NULL, ("NULL sp->owner"));
  407         g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
  408         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  409                 bp->bio_error = EIO;
  410         sp->error = bp->bio_error;
  411         g_destroy_bio(bp);
  412         wp = sp->owner;
  413         if (wp->error == 0)
  414                 wp->error = sp->error;
  415 
  416         if (wp->bp->bio_cmd == BIO_DELETE) {
  417                 KASSERT(sp == wp->sp, ("trashed delete op"));
  418                 g_bde_contribute(wp->bp, wp->length, wp->error);
  419                 g_bde_delete_sector(sc, sp);
  420                 g_bde_delete_work(wp);
  421                 mtx_unlock(&sc->worklist_mutex);
  422                 return;
  423         }
  424 
  425         KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
  426         KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
  427         if (wp->sp == sp) {
  428                 g_bde_delete_sector(sc, wp->sp);
  429                 wp->sp = NULL;
  430         } else {
  431                 sp->state = VALID;
  432         }
  433         if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
  434                 g_bde_contribute(wp->bp, wp->length, wp->error);
  435                 g_bde_release_keysector(wp);
  436                 g_bde_delete_work(wp);
  437         }
  438         mtx_unlock(&sc->worklist_mutex);
  439         return;
  440 }
  441 
  442 /*
  443  * Send a write request for the given sector down the pipeline.
  444  */
  445 
  446 static int
  447 g_bde_start_write(struct g_bde_sector *sp)
  448 {
  449         struct bio *bp;
  450         struct g_bde_softc *sc;
  451 
  452         g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
  453         sc = sp->softc;
  454         KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
  455         KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
  456         bp = g_new_bio();
  457         if (bp == NULL)
  458                 return (ENOMEM);
  459         bp->bio_cmd = BIO_WRITE;
  460         bp->bio_offset = sp->offset;
  461         bp->bio_data = sp->data;
  462         bp->bio_length = sp->size;
  463         bp->bio_done = g_bde_write_done;
  464         bp->bio_caller1 = sp;
  465         bp->bio_caller2 = sc;
  466         sp->state = IO;
  467         g_io_request(bp, sc->consumer);
  468         return(0);
  469 }
  470 
  471 /*
  472  * A read operation has finished.  Mark the sector no longer iobusy and
  473  * wake up the worker thread and let it do its thing.
  474  */
  475 
  476 static void
  477 g_bde_read_done(struct bio *bp)
  478 {
  479         struct g_bde_sector *sp;
  480         struct g_bde_softc *sc;
  481 
  482         sp = bp->bio_caller1;
  483         g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
  484         sc = bp->bio_caller2;
  485         mtx_lock(&sc->worklist_mutex);
  486         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  487                 bp->bio_error = EIO;
  488         sp->error = bp->bio_error;
  489         if (sp->error == 0)
  490                 sp->state = VALID;
  491         else
  492                 sp->state = JUNK;
  493         wakeup(sc);
  494         g_destroy_bio(bp);
  495         mtx_unlock(&sc->worklist_mutex);
  496 }
  497 
  498 /*
  499  * Send a read request for the given sector down the pipeline.
  500  */
  501 
  502 static int
  503 g_bde_start_read(struct g_bde_sector *sp)
  504 {
  505         struct bio *bp;
  506         struct g_bde_softc *sc;
  507 
  508         g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
  509         sc = sp->softc;
  510         KASSERT(sc != NULL, ("Null softc in sp %p", sp));
  511         bp = g_new_bio();
  512         if (bp == NULL)
  513                 return (ENOMEM);
  514         bp->bio_cmd = BIO_READ;
  515         bp->bio_offset = sp->offset;
  516         bp->bio_data = sp->data;
  517         bp->bio_length = sp->size;
  518         bp->bio_done = g_bde_read_done;
  519         bp->bio_caller1 = sp;
  520         bp->bio_caller2 = sc;
  521         sp->state = IO;
  522         g_io_request(bp, sc->consumer);
  523         return(0);
  524 }
  525 
  526 /*
  527  * The worker thread.
  528  *
  529  * The up/down path of GEOM is not allowed to sleep or do any major work
  530  * so we use this thread to do the actual crypto operations and to push
  531  * the state engine onwards.
  532  *
  533  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
  534  * XXX: using a thread here is probably not needed.
  535  */
  536 
  537 void
  538 g_bde_worker(void *arg)
  539 {
  540         struct g_bde_softc *sc;
  541         struct g_bde_work *wp;
  542         struct g_geom *gp;
  543         int busy, error;
  544 
  545         gp = arg;
  546         sc = gp->softc;
  547 
  548         mtx_lock(&sc->worklist_mutex);
  549         for (;;) {
  550                 busy = 0;
  551                 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
  552                 TAILQ_FOREACH(wp, &sc->worklist, list) {
  553                         KASSERT(wp != NULL, ("NULL wp"));
  554                         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  555                         if (wp->state != WAIT)
  556                                 continue;               /* Not interesting here */
  557 
  558                         KASSERT(wp->bp != NULL, ("NULL wp->bp"));
  559                         KASSERT(wp->sp != NULL, ("NULL wp->sp"));
  560 
  561                         if (wp->ksp != NULL) {
  562                                 if (wp->ksp->owner != wp)
  563                                         continue;
  564                                 if (wp->ksp->state == IO)
  565                                         continue;
  566                                 KASSERT(wp->ksp->state == VALID,
  567                                     ("Illegal sector state (JUNK ?)"));
  568                         }
  569 
  570                         if (wp->bp->bio_cmd == BIO_READ &&
  571                              wp->sp->state == IO)
  572                                 continue;
  573 
  574                         if (wp->ksp != NULL && wp->ksp->error != 0) {
  575                                 g_bde_contribute(wp->bp, wp->length,
  576                                     wp->ksp->error);
  577                                 g_bde_delete_sector(sc, wp->sp);
  578                                 g_bde_release_keysector(wp);
  579                                 g_bde_delete_work(wp);
  580                                 busy++;
  581                                 break;
  582                         } 
  583                         switch(wp->bp->bio_cmd) {
  584                         case BIO_READ:
  585                                 if (wp->ksp == NULL) {
  586                                         KASSERT(wp->error != 0,
  587                                             ("BIO_READ, no ksp and no error"));
  588                                         g_bde_contribute(wp->bp, wp->length,
  589                                                     wp->error);
  590                                 } else {
  591                                         if (wp->sp->error == 0) {
  592                                                 mtx_unlock(&sc->worklist_mutex);
  593                                                 g_bde_crypt_read(wp);
  594                                                 mtx_lock(&sc->worklist_mutex);
  595                                         }
  596                                         g_bde_contribute(wp->bp, wp->length,
  597                                                     wp->sp->error);
  598                                 }
  599                                 g_bde_delete_sector(sc, wp->sp);
  600                                 if (wp->ksp != NULL)
  601                                         g_bde_release_keysector(wp);
  602                                 g_bde_delete_work(wp);
  603                                 break;
  604                         case BIO_WRITE:
  605                                 wp->state = FINISH;
  606                                 KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
  607                                 KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
  608                                 mtx_unlock(&sc->worklist_mutex);
  609                                 g_bde_crypt_write(wp);
  610                                 mtx_lock(&sc->worklist_mutex);
  611                                 error = g_bde_start_write(wp->sp);
  612                                 if (error) {
  613                                         g_bde_contribute(wp->bp, wp->length, error);
  614                                         g_bde_release_keysector(wp);
  615                                         g_bde_delete_sector(sc, wp->sp);
  616                                         g_bde_delete_work(wp);
  617                                         break;
  618                                 }
  619                                 error = g_bde_start_write(wp->ksp);
  620                                 if (wp->error == 0)
  621                                         wp->error = error;
  622                                 break;
  623                         case BIO_DELETE:
  624                                 wp->state = FINISH;
  625                                 mtx_unlock(&sc->worklist_mutex);
  626                                 g_bde_crypt_delete(wp);
  627                                 mtx_lock(&sc->worklist_mutex);
  628                                 g_bde_start_write(wp->sp);
  629                                 break;
  630                         }
  631                         busy++;
  632                         break;
  633                 }
  634                 if (!busy) {
  635                         /*
  636                          * We don't look for our death-warrant until we are
  637                          * idle.  Shouldn't make a difference in practice.
  638                          */
  639                         if (sc->dead)
  640                                 break;
  641                         g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
  642                         error = msleep(sc, &sc->worklist_mutex,
  643                             PRIBIO, "g_bde", hz);
  644                         if (error == EWOULDBLOCK) {
  645                                 /*
  646                                  * Loose our skey cache in an orderly fashion.
  647                                  * The exact rate can be tuned to be less
  648                                  * aggressive if this is desirable.  10% per
  649                                  * second means that the cache is gone in a
  650                                  * few minutes.
  651                                  */
  652                                 g_bde_purge_sector(sc, 10);
  653                         }
  654                 }
  655         }
  656         g_trace(G_T_TOPOLOGY, "g_bde_worker die");
  657         g_bde_purge_sector(sc, 1);
  658         KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
  659         KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
  660         KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
  661         mtx_unlock(&sc->worklist_mutex);
  662         sc->dead = 2;
  663         wakeup(sc);
  664         mtx_lock(&Giant);
  665         kthread_exit(0);
  666 }
  667 
  668 /*
  669  * g_bde_start1 has chopped the incoming request up so all the requests
  670  * we see here are inside a single zone.  Map the data and key locations
  671  * grab the buffers we need and fire off the first volley of read requests.
  672  */
  673 
  674 static void
  675 g_bde_start2(struct g_bde_work *wp)
  676 {
  677         struct g_bde_softc *sc;
  678 
  679         KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
  680         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  681         g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
  682         sc = wp->softc;
  683         if (wp->bp->bio_cmd == BIO_READ) {
  684                 wp->sp = g_bde_new_sector(wp, 0);
  685                 if (wp->sp == NULL) {
  686                         g_bde_contribute(wp->bp, wp->length, ENOMEM);
  687                         g_bde_delete_work(wp);
  688                         return;
  689                 }
  690                 wp->sp->size = wp->length;
  691                 wp->sp->data = wp->data;
  692                 if (g_bde_start_read(wp->sp) != 0) {
  693                         g_bde_contribute(wp->bp, wp->length, ENOMEM);
  694                         g_bde_delete_sector(sc, wp->sp);
  695                         g_bde_delete_work(wp);
  696                         return;
  697                 }
  698                 g_bde_read_keysector(sc, wp);
  699                 if (wp->ksp == NULL)
  700                         wp->error = ENOMEM;
  701         } else if (wp->bp->bio_cmd == BIO_DELETE) {
  702                 wp->sp = g_bde_new_sector(wp, wp->length);
  703                 if (wp->sp == NULL) {
  704                         g_bde_contribute(wp->bp, wp->length, ENOMEM);
  705                         g_bde_delete_work(wp);
  706                         return;
  707                 }
  708         } else if (wp->bp->bio_cmd == BIO_WRITE) {
  709                 wp->sp = g_bde_new_sector(wp, wp->length);
  710                 if (wp->sp == NULL) {
  711                         g_bde_contribute(wp->bp, wp->length, ENOMEM);
  712                         g_bde_delete_work(wp);
  713                         return;
  714                 }
  715                 g_bde_read_keysector(sc, wp);
  716                 if (wp->ksp == NULL) {
  717                         g_bde_contribute(wp->bp, wp->length, ENOMEM);
  718                         g_bde_delete_sector(sc, wp->sp);
  719                         g_bde_delete_work(wp);
  720                         return;
  721                 }
  722         } else {
  723                 KASSERT(0 == 1, 
  724                     ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
  725         }
  726 
  727         wp->state = WAIT;
  728         wakeup(sc);
  729 }
  730 
  731 /*
  732  * Create a sequence of work structures, and have g_bde_map_sector() determine
  733  * how long they each can be.  Feed them to g_bde_start2().
  734  */
  735 
  736 void
  737 g_bde_start1(struct bio *bp)
  738 {
  739         struct g_bde_softc *sc;
  740         struct g_bde_work *wp;
  741         off_t done;
  742 
  743         sc = bp->bio_to->geom->softc;
  744         bp->bio_driver1 = sc;
  745 
  746         mtx_lock(&sc->worklist_mutex);
  747         for(done = 0; done < bp->bio_length; ) {
  748                 wp = g_bde_new_work(sc);
  749                 if (wp != NULL) {
  750                         wp->bp = bp;
  751                         wp->offset = bp->bio_offset + done;
  752                         wp->data = bp->bio_data + done;
  753                         wp->length = bp->bio_length - done;
  754                         g_bde_map_sector(wp);
  755                         done += wp->length;
  756                         g_bde_start2(wp);
  757                 }
  758                 if (wp == NULL || bp->bio_error != 0) {
  759                         g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
  760                         break;
  761                 }
  762         }
  763         mtx_unlock(&sc->worklist_mutex);
  764         return;
  765 }

Cache object: 70c4e8d216e34af0ab7e0e6525177b96


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.