g_bde_work.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2002 Poul-Henning Kamp
    5  * Copyright (c) 2002 Networks Associates Technology, Inc.
    6  * All rights reserved.
    7  *
    8  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
    9  * and NAI Labs, the Security Research Division of Network Associates, Inc.
   10  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
   11  * DARPA CHATS research program.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  * $FreeBSD: releng/12.0/sys/geom/bde/g_bde_work.c 326270 2017-11-27 15:17:37Z pfg $
   35  */
   36 /*
   37  * This source file contains the state-engine which makes things happen in the
   38  * right order.
   39  *
   40  * Outline:
   41  *   1) g_bde_start1()
   42  *      Break the struct bio into multiple work packets one per zone.
   43  *   2) g_bde_start2()
   44  *      Setup the necessary sector buffers and start those read operations
   45  *      which we can start at this time and put the item on the work-list.
   46  *   3) g_bde_worker()
   47  *      Scan the work-list for items which are ready for crypto processing
   48  *      and call the matching crypto function in g_bde_crypt.c and schedule
   49  *      any writes needed.  Read operations finish here by releasing the
   50  *      sector buffers and delivering the original bio request.
   51  *   4) g_bde_write_done()
   52  *      Release sector buffers and deliver the original bio request.
   53  *
   54  * Because of the C-scope rules, the functions are almost perfectly in the
   55  * opposite order in this source file.
   56  *
   57  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
   58  * XXX: additional states to this state-engine.  Since no hardware available
   59  * XXX: at this time has AES support, implementing this has been postponed
   60  * XXX: until such time as it would result in a benefit.
   61  */
   62 
   63 #include <sys/param.h>
   64 #include <sys/bio.h>
   65 #include <sys/lock.h>
   66 #include <sys/mutex.h>
   67 #include <sys/queue.h>
   68 #include <sys/malloc.h>
   69 #include <sys/systm.h>
   70 #include <sys/kernel.h>
   71 #include <sys/sysctl.h>
   72 #include <sys/proc.h>
   73 #include <sys/kthread.h>
   74 
   75 #include <crypto/rijndael/rijndael-api-fst.h>
   76 #include <crypto/sha2/sha512.h>
   77 #include <geom/geom.h>
   78 #include <geom/bde/g_bde.h>
   79 
   80 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
   81 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
   82 static void g_bde_release_keysector(struct g_bde_work *wp);
   83 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
   84 static int g_bde_start_read(struct g_bde_sector *sp);
   85 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
   86 
   87 /*
   88  * Work item allocation.
   89  *
   90  * C++ would call these constructors and destructors.
   91  */
   92 static u_int g_bde_nwork;
   93 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
   94 
   95 static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
   96 
   97 static struct g_bde_work *
   98 g_bde_new_work(struct g_bde_softc *sc)
   99 {
  100         struct g_bde_work *wp;
  101 
  102         wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
  103         if (wp == NULL)
  104                 return (wp);
  105         wp->state = SETUP;
  106         wp->softc = sc;
  107         g_bde_nwork++;
  108         sc->nwork++;
  109         TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
  110         return (wp);
  111 }
  112 
  113 static void
  114 g_bde_delete_work(struct g_bde_work *wp)
  115 {
  116         struct g_bde_softc *sc;
  117 
  118         sc = wp->softc;
  119         g_bde_nwork--;
  120         sc->nwork--;
  121         TAILQ_REMOVE(&sc->worklist, wp, list);
  122         free(wp, M_GBDE);
  123 }
  124 
  125 /*
  126  * Sector buffer allocation
  127  *
  128  * These two functions allocate and free back variable sized sector buffers
  129  */
  130 
  131 static u_int g_bde_nsect;
  132 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
  133 
  134 static void
  135 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  136 {
  137 
  138         g_bde_nsect--;
  139         sc->nsect--;
  140         if (sp->malloc)
  141                 free(sp->data, M_GBDE);
  142         free(sp, M_GBDE);
  143 }
  144 
  145 static struct g_bde_sector *
  146 g_bde_new_sector(struct g_bde_work *wp, u_int len)
  147 {
  148         struct g_bde_sector *sp;
  149 
  150         sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
  151         if (sp == NULL)
  152                 return (sp);
  153         if (len > 0) {
  154                 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
  155                 if (sp->data == NULL) {
  156                         free(sp, M_GBDE);
  157                         return (NULL);
  158                 }
  159                 sp->malloc = 1;
  160         }
  161         g_bde_nsect++;
  162         wp->softc->nsect++;
  163         sp->size = len;
  164         sp->softc = wp->softc;
  165         sp->ref = 1;
  166         sp->owner = wp;
  167         sp->offset = wp->so;
  168         sp->state = JUNK;
  169         return (sp);
  170 }
  171 
  172 /*
  173  * Skey sector cache.
  174  *
  175  * Nothing prevents two separate I/O requests from addressing the same zone
  176  * and thereby needing the same skey sector.  We therefore need to sequence
  177  * I/O operations to the skey sectors.  A certain amount of caching is also
  178  * desirable, although the extent of benefit from this is not at this point
  179  * determined.
  180  *
  181  * XXX: GEOM may be able to grow a generic caching facility at some point
  182  * XXX: to support such needs.
  183  */
  184 
  185 static u_int g_bde_ncache;
  186 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
  187 
  188 static void
  189 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  190 {
  191 
  192         g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
  193         if (sp->ref != 0)
  194                 return;
  195         TAILQ_REMOVE(&sc->freelist, sp, list);
  196         g_bde_ncache--;
  197         sc->ncache--;
  198         bzero(sp->data, sp->size);
  199         g_bde_delete_sector(sc, sp);
  200 }
  201 
  202 static struct g_bde_sector *
  203 g_bde_get_keysector(struct g_bde_work *wp)
  204 {
  205         struct g_bde_sector *sp;
  206         struct g_bde_softc *sc;
  207         off_t offset;
  208 
  209         offset = wp->kso;
  210         g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
  211         sc = wp->softc;
  212 
  213         if (malloc_last_fail() < g_bde_ncache)
  214                 g_bde_purge_sector(sc, -1);
  215 
  216         sp = TAILQ_FIRST(&sc->freelist);
  217         if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
  218                 g_bde_purge_one_sector(sc, sp);
  219 
  220         TAILQ_FOREACH(sp, &sc->freelist, list) {
  221                 if (sp->offset == offset)
  222                         break;
  223         }
  224         if (sp != NULL) {
  225                 sp->ref++;
  226                 KASSERT(sp->offset == offset, ("wrong offset"));
  227                 KASSERT(sp->softc == wp->softc, ("wrong softc"));
  228                 if (sp->ref == 1)
  229                         sp->owner = wp;
  230         } else {
  231                 if (malloc_last_fail() < g_bde_ncache) {
  232                         TAILQ_FOREACH(sp, &sc->freelist, list)
  233                                 if (sp->ref == 0)
  234                                         break;
  235                 }
  236                 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
  237                         sp = TAILQ_FIRST(&sc->freelist);
  238                 if (sp != NULL && sp->ref > 0)
  239                         sp = NULL;
  240                 if (sp == NULL) {
  241                         sp = g_bde_new_sector(wp, sc->sectorsize);
  242                         if (sp != NULL) {
  243                                 g_bde_ncache++;
  244                                 sc->ncache++;
  245                                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  246                                 sp->malloc = 2;
  247                         }
  248                 }
  249                 if (sp != NULL) {
  250                         sp->offset = offset;
  251                         sp->softc = wp->softc;
  252                         sp->ref = 1;
  253                         sp->owner = wp;
  254                         sp->state = JUNK;
  255                         sp->error = 0;
  256                 }
  257         }
  258         if (sp != NULL) {
  259                 TAILQ_REMOVE(&sc->freelist, sp, list);
  260                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  261                 sp->used = time_uptime;
  262         }
  263         wp->ksp = sp;
  264         return(sp);
  265 }
  266 
  267 static void
  268 g_bde_release_keysector(struct g_bde_work *wp)
  269 {
  270         struct g_bde_softc *sc;
  271         struct g_bde_work *wp2;
  272         struct g_bde_sector *sp;
  273 
  274         sp = wp->ksp;
  275         g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
  276         KASSERT(sp->malloc == 2, ("Wrong sector released"));
  277         sc = sp->softc;
  278         KASSERT(sc != NULL, ("NULL sp->softc"));
  279         KASSERT(wp == sp->owner, ("Releasing, not owner"));
  280         sp->owner = NULL;
  281         wp->ksp = NULL;
  282         sp->ref--;
  283         if (sp->ref > 0) {
  284                 TAILQ_REMOVE(&sc->freelist, sp, list);
  285                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  286                 TAILQ_FOREACH(wp2, &sc->worklist, list) {
  287                         if (wp2->ksp == sp) {
  288                                 KASSERT(wp2 != wp, ("Self-reowning"));
  289                                 sp->owner = wp2;
  290                                 wakeup(sp->softc);
  291                                 break;
  292                         }
  293                 }
  294                 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
  295         } else if (sp->error != 0) {
  296                 sp->offset = ~0;
  297                 sp->error = 0;
  298                 sp->state = JUNK;
  299         }
  300         TAILQ_REMOVE(&sc->freelist, sp, list);
  301         TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
  302 }
  303 
  304 static void
  305 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
  306 {
  307         struct g_bde_sector *sp;
  308         int n;
  309 
  310         g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
  311         if (fraction > 0)
  312                 n = sc->ncache / fraction + 1;
  313         else 
  314                 n = g_bde_ncache - malloc_last_fail();
  315         if (n < 0)
  316                 return;
  317         if (n > sc->ncache)
  318                 n = sc->ncache;
  319         while(n--) {
  320                 TAILQ_FOREACH(sp, &sc->freelist, list) {
  321                         if (sp->ref != 0)
  322                                 continue;
  323                         TAILQ_REMOVE(&sc->freelist, sp, list);
  324                         g_bde_ncache--;
  325                         sc->ncache--;
  326                         bzero(sp->data, sp->size);
  327                         g_bde_delete_sector(sc, sp);
  328                         break;
  329                 }
  330         }
  331 }
  332 
  333 static struct g_bde_sector *
  334 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
  335 {
  336         struct g_bde_sector *sp;
  337 
  338         g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
  339         sp = g_bde_get_keysector(wp);
  340         if (sp == NULL) {
  341                 g_bde_purge_sector(sc, -1);
  342                 sp = g_bde_get_keysector(wp);
  343         }
  344         if (sp == NULL)
  345                 return (sp);
  346         if (sp->owner != wp)
  347                 return (sp);
  348         if (sp->state == VALID)
  349                 return (sp);
  350         if (g_bde_start_read(sp) == 0)
  351                 return (sp);
  352         g_bde_release_keysector(wp);
  353         return (NULL);
  354 }
  355 
  356 /*
  357  * Contribute to the completion of the original bio request.
  358  *
  359  * We have no simple way to tell how many bits the original bio request has
  360  * been segmented into, so the easiest way to determine when we can deliver
  361  * it is to keep track of the number of bytes we have completed.  We keep
  362  * track of any errors underway and latch onto the first one.
  363  *
  364  * We always report "nothing done" in case of error, because random bits here
  365  * and there may be completed and returning a number of completed bytes does
  366  * not convey any useful information about which bytes they were.  If some
  367  * piece of broken code somewhere interprets this to mean that nothing has
  368  * changed on the underlying media they deserve the lossage headed for them.
  369  *
  370  * A single mutex per g_bde instance is used to prevent contention.
  371  */
  372 
  373 static void
  374 g_bde_contribute(struct bio *bp, off_t bytes, int error)
  375 {
  376 
  377         g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
  378              bp, (intmax_t)bytes, error);
  379         if (bp->bio_error == 0)
  380                 bp->bio_error = error;
  381         bp->bio_completed += bytes;
  382         KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
  383         if (bp->bio_completed == bp->bio_length) {
  384                 if (bp->bio_error != 0)
  385                         bp->bio_completed = 0;
  386                 g_io_deliver(bp, bp->bio_error);
  387         }
  388 }
  389 
  390 /*
  391  * This is the common case "we're done with this work package" function
  392  */
  393 
  394 static void
  395 g_bde_work_done(struct g_bde_work *wp, int error)
  396 {
  397 
  398         g_bde_contribute(wp->bp, wp->length, error);
  399         if (wp->sp != NULL)
  400                 g_bde_delete_sector(wp->softc, wp->sp);
  401         if (wp->ksp != NULL)
  402                 g_bde_release_keysector(wp);
  403         g_bde_delete_work(wp);
  404 }
  405 
  406 /*
  407  * A write operation has finished.  When we have all expected cows in the
  408  * barn close the door and call it a day.
  409  */
  410 
  411 static void
  412 g_bde_write_done(struct bio *bp)
  413 {
  414         struct g_bde_sector *sp;
  415         struct g_bde_work *wp;
  416         struct g_bde_softc *sc;
  417 
  418         sp = bp->bio_caller1;
  419         sc = bp->bio_caller2;
  420         mtx_lock(&sc->worklist_mutex);
  421         KASSERT(sp != NULL, ("NULL sp"));
  422         KASSERT(sc != NULL, ("NULL sc"));
  423         KASSERT(sp->owner != NULL, ("NULL sp->owner"));
  424         g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
  425         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  426                 bp->bio_error = EIO;
  427         sp->error = bp->bio_error;
  428         g_destroy_bio(bp);
  429         wp = sp->owner;
  430         if (wp->error == 0)
  431                 wp->error = sp->error;
  432 
  433         if (wp->bp->bio_cmd == BIO_DELETE) {
  434                 KASSERT(sp == wp->sp, ("trashed delete op"));
  435                 g_bde_work_done(wp, wp->error);
  436                 mtx_unlock(&sc->worklist_mutex);
  437                 return;
  438         }
  439 
  440         KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
  441         KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
  442         if (wp->sp == sp) {
  443                 g_bde_delete_sector(sc, wp->sp);
  444                 wp->sp = NULL;
  445         } else {
  446                 sp->state = VALID;
  447         }
  448         if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
  449                 g_bde_work_done(wp, wp->error);
  450         mtx_unlock(&sc->worklist_mutex);
  451         return;
  452 }
  453 
  454 /*
  455  * Send a write request for the given sector down the pipeline.
  456  */
  457 
  458 static int
  459 g_bde_start_write(struct g_bde_sector *sp)
  460 {
  461         struct bio *bp;
  462         struct g_bde_softc *sc;
  463 
  464         g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
  465         sc = sp->softc;
  466         KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
  467         KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
  468         bp = g_new_bio();
  469         if (bp == NULL)
  470                 return (ENOMEM);
  471         bp->bio_cmd = BIO_WRITE;
  472         bp->bio_offset = sp->offset;
  473         bp->bio_data = sp->data;
  474         bp->bio_length = sp->size;
  475         bp->bio_done = g_bde_write_done;
  476         bp->bio_caller1 = sp;
  477         bp->bio_caller2 = sc;
  478         sp->state = IO;
  479         g_io_request(bp, sc->consumer);
  480         return(0);
  481 }
  482 
  483 /*
  484  * A read operation has finished.  Mark the sector no longer iobusy and
  485  * wake up the worker thread and let it do its thing.
  486  */
  487 
  488 static void
  489 g_bde_read_done(struct bio *bp)
  490 {
  491         struct g_bde_sector *sp;
  492         struct g_bde_softc *sc;
  493 
  494         sp = bp->bio_caller1;
  495         g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
  496         sc = bp->bio_caller2;
  497         mtx_lock(&sc->worklist_mutex);
  498         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  499                 bp->bio_error = EIO;
  500         sp->error = bp->bio_error;
  501         if (sp->error == 0)
  502                 sp->state = VALID;
  503         else
  504                 sp->state = JUNK;
  505         wakeup(sc);
  506         g_destroy_bio(bp);
  507         mtx_unlock(&sc->worklist_mutex);
  508 }
  509 
  510 /*
  511  * Send a read request for the given sector down the pipeline.
  512  */
  513 
  514 static int
  515 g_bde_start_read(struct g_bde_sector *sp)
  516 {
  517         struct bio *bp;
  518         struct g_bde_softc *sc;
  519 
  520         g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
  521         sc = sp->softc;
  522         KASSERT(sc != NULL, ("Null softc in sp %p", sp));
  523         bp = g_new_bio();
  524         if (bp == NULL)
  525                 return (ENOMEM);
  526         bp->bio_cmd = BIO_READ;
  527         bp->bio_offset = sp->offset;
  528         bp->bio_data = sp->data;
  529         bp->bio_length = sp->size;
  530         bp->bio_done = g_bde_read_done;
  531         bp->bio_caller1 = sp;
  532         bp->bio_caller2 = sc;
  533         sp->state = IO;
  534         g_io_request(bp, sc->consumer);
  535         return(0);
  536 }
  537 
  538 /*
  539  * The worker thread.
  540  *
  541  * The up/down path of GEOM is not allowed to sleep or do any major work
  542  * so we use this thread to do the actual crypto operations and to push
  543  * the state engine onwards.
  544  *
  545  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
  546  * XXX: using a thread here is probably not needed.
  547  */
  548 
  549 void
  550 g_bde_worker(void *arg)
  551 {
  552         struct g_bde_softc *sc;
  553         struct g_bde_work *wp, *twp;
  554         struct g_geom *gp;
  555         int restart, error;
  556 
  557         gp = arg;
  558         sc = gp->softc;
  559 
  560         mtx_lock(&sc->worklist_mutex);
  561         for (;;) {
  562                 restart = 0;
  563                 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
  564                 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
  565                         KASSERT(wp != NULL, ("NULL wp"));
  566                         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  567                         if (wp->state != WAIT)
  568                                 continue;       /* Not interesting here */
  569 
  570                         KASSERT(wp->bp != NULL, ("NULL wp->bp"));
  571                         KASSERT(wp->sp != NULL, ("NULL wp->sp"));
  572 
  573                         if (wp->ksp != NULL) {
  574                                 if (wp->ksp->owner != wp)
  575                                         continue;
  576                                 if (wp->ksp->state == IO)
  577                                         continue;
  578                                 KASSERT(wp->ksp->state == VALID,
  579                                     ("Illegal sector state (%d)",
  580                                     wp->ksp->state));
  581                         }
  582 
  583                         if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
  584                                 continue;
  585 
  586                         if (wp->ksp != NULL && wp->ksp->error != 0) {
  587                                 g_bde_work_done(wp, wp->ksp->error);
  588                                 continue;
  589                         } 
  590                         switch(wp->bp->bio_cmd) {
  591                         case BIO_READ:
  592                                 if (wp->ksp == NULL) {
  593                                         KASSERT(wp->error != 0,
  594                                             ("BIO_READ, no ksp and no error"));
  595                                         g_bde_work_done(wp, wp->error);
  596                                         break;
  597                                 }
  598                                 if (wp->sp->error != 0) {
  599                                         g_bde_work_done(wp, wp->sp->error);
  600                                         break;
  601                                 }
  602                                 mtx_unlock(&sc->worklist_mutex);
  603                                 g_bde_crypt_read(wp);
  604                                 mtx_lock(&sc->worklist_mutex);
  605                                 restart++;
  606                                 g_bde_work_done(wp, wp->sp->error);
  607                                 break;
  608                         case BIO_WRITE:
  609                                 wp->state = FINISH;
  610                                 KASSERT(wp->sp->owner == wp,
  611                                     ("Write not owner sp"));
  612                                 KASSERT(wp->ksp->owner == wp,
  613                                     ("Write not owner ksp"));
  614                                 mtx_unlock(&sc->worklist_mutex);
  615                                 g_bde_crypt_write(wp);
  616                                 mtx_lock(&sc->worklist_mutex);
  617                                 restart++;
  618                                 error = g_bde_start_write(wp->sp);
  619                                 if (error) {
  620                                         g_bde_work_done(wp, error);
  621                                         break;
  622                                 }
  623                                 error = g_bde_start_write(wp->ksp);
  624                                 if (wp->error != 0)
  625                                         wp->error = error;
  626                                 break;
  627                         case BIO_DELETE:
  628                                 wp->state = FINISH;
  629                                 mtx_unlock(&sc->worklist_mutex);
  630                                 g_bde_crypt_delete(wp);
  631                                 mtx_lock(&sc->worklist_mutex);
  632                                 restart++;
  633                                 g_bde_start_write(wp->sp);
  634                                 break;
  635                         }
  636                         if (restart)
  637                                 break;
  638                 }
  639                 if (!restart) {
  640                         /*
  641                          * We don't look for our death-warrant until we are
  642                          * idle.  Shouldn't make a difference in practice.
  643                          */
  644                         if (sc->dead)
  645                                 break;
  646                         g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
  647                         error = msleep(sc, &sc->worklist_mutex,
  648                             PRIBIO, "-", hz);
  649                         if (error == EWOULDBLOCK) {
  650                                 /*
  651                                  * Lose our skey cache in an orderly fashion.
  652                                  * The exact rate can be tuned to be less
  653                                  * aggressive if this is desirable.  10% per
  654                                  * second means that the cache is gone in a
  655                                  * few minutes.
  656                                  */
  657                                 g_bde_purge_sector(sc, 10);
  658                         }
  659                 }
  660         }
  661         g_trace(G_T_TOPOLOGY, "g_bde_worker die");
  662         g_bde_purge_sector(sc, 1);
  663         KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
  664         KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
  665         KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
  666         mtx_unlock(&sc->worklist_mutex);
  667         sc->dead = 2;
  668         wakeup(sc);
  669         kproc_exit(0);
  670 }
  671 
  672 /*
  673  * g_bde_start1 has chopped the incoming request up so all the requests
  674  * we see here are inside a single zone.  Map the data and key locations
  675  * grab the buffers we need and fire off the first volley of read requests.
  676  */
  677 
  678 static void
  679 g_bde_start2(struct g_bde_work *wp)
  680 {
  681         struct g_bde_softc *sc;
  682 
  683         KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
  684         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  685         g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
  686         sc = wp->softc;
  687         switch (wp->bp->bio_cmd) {
  688         case BIO_READ:
  689                 wp->sp = g_bde_new_sector(wp, 0);
  690                 if (wp->sp == NULL) {
  691                         g_bde_work_done(wp, ENOMEM);
  692                         return;
  693                 }
  694                 wp->sp->size = wp->length;
  695                 wp->sp->data = wp->data;
  696                 if (g_bde_start_read(wp->sp) != 0) {
  697                         g_bde_work_done(wp, ENOMEM);
  698                         return;
  699                 }
  700                 g_bde_read_keysector(sc, wp);
  701                 if (wp->ksp == NULL)
  702                         wp->error = ENOMEM;
  703                 break;
  704         case BIO_DELETE:
  705                 wp->sp = g_bde_new_sector(wp, wp->length);
  706                 if (wp->sp == NULL) {
  707                         g_bde_work_done(wp, ENOMEM);
  708                         return;
  709                 }
  710                 break;
  711         case BIO_WRITE:
  712                 wp->sp = g_bde_new_sector(wp, wp->length);
  713                 if (wp->sp == NULL) {
  714                         g_bde_work_done(wp, ENOMEM);
  715                         return;
  716                 }
  717                 g_bde_read_keysector(sc, wp);
  718                 if (wp->ksp == NULL) {
  719                         g_bde_work_done(wp, ENOMEM);
  720                         return;
  721                 }
  722                 break;
  723         default:
  724                 KASSERT(0 == 1, 
  725                     ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
  726         }
  727 
  728         wp->state = WAIT;
  729         wakeup(sc);
  730 }
  731 
  732 /*
  733  * Create a sequence of work structures, and have g_bde_map_sector() determine
  734  * how long they each can be.  Feed them to g_bde_start2().
  735  */
  736 
  737 void
  738 g_bde_start1(struct bio *bp)
  739 {
  740         struct g_bde_softc *sc;
  741         struct g_bde_work *wp;
  742         off_t done;
  743 
  744         sc = bp->bio_to->geom->softc;
  745         bp->bio_driver1 = sc;
  746 
  747         mtx_lock(&sc->worklist_mutex);
  748         for(done = 0; done < bp->bio_length; ) {
  749                 wp = g_bde_new_work(sc);
  750                 if (wp != NULL) {
  751                         wp->bp = bp;
  752                         wp->offset = bp->bio_offset + done;
  753                         wp->data = bp->bio_data + done;
  754                         wp->length = bp->bio_length - done;
  755                         g_bde_map_sector(wp);
  756                         done += wp->length;
  757                         g_bde_start2(wp);
  758                 }
  759                 if (wp == NULL || bp->bio_error != 0) {
  760                         g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
  761                         break;
  762                 }
  763         }
  764         mtx_unlock(&sc->worklist_mutex);
  765         return;
  766 }
Cache object: 0a8f397a034cdebc5150cc34838fb10e
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/geom/bde/g_bde_work.c

FreeBSD/Linux Kernel Cross Reference
sys/geom/bde/g_bde_work.c