The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/bde/g_bde_work.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002 Poul-Henning Kamp
    3  * Copyright (c) 2002 Networks Associates Technology, Inc.
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
    7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
    8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
    9  * DARPA CHATS research program.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  * $FreeBSD: src/sys/geom/bde/g_bde_work.c,v 1.24.2.1 2005/01/31 23:26:01 imp Exp $
   33  */
   34 
   35 /*
   36  * This source file contains the state-engine which makes things happen in the
   37  * right order.
   38  *
   39  * Outline:
   40  *   1) g_bde_start1()
   41  *      Break the struct bio into multiple work packets one per zone.
   42  *   2) g_bde_start2()
   43  *      Setup the necessary sector buffers and start those read operations
   44  *      which we can start at this time and put the item on the work-list.
   45  *   3) g_bde_worker()
   46  *      Scan the work-list for items which are ready for crypto processing
   47  *      and call the matching crypto function in g_bde_crypt.c and schedule
   48  *      any writes needed.  Read operations finish here by releasing the
   49  *      sector buffers and delivering the original bio request.
   50  *   4) g_bde_write_done()
   51  *      Release sector buffers and deliver the original bio request.
   52  *
   53  * Because of the C-scope rules, the functions are almost perfectly in the
   54  * opposite order in this source file.
   55  *
   56  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
   57  * XXX: additional states to this state-engine.  Since no hardware available
   58  * XXX: at this time has AES support, implementing this has been postponed
   59  * XXX: until such time as it would result in a benefit.
   60  */
   61 
   62 #include <sys/param.h>
   63 #include <sys/bio.h>
   64 #include <sys/lock.h>
   65 #include <sys/mutex.h>
   66 #include <sys/queue.h>
   67 #include <sys/malloc.h>
   68 #include <sys/systm.h>
   69 #include <sys/kernel.h>
   70 #include <sys/sysctl.h>
   71 #include <sys/proc.h>
   72 #include <sys/kthread.h>
   73 
   74 #include <crypto/rijndael/rijndael.h>
   75 #include <crypto/sha2/sha2.h>
   76 #include <geom/geom.h>
   77 #include <geom/bde/g_bde.h>
   78 
   79 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
   80 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
   81 static void g_bde_release_keysector(struct g_bde_work *wp);
   82 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
   83 static int g_bde_start_read(struct g_bde_sector *sp);
   84 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
   85 
   86 /*
   87  * Work item allocation.
   88  *
   89  * C++ would call these constructors and destructors.
   90  */
   91 static u_int g_bde_nwork;
   92 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
   93 
   94 static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures");
   95 
   96 static struct g_bde_work *
   97 g_bde_new_work(struct g_bde_softc *sc)
   98 {
   99         struct g_bde_work *wp;
  100 
  101         wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
  102         if (wp == NULL)
  103                 return (wp);
  104         wp->state = SETUP;
  105         wp->softc = sc;
  106         g_bde_nwork++;
  107         sc->nwork++;
  108         TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
  109         return (wp);
  110 }
  111 
  112 static void
  113 g_bde_delete_work(struct g_bde_work *wp)
  114 {
  115         struct g_bde_softc *sc;
  116 
  117         sc = wp->softc;
  118         g_bde_nwork--;
  119         sc->nwork--;
  120         TAILQ_REMOVE(&sc->worklist, wp, list);
  121         free(wp, M_GBDE);
  122 }
  123 
  124 /*
  125  * Sector buffer allocation
  126  *
  127  * These two functions allocate and free back variable sized sector buffers
  128  */
  129 
  130 static u_int g_bde_nsect;
  131 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
  132 
  133 static void
  134 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  135 {
  136 
  137         g_bde_nsect--;
  138         sc->nsect--;
  139         if (sp->malloc)
  140                 free(sp->data, M_GBDE);
  141         free(sp, M_GBDE);
  142 }
  143 
  144 static struct g_bde_sector *
  145 g_bde_new_sector(struct g_bde_work *wp, u_int len)
  146 {
  147         struct g_bde_sector *sp;
  148 
  149         sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
  150         if (sp == NULL)
  151                 return (sp);
  152         if (len > 0) {
  153                 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
  154                 if (sp->data == NULL) {
  155                         free(sp, M_GBDE);
  156                         return (NULL);
  157                 }
  158                 sp->malloc = 1;
  159         }
  160         g_bde_nsect++;
  161         wp->softc->nsect++;
  162         sp->size = len;
  163         sp->softc = wp->softc;
  164         sp->ref = 1;
  165         sp->owner = wp;
  166         sp->offset = wp->so;
  167         sp->state = JUNK;
  168         return (sp);
  169 }
  170 
  171 /*
  172  * Skey sector cache.
  173  *
  174  * Nothing prevents two separate I/O requests from addressing the same zone
  175  * and thereby needing the same skey sector.  We therefore need to sequence
  176  * I/O operations to the skey sectors.  A certain amount of caching is also
  177  * desirable, although the extent of benefit from this is not at this point
  178  * determined.
  179  *
  180  * XXX: GEOM may be able to grow a generic caching facility at some point
  181  * XXX: to support such needs.
  182  */
  183 
  184 static u_int g_bde_ncache;
  185 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
  186 
  187 static void
  188 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
  189 {
  190 
  191         g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
  192         if (sp->ref != 0)
  193                 return;
  194         TAILQ_REMOVE(&sc->freelist, sp, list);
  195         g_bde_ncache--;
  196         sc->ncache--;
  197         bzero(sp->data, sp->size);
  198         g_bde_delete_sector(sc, sp);
  199 }
  200 
  201 static struct g_bde_sector *
  202 g_bde_get_keysector(struct g_bde_work *wp)
  203 {
  204         struct g_bde_sector *sp;
  205         struct g_bde_softc *sc;
  206         off_t offset;
  207 
  208         offset = wp->kso;
  209         g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
  210         sc = wp->softc;
  211 
  212         if (malloc_last_fail() < g_bde_ncache)
  213                 g_bde_purge_sector(sc, -1);
  214 
  215         sp = TAILQ_FIRST(&sc->freelist);
  216         if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
  217                 g_bde_purge_one_sector(sc, sp);
  218 
  219         TAILQ_FOREACH(sp, &sc->freelist, list) {
  220                 if (sp->offset == offset)
  221                         break;
  222         }
  223         if (sp != NULL) {
  224                 sp->ref++;
  225                 KASSERT(sp->offset == offset, ("wrong offset"));
  226                 KASSERT(sp->softc == wp->softc, ("wrong softc"));
  227                 if (sp->ref == 1)
  228                         sp->owner = wp;
  229         } else {
  230                 if (malloc_last_fail() < g_bde_ncache) {
  231                         TAILQ_FOREACH(sp, &sc->freelist, list)
  232                                 if (sp->ref == 0)
  233                                         break;
  234                 }
  235                 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
  236                         sp = TAILQ_FIRST(&sc->freelist);
  237                 if (sp != NULL && sp->ref > 0)
  238                         sp = NULL;
  239                 if (sp == NULL) {
  240                         sp = g_bde_new_sector(wp, sc->sectorsize);
  241                         if (sp != NULL) {
  242                                 g_bde_ncache++;
  243                                 sc->ncache++;
  244                                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  245                                 sp->malloc = 2;
  246                         }
  247                 }
  248                 if (sp != NULL) {
  249                         sp->offset = offset;
  250                         sp->softc = wp->softc;
  251                         sp->ref = 1;
  252                         sp->owner = wp;
  253                         sp->state = JUNK;
  254                         sp->error = 0;
  255                 }
  256         }
  257         if (sp != NULL) {
  258                 TAILQ_REMOVE(&sc->freelist, sp, list);
  259                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  260                 sp->used = time_uptime;
  261         }
  262         wp->ksp = sp;
  263         return(sp);
  264 }
  265 
  266 static void
  267 g_bde_release_keysector(struct g_bde_work *wp)
  268 {
  269         struct g_bde_softc *sc;
  270         struct g_bde_work *wp2;
  271         struct g_bde_sector *sp;
  272 
  273         sp = wp->ksp;
  274         g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
  275         KASSERT(sp->malloc == 2, ("Wrong sector released"));
  276         sc = sp->softc;
  277         KASSERT(sc != NULL, ("NULL sp->softc"));
  278         KASSERT(wp == sp->owner, ("Releasing, not owner"));
  279         sp->owner = NULL;
  280         wp->ksp = NULL;
  281         sp->ref--;
  282         if (sp->ref > 0) {
  283                 TAILQ_REMOVE(&sc->freelist, sp, list);
  284                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
  285                 TAILQ_FOREACH(wp2, &sc->worklist, list) {
  286                         if (wp2->ksp == sp) {
  287                                 KASSERT(wp2 != wp, ("Self-reowning"));
  288                                 sp->owner = wp2;
  289                                 wakeup(sp->softc);
  290                                 break;
  291                         }
  292                 }
  293                 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
  294         } else if (sp->error != 0) {
  295                 sp->offset = ~0;
  296                 sp->error = 0;
  297                 sp->state = JUNK;
  298         }
  299         TAILQ_REMOVE(&sc->freelist, sp, list);
  300         TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
  301 }
  302 
  303 static void
  304 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
  305 {
  306         struct g_bde_sector *sp;
  307         int n;
  308 
  309         g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
  310         if (fraction > 0)
  311                 n = sc->ncache / fraction + 1;
  312         else 
  313                 n = g_bde_ncache - malloc_last_fail();
  314         if (n < 0)
  315                 return;
  316         if (n > sc->ncache)
  317                 n = sc->ncache;
  318         while(n--) {
  319                 TAILQ_FOREACH(sp, &sc->freelist, list) {
  320                         if (sp->ref != 0)
  321                                 continue;
  322                         TAILQ_REMOVE(&sc->freelist, sp, list);
  323                         g_bde_ncache--;
  324                         sc->ncache--;
  325                         bzero(sp->data, sp->size);
  326                         g_bde_delete_sector(sc, sp);
  327                         break;
  328                 }
  329         }
  330 }
  331 
  332 static struct g_bde_sector *
  333 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
  334 {
  335         struct g_bde_sector *sp;
  336 
  337         g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
  338         sp = g_bde_get_keysector(wp);
  339         if (sp == NULL) {
  340                 g_bde_purge_sector(sc, -1);
  341                 sp = g_bde_get_keysector(wp);
  342         }
  343         if (sp == NULL)
  344                 return (sp);
  345         if (sp->owner != wp)
  346                 return (sp);
  347         if (sp->state == VALID)
  348                 return (sp);
  349         if (g_bde_start_read(sp) == 0)
  350                 return (sp);
  351         g_bde_release_keysector(wp);
  352         return (NULL);
  353 }
  354 
  355 /*
  356  * Contribute to the completion of the original bio request.
  357  *
  358  * We have no simple way to tell how many bits the original bio request has
  359  * been segmented into, so the easiest way to determine when we can deliver
  360  * it is to keep track of the number of bytes we have completed.  We keep
  361  * track of any errors underway and latch onto the first one.
  362  *
  363  * We always report "nothing done" in case of error, because random bits here
  364  * and there may be completed and returning a number of completed bytes does
  365  * not convey any useful information about which bytes they were.  If some
  366  * piece of broken code somewhere interprets this to mean that nothing has
  367  * changed on the underlying media they deserve the lossage headed for them.
  368  *
  369  * A single mutex per g_bde instance is used to prevent contention.
  370  */
  371 
  372 static void
  373 g_bde_contribute(struct bio *bp, off_t bytes, int error)
  374 {
  375 
  376         g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
  377              bp, (intmax_t)bytes, error);
  378         if (bp->bio_error == 0)
  379                 bp->bio_error = error;
  380         bp->bio_completed += bytes;
  381         KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
  382         if (bp->bio_completed == bp->bio_length) {
  383                 if (bp->bio_error != 0)
  384                         bp->bio_completed = 0;
  385                 g_io_deliver(bp, bp->bio_error);
  386         }
  387 }
  388 
  389 /*
  390  * This is the common case "we're done with this work package" function
  391  */
  392 
  393 static void
  394 g_bde_work_done(struct g_bde_work *wp, int error)
  395 {
  396 
  397         g_bde_contribute(wp->bp, wp->length, error);
  398         if (wp->sp != NULL)
  399                 g_bde_delete_sector(wp->softc, wp->sp);
  400         if (wp->ksp != NULL)
  401                 g_bde_release_keysector(wp);
  402         g_bde_delete_work(wp);
  403 }
  404 
  405 /*
  406  * A write operation has finished.  When we have all expected cows in the
  407  * barn close the door and call it a day.
  408  */
  409 
  410 static void
  411 g_bde_write_done(struct bio *bp)
  412 {
  413         struct g_bde_sector *sp;
  414         struct g_bde_work *wp;
  415         struct g_bde_softc *sc;
  416 
  417         sp = bp->bio_caller1;
  418         sc = bp->bio_caller2;
  419         mtx_lock(&sc->worklist_mutex);
  420         KASSERT(sp != NULL, ("NULL sp"));
  421         KASSERT(sc != NULL, ("NULL sc"));
  422         KASSERT(sp->owner != NULL, ("NULL sp->owner"));
  423         g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
  424         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  425                 bp->bio_error = EIO;
  426         sp->error = bp->bio_error;
  427         g_destroy_bio(bp);
  428         wp = sp->owner;
  429         if (wp->error == 0)
  430                 wp->error = sp->error;
  431 
  432         if (wp->bp->bio_cmd == BIO_DELETE) {
  433                 KASSERT(sp == wp->sp, ("trashed delete op"));
  434                 g_bde_work_done(wp, wp->error);
  435                 mtx_unlock(&sc->worklist_mutex);
  436                 return;
  437         }
  438 
  439         KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
  440         KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
  441         if (wp->sp == sp) {
  442                 g_bde_delete_sector(sc, wp->sp);
  443                 wp->sp = NULL;
  444         } else {
  445                 sp->state = VALID;
  446         }
  447         if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
  448                 g_bde_work_done(wp, wp->error);
  449         mtx_unlock(&sc->worklist_mutex);
  450         return;
  451 }
  452 
  453 /*
  454  * Send a write request for the given sector down the pipeline.
  455  */
  456 
  457 static int
  458 g_bde_start_write(struct g_bde_sector *sp)
  459 {
  460         struct bio *bp;
  461         struct g_bde_softc *sc;
  462 
  463         g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
  464         sc = sp->softc;
  465         KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
  466         KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
  467         bp = g_new_bio();
  468         if (bp == NULL)
  469                 return (ENOMEM);
  470         bp->bio_cmd = BIO_WRITE;
  471         bp->bio_offset = sp->offset;
  472         bp->bio_data = sp->data;
  473         bp->bio_length = sp->size;
  474         bp->bio_done = g_bde_write_done;
  475         bp->bio_caller1 = sp;
  476         bp->bio_caller2 = sc;
  477         sp->state = IO;
  478         g_io_request(bp, sc->consumer);
  479         return(0);
  480 }
  481 
  482 /*
  483  * A read operation has finished.  Mark the sector no longer iobusy and
  484  * wake up the worker thread and let it do its thing.
  485  */
  486 
  487 static void
  488 g_bde_read_done(struct bio *bp)
  489 {
  490         struct g_bde_sector *sp;
  491         struct g_bde_softc *sc;
  492 
  493         sp = bp->bio_caller1;
  494         g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
  495         sc = bp->bio_caller2;
  496         mtx_lock(&sc->worklist_mutex);
  497         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
  498                 bp->bio_error = EIO;
  499         sp->error = bp->bio_error;
  500         if (sp->error == 0)
  501                 sp->state = VALID;
  502         else
  503                 sp->state = JUNK;
  504         wakeup(sc);
  505         g_destroy_bio(bp);
  506         mtx_unlock(&sc->worklist_mutex);
  507 }
  508 
  509 /*
  510  * Send a read request for the given sector down the pipeline.
  511  */
  512 
  513 static int
  514 g_bde_start_read(struct g_bde_sector *sp)
  515 {
  516         struct bio *bp;
  517         struct g_bde_softc *sc;
  518 
  519         g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
  520         sc = sp->softc;
  521         KASSERT(sc != NULL, ("Null softc in sp %p", sp));
  522         bp = g_new_bio();
  523         if (bp == NULL)
  524                 return (ENOMEM);
  525         bp->bio_cmd = BIO_READ;
  526         bp->bio_offset = sp->offset;
  527         bp->bio_data = sp->data;
  528         bp->bio_length = sp->size;
  529         bp->bio_done = g_bde_read_done;
  530         bp->bio_caller1 = sp;
  531         bp->bio_caller2 = sc;
  532         sp->state = IO;
  533         g_io_request(bp, sc->consumer);
  534         return(0);
  535 }
  536 
  537 /*
  538  * The worker thread.
  539  *
  540  * The up/down path of GEOM is not allowed to sleep or do any major work
  541  * so we use this thread to do the actual crypto operations and to push
  542  * the state engine onwards.
  543  *
  544  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
  545  * XXX: using a thread here is probably not needed.
  546  */
  547 
  548 void
  549 g_bde_worker(void *arg)
  550 {
  551         struct g_bde_softc *sc;
  552         struct g_bde_work *wp, *twp;
  553         struct g_geom *gp;
  554         int restart, error;
  555 
  556         gp = arg;
  557         sc = gp->softc;
  558 
  559         mtx_lock(&sc->worklist_mutex);
  560         for (;;) {
  561                 restart = 0;
  562                 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
  563                 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
  564                         KASSERT(wp != NULL, ("NULL wp"));
  565                         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  566                         if (wp->state != WAIT)
  567                                 continue;       /* Not interesting here */
  568 
  569                         KASSERT(wp->bp != NULL, ("NULL wp->bp"));
  570                         KASSERT(wp->sp != NULL, ("NULL wp->sp"));
  571 
  572                         if (wp->ksp != NULL) {
  573                                 if (wp->ksp->owner != wp)
  574                                         continue;
  575                                 if (wp->ksp->state == IO)
  576                                         continue;
  577                                 KASSERT(wp->ksp->state == VALID,
  578                                     ("Illegal sector state (%d)",
  579                                     wp->ksp->state));
  580                         }
  581 
  582                         if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
  583                                 continue;
  584 
  585                         if (wp->ksp != NULL && wp->ksp->error != 0) {
  586                                 g_bde_work_done(wp, wp->ksp->error);
  587                                 continue;
  588                         } 
  589                         switch(wp->bp->bio_cmd) {
  590                         case BIO_READ:
  591                                 if (wp->ksp == NULL) {
  592                                         KASSERT(wp->error != 0,
  593                                             ("BIO_READ, no ksp and no error"));
  594                                         g_bde_work_done(wp, wp->error);
  595                                         break;
  596                                 }
  597                                 if (wp->sp->error != 0) {
  598                                         g_bde_work_done(wp, wp->sp->error);
  599                                         break;
  600                                 }
  601                                 mtx_unlock(&sc->worklist_mutex);
  602                                 g_bde_crypt_read(wp);
  603                                 mtx_lock(&sc->worklist_mutex);
  604                                 restart++;
  605                                 g_bde_work_done(wp, wp->sp->error);
  606                                 break;
  607                         case BIO_WRITE:
  608                                 wp->state = FINISH;
  609                                 KASSERT(wp->sp->owner == wp,
  610                                     ("Write not owner sp"));
  611                                 KASSERT(wp->ksp->owner == wp,
  612                                     ("Write not owner ksp"));
  613                                 mtx_unlock(&sc->worklist_mutex);
  614                                 g_bde_crypt_write(wp);
  615                                 mtx_lock(&sc->worklist_mutex);
  616                                 restart++;
  617                                 error = g_bde_start_write(wp->sp);
  618                                 if (error) {
  619                                         g_bde_work_done(wp, error);
  620                                         break;
  621                                 }
  622                                 error = g_bde_start_write(wp->ksp);
  623                                 if (wp->error != 0)
  624                                         wp->error = error;
  625                                 break;
  626                         case BIO_DELETE:
  627                                 wp->state = FINISH;
  628                                 mtx_unlock(&sc->worklist_mutex);
  629                                 g_bde_crypt_delete(wp);
  630                                 mtx_lock(&sc->worklist_mutex);
  631                                 restart++;
  632                                 g_bde_start_write(wp->sp);
  633                                 break;
  634                         }
  635                         if (restart)
  636                                 break;
  637                 }
  638                 if (!restart) {
  639                         /*
  640                          * We don't look for our death-warrant until we are
  641                          * idle.  Shouldn't make a difference in practice.
  642                          */
  643                         if (sc->dead)
  644                                 break;
  645                         g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
  646                         error = msleep(sc, &sc->worklist_mutex,
  647                             PRIBIO, "-", hz);
  648                         if (error == EWOULDBLOCK) {
  649                                 /*
  650                                  * Loose our skey cache in an orderly fashion.
  651                                  * The exact rate can be tuned to be less
  652                                  * aggressive if this is desirable.  10% per
  653                                  * second means that the cache is gone in a
  654                                  * few minutes.
  655                                  */
  656                                 g_bde_purge_sector(sc, 10);
  657                         }
  658                 }
  659         }
  660         g_trace(G_T_TOPOLOGY, "g_bde_worker die");
  661         g_bde_purge_sector(sc, 1);
  662         KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
  663         KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
  664         KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
  665         mtx_unlock(&sc->worklist_mutex);
  666         sc->dead = 2;
  667         wakeup(sc);
  668         kthread_exit(0);
  669 }
  670 
  671 /*
  672  * g_bde_start1 has chopped the incoming request up so all the requests
  673  * we see here are inside a single zone.  Map the data and key locations
  674  * grab the buffers we need and fire off the first volley of read requests.
  675  */
  676 
  677 static void
  678 g_bde_start2(struct g_bde_work *wp)
  679 {
  680         struct g_bde_softc *sc;
  681 
  682         KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
  683         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
  684         g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
  685         sc = wp->softc;
  686         switch (wp->bp->bio_cmd) {
  687         case BIO_READ:
  688                 wp->sp = g_bde_new_sector(wp, 0);
  689                 if (wp->sp == NULL) {
  690                         g_bde_work_done(wp, ENOMEM);
  691                         return;
  692                 }
  693                 wp->sp->size = wp->length;
  694                 wp->sp->data = wp->data;
  695                 if (g_bde_start_read(wp->sp) != 0) {
  696                         g_bde_work_done(wp, ENOMEM);
  697                         return;
  698                 }
  699                 g_bde_read_keysector(sc, wp);
  700                 if (wp->ksp == NULL)
  701                         wp->error = ENOMEM;
  702                 break;
  703         case BIO_DELETE:
  704                 wp->sp = g_bde_new_sector(wp, wp->length);
  705                 if (wp->sp == NULL) {
  706                         g_bde_work_done(wp, ENOMEM);
  707                         return;
  708                 }
  709                 break;
  710         case BIO_WRITE:
  711                 wp->sp = g_bde_new_sector(wp, wp->length);
  712                 if (wp->sp == NULL) {
  713                         g_bde_work_done(wp, ENOMEM);
  714                         return;
  715                 }
  716                 g_bde_read_keysector(sc, wp);
  717                 if (wp->ksp == NULL) {
  718                         g_bde_work_done(wp, ENOMEM);
  719                         return;
  720                 }
  721                 break;
  722         default:
  723                 KASSERT(0 == 1, 
  724                     ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
  725         }
  726 
  727         wp->state = WAIT;
  728         wakeup(sc);
  729 }
  730 
  731 /*
  732  * Create a sequence of work structures, and have g_bde_map_sector() determine
  733  * how long they each can be.  Feed them to g_bde_start2().
  734  */
  735 
  736 void
  737 g_bde_start1(struct bio *bp)
  738 {
  739         struct g_bde_softc *sc;
  740         struct g_bde_work *wp;
  741         off_t done;
  742 
  743         sc = bp->bio_to->geom->softc;
  744         bp->bio_driver1 = sc;
  745 
  746         mtx_lock(&sc->worklist_mutex);
  747         for(done = 0; done < bp->bio_length; ) {
  748                 wp = g_bde_new_work(sc);
  749                 if (wp != NULL) {
  750                         wp->bp = bp;
  751                         wp->offset = bp->bio_offset + done;
  752                         wp->data = bp->bio_data + done;
  753                         wp->length = bp->bio_length - done;
  754                         g_bde_map_sector(wp);
  755                         done += wp->length;
  756                         g_bde_start2(wp);
  757                 }
  758                 if (wp == NULL || bp->bio_error != 0) {
  759                         g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
  760                         break;
  761                 }
  762         }
  763         mtx_unlock(&sc->worklist_mutex);
  764         return;
  765 }

Cache object: 98db283655268f84ef159cd7199fc0bf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.