The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_wapbl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vfs_wapbl.c,v 1.3.8.5 2011/03/07 04:09:28 riz Exp $    */
    2 
    3 /*-
    4  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Wasabi Systems, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * This implements file system independent write ahead filesystem logging.
   34  */
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.3.8.5 2011/03/07 04:09:28 riz Exp $");
   37 
   38 #include <sys/param.h>
   39 
   40 #ifdef _KERNEL
   41 #include <sys/param.h>
   42 #include <sys/namei.h>
   43 #include <sys/proc.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/uio.h>
   46 #include <sys/vnode.h>
   47 #include <sys/file.h>
   48 #include <sys/malloc.h>
   49 #include <sys/resourcevar.h>
   50 #include <sys/conf.h>
   51 #include <sys/mount.h>
   52 #include <sys/kernel.h>
   53 #include <sys/kauth.h>
   54 #include <sys/mutex.h>
   55 #include <sys/atomic.h>
   56 #include <sys/wapbl.h>
   57 
   58 #if WAPBL_UVM_ALLOC
   59 #include <uvm/uvm.h>
   60 #endif
   61 
   62 #include <miscfs/specfs/specdev.h>
   63 
   64 MALLOC_JUSTDEFINE(M_WAPBL, "wapbl", "write-ahead physical block logging");
   65 #define wapbl_malloc(s) malloc((s), M_WAPBL, M_WAITOK)
   66 #define wapbl_free(a) free((a), M_WAPBL)
   67 #define wapbl_calloc(n, s) malloc((n)*(s), M_WAPBL, M_WAITOK | M_ZERO)
   68 
   69 static struct sysctllog *wapbl_sysctl;
   70 static int wapbl_flush_disk_cache = 1;
   71 static int wapbl_verbose_commit = 0;
   72 
   73 #else /* !_KERNEL */
   74 #include <assert.h>
   75 #include <errno.h>
   76 #include <stdio.h>
   77 #include <stdbool.h>
   78 #include <stdlib.h>
   79 #include <string.h>
   80 
   81 #include <sys/time.h>
   82 #include <sys/wapbl.h>
   83 
   84 #define KDASSERT(x) assert(x)
   85 #define KASSERT(x) assert(x)
   86 #define wapbl_malloc(s) malloc(s)
   87 #define wapbl_free(a) free(a)
   88 #define wapbl_calloc(n, s) calloc((n), (s))
   89 
   90 #endif /* !_KERNEL */
   91 
   92 /*
   93  * INTERNAL DATA STRUCTURES
   94  */
   95 
   96 /* 
   97  * This structure holds per-mount log information.
   98  *
   99  * Legend:      a = atomic access only
  100  *              r = read-only after init
  101  *              l = rwlock held
  102  *              m = mutex held
  103  *              lm = rwlock held writing or mutex held
  104  *              u = unlocked access ok
  105  *              b = bufcache_lock held
  106  */
  107 struct wapbl {
  108         struct vnode *wl_logvp; /* r:   log here */
  109         struct vnode *wl_devvp; /* r:   log on this device */
  110         struct mount *wl_mount; /* r:   mountpoint wl is associated with */
  111         daddr_t wl_logpbn;      /* r:   Physical block number of start of log */
  112         int wl_log_dev_bshift;  /* r:   logarithm of device block size of log
  113                                         device */
  114         int wl_fs_dev_bshift;   /* r:   logarithm of device block size of
  115                                         filesystem device */
  116 
  117         unsigned wl_lock_count; /* m:   Count of transactions in progress */
  118 
  119         size_t wl_circ_size;    /* r:   Number of bytes in buffer of log */
  120         size_t wl_circ_off;     /* r:   Number of bytes reserved at start */
  121 
  122         size_t wl_bufcount_max; /* r:   Number of buffers reserved for log */
  123         size_t wl_bufbytes_max; /* r:   Number of buf bytes reserved for log */
  124 
  125         off_t wl_head;          /* l:   Byte offset of log head */
  126         off_t wl_tail;          /* l:   Byte offset of log tail */
  127         /*
  128          * head == tail == 0 means log is empty
  129          * head == tail != 0 means log is full
  130          * see assertions in wapbl_advance() for other boundary conditions.
  131          * only truncate moves the tail, except when flush sets it to
  132          * wl_header_size only flush moves the head, except when truncate
  133          * sets it to 0.
  134          */
  135 
  136         struct wapbl_wc_header *wl_wc_header;   /* l    */
  137         void *wl_wc_scratch;    /* l:   scratch space (XXX: por que?!?) */
  138 
  139         kmutex_t wl_mtx;        /* u:   short-term lock */
  140         krwlock_t wl_rwlock;    /* u:   File system transaction lock */
  141 
  142         /*
  143          * Must be held while accessing
  144          * wl_count or wl_bufs or head or tail
  145          */
  146 
  147         /*
  148          * Callback called from within the flush routine to flush any extra
  149          * bits.  Note that flush may be skipped without calling this if
  150          * there are no outstanding buffers in the transaction.
  151          */
  152         wapbl_flush_fn_t wl_flush;      /* r    */
  153         wapbl_flush_fn_t wl_flush_abort;/* r    */
  154 
  155         size_t wl_bufbytes;     /* m:   Byte count of pages in wl_bufs */
  156         size_t wl_bufcount;     /* m:   Count of buffers in wl_bufs */
  157         size_t wl_bcount;       /* m:   Total bcount of wl_bufs */
  158 
  159         LIST_HEAD(, buf) wl_bufs; /* m: Buffers in current transaction */
  160 
  161         kcondvar_t wl_reclaimable_cv;   /* m (obviously) */
  162         size_t wl_reclaimable_bytes; /* m:      Amount of space available for
  163                                                 reclamation by truncate */
  164         int wl_error_count;     /* m:   # of wl_entries with errors */
  165         size_t wl_reserved_bytes; /* never truncate log smaller than this */
  166 
  167 #ifdef WAPBL_DEBUG_BUFBYTES
  168         size_t wl_unsynced_bufbytes; /* Byte count of unsynced buffers */
  169 #endif
  170 
  171         daddr_t *wl_deallocblks;/* lm:  address of block */
  172         int *wl_dealloclens;    /* lm:  size of block */
  173         int wl_dealloccnt;      /* lm:  total count */
  174         int wl_dealloclim;      /* l:   max count */
  175 
  176         /* hashtable of inode numbers for allocated but unlinked inodes */
  177         /* synch ??? */
  178         LIST_HEAD(wapbl_ino_head, wapbl_ino) *wl_inohash;
  179         u_long wl_inohashmask;
  180         int wl_inohashcnt;
  181 
  182         SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
  183                                                    accounting */
  184 };
  185 
  186 #ifdef WAPBL_DEBUG_PRINT
  187 int wapbl_debug_print = WAPBL_DEBUG_PRINT;
  188 #endif
  189 
  190 /****************************************************************/
  191 #ifdef _KERNEL
  192 
  193 #ifdef WAPBL_DEBUG
  194 struct wapbl *wapbl_debug_wl;
  195 #endif
  196 
  197 static int wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail);
  198 static int wapbl_write_blocks(struct wapbl *wl, off_t *offp);
  199 static int wapbl_write_revocations(struct wapbl *wl, off_t *offp);
  200 static int wapbl_write_inodes(struct wapbl *wl, off_t *offp);
  201 #endif /* _KERNEL */
  202 
  203 static int wapbl_replay_prescan(struct wapbl_replay *wr);
  204 static int wapbl_replay_get_inodes(struct wapbl_replay *wr);
  205 
  206 static __inline size_t wapbl_space_free(size_t avail, off_t head,
  207         off_t tail);
  208 static __inline size_t wapbl_space_used(size_t avail, off_t head,
  209         off_t tail);
  210 
  211 #ifdef _KERNEL
  212 
  213 #define WAPBL_INODETRK_SIZE 83
  214 static int wapbl_ino_pool_refcount;
  215 static struct pool wapbl_ino_pool;
  216 struct wapbl_ino {
  217         LIST_ENTRY(wapbl_ino) wi_hash;
  218         ino_t wi_ino;
  219         mode_t wi_mode;
  220 };
  221 
  222 static void wapbl_inodetrk_init(struct wapbl *wl, u_int size);
  223 static void wapbl_inodetrk_free(struct wapbl *wl);
  224 static struct wapbl_ino *wapbl_inodetrk_get(struct wapbl *wl, ino_t ino);
  225 
  226 static size_t wapbl_transaction_len(struct wapbl *wl);
  227 static __inline size_t wapbl_transaction_inodes_len(struct wapbl *wl);
  228 
  229 /*
  230  * This is useful for debugging.  If set, the log will
  231  * only be truncated when necessary.
  232  */
  233 int wapbl_lazy_truncate = 0;
  234 
  235 struct wapbl_ops wapbl_ops = {
  236         .wo_wapbl_discard       = wapbl_discard,
  237         .wo_wapbl_replay_isopen = wapbl_replay_isopen1,
  238         .wo_wapbl_replay_read   = wapbl_replay_read,
  239         .wo_wapbl_add_buf       = wapbl_add_buf,
  240         .wo_wapbl_remove_buf    = wapbl_remove_buf,
  241         .wo_wapbl_resize_buf    = wapbl_resize_buf,
  242         .wo_wapbl_begin         = wapbl_begin,
  243         .wo_wapbl_end           = wapbl_end,
  244         .wo_wapbl_junlock_assert= wapbl_junlock_assert,
  245 
  246         /* XXX: the following is only used to say "this is a wapbl buf" */
  247         .wo_wapbl_biodone       = wapbl_biodone,
  248 };
  249 
  250 void
  251 wapbl_init()
  252 {
  253         int rv;
  254         const struct sysctlnode *rnode, *cnode;
  255 
  256         malloc_type_attach(M_WAPBL);
  257 
  258         wapbl_sysctl = NULL;
  259 
  260         rv = sysctl_createv(&wapbl_sysctl, 0, NULL, &rnode,
  261                        CTLFLAG_PERMANENT,
  262                        CTLTYPE_NODE, "vfs", NULL,
  263                        NULL, 0, NULL, 0,
  264                        CTL_VFS, CTL_EOL);
  265         if (rv)
  266                 return;
  267 
  268         rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &rnode,
  269                        CTLFLAG_PERMANENT,
  270                        CTLTYPE_NODE, "wapbl",
  271                        SYSCTL_DESCR("WAPBL journaling options"),
  272                        NULL, 0, NULL, 0,
  273                        CTL_CREATE, CTL_EOL);
  274         if (rv)
  275                 return;
  276 
  277         rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
  278                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
  279                        CTLTYPE_INT, "flush_disk_cache",
  280                        SYSCTL_DESCR("flush disk cache"),
  281                        NULL, 0, &wapbl_flush_disk_cache, 0,
  282                        CTL_CREATE, CTL_EOL);
  283         if (rv)
  284                 return;
  285 
  286         rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
  287                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
  288                        CTLTYPE_INT, "verbose_commit",
  289                        SYSCTL_DESCR("show time and size of wapbl log commits"),
  290                        NULL, 0, &wapbl_verbose_commit, 0,
  291                        CTL_CREATE, CTL_EOL);
  292 }
  293 
  294 int
  295 wapbl_start(struct wapbl ** wlp, struct mount *mp, struct vnode *vp,
  296         daddr_t off, size_t count, size_t blksize, struct wapbl_replay *wr,
  297         wapbl_flush_fn_t flushfn, wapbl_flush_fn_t flushabortfn)
  298 {
  299         struct wapbl *wl;
  300         struct vnode *devvp;
  301         daddr_t logpbn;
  302         int error;
  303         int log_dev_bshift = DEV_BSHIFT;
  304         int fs_dev_bshift = DEV_BSHIFT;
  305         int run;
  306 
  307         WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_start: vp=%p off=%" PRId64
  308             " count=%zu blksize=%zu\n", vp, off, count, blksize));
  309 
  310         if (log_dev_bshift > fs_dev_bshift) {
  311                 WAPBL_PRINTF(WAPBL_PRINT_OPEN,
  312                         ("wapbl: log device's block size cannot be larger "
  313                          "than filesystem's\n"));
  314                 /*
  315                  * Not currently implemented, although it could be if
  316                  * needed someday.
  317                  */
  318                 return ENOSYS;
  319         }
  320 
  321         if (off < 0)
  322                 return EINVAL;
  323 
  324         if (blksize < DEV_BSIZE)
  325                 return EINVAL;
  326         if (blksize % DEV_BSIZE)
  327                 return EINVAL;
  328 
  329         /* XXXTODO: verify that the full load is writable */
  330 
  331         /*
  332          * XXX check for minimum log size
  333          * minimum is governed by minimum amount of space
  334          * to complete a transaction. (probably truncate)
  335          */
  336         /* XXX for now pick something minimal */
  337         if ((count * blksize) < MAXPHYS) {
  338                 return ENOSPC;
  339         }
  340 
  341         if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, &run)) != 0) {
  342                 return error;
  343         }
  344 
  345         wl = wapbl_calloc(1, sizeof(*wl));
  346         rw_init(&wl->wl_rwlock);
  347         mutex_init(&wl->wl_mtx, MUTEX_DEFAULT, IPL_NONE);
  348         cv_init(&wl->wl_reclaimable_cv, "wapblrec");
  349         LIST_INIT(&wl->wl_bufs);
  350         SIMPLEQ_INIT(&wl->wl_entries);
  351 
  352         wl->wl_logvp = vp;
  353         wl->wl_devvp = devvp;
  354         wl->wl_mount = mp;
  355         wl->wl_logpbn = logpbn;
  356         wl->wl_log_dev_bshift = log_dev_bshift;
  357         wl->wl_fs_dev_bshift = fs_dev_bshift;
  358 
  359         wl->wl_flush = flushfn;
  360         wl->wl_flush_abort = flushabortfn;
  361 
  362         /* Reserve two log device blocks for the commit headers */
  363         wl->wl_circ_off = 2<<wl->wl_log_dev_bshift;
  364         wl->wl_circ_size = ((count * blksize) - wl->wl_circ_off);
  365         /* truncate the log usage to a multiple of log_dev_bshift */
  366         wl->wl_circ_size >>= wl->wl_log_dev_bshift;
  367         wl->wl_circ_size <<= wl->wl_log_dev_bshift;
  368 
  369         /*
  370          * wl_bufbytes_max limits the size of the in memory transaction space.
  371          * - Since buffers are allocated and accounted for in units of
  372          *   PAGE_SIZE it is required to be a multiple of PAGE_SIZE
  373          *   (i.e. 1<<PAGE_SHIFT)
  374          * - Since the log device has to be written in units of
  375          *   1<<wl_log_dev_bshift it is required to be a mulitple of
  376          *   1<<wl_log_dev_bshift.
  377          * - Since filesystem will provide data in units of 1<<wl_fs_dev_bshift,
  378          *   it is convenient to be a multiple of 1<<wl_fs_dev_bshift.
  379          * Therefore it must be multiple of the least common multiple of those
  380          * three quantities.  Fortunately, all of those quantities are
  381          * guaranteed to be a power of two, and the least common multiple of
  382          * a set of numbers which are all powers of two is simply the maximum
  383          * of those numbers.  Finally, the maximum logarithm of a power of two
  384          * is the same as the log of the maximum power of two.  So we can do
  385          * the following operations to size wl_bufbytes_max:
  386          */
  387 
  388         /* XXX fix actual number of pages reserved per filesystem. */
  389         wl->wl_bufbytes_max = MIN(wl->wl_circ_size, buf_memcalc() / 2);
  390 
  391         /* Round wl_bufbytes_max to the largest power of two constraint */
  392         wl->wl_bufbytes_max >>= PAGE_SHIFT;
  393         wl->wl_bufbytes_max <<= PAGE_SHIFT;
  394         wl->wl_bufbytes_max >>= wl->wl_log_dev_bshift;
  395         wl->wl_bufbytes_max <<= wl->wl_log_dev_bshift;
  396         wl->wl_bufbytes_max >>= wl->wl_fs_dev_bshift;
  397         wl->wl_bufbytes_max <<= wl->wl_fs_dev_bshift;
  398 
  399         /* XXX maybe use filesystem fragment size instead of 1024 */
  400         /* XXX fix actual number of buffers reserved per filesystem. */
  401         wl->wl_bufcount_max = (nbuf / 2) * 1024;
  402 
  403         /* XXX tie this into resource estimation */
  404         wl->wl_dealloclim = 2 * btodb(wl->wl_bufbytes_max);
  405         
  406 #if WAPBL_UVM_ALLOC
  407         wl->wl_deallocblks = (void *) uvm_km_zalloc(kernel_map,
  408             round_page(sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim));
  409         KASSERT(wl->wl_deallocblks != NULL);
  410         wl->wl_dealloclens = (void *) uvm_km_zalloc(kernel_map,
  411             round_page(sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim));
  412         KASSERT(wl->wl_dealloclens != NULL);
  413 #else
  414         wl->wl_deallocblks = wapbl_malloc(sizeof(*wl->wl_deallocblks) *
  415             wl->wl_dealloclim);
  416         wl->wl_dealloclens = wapbl_malloc(sizeof(*wl->wl_dealloclens) *
  417             wl->wl_dealloclim);
  418 #endif
  419 
  420         wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
  421 
  422         /* Initialize the commit header */
  423         {
  424                 struct wapbl_wc_header *wc;
  425                 size_t len = 1<<wl->wl_log_dev_bshift;
  426                 wc = wapbl_calloc(1, len);
  427                 wc->wc_type = WAPBL_WC_HEADER;
  428                 wc->wc_len = len;
  429                 wc->wc_circ_off = wl->wl_circ_off;
  430                 wc->wc_circ_size = wl->wl_circ_size;
  431                 /* XXX wc->wc_fsid */
  432                 wc->wc_log_dev_bshift = wl->wl_log_dev_bshift;
  433                 wc->wc_fs_dev_bshift = wl->wl_fs_dev_bshift;
  434                 wl->wl_wc_header = wc;
  435                 wl->wl_wc_scratch = wapbl_malloc(len);
  436         }
  437 
  438         /*
  439          * if there was an existing set of unlinked but
  440          * allocated inodes, preserve it in the new
  441          * log.
  442          */
  443         if (wr && wr->wr_inodescnt) {
  444                 int i;
  445 
  446                 WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
  447                     ("wapbl_start: reusing log with %d inodes\n",
  448                     wr->wr_inodescnt));
  449 
  450                 /*
  451                  * Its only valid to reuse the replay log if its
  452                  * the same as the new log we just opened.
  453                  */
  454                 KDASSERT(!wapbl_replay_isopen(wr));
  455                 KASSERT(devvp->v_rdev == wr->wr_devvp->v_rdev);
  456                 KASSERT(logpbn == wr->wr_logpbn);
  457                 KASSERT(wl->wl_circ_size == wr->wr_wc_header.wc_circ_size);
  458                 KASSERT(wl->wl_circ_off == wr->wr_wc_header.wc_circ_off);
  459                 KASSERT(wl->wl_log_dev_bshift ==
  460                     wr->wr_wc_header.wc_log_dev_bshift);
  461                 KASSERT(wl->wl_fs_dev_bshift ==
  462                     wr->wr_wc_header.wc_fs_dev_bshift);
  463 
  464                 wl->wl_wc_header->wc_generation =
  465                     wr->wr_wc_header.wc_generation + 1;
  466 
  467                 for (i = 0; i < wr->wr_inodescnt; i++)
  468                         wapbl_register_inode(wl, wr->wr_inodes[i].wr_inumber,
  469                             wr->wr_inodes[i].wr_imode);
  470 
  471                 /* Make sure new transaction won't overwrite old inodes list */
  472                 KDASSERT(wapbl_transaction_len(wl) <= 
  473                     wapbl_space_free(wl->wl_circ_size, wr->wr_inodeshead,
  474                       wr->wr_inodestail));
  475 
  476                 wl->wl_head = wl->wl_tail = wr->wr_inodeshead;
  477                 wl->wl_reclaimable_bytes = wl->wl_reserved_bytes =
  478                         wapbl_transaction_len(wl);
  479 
  480                 error = wapbl_write_inodes(wl, &wl->wl_head);
  481                 if (error)
  482                         goto errout;
  483 
  484                 KASSERT(wl->wl_head != wl->wl_tail);
  485                 KASSERT(wl->wl_head != 0);
  486         }
  487 
  488         error = wapbl_write_commit(wl, wl->wl_head, wl->wl_tail);
  489         if (error) {
  490                 goto errout;
  491         }
  492 
  493         *wlp = wl;
  494 #if defined(WAPBL_DEBUG)
  495         wapbl_debug_wl = wl;
  496 #endif
  497 
  498         return 0;
  499  errout:
  500         wapbl_discard(wl);
  501         wapbl_free(wl->wl_wc_scratch);
  502         wapbl_free(wl->wl_wc_header);
  503 #if WAPBL_UVM_ALLOC
  504         uvm_km_free_wakeup(kernel_map, (vaddr_t) wl->wl_deallocblks,
  505                            round_page(sizeof(*wl->wl_deallocblks *
  506                                       wl->wl_dealloclim)));
  507         uvm_km_free_wakeup(kernel_map, (vaddr_t) wl->wl_dealloclens,
  508                            round_page(sizeof(*wl->wl_dealloclens *
  509                                       wl->wl_dealloclim)));
  510 #else
  511         wapbl_free(wl->wl_deallocblks);
  512         wapbl_free(wl->wl_dealloclens);
  513 #endif
  514         wapbl_inodetrk_free(wl);
  515         wapbl_free(wl);
  516 
  517         return error;
  518 }
  519 
  520 /*
  521  * Like wapbl_flush, only discards the transaction
  522  * completely
  523  */
  524 
  525 void
  526 wapbl_discard(struct wapbl *wl)
  527 {
  528         struct wapbl_entry *we;
  529         struct buf *bp;
  530         int i;
  531 
  532         /*
  533          * XXX we may consider using upgrade here
  534          * if we want to call flush from inside a transaction
  535          */
  536         rw_enter(&wl->wl_rwlock, RW_WRITER);
  537         wl->wl_flush(wl->wl_mount, wl->wl_deallocblks, wl->wl_dealloclens,
  538             wl->wl_dealloccnt);
  539 
  540 #ifdef WAPBL_DEBUG_PRINT
  541         {
  542                 struct wapbl_entry *we;
  543                 pid_t pid = -1;
  544                 lwpid_t lid = -1;
  545                 if (curproc)
  546                         pid = curproc->p_pid;
  547                 if (curlwp)
  548                         lid = curlwp->l_lid;
  549 #ifdef WAPBL_DEBUG_BUFBYTES
  550                 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
  551                     ("wapbl_discard: thread %d.%d discarding "
  552                     "transaction\n"
  553                     "\tbufcount=%zu bufbytes=%zu bcount=%zu "
  554                     "deallocs=%d inodes=%d\n"
  555                     "\terrcnt = %u, reclaimable=%zu reserved=%zu "
  556                     "unsynced=%zu\n",
  557                     pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
  558                     wl->wl_bcount, wl->wl_dealloccnt,
  559                     wl->wl_inohashcnt, wl->wl_error_count,
  560                     wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
  561                     wl->wl_unsynced_bufbytes));
  562                 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
  563                         WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
  564                             ("\tentry: bufcount = %zu, reclaimable = %zu, "
  565                              "error = %d, unsynced = %zu\n",
  566                              we->we_bufcount, we->we_reclaimable_bytes,
  567                              we->we_error, we->we_unsynced_bufbytes));
  568                 }
  569 #else /* !WAPBL_DEBUG_BUFBYTES */
  570                 WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
  571                     ("wapbl_discard: thread %d.%d discarding transaction\n"
  572                     "\tbufcount=%zu bufbytes=%zu bcount=%zu "
  573                     "deallocs=%d inodes=%d\n"
  574                     "\terrcnt = %u, reclaimable=%zu reserved=%zu\n",
  575                     pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
  576                     wl->wl_bcount, wl->wl_dealloccnt,
  577                     wl->wl_inohashcnt, wl->wl_error_count,
  578                     wl->wl_reclaimable_bytes, wl->wl_reserved_bytes));
  579                 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
  580                         WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
  581                             ("\tentry: bufcount = %zu, reclaimable = %zu, "
  582                              "error = %d\n",
  583                              we->we_bufcount, we->we_reclaimable_bytes,
  584                              we->we_error));
  585                 }
  586 #endif /* !WAPBL_DEBUG_BUFBYTES */
  587         }
  588 #endif /* WAPBL_DEBUG_PRINT */
  589 
  590         for (i = 0; i <= wl->wl_inohashmask; i++) {
  591                 struct wapbl_ino_head *wih;
  592                 struct wapbl_ino *wi;
  593 
  594                 wih = &wl->wl_inohash[i];
  595                 while ((wi = LIST_FIRST(wih)) != NULL) {
  596                         LIST_REMOVE(wi, wi_hash);
  597                         pool_put(&wapbl_ino_pool, wi);
  598                         KASSERT(wl->wl_inohashcnt > 0);
  599                         wl->wl_inohashcnt--;
  600                 }
  601         }
  602 
  603         /*
  604          * clean buffer list
  605          */
  606         mutex_enter(&bufcache_lock);
  607         mutex_enter(&wl->wl_mtx);
  608         while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
  609                 if (bbusy(bp, 0, 0, &wl->wl_mtx) == 0) {
  610                         /*
  611                          * The buffer will be unlocked and
  612                          * removed from the transaction in brelse
  613                          */
  614                         mutex_exit(&wl->wl_mtx);
  615                         brelsel(bp, 0);
  616                         mutex_enter(&wl->wl_mtx);
  617                 }
  618         }
  619         mutex_exit(&wl->wl_mtx);
  620         mutex_exit(&bufcache_lock);
  621 
  622         /*
  623          * Remove references to this wl from wl_entries, free any which
  624          * no longer have buffers, others will be freed in wapbl_biodone
  625          * when they no longer have any buffers.
  626          */
  627         while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) != NULL) {
  628                 SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
  629                 /* XXX should we be accumulating wl_error_count
  630                  * and increasing reclaimable bytes ? */
  631                 we->we_wapbl = NULL;
  632                 if (we->we_bufcount == 0) {
  633 #ifdef WAPBL_DEBUG_BUFBYTES
  634                         KASSERT(we->we_unsynced_bufbytes == 0);
  635 #endif
  636                         wapbl_free(we);
  637                 }
  638         }
  639 
  640         /* Discard list of deallocs */
  641         wl->wl_dealloccnt = 0;
  642         /* XXX should we clear wl_reserved_bytes? */
  643 
  644         KASSERT(wl->wl_bufbytes == 0);
  645         KASSERT(wl->wl_bcount == 0);
  646         KASSERT(wl->wl_bufcount == 0);
  647         KASSERT(LIST_EMPTY(&wl->wl_bufs));
  648         KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
  649         KASSERT(wl->wl_inohashcnt == 0);
  650 
  651         rw_exit(&wl->wl_rwlock);
  652 }
  653 
  654 int
  655 wapbl_stop(struct wapbl *wl, int force)
  656 {
  657         struct vnode *vp;
  658         int error;
  659 
  660         WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n"));
  661         error = wapbl_flush(wl, 1);
  662         if (error) {
  663                 if (force)
  664                         wapbl_discard(wl);
  665                 else
  666                         return error;
  667         }
  668 
  669         /* Unlinked inodes persist after a flush */
  670         if (wl->wl_inohashcnt) {
  671                 if (force) {
  672                         wapbl_discard(wl);
  673                 } else {
  674                         return EBUSY;
  675                 }
  676         }
  677 
  678         KASSERT(wl->wl_bufbytes == 0);
  679         KASSERT(wl->wl_bcount == 0);
  680         KASSERT(wl->wl_bufcount == 0);
  681         KASSERT(LIST_EMPTY(&wl->wl_bufs));
  682         KASSERT(wl->wl_dealloccnt == 0);
  683         KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
  684         KASSERT(wl->wl_inohashcnt == 0);
  685 
  686         vp = wl->wl_logvp;
  687 
  688         wapbl_free(wl->wl_wc_scratch);
  689         wapbl_free(wl->wl_wc_header);
  690 #if WAPBL_UVM_ALLOC
  691         uvm_km_free_wakeup(kernel_map, (vaddr_t) wl->wl_deallocblks,
  692                            round_page(sizeof(*wl->wl_deallocblks *
  693                                       wl->wl_dealloclim)));
  694         uvm_km_free_wakeup(kernel_map, (vaddr_t) wl->wl_dealloclens,
  695                            round_page(sizeof(*wl->wl_dealloclens *
  696                                       wl->wl_dealloclim)));
  697 #else
  698         wapbl_free(wl->wl_deallocblks);
  699         wapbl_free(wl->wl_dealloclens);
  700 #endif
  701         wapbl_inodetrk_free(wl);
  702 
  703         cv_destroy(&wl->wl_reclaimable_cv);
  704         mutex_destroy(&wl->wl_mtx);
  705         rw_destroy(&wl->wl_rwlock);
  706         wapbl_free(wl);
  707 
  708         return 0;
  709 }
  710 
  711 static int
  712 wapbl_doio(void *data, size_t len, struct vnode *devvp, daddr_t pbn, int flags)
  713 {
  714         struct pstats *pstats = curlwp->l_proc->p_stats;
  715         struct buf *bp;
  716         int error;
  717 
  718         KASSERT((flags & ~(B_WRITE | B_READ)) == 0);
  719         KASSERT(devvp->v_type == VBLK);
  720 
  721         if ((flags & (B_WRITE | B_READ)) == B_WRITE) {
  722                 mutex_enter(&devvp->v_interlock);
  723                 devvp->v_numoutput++;
  724                 mutex_exit(&devvp->v_interlock);
  725                 pstats->p_ru.ru_oublock++;
  726         } else {
  727                 pstats->p_ru.ru_inblock++;
  728         }
  729 
  730         bp = getiobuf(devvp, true);
  731         bp->b_flags = flags;
  732         bp->b_cflags = BC_BUSY; /* silly & dubious */
  733         bp->b_dev = devvp->v_rdev;
  734         bp->b_data = data;
  735         bp->b_bufsize = bp->b_resid = bp->b_bcount = len;
  736         bp->b_blkno = pbn;
  737 
  738         WAPBL_PRINTF(WAPBL_PRINT_IO,
  739             ("wapbl_doio: %s %d bytes at block %"PRId64" on dev 0x%x\n",
  740             BUF_ISWRITE(bp) ? "write" : "read", bp->b_bcount,
  741             bp->b_blkno, bp->b_dev));
  742 
  743         VOP_STRATEGY(devvp, bp);
  744 
  745         error = biowait(bp);
  746         putiobuf(bp);
  747 
  748         if (error) {
  749                 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
  750                     ("wapbl_doio: %s %zu bytes at block %" PRId64
  751                     " on dev 0x%x failed with error %d\n",
  752                     (((flags & (B_WRITE | B_READ)) == B_WRITE) ?
  753                      "write" : "read"),
  754                     len, pbn, devvp->v_rdev, error));
  755         }
  756 
  757         return error;
  758 }
  759 
  760 int
  761 wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
  762 {
  763 
  764         return wapbl_doio(data, len, devvp, pbn, B_WRITE);
  765 }
  766 
  767 int
  768 wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
  769 {
  770 
  771         return wapbl_doio(data, len, devvp, pbn, B_READ);
  772 }
  773 
  774 /*
  775  * Off is byte offset returns new offset for next write
  776  * handles log wraparound
  777  */
  778 static int
  779 wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp)
  780 {
  781         size_t slen;
  782         off_t off = *offp;
  783         int error;
  784 
  785         KDASSERT(((len >> wl->wl_log_dev_bshift) <<
  786             wl->wl_log_dev_bshift) == len);
  787 
  788         if (off < wl->wl_circ_off)
  789                 off = wl->wl_circ_off;
  790         slen = wl->wl_circ_off + wl->wl_circ_size - off;
  791         if (slen < len) {
  792                 error = wapbl_write(data, slen, wl->wl_devvp,
  793                     wl->wl_logpbn + (off >> wl->wl_log_dev_bshift));
  794                 if (error)
  795                         return error;
  796                 data = (uint8_t *)data + slen;
  797                 len -= slen;
  798                 off = wl->wl_circ_off;
  799         }
  800         error = wapbl_write(data, len, wl->wl_devvp,
  801                             wl->wl_logpbn + (off >> wl->wl_log_dev_bshift));
  802         if (error)
  803                 return error;
  804         off += len;
  805         if (off >= wl->wl_circ_off + wl->wl_circ_size)
  806                 off = wl->wl_circ_off;
  807         *offp = off;
  808         return 0;
  809 }
  810 
  811 /****************************************************************/
  812 
  813 int
  814 wapbl_begin(struct wapbl *wl, const char *file, int line)
  815 {
  816         int doflush;
  817         unsigned lockcount;
  818 
  819         KDASSERT(wl);
  820 
  821         /*
  822          * XXX this needs to be made much more sophisticated.
  823          * perhaps each wapbl_begin could reserve a specified
  824          * number of buffers and bytes.
  825          */
  826         mutex_enter(&wl->wl_mtx);
  827         lockcount = wl->wl_lock_count;
  828         doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) >
  829                    wl->wl_bufbytes_max / 2) ||
  830                   ((wl->wl_bufcount + (lockcount * 10)) >
  831                    wl->wl_bufcount_max / 2) ||
  832                   (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) ||
  833                   (wl->wl_dealloccnt >=
  834                    (wl->wl_dealloclim - (wl->wl_dealloclim >> 8)));
  835         mutex_exit(&wl->wl_mtx);
  836 
  837         if (doflush) {
  838                 WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
  839                     ("force flush lockcnt=%d bufbytes=%zu "
  840                     "(max=%zu) bufcount=%zu (max=%zu) "
  841                     "dealloccnt %d (lim=%d)\n",
  842                     lockcount, wl->wl_bufbytes,
  843                     wl->wl_bufbytes_max, wl->wl_bufcount,
  844                     wl->wl_bufcount_max,
  845                     wl->wl_dealloccnt, wl->wl_dealloclim));
  846         }
  847 
  848         if (doflush) {
  849                 int error = wapbl_flush(wl, 0);
  850                 if (error)
  851                         return error;
  852         }
  853 
  854         rw_enter(&wl->wl_rwlock, RW_READER);
  855         mutex_enter(&wl->wl_mtx);
  856         wl->wl_lock_count++;
  857         mutex_exit(&wl->wl_mtx);
  858 
  859 #if defined(WAPBL_DEBUG_PRINT)
  860         WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
  861             ("wapbl_begin thread %d.%d with bufcount=%zu "
  862             "bufbytes=%zu bcount=%zu at %s:%d\n",
  863             curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
  864             wl->wl_bufbytes, wl->wl_bcount, file, line));
  865 #endif
  866 
  867         return 0;
  868 }
  869 
  870 void
  871 wapbl_end(struct wapbl *wl)
  872 {
  873 
  874 #if defined(WAPBL_DEBUG_PRINT)
  875         WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
  876              ("wapbl_end thread %d.%d with bufcount=%zu "
  877               "bufbytes=%zu bcount=%zu\n",
  878               curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
  879               wl->wl_bufbytes, wl->wl_bcount));
  880 #endif
  881 
  882         mutex_enter(&wl->wl_mtx);
  883         KASSERT(wl->wl_lock_count > 0);
  884         wl->wl_lock_count--;
  885         mutex_exit(&wl->wl_mtx);
  886 
  887         rw_exit(&wl->wl_rwlock);
  888 }
  889 
  890 void
  891 wapbl_add_buf(struct wapbl *wl, struct buf * bp)
  892 {
  893 
  894         KASSERT(bp->b_cflags & BC_BUSY);
  895         KASSERT(bp->b_vp);
  896 
  897         wapbl_jlock_assert(wl);
  898 
  899 #if 0
  900         /*
  901          * XXX this might be an issue for swapfiles.
  902          * see uvm_swap.c:1702
  903          *
  904          * XXX2 why require it then?  leap of semantics?
  905          */
  906         KASSERT((bp->b_cflags & BC_NOCACHE) == 0);
  907 #endif
  908 
  909         mutex_enter(&wl->wl_mtx);
  910         if (bp->b_flags & B_LOCKED) {
  911                 LIST_REMOVE(bp, b_wapbllist);
  912                 WAPBL_PRINTF(WAPBL_PRINT_BUFFER2,
  913                    ("wapbl_add_buf thread %d.%d re-adding buf %p "
  914                     "with %d bytes %d bcount\n",
  915                     curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
  916                     bp->b_bcount));
  917         } else {
  918                 /* unlocked by dirty buffers shouldn't exist */
  919                 KASSERT(!(bp->b_oflags & BO_DELWRI));
  920                 wl->wl_bufbytes += bp->b_bufsize;
  921                 wl->wl_bcount += bp->b_bcount;
  922                 wl->wl_bufcount++;
  923                 WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
  924                    ("wapbl_add_buf thread %d.%d adding buf %p "
  925                     "with %d bytes %d bcount\n",
  926                     curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
  927                     bp->b_bcount));
  928         }
  929         LIST_INSERT_HEAD(&wl->wl_bufs, bp, b_wapbllist);
  930         mutex_exit(&wl->wl_mtx);
  931 
  932         bp->b_flags |= B_LOCKED;
  933 }
  934 
  935 static void
  936 wapbl_remove_buf_locked(struct wapbl * wl, struct buf *bp)
  937 {
  938 
  939         KASSERT(mutex_owned(&wl->wl_mtx));
  940         KASSERT(bp->b_cflags & BC_BUSY);
  941         wapbl_jlock_assert(wl);
  942 
  943 #if 0
  944         /*
  945          * XXX this might be an issue for swapfiles.
  946          * see uvm_swap.c:1725
  947          *
  948          * XXXdeux: see above
  949          */
  950         KASSERT((bp->b_flags & BC_NOCACHE) == 0);
  951 #endif
  952         KASSERT(bp->b_flags & B_LOCKED);
  953 
  954         WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
  955            ("wapbl_remove_buf thread %d.%d removing buf %p with "
  956             "%d bytes %d bcount\n",
  957             curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, bp->b_bcount));
  958 
  959         KASSERT(wl->wl_bufbytes >= bp->b_bufsize);
  960         wl->wl_bufbytes -= bp->b_bufsize;
  961         KASSERT(wl->wl_bcount >= bp->b_bcount);
  962         wl->wl_bcount -= bp->b_bcount;
  963         KASSERT(wl->wl_bufcount > 0);
  964         wl->wl_bufcount--;
  965         KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
  966         KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
  967         LIST_REMOVE(bp, b_wapbllist);
  968 
  969         bp->b_flags &= ~B_LOCKED;
  970 }
  971 
  972 /* called from brelsel() in vfs_bio among other places */
  973 void
  974 wapbl_remove_buf(struct wapbl * wl, struct buf *bp)
  975 {
  976 
  977         mutex_enter(&wl->wl_mtx);
  978         wapbl_remove_buf_locked(wl, bp);
  979         mutex_exit(&wl->wl_mtx);
  980 }
  981 
  982 void
  983 wapbl_resize_buf(struct wapbl *wl, struct buf *bp, long oldsz, long oldcnt)
  984 {
  985 
  986         KASSERT(bp->b_cflags & BC_BUSY);
  987 
  988         /*
  989          * XXX: why does this depend on B_LOCKED?  otherwise the buf
  990          * is not for a transaction?  if so, why is this called in the
  991          * first place?
  992          */
  993         if (bp->b_flags & B_LOCKED) {
  994                 mutex_enter(&wl->wl_mtx);
  995                 wl->wl_bufbytes += bp->b_bufsize - oldsz;
  996                 wl->wl_bcount += bp->b_bcount - oldcnt;
  997                 mutex_exit(&wl->wl_mtx);
  998         }
  999 }
 1000 
 1001 #endif /* _KERNEL */
 1002 
 1003 /****************************************************************/
 1004 /* Some utility inlines */
 1005 
 1006 /* This is used to advance the pointer at old to new value at old+delta */
 1007 static __inline off_t
 1008 wapbl_advance(size_t size, size_t off, off_t old, size_t delta)
 1009 {
 1010         off_t new;
 1011 
 1012         /* Define acceptable ranges for inputs. */
 1013         KASSERT(delta <= size);
 1014         KASSERT((old == 0) || (old >= off));
 1015         KASSERT(old < (size + off));
 1016 
 1017         if ((old == 0) && (delta != 0))
 1018                 new = off + delta;
 1019         else if ((old + delta) < (size + off))
 1020                 new = old + delta;
 1021         else
 1022                 new = (old + delta) - size;
 1023 
 1024         /* Note some interesting axioms */
 1025         KASSERT((delta != 0) || (new == old));
 1026         KASSERT((delta == 0) || (new != 0));
 1027         KASSERT((delta != (size)) || (new == old));
 1028 
 1029         /* Define acceptable ranges for output. */
 1030         KASSERT((new == 0) || (new >= off));
 1031         KASSERT(new < (size + off));
 1032         return new;
 1033 }
 1034 
 1035 static __inline size_t
 1036 wapbl_space_used(size_t avail, off_t head, off_t tail)
 1037 {
 1038 
 1039         if (tail == 0) {
 1040                 KASSERT(head == 0);
 1041                 return 0;
 1042         }
 1043         return ((head + (avail - 1) - tail) % avail) + 1;
 1044 }
 1045 
 1046 static __inline size_t
 1047 wapbl_space_free(size_t avail, off_t head, off_t tail)
 1048 {
 1049 
 1050         return avail - wapbl_space_used(avail, head, tail);
 1051 }
 1052 
 1053 static __inline void
 1054 wapbl_advance_head(size_t size, size_t off, size_t delta, off_t *headp,
 1055                    off_t *tailp)
 1056 {
 1057         off_t head = *headp;
 1058         off_t tail = *tailp;
 1059 
 1060         KASSERT(delta <= wapbl_space_free(size, head, tail));
 1061         head = wapbl_advance(size, off, head, delta);
 1062         if ((tail == 0) && (head != 0))
 1063                 tail = off;
 1064         *headp = head;
 1065         *tailp = tail;
 1066 }
 1067 
 1068 static __inline void
 1069 wapbl_advance_tail(size_t size, size_t off, size_t delta, off_t *headp,
 1070                    off_t *tailp)
 1071 {
 1072         off_t head = *headp;
 1073         off_t tail = *tailp;
 1074 
 1075         KASSERT(delta <= wapbl_space_used(size, head, tail));
 1076         tail = wapbl_advance(size, off, tail, delta);
 1077         if (head == tail) {
 1078                 head = tail = 0;
 1079         }
 1080         *headp = head;
 1081         *tailp = tail;
 1082 }
 1083 
 1084 #ifdef _KERNEL
 1085 
 1086 /****************************************************************/
 1087 
 1088 /*
 1089  * Remove transactions whose buffers are completely flushed to disk.
 1090  * Will block until at least minfree space is available.
 1091  * only intended to be called from inside wapbl_flush and therefore
 1092  * does not protect against commit races with itself or with flush.
 1093  */
 1094 static int
 1095 wapbl_truncate(struct wapbl *wl, size_t minfree, int waitonly)
 1096 {
 1097         size_t delta;
 1098         size_t avail;
 1099         off_t head;
 1100         off_t tail;
 1101         int error = 0;
 1102 
 1103         KASSERT(minfree <= (wl->wl_circ_size - wl->wl_reserved_bytes));
 1104         KASSERT(rw_write_held(&wl->wl_rwlock));
 1105 
 1106         mutex_enter(&wl->wl_mtx);
 1107 
 1108         /*
 1109          * First check to see if we have to do a commit
 1110          * at all.
 1111          */
 1112         avail = wapbl_space_free(wl->wl_circ_size, wl->wl_head, wl->wl_tail);
 1113         if (minfree < avail) {
 1114                 mutex_exit(&wl->wl_mtx);
 1115                 return 0;
 1116         }
 1117         minfree -= avail;
 1118         while ((wl->wl_error_count == 0) &&
 1119             (wl->wl_reclaimable_bytes < minfree)) {
 1120                 WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
 1121                    ("wapbl_truncate: sleeping on %p wl=%p bytes=%zd "
 1122                     "minfree=%zd\n",
 1123                     &wl->wl_reclaimable_bytes, wl, wl->wl_reclaimable_bytes,
 1124                     minfree));
 1125 
 1126                 cv_wait(&wl->wl_reclaimable_cv, &wl->wl_mtx);
 1127         }
 1128         if (wl->wl_reclaimable_bytes < minfree) {
 1129                 KASSERT(wl->wl_error_count);
 1130                 /* XXX maybe get actual error from buffer instead someday? */
 1131                 error = EIO;
 1132         }
 1133         head = wl->wl_head;
 1134         tail = wl->wl_tail;
 1135         delta = wl->wl_reclaimable_bytes;
 1136 
 1137         /* If all of of the entries are flushed, then be sure to keep
 1138          * the reserved bytes reserved.  Watch out for discarded transactions,
 1139          * which could leave more bytes reserved than are reclaimable.
 1140          */
 1141         if (SIMPLEQ_EMPTY(&wl->wl_entries) && 
 1142             (delta >= wl->wl_reserved_bytes)) {
 1143                 delta -= wl->wl_reserved_bytes;
 1144         }
 1145         wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, &head,
 1146                            &tail);
 1147         KDASSERT(wl->wl_reserved_bytes <=
 1148                 wapbl_space_used(wl->wl_circ_size, head, tail));
 1149         mutex_exit(&wl->wl_mtx);
 1150 
 1151         if (error)
 1152                 return error;
 1153 
 1154         if (waitonly)
 1155                 return 0;
 1156 
 1157         /*
 1158          * This is where head, tail and delta are unprotected
 1159          * from races against itself or flush.  This is ok since
 1160          * we only call this routine from inside flush itself.
 1161          *
 1162          * XXX: how can it race against itself when accessed only
 1163          * from behind the write-locked rwlock?
 1164          */
 1165         error = wapbl_write_commit(wl, head, tail);
 1166         if (error)
 1167                 return error;
 1168 
 1169         wl->wl_head = head;
 1170         wl->wl_tail = tail;
 1171 
 1172         mutex_enter(&wl->wl_mtx);
 1173         KASSERT(wl->wl_reclaimable_bytes >= delta);
 1174         wl->wl_reclaimable_bytes -= delta;
 1175         mutex_exit(&wl->wl_mtx);
 1176         WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
 1177             ("wapbl_truncate thread %d.%d truncating %zu bytes\n",
 1178             curproc->p_pid, curlwp->l_lid, delta));
 1179 
 1180         return 0;
 1181 }
 1182 
 1183 /****************************************************************/
 1184 
 1185 void
 1186 wapbl_biodone(struct buf *bp)
 1187 {
 1188         struct wapbl_entry *we = bp->b_private;
 1189         struct wapbl *wl = we->we_wapbl;
 1190 
 1191         /*
 1192          * Handle possible flushing of buffers after log has been
 1193          * decomissioned.
 1194          */
 1195         if (!wl) {
 1196                 KASSERT(we->we_bufcount > 0);
 1197                 we->we_bufcount--;
 1198 #ifdef WAPBL_DEBUG_BUFBYTES
 1199                 KASSERT(we->we_unsynced_bufbytes >= bp->b_bufsize);
 1200                 we->we_unsynced_bufbytes -= bp->b_bufsize;
 1201 #endif
 1202 
 1203                 if (we->we_bufcount == 0) {
 1204 #ifdef WAPBL_DEBUG_BUFBYTES
 1205                         KASSERT(we->we_unsynced_bufbytes == 0);
 1206 #endif
 1207                         wapbl_free(we);
 1208                 }
 1209 
 1210                 brelse(bp, 0);
 1211                 return;
 1212         }
 1213 
 1214 #ifdef ohbother
 1215         KDASSERT(bp->b_flags & B_DONE);
 1216         KDASSERT(!(bp->b_flags & B_DELWRI));
 1217         KDASSERT(bp->b_flags & B_ASYNC);
 1218         KDASSERT(bp->b_flags & B_BUSY);
 1219         KDASSERT(!(bp->b_flags & B_LOCKED));
 1220         KDASSERT(!(bp->b_flags & B_READ));
 1221         KDASSERT(!(bp->b_flags & B_INVAL));
 1222         KDASSERT(!(bp->b_flags & B_NOCACHE));
 1223 #endif
 1224 
 1225         if (bp->b_error) {
 1226 #ifdef notyet /* Can't currently handle possible dirty buffer reuse */
 1227                 XXXpooka: interfaces not fully updated
 1228                 Note: this was not enabled in the original patch
 1229                 against netbsd4 either.  I don't know if comment
 1230                 above is true or not.
 1231 
 1232                 /*
 1233                  * If an error occurs, report the error and leave the
 1234                  * buffer as a delayed write on the LRU queue.
 1235                  * restarting the write would likely result in
 1236                  * an error spinloop, so let it be done harmlessly
 1237                  * by the syncer.
 1238                  */
 1239                 bp->b_flags &= ~(B_DONE);
 1240                 simple_unlock(&bp->b_interlock);
 1241 
 1242                 if (we->we_error == 0) {
 1243                         mutex_enter(&wl->wl_mtx);
 1244                         wl->wl_error_count++;
 1245                         mutex_exit(&wl->wl_mtx);
 1246                         cv_broadcast(&wl->wl_reclaimable_cv);
 1247                 }
 1248                 we->we_error = bp->b_error;
 1249                 bp->b_error = 0;
 1250                 brelse(bp);
 1251                 return;
 1252 #else
 1253                 /* For now, just mark the log permanently errored out */
 1254 
 1255                 mutex_enter(&wl->wl_mtx);
 1256                 if (wl->wl_error_count == 0) {
 1257                         wl->wl_error_count++;
 1258                         cv_broadcast(&wl->wl_reclaimable_cv);
 1259                 }
 1260                 mutex_exit(&wl->wl_mtx);
 1261 #endif
 1262         }
 1263 
 1264         mutex_enter(&wl->wl_mtx);
 1265 
 1266         KASSERT(we->we_bufcount > 0);
 1267         we->we_bufcount--;
 1268 #ifdef WAPBL_DEBUG_BUFBYTES
 1269         KASSERT(we->we_unsynced_bufbytes >= bp->b_bufsize);
 1270         we->we_unsynced_bufbytes -= bp->b_bufsize;
 1271         KASSERT(wl->wl_unsynced_bufbytes >= bp->b_bufsize);
 1272         wl->wl_unsynced_bufbytes -= bp->b_bufsize;
 1273 #endif
 1274 
 1275         /*
 1276          * If the current transaction can be reclaimed, start
 1277          * at the beginning and reclaim any consecutive reclaimable
 1278          * transactions.  If we successfully reclaim anything,
 1279          * then wakeup anyone waiting for the reclaim.
 1280          */
 1281         if (we->we_bufcount == 0) {
 1282                 size_t delta = 0;
 1283                 int errcnt = 0;
 1284 #ifdef WAPBL_DEBUG_BUFBYTES
 1285                 KDASSERT(we->we_unsynced_bufbytes == 0);
 1286 #endif
 1287                 /*
 1288                  * clear any posted error, since the buffer it came from
 1289                  * has successfully flushed by now
 1290                  */
 1291                 while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) &&
 1292                        (we->we_bufcount == 0)) {
 1293                         delta += we->we_reclaimable_bytes;
 1294                         if (we->we_error)
 1295                                 errcnt++;
 1296                         SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
 1297                         wapbl_free(we);
 1298                 }
 1299 
 1300                 if (delta) {
 1301                         wl->wl_reclaimable_bytes += delta;
 1302                         KASSERT(wl->wl_error_count >= errcnt);
 1303                         wl->wl_error_count -= errcnt;
 1304                         cv_broadcast(&wl->wl_reclaimable_cv);
 1305                 }
 1306         }
 1307 
 1308         mutex_exit(&wl->wl_mtx);
 1309         brelse(bp, 0);
 1310 }
 1311 
 1312 /*
 1313  * Write transactions to disk + start I/O for contents
 1314  */
 1315 int
 1316 wapbl_flush(struct wapbl *wl, int waitfor)
 1317 {
 1318         struct buf *bp;
 1319         struct wapbl_entry *we;
 1320         off_t off;
 1321         off_t head;
 1322         off_t tail;
 1323         size_t delta = 0;
 1324         size_t flushsize;
 1325         size_t reserved;
 1326         int error = 0;
 1327 
 1328         /*
 1329          * Do a quick check to see if a full flush can be skipped
 1330          * This assumes that the flush callback does not need to be called
 1331          * unless there are other outstanding bufs.
 1332          */
 1333         if (!waitfor) {
 1334                 size_t nbufs;
 1335                 mutex_enter(&wl->wl_mtx);       /* XXX need mutex here to
 1336                                                    protect the KASSERTS */
 1337                 nbufs = wl->wl_bufcount;
 1338                 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
 1339                 KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
 1340                 mutex_exit(&wl->wl_mtx);
 1341                 if (nbufs == 0)
 1342                         return 0;
 1343         }
 1344 
 1345         /*
 1346          * XXX we may consider using LK_UPGRADE here
 1347          * if we want to call flush from inside a transaction
 1348          */
 1349         rw_enter(&wl->wl_rwlock, RW_WRITER);
 1350         wl->wl_flush(wl->wl_mount, wl->wl_deallocblks, wl->wl_dealloclens,
 1351             wl->wl_dealloccnt);
 1352 
 1353         /*
 1354          * Now that we are fully locked and flushed,
 1355          * do another check for nothing to do.
 1356          */
 1357         if (wl->wl_bufcount == 0) {
 1358                 goto out;
 1359         }
 1360 
 1361 #if 0
 1362         WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
 1363                      ("wapbl_flush thread %d.%d flushing entries with "
 1364                       "bufcount=%zu bufbytes=%zu\n",
 1365                       curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
 1366                       wl->wl_bufbytes));
 1367 #endif
 1368 
 1369         /* Calculate amount of space needed to flush */
 1370         flushsize = wapbl_transaction_len(wl);
 1371         if (wapbl_verbose_commit) {
 1372                 struct timespec ts;
 1373                 getnanotime(&ts);
 1374                 printf("%s: %lld.%06ld this transaction = %zu bytes\n",
 1375                     __func__, (long long)ts.tv_sec,
 1376                     (long)ts.tv_nsec, flushsize);
 1377         }
 1378 
 1379         if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) {
 1380                 /*
 1381                  * XXX this could be handled more gracefully, perhaps place
 1382                  * only a partial transaction in the log and allow the
 1383                  * remaining to flush without the protection of the journal.
 1384                  */
 1385                 panic("wapbl_flush: current transaction too big to flush\n");
 1386         }
 1387 
 1388         error = wapbl_truncate(wl, flushsize, 0);
 1389         if (error)
 1390                 goto out2;
 1391 
 1392         off = wl->wl_head;
 1393         KASSERT((off == 0) || ((off >= wl->wl_circ_off) && 
 1394                               (off < wl->wl_circ_off + wl->wl_circ_size)));
 1395         error = wapbl_write_blocks(wl, &off);
 1396         if (error)
 1397                 goto out2;
 1398         error = wapbl_write_revocations(wl, &off);
 1399         if (error)
 1400                 goto out2;
 1401         error = wapbl_write_inodes(wl, &off);
 1402         if (error)
 1403                 goto out2;
 1404 
 1405         reserved = 0;
 1406         if (wl->wl_inohashcnt)
 1407                 reserved = wapbl_transaction_inodes_len(wl);
 1408 
 1409         head = wl->wl_head;
 1410         tail = wl->wl_tail;
 1411 
 1412         wapbl_advance_head(wl->wl_circ_size, wl->wl_circ_off, flushsize,
 1413             &head, &tail);
 1414 #ifdef WAPBL_DEBUG
 1415         if (head != off) {
 1416                 panic("lost head! head=%"PRIdMAX" tail=%" PRIdMAX
 1417                       " off=%"PRIdMAX" flush=%zu\n",
 1418                       (intmax_t)head, (intmax_t)tail, (intmax_t)off,
 1419                       flushsize);
 1420         }
 1421 #else
 1422         KASSERT(head == off);
 1423 #endif
 1424 
 1425         /* Opportunistically move the tail forward if we can */
 1426         if (!wapbl_lazy_truncate) {
 1427                 mutex_enter(&wl->wl_mtx);
 1428                 delta = wl->wl_reclaimable_bytes;
 1429                 mutex_exit(&wl->wl_mtx);
 1430                 wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta,
 1431                     &head, &tail);
 1432         }
 1433 
 1434         error = wapbl_write_commit(wl, head, tail);
 1435         if (error)
 1436                 goto out2;
 1437 
 1438         /* poolme?  or kmemme? */
 1439         we = wapbl_calloc(1, sizeof(*we));
 1440 
 1441 #ifdef WAPBL_DEBUG_BUFBYTES
 1442         WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
 1443                 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
 1444                  " unsynced=%zu"
 1445                  "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
 1446                  "inodes=%d\n",
 1447                  curproc->p_pid, curlwp->l_lid, flushsize, delta,
 1448                  wapbl_space_used(wl->wl_circ_size, head, tail),
 1449                  wl->wl_unsynced_bufbytes, wl->wl_bufcount,
 1450                  wl->wl_bufbytes, wl->wl_bcount, wl->wl_dealloccnt,
 1451                  wl->wl_inohashcnt));
 1452 #else
 1453         WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
 1454                 ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
 1455                  "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
 1456                  "inodes=%d\n",
 1457                  curproc->p_pid, curlwp->l_lid, flushsize, delta,
 1458                  wapbl_space_used(wl->wl_circ_size, head, tail),
 1459                  wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
 1460                  wl->wl_dealloccnt, wl->wl_inohashcnt));
 1461 #endif
 1462 
 1463 
 1464         mutex_enter(&bufcache_lock);
 1465         mutex_enter(&wl->wl_mtx);
 1466 
 1467         wl->wl_reserved_bytes = reserved;
 1468         wl->wl_head = head;
 1469         wl->wl_tail = tail;
 1470         KASSERT(wl->wl_reclaimable_bytes >= delta);
 1471         wl->wl_reclaimable_bytes -= delta;
 1472         wl->wl_dealloccnt = 0;
 1473 #ifdef WAPBL_DEBUG_BUFBYTES
 1474         wl->wl_unsynced_bufbytes += wl->wl_bufbytes;
 1475 #endif
 1476 
 1477         we->we_wapbl = wl;
 1478         we->we_bufcount = wl->wl_bufcount;
 1479 #ifdef WAPBL_DEBUG_BUFBYTES
 1480         we->we_unsynced_bufbytes = wl->wl_bufbytes;
 1481 #endif
 1482         we->we_reclaimable_bytes = flushsize;
 1483         we->we_error = 0;
 1484         SIMPLEQ_INSERT_TAIL(&wl->wl_entries, we, we_entries);
 1485 
 1486         /*
 1487          * this flushes bufs in reverse order than they were queued
 1488          * it shouldn't matter, but if we care we could use TAILQ instead.
 1489          * XXX Note they will get put on the lru queue when they flush
 1490          * so we might actually want to change this to preserve order.
 1491          */
 1492         while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
 1493                 if (bbusy(bp, 0, 0, &wl->wl_mtx)) {
 1494                         continue;
 1495                 }
 1496                 bp->b_iodone = wapbl_biodone;
 1497                 bp->b_private = we;
 1498                 bremfree(bp);
 1499                 wapbl_remove_buf_locked(wl, bp);
 1500                 mutex_exit(&wl->wl_mtx);
 1501                 mutex_exit(&bufcache_lock);
 1502                 bawrite(bp);
 1503                 mutex_enter(&bufcache_lock);
 1504                 mutex_enter(&wl->wl_mtx);
 1505         }
 1506         mutex_exit(&wl->wl_mtx);
 1507         mutex_exit(&bufcache_lock);
 1508 
 1509 #if 0
 1510         WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
 1511                      ("wapbl_flush thread %d.%d done flushing entries...\n",
 1512                      curproc->p_pid, curlwp->l_lid));
 1513 #endif
 1514 
 1515  out:
 1516 
 1517         /*
 1518          * If the waitfor flag is set, don't return until everything is
 1519          * fully flushed and the on disk log is empty.
 1520          */
 1521         if (waitfor) {
 1522                 error = wapbl_truncate(wl, wl->wl_circ_size - 
 1523                         wl->wl_reserved_bytes, wapbl_lazy_truncate);
 1524         }
 1525 
 1526  out2:
 1527         if (error) {
 1528                 wl->wl_flush_abort(wl->wl_mount, wl->wl_deallocblks,
 1529                     wl->wl_dealloclens, wl->wl_dealloccnt);
 1530         }
 1531 
 1532 #ifdef WAPBL_DEBUG_PRINT
 1533         if (error) {
 1534                 pid_t pid = -1;
 1535                 lwpid_t lid = -1;
 1536                 if (curproc)
 1537                         pid = curproc->p_pid;
 1538                 if (curlwp)
 1539                         lid = curlwp->l_lid;
 1540                 mutex_enter(&wl->wl_mtx);
 1541 #ifdef WAPBL_DEBUG_BUFBYTES
 1542                 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
 1543                     ("wapbl_flush: thread %d.%d aborted flush: "
 1544                     "error = %d\n"
 1545                     "\tbufcount=%zu bufbytes=%zu bcount=%zu "
 1546                     "deallocs=%d inodes=%d\n"
 1547                     "\terrcnt = %d, reclaimable=%zu reserved=%zu "
 1548                     "unsynced=%zu\n",
 1549                     pid, lid, error, wl->wl_bufcount,
 1550                     wl->wl_bufbytes, wl->wl_bcount,
 1551                     wl->wl_dealloccnt, wl->wl_inohashcnt,
 1552                     wl->wl_error_count, wl->wl_reclaimable_bytes,
 1553                     wl->wl_reserved_bytes, wl->wl_unsynced_bufbytes));
 1554                 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
 1555                         WAPBL_PRINTF(WAPBL_PRINT_ERROR,
 1556                             ("\tentry: bufcount = %zu, reclaimable = %zu, "
 1557                              "error = %d, unsynced = %zu\n",
 1558                              we->we_bufcount, we->we_reclaimable_bytes,
 1559                              we->we_error, we->we_unsynced_bufbytes));
 1560                 }
 1561 #else
 1562                 WAPBL_PRINTF(WAPBL_PRINT_ERROR,
 1563                     ("wapbl_flush: thread %d.%d aborted flush: "
 1564                      "error = %d\n"
 1565                      "\tbufcount=%zu bufbytes=%zu bcount=%zu "
 1566                      "deallocs=%d inodes=%d\n"
 1567                      "\terrcnt = %d, reclaimable=%zu reserved=%zu\n",
 1568                      pid, lid, error, wl->wl_bufcount,
 1569                      wl->wl_bufbytes, wl->wl_bcount,
 1570                      wl->wl_dealloccnt, wl->wl_inohashcnt,
 1571                      wl->wl_error_count, wl->wl_reclaimable_bytes,
 1572                      wl->wl_reserved_bytes));
 1573                 SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
 1574                         WAPBL_PRINTF(WAPBL_PRINT_ERROR,
 1575                             ("\tentry: bufcount = %zu, reclaimable = %zu, "
 1576                              "error = %d\n", we->we_bufcount,
 1577                              we->we_reclaimable_bytes, we->we_error));
 1578                 }
 1579 #endif
 1580                 mutex_exit(&wl->wl_mtx);
 1581         }
 1582 #endif
 1583 
 1584         rw_exit(&wl->wl_rwlock);
 1585         return error;
 1586 }
 1587 
 1588 /****************************************************************/
 1589 
 1590 void
 1591 wapbl_jlock_assert(struct wapbl *wl)
 1592 {
 1593 
 1594         KASSERT(rw_lock_held(&wl->wl_rwlock));
 1595 }
 1596 
 1597 void
 1598 wapbl_junlock_assert(struct wapbl *wl)
 1599 {
 1600 
 1601         KASSERT(!rw_write_held(&wl->wl_rwlock));
 1602 }
 1603 
 1604 /****************************************************************/
 1605 
 1606 /* locks missing */
 1607 void
 1608 wapbl_print(struct wapbl *wl,
 1609                 int full,
 1610                 void (*pr)(const char *, ...))
 1611 {
 1612         struct buf *bp;
 1613         struct wapbl_entry *we;
 1614         (*pr)("wapbl %p", wl);
 1615         (*pr)("\nlogvp = %p, devvp = %p, logpbn = %"PRId64"\n",
 1616               wl->wl_logvp, wl->wl_devvp, wl->wl_logpbn);
 1617         (*pr)("circ = %zu, header = %zu, head = %"PRIdMAX" tail = %"PRIdMAX"\n",
 1618               wl->wl_circ_size, wl->wl_circ_off,
 1619               (intmax_t)wl->wl_head, (intmax_t)wl->wl_tail);
 1620         (*pr)("fs_dev_bshift = %d, log_dev_bshift = %d\n",
 1621               wl->wl_log_dev_bshift, wl->wl_fs_dev_bshift);
 1622 #ifdef WAPBL_DEBUG_BUFBYTES
 1623         (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
 1624               "reserved = %zu errcnt = %d unsynced = %zu\n",
 1625               wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
 1626               wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
 1627                                 wl->wl_error_count, wl->wl_unsynced_bufbytes);
 1628 #else
 1629         (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
 1630               "reserved = %zu errcnt = %d\n", wl->wl_bufcount, wl->wl_bufbytes,
 1631               wl->wl_bcount, wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
 1632                                 wl->wl_error_count);
 1633 #endif
 1634         (*pr)("\tdealloccnt = %d, dealloclim = %d\n",
 1635               wl->wl_dealloccnt, wl->wl_dealloclim);
 1636         (*pr)("\tinohashcnt = %d, inohashmask = 0x%08x\n",
 1637               wl->wl_inohashcnt, wl->wl_inohashmask);
 1638         (*pr)("entries:\n");
 1639         SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
 1640 #ifdef WAPBL_DEBUG_BUFBYTES
 1641                 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d, "
 1642                       "unsynced = %zu\n",
 1643                       we->we_bufcount, we->we_reclaimable_bytes,
 1644                       we->we_error, we->we_unsynced_bufbytes);
 1645 #else
 1646                 (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d\n",
 1647                       we->we_bufcount, we->we_reclaimable_bytes, we->we_error);
 1648 #endif
 1649         }
 1650         if (full) {
 1651                 int cnt = 0;
 1652                 (*pr)("bufs =");
 1653                 LIST_FOREACH(bp, &wl->wl_bufs, b_wapbllist) {
 1654                         if (!LIST_NEXT(bp, b_wapbllist)) {
 1655                                 (*pr)(" %p", bp);
 1656                         } else if ((++cnt % 6) == 0) {
 1657                                 (*pr)(" %p,\n\t", bp);
 1658                         } else {
 1659                                 (*pr)(" %p,", bp);
 1660                         }
 1661                 }
 1662                 (*pr)("\n");
 1663 
 1664                 (*pr)("dealloced blks = ");
 1665                 {
 1666                         int i;
 1667                         cnt = 0;
 1668                         for (i = 0; i < wl->wl_dealloccnt; i++) {
 1669                                 (*pr)(" %"PRId64":%d,",
 1670                                       wl->wl_deallocblks[i],
 1671                                       wl->wl_dealloclens[i]);
 1672                                 if ((++cnt % 4) == 0) {
 1673                                         (*pr)("\n\t");
 1674                                 }
 1675                         }
 1676                 }
 1677                 (*pr)("\n");
 1678 
 1679                 (*pr)("registered inodes = ");
 1680                 {
 1681                         int i;
 1682                         cnt = 0;
 1683                         for (i = 0; i <= wl->wl_inohashmask; i++) {
 1684                                 struct wapbl_ino_head *wih;
 1685                                 struct wapbl_ino *wi;
 1686 
 1687                                 wih = &wl->wl_inohash[i];
 1688                                 LIST_FOREACH(wi, wih, wi_hash) {
 1689                                         if (wi->wi_ino == 0)
 1690                                                 continue;
 1691                                         (*pr)(" %"PRId32"/0%06"PRIo32",",
 1692                                             wi->wi_ino, wi->wi_mode);
 1693                                         if ((++cnt % 4) == 0) {
 1694                                                 (*pr)("\n\t");
 1695                                         }
 1696                                 }
 1697                         }
 1698                         (*pr)("\n");
 1699                 }
 1700         }
 1701 }
 1702 
 1703 #if defined(WAPBL_DEBUG) || defined(DDB)
 1704 void
 1705 wapbl_dump(struct wapbl *wl)
 1706 {
 1707 #if defined(WAPBL_DEBUG)
 1708         if (!wl)
 1709                 wl = wapbl_debug_wl;
 1710 #endif
 1711         if (!wl)
 1712                 return;
 1713         wapbl_print(wl, 1, printf);
 1714 }
 1715 #endif
 1716 
 1717 /****************************************************************/
 1718 
 1719 void
 1720 wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len)
 1721 {
 1722 
 1723         wapbl_jlock_assert(wl);
 1724 
 1725         mutex_enter(&wl->wl_mtx);
 1726         /* XXX should eventually instead tie this into resource estimation */
 1727         /*
 1728          * XXX this panic needs locking/mutex analysis and the
 1729          * ability to cope with the failure.
 1730          */
 1731         /* XXX this XXX doesn't have enough XXX */
 1732         if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim))
 1733                 panic("wapbl_register_deallocation: out of resources");
 1734 
 1735         wl->wl_deallocblks[wl->wl_dealloccnt] = blk;
 1736         wl->wl_dealloclens[wl->wl_dealloccnt] = len;
 1737         wl->wl_dealloccnt++;
 1738         WAPBL_PRINTF(WAPBL_PRINT_ALLOC,
 1739             ("wapbl_register_deallocation: blk=%"PRId64" len=%d\n", blk, len));
 1740         mutex_exit(&wl->wl_mtx);
 1741 }
 1742 
 1743 /****************************************************************/
 1744 
 1745 static void
 1746 wapbl_inodetrk_init(struct wapbl *wl, u_int size)
 1747 {
 1748 
 1749         wl->wl_inohash = hashinit(size, HASH_LIST, true, &wl->wl_inohashmask);
 1750         if (atomic_inc_uint_nv(&wapbl_ino_pool_refcount) == 1) {
 1751                 pool_init(&wapbl_ino_pool, sizeof(struct wapbl_ino), 0, 0, 0,
 1752                     "wapblinopl", &pool_allocator_nointr, IPL_NONE);
 1753         }
 1754 }
 1755 
 1756 static void
 1757 wapbl_inodetrk_free(struct wapbl *wl)
 1758 {
 1759 
 1760         /* XXX this KASSERT needs locking/mutex analysis */
 1761         KASSERT(wl->wl_inohashcnt == 0);
 1762         hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask);
 1763         if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) {
 1764                 pool_destroy(&wapbl_ino_pool);
 1765         }
 1766 }
 1767 
 1768 static struct wapbl_ino *
 1769 wapbl_inodetrk_get(struct wapbl *wl, ino_t ino)
 1770 {
 1771         struct wapbl_ino_head *wih;
 1772         struct wapbl_ino *wi;
 1773 
 1774         KASSERT(mutex_owned(&wl->wl_mtx));
 1775 
 1776         wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
 1777         LIST_FOREACH(wi, wih, wi_hash) {
 1778                 if (ino == wi->wi_ino)
 1779                         return wi;
 1780         }
 1781         return 0;
 1782 }
 1783 
 1784 void
 1785 wapbl_register_inode(struct wapbl *wl, ino_t ino, mode_t mode)
 1786 {
 1787         struct wapbl_ino_head *wih;
 1788         struct wapbl_ino *wi;
 1789 
 1790         wi = pool_get(&wapbl_ino_pool, PR_WAITOK);
 1791 
 1792         mutex_enter(&wl->wl_mtx);
 1793         if (wapbl_inodetrk_get(wl, ino) == NULL) {
 1794                 wi->wi_ino = ino;
 1795                 wi->wi_mode = mode;
 1796                 wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
 1797                 LIST_INSERT_HEAD(wih, wi, wi_hash);
 1798                 wl->wl_inohashcnt++;
 1799                 WAPBL_PRINTF(WAPBL_PRINT_INODE,
 1800                     ("wapbl_register_inode: ino=%"PRId64"\n", ino));
 1801                 mutex_exit(&wl->wl_mtx);
 1802         } else {
 1803                 mutex_exit(&wl->wl_mtx);
 1804                 pool_put(&wapbl_ino_pool, wi);
 1805         }
 1806 }
 1807 
 1808 void
 1809 wapbl_unregister_inode(struct wapbl *wl, ino_t ino, mode_t mode)
 1810 {
 1811         struct wapbl_ino *wi;
 1812 
 1813         mutex_enter(&wl->wl_mtx);
 1814         wi = wapbl_inodetrk_get(wl, ino);
 1815         if (wi) {
 1816                 WAPBL_PRINTF(WAPBL_PRINT_INODE,
 1817                     ("wapbl_unregister_inode: ino=%"PRId64"\n", ino));
 1818                 KASSERT(wl->wl_inohashcnt > 0);
 1819                 wl->wl_inohashcnt--;
 1820                 LIST_REMOVE(wi, wi_hash);
 1821                 mutex_exit(&wl->wl_mtx);
 1822 
 1823                 pool_put(&wapbl_ino_pool, wi);
 1824         } else {
 1825                 mutex_exit(&wl->wl_mtx);
 1826         }
 1827 }
 1828 
 1829 /****************************************************************/
 1830 
 1831 static __inline size_t
 1832 wapbl_transaction_inodes_len(struct wapbl *wl)
 1833 {
 1834         int blocklen = 1<<wl->wl_log_dev_bshift;
 1835         int iph;
 1836 
 1837         /* Calculate number of inodes described in a inodelist header */
 1838         iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
 1839             sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
 1840 
 1841         KASSERT(iph > 0);
 1842 
 1843         return MAX(1, howmany(wl->wl_inohashcnt, iph))*blocklen;
 1844 }
 1845 
 1846 
 1847 /* Calculate amount of space a transaction will take on disk */
 1848 static size_t
 1849 wapbl_transaction_len(struct wapbl *wl)
 1850 {
 1851         int blocklen = 1<<wl->wl_log_dev_bshift;
 1852         size_t len;
 1853         int bph;
 1854 
 1855         /* Calculate number of blocks described in a blocklist header */
 1856         bph = (blocklen - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
 1857             sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
 1858 
 1859         KASSERT(bph > 0);
 1860 
 1861         len = wl->wl_bcount;
 1862         len += howmany(wl->wl_bufcount, bph)*blocklen;
 1863         len += howmany(wl->wl_dealloccnt, bph)*blocklen;
 1864         len += wapbl_transaction_inodes_len(wl);
 1865 
 1866         return len;
 1867 }
 1868 
 1869 /*
 1870  * Perform commit operation
 1871  *
 1872  * Note that generation number incrementation needs to
 1873  * be protected against racing with other invocations
 1874  * of wapbl_commit.  This is ok since this routine
 1875  * is only invoked from wapbl_flush
 1876  */
 1877 static int
 1878 wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail)
 1879 {
 1880         struct wapbl_wc_header *wc = wl->wl_wc_header;
 1881         struct timespec ts;
 1882         int error;
 1883         int force = 1;
 1884 
 1885         if (wapbl_flush_disk_cache) {
 1886                 /* XXX Calc checksum here, instead we do this for now */
 1887                 error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force,
 1888                     FWRITE, FSCRED);
 1889                 if (error) {
 1890                         WAPBL_PRINTF(WAPBL_PRINT_ERROR,
 1891                             ("wapbl_write_commit: DIOCCACHESYNC on dev 0x%x "
 1892                             "returned %d\n", wl->wl_devvp->v_rdev, error));
 1893                 }
 1894         }
 1895 
 1896         wc->wc_head = head;
 1897         wc->wc_tail = tail;
 1898         wc->wc_checksum = 0;
 1899         wc->wc_version = 1;
 1900         getnanotime(&ts);
 1901         wc->wc_time = ts.tv_sec;;
 1902         wc->wc_timensec = ts.tv_nsec;
 1903 
 1904         WAPBL_PRINTF(WAPBL_PRINT_WRITE,
 1905             ("wapbl_write_commit: head = %"PRIdMAX "tail = %"PRIdMAX"\n",
 1906             (intmax_t)head, (intmax_t)tail));
 1907 
 1908         /*
 1909          * XXX if generation will rollover, then first zero
 1910          * over second commit header before trying to write both headers.
 1911          */
 1912 
 1913         error = wapbl_write(wc, wc->wc_len, wl->wl_devvp,
 1914             wl->wl_logpbn + wc->wc_generation % 2);
 1915         if (error)
 1916                 return error;
 1917 
 1918         if (wapbl_flush_disk_cache) {
 1919                 error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force,
 1920                     FWRITE, FSCRED);
 1921                 if (error) {
 1922                         WAPBL_PRINTF(WAPBL_PRINT_ERROR,
 1923                             ("wapbl_write_commit: DIOCCACHESYNC on dev 0x%x "
 1924                             "returned %d\n", wl->wl_devvp->v_rdev, error));
 1925                 }
 1926         }
 1927 
 1928         /*
 1929          * If the generation number was zero, write it out a second time.
 1930          * This handles initialization and generation number rollover
 1931          */
 1932         if (wc->wc_generation++ == 0) {
 1933                 error = wapbl_write_commit(wl, head, tail);
 1934                 /*
 1935                  * This panic should be able to be removed if we do the
 1936                  * zero'ing mentioned above, and we are certain to roll
 1937                  * back generation number on failure.
 1938                  */
 1939                 if (error)
 1940                         panic("wapbl_write_commit: error writing duplicate "
 1941                               "log header: %d\n", error);
 1942         }
 1943         return 0;
 1944 }
 1945 
 1946 /* Returns new offset value */
 1947 static int
 1948 wapbl_write_blocks(struct wapbl *wl, off_t *offp)
 1949 {
 1950         struct wapbl_wc_blocklist *wc =
 1951             (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
 1952         int blocklen = 1<<wl->wl_log_dev_bshift;
 1953         int bph;
 1954         struct buf *bp;
 1955         off_t off = *offp;
 1956         int error;
 1957 
 1958         KASSERT(rw_write_held(&wl->wl_rwlock));
 1959 
 1960         bph = (blocklen - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
 1961             sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
 1962 
 1963         bp = LIST_FIRST(&wl->wl_bufs);
 1964 
 1965         while (bp) {
 1966                 int cnt;
 1967                 struct buf *obp = bp;
 1968 
 1969                 KASSERT(bp->b_flags & B_LOCKED);
 1970 
 1971                 wc->wc_type = WAPBL_WC_BLOCKS;
 1972                 wc->wc_len = blocklen;
 1973                 wc->wc_blkcount = 0;
 1974                 while (bp && (wc->wc_blkcount < bph)) {
 1975                         /*
 1976                          * Make sure all the physical block numbers are up to
 1977                          * date.  If this is not always true on a given
 1978                          * filesystem, then VOP_BMAP must be called.  We
 1979                          * could call VOP_BMAP here, or else in the filesystem
 1980                          * specific flush callback, although neither of those
 1981                          * solutions allow us to take the vnode lock.  If a
 1982                          * filesystem requires that we must take the vnode lock
 1983                          * to call VOP_BMAP, then we can probably do it in
 1984                          * bwrite when the vnode lock should already be held
 1985                          * by the invoking code.
 1986                          */
 1987                         KASSERT((bp->b_vp->v_type == VBLK) ||
 1988                                  (bp->b_blkno != bp->b_lblkno));
 1989                         KASSERT(bp->b_blkno > 0);
 1990 
 1991                         wc->wc_blocks[wc->wc_blkcount].wc_daddr = bp->b_blkno;
 1992                         wc->wc_blocks[wc->wc_blkcount].wc_dlen = bp->b_bcount;
 1993                         wc->wc_len += bp->b_bcount;
 1994                         wc->wc_blkcount++;
 1995                         bp = LIST_NEXT(bp, b_wapbllist);
 1996                 }
 1997                 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
 1998                     ("wapbl_write_blocks: len = %u off = %"PRIdMAX"\n",
 1999                     wc->wc_len, (intmax_t)off));
 2000 
 2001                 error = wapbl_circ_write(wl, wc, blocklen, &off);
 2002                 if (error)
 2003                         return error;
 2004                 bp = obp;
 2005                 cnt = 0;
 2006                 while (bp && (cnt++ < bph)) {
 2007                         error = wapbl_circ_write(wl, bp->b_data,
 2008                             bp->b_bcount, &off);
 2009                         if (error)
 2010                                 return error;
 2011                         bp = LIST_NEXT(bp, b_wapbllist);
 2012                 }
 2013         }
 2014         *offp = off;
 2015         return 0;
 2016 }
 2017 
 2018 static int
 2019 wapbl_write_revocations(struct wapbl *wl, off_t *offp)
 2020 {
 2021         struct wapbl_wc_blocklist *wc =
 2022             (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
 2023         int i;
 2024         int blocklen = 1<<wl->wl_log_dev_bshift;
 2025         int bph;
 2026         off_t off = *offp;
 2027         int error;
 2028 
 2029         if (wl->wl_dealloccnt == 0)
 2030                 return 0;
 2031 
 2032         bph = (blocklen - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
 2033             sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
 2034 
 2035         i = 0;
 2036         while (i < wl->wl_dealloccnt) {
 2037                 wc->wc_type = WAPBL_WC_REVOCATIONS;
 2038                 wc->wc_len = blocklen;
 2039                 wc->wc_blkcount = 0;
 2040                 while ((i < wl->wl_dealloccnt) && (wc->wc_blkcount < bph)) {
 2041                         wc->wc_blocks[wc->wc_blkcount].wc_daddr =
 2042                             wl->wl_deallocblks[i];
 2043                         wc->wc_blocks[wc->wc_blkcount].wc_dlen =
 2044                             wl->wl_dealloclens[i];
 2045                         wc->wc_blkcount++;
 2046                         i++;
 2047                 }
 2048                 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
 2049                     ("wapbl_write_revocations: len = %u off = %"PRIdMAX"\n",
 2050                     wc->wc_len, (intmax_t)off));
 2051                 error = wapbl_circ_write(wl, wc, blocklen, &off);
 2052                 if (error)
 2053                         return error;
 2054         }
 2055         *offp = off;
 2056         return 0;
 2057 }
 2058 
 2059 static int
 2060 wapbl_write_inodes(struct wapbl *wl, off_t *offp)
 2061 {
 2062         struct wapbl_wc_inodelist *wc =
 2063             (struct wapbl_wc_inodelist *)wl->wl_wc_scratch;
 2064         int i;
 2065         int blocklen = 1<<wl->wl_log_dev_bshift;
 2066         off_t off = *offp;
 2067         int error;
 2068 
 2069         struct wapbl_ino_head *wih;
 2070         struct wapbl_ino *wi;
 2071         int iph;
 2072 
 2073         iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
 2074             sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
 2075 
 2076         i = 0;
 2077         wih = &wl->wl_inohash[0];
 2078         wi = 0;
 2079         do {
 2080                 wc->wc_type = WAPBL_WC_INODES;
 2081                 wc->wc_len = blocklen;
 2082                 wc->wc_inocnt = 0;
 2083                 wc->wc_clear = (i == 0);
 2084                 while ((i < wl->wl_inohashcnt) && (wc->wc_inocnt < iph)) {
 2085                         while (!wi) {
 2086                                 KASSERT((wih - &wl->wl_inohash[0])
 2087                                     <= wl->wl_inohashmask);
 2088                                 wi = LIST_FIRST(wih++);
 2089                         }
 2090                         wc->wc_inodes[wc->wc_inocnt].wc_inumber = wi->wi_ino;
 2091                         wc->wc_inodes[wc->wc_inocnt].wc_imode = wi->wi_mode;
 2092                         wc->wc_inocnt++;
 2093                         i++;
 2094                         wi = LIST_NEXT(wi, wi_hash);
 2095                 }
 2096                 WAPBL_PRINTF(WAPBL_PRINT_WRITE,
 2097                     ("wapbl_write_inodes: len = %u off = %"PRIdMAX"\n",
 2098                     wc->wc_len, (intmax_t)off));
 2099                 error = wapbl_circ_write(wl, wc, blocklen, &off);
 2100                 if (error)
 2101                         return error;
 2102         } while (i < wl->wl_inohashcnt);
 2103         
 2104         *offp = off;
 2105         return 0;
 2106 }
 2107 
 2108 #endif /* _KERNEL */
 2109 
 2110 /****************************************************************/
 2111 
 2112 #ifdef _KERNEL
 2113 static struct pool wapbl_blk_pool;
 2114 static int wapbl_blk_pool_refcount;
 2115 #endif
 2116 struct wapbl_blk {
 2117         LIST_ENTRY(wapbl_blk) wb_hash;
 2118         daddr_t wb_blk;
 2119         off_t wb_off; /* Offset of this block in the log */
 2120 };
 2121 #define WAPBL_BLKPOOL_MIN 83
 2122 
 2123 static void
 2124 wapbl_blkhash_init(struct wapbl_replay *wr, u_int size)
 2125 {
 2126         if (size < WAPBL_BLKPOOL_MIN)
 2127                 size = WAPBL_BLKPOOL_MIN;
 2128         KASSERT(wr->wr_blkhash == 0);
 2129 #ifdef _KERNEL
 2130         wr->wr_blkhash = hashinit(size, HASH_LIST, true, &wr->wr_blkhashmask);
 2131         if (atomic_inc_uint_nv(&wapbl_blk_pool_refcount) == 1) {
 2132                 pool_init(&wapbl_blk_pool, sizeof(struct wapbl_blk), 0, 0, 0,
 2133                     "wapblblkpl", &pool_allocator_nointr, IPL_NONE);
 2134         }
 2135 #else /* ! _KERNEL */
 2136         /* Manually implement hashinit */
 2137         {
 2138                 int i;
 2139                 unsigned long hashsize;
 2140                 for (hashsize = 1; hashsize < size; hashsize <<= 1)
 2141                         continue;
 2142                 wr->wr_blkhash = wapbl_malloc(hashsize * sizeof(*wr->wr_blkhash));
 2143                 for (i = 0; i < hashsize; i++)
 2144                         LIST_INIT(&wr->wr_blkhash[i]);
 2145                 wr->wr_blkhashmask = hashsize - 1;
 2146         }
 2147 #endif /* ! _KERNEL */
 2148 }
 2149 
 2150 static void
 2151 wapbl_blkhash_free(struct wapbl_replay *wr)
 2152 {
 2153         KASSERT(wr->wr_blkhashcnt == 0);
 2154 #ifdef _KERNEL
 2155         hashdone(wr->wr_blkhash, HASH_LIST, wr->wr_blkhashmask);
 2156         if (atomic_dec_uint_nv(&wapbl_blk_pool_refcount) == 0) {
 2157                 pool_destroy(&wapbl_blk_pool);
 2158         }
 2159 #else /* ! _KERNEL */
 2160         wapbl_free(wr->wr_blkhash);
 2161 #endif /* ! _KERNEL */
 2162 }
 2163 
 2164 static struct wapbl_blk *
 2165 wapbl_blkhash_get(struct wapbl_replay *wr, daddr_t blk)
 2166 {
 2167         struct wapbl_blk_head *wbh;
 2168         struct wapbl_blk *wb;
 2169         wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
 2170         LIST_FOREACH(wb, wbh, wb_hash) {
 2171                 if (blk == wb->wb_blk)
 2172                         return wb;
 2173         }
 2174         return 0;
 2175 }
 2176 
 2177 static void
 2178 wapbl_blkhash_ins(struct wapbl_replay *wr, daddr_t blk, off_t off)
 2179 {
 2180         struct wapbl_blk_head *wbh;
 2181         struct wapbl_blk *wb;
 2182         wb = wapbl_blkhash_get(wr, blk);
 2183         if (wb) {
 2184                 KASSERT(wb->wb_blk == blk);
 2185                 wb->wb_off = off;
 2186         } else {
 2187 #ifdef _KERNEL
 2188                 wb = pool_get(&wapbl_blk_pool, PR_WAITOK);
 2189 #else /* ! _KERNEL */
 2190                 wb = wapbl_malloc(sizeof(*wb));
 2191 #endif /* ! _KERNEL */
 2192                 wb->wb_blk = blk;
 2193                 wb->wb_off = off;
 2194                 wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
 2195                 LIST_INSERT_HEAD(wbh, wb, wb_hash);
 2196                 wr->wr_blkhashcnt++;
 2197         }
 2198 }
 2199 
 2200 static void
 2201 wapbl_blkhash_rem(struct wapbl_replay *wr, daddr_t blk)
 2202 {
 2203         struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
 2204         if (wb) {
 2205                 KASSERT(wr->wr_blkhashcnt > 0);
 2206                 wr->wr_blkhashcnt--;
 2207                 LIST_REMOVE(wb, wb_hash);
 2208 #ifdef _KERNEL
 2209                 pool_put(&wapbl_blk_pool, wb);
 2210 #else /* ! _KERNEL */
 2211                 wapbl_free(wb);
 2212 #endif /* ! _KERNEL */
 2213         }
 2214 }
 2215 
 2216 static void
 2217 wapbl_blkhash_clear(struct wapbl_replay *wr)
 2218 {
 2219         int i;
 2220         for (i = 0; i <= wr->wr_blkhashmask; i++) {
 2221                 struct wapbl_blk *wb;
 2222 
 2223                 while ((wb = LIST_FIRST(&wr->wr_blkhash[i]))) {
 2224                         KASSERT(wr->wr_blkhashcnt > 0);
 2225                         wr->wr_blkhashcnt--;
 2226                         LIST_REMOVE(wb, wb_hash);
 2227 #ifdef _KERNEL
 2228                         pool_put(&wapbl_blk_pool, wb);
 2229 #else /* ! _KERNEL */
 2230                         wapbl_free(wb);
 2231 #endif /* ! _KERNEL */
 2232                 }
 2233         }
 2234         KASSERT(wr->wr_blkhashcnt == 0);
 2235 }
 2236 
 2237 /****************************************************************/
 2238 
 2239 static int
 2240 wapbl_circ_read(struct wapbl_replay *wr, void *data, size_t len, off_t *offp)
 2241 {
 2242         size_t slen;
 2243         struct wapbl_wc_header *wc = &wr->wr_wc_header;
 2244         off_t off = *offp;
 2245         int error;
 2246 
 2247         KASSERT(((len >> wc->wc_log_dev_bshift) <<
 2248             wc->wc_log_dev_bshift) == len);
 2249         if (off < wc->wc_circ_off)
 2250                 off = wc->wc_circ_off;
 2251         slen = wc->wc_circ_off + wc->wc_circ_size - off;
 2252         if (slen < len) {
 2253                 error = wapbl_read(data, slen, wr->wr_devvp,
 2254                     wr->wr_logpbn + (off >> wc->wc_log_dev_bshift));
 2255                 if (error)
 2256                         return error;
 2257                 data = (uint8_t *)data + slen;
 2258                 len -= slen;
 2259                 off = wc->wc_circ_off;
 2260         }
 2261         error = wapbl_read(data, len, wr->wr_devvp,
 2262             wr->wr_logpbn + (off >> wc->wc_log_dev_bshift));
 2263         if (error)
 2264                 return error;
 2265         off += len;
 2266         if (off >= wc->wc_circ_off + wc->wc_circ_size)
 2267                 off = wc->wc_circ_off;
 2268         *offp = off;
 2269         return 0;
 2270 }
 2271 
 2272 static void
 2273 wapbl_circ_advance(struct wapbl_replay *wr, size_t len, off_t *offp)
 2274 {
 2275         size_t slen;
 2276         struct wapbl_wc_header *wc = &wr->wr_wc_header;
 2277         off_t off = *offp;
 2278 
 2279         KASSERT(((len >> wc->wc_log_dev_bshift) <<
 2280             wc->wc_log_dev_bshift) == len);
 2281 
 2282         if (off < wc->wc_circ_off)
 2283                 off = wc->wc_circ_off;
 2284         slen = wc->wc_circ_off + wc->wc_circ_size - off;
 2285         if (slen < len) {
 2286                 len -= slen;
 2287                 off = wc->wc_circ_off;
 2288         }
 2289         off += len;
 2290         if (off >= wc->wc_circ_off + wc->wc_circ_size)
 2291                 off = wc->wc_circ_off;
 2292         *offp = off;
 2293 }
 2294 
 2295 /****************************************************************/
 2296 
 2297 int
 2298 wapbl_replay_start(struct wapbl_replay **wrp, struct vnode *vp,
 2299         daddr_t off, size_t count, size_t blksize)
 2300 {
 2301         struct wapbl_replay *wr;
 2302         int error;
 2303         struct vnode *devvp;
 2304         daddr_t logpbn;
 2305         uint8_t *scratch;
 2306         struct wapbl_wc_header *wch;
 2307         struct wapbl_wc_header *wch2;
 2308         /* Use this until we read the actual log header */
 2309         int log_dev_bshift = DEV_BSHIFT;
 2310         size_t used;
 2311 
 2312         WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
 2313             ("wapbl_replay_start: vp=%p off=%"PRId64 " count=%zu blksize=%zu\n",
 2314             vp, off, count, blksize));
 2315 
 2316         if (off < 0)
 2317                 return EINVAL;
 2318 
 2319         if (blksize < DEV_BSIZE)
 2320                 return EINVAL;
 2321         if (blksize % DEV_BSIZE)
 2322                 return EINVAL;
 2323 
 2324 #ifdef _KERNEL
 2325 #if 0
 2326         /* XXX vp->v_size isn't reliably set for VBLK devices,
 2327          * especially root.  However, we might still want to verify
 2328          * that the full load is readable */
 2329         if ((off + count) * blksize > vp->v_size)
 2330                 return EINVAL;
 2331 #endif
 2332 
 2333         if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, 0)) != 0) {
 2334                 return error;
 2335         }
 2336 #else /* ! _KERNEL */
 2337         devvp = vp;
 2338         logpbn = off;
 2339 #endif /* ! _KERNEL */
 2340 
 2341         scratch = wapbl_malloc(MAXBSIZE);
 2342 
 2343         error = wapbl_read(scratch, 2<<log_dev_bshift, devvp, logpbn);
 2344         if (error)
 2345                 goto errout;
 2346 
 2347         wch = (struct wapbl_wc_header *)scratch;
 2348         wch2 =
 2349             (struct wapbl_wc_header *)(scratch + (1<<log_dev_bshift));
 2350         /* XXX verify checksums and magic numbers */
 2351         if (wch->wc_type != WAPBL_WC_HEADER) {
 2352                 printf("Unrecognized wapbl magic: 0x%08x\n", wch->wc_type);
 2353                 error = EFTYPE;
 2354                 goto errout;
 2355         }
 2356 
 2357         if (wch2->wc_generation > wch->wc_generation)
 2358                 wch = wch2;
 2359 
 2360         wr = wapbl_calloc(1, sizeof(*wr));
 2361 
 2362         wr->wr_logvp = vp;
 2363         wr->wr_devvp = devvp;
 2364         wr->wr_logpbn = logpbn;
 2365 
 2366         wr->wr_scratch = scratch;
 2367 
 2368         memcpy(&wr->wr_wc_header, wch, sizeof(wr->wr_wc_header));
 2369 
 2370         used = wapbl_space_used(wch->wc_circ_size, wch->wc_head, wch->wc_tail);
 2371 
 2372         WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
 2373             ("wapbl_replay: head=%"PRId64" tail=%"PRId64" off=%"PRId64
 2374             " len=%"PRId64" used=%zu\n",
 2375             wch->wc_head, wch->wc_tail, wch->wc_circ_off,
 2376             wch->wc_circ_size, used));
 2377 
 2378         wapbl_blkhash_init(wr, (used >> wch->wc_fs_dev_bshift));
 2379         error = wapbl_replay_prescan(wr);
 2380         if (error) {
 2381                 wapbl_replay_stop(wr);
 2382                 wapbl_replay_free(wr);
 2383                 return error;
 2384         }
 2385 
 2386         error = wapbl_replay_get_inodes(wr);
 2387         if (error) {
 2388                 wapbl_replay_stop(wr);
 2389                 wapbl_replay_free(wr);
 2390                 return error;
 2391         }
 2392 
 2393         *wrp = wr;
 2394         return 0;
 2395 
 2396  errout:
 2397         wapbl_free(scratch);
 2398         return error;
 2399 }
 2400 
 2401 void
 2402 wapbl_replay_stop(struct wapbl_replay *wr)
 2403 {
 2404 
 2405         WAPBL_PRINTF(WAPBL_PRINT_REPLAY, ("wapbl_replay_stop called\n"));
 2406 
 2407         KDASSERT(wapbl_replay_isopen(wr));
 2408 
 2409         wapbl_free(wr->wr_scratch);
 2410         wr->wr_scratch = 0;
 2411 
 2412         wr->wr_logvp = 0;
 2413 
 2414         wapbl_blkhash_clear(wr);
 2415         wapbl_blkhash_free(wr);
 2416 }
 2417 
 2418 void
 2419 wapbl_replay_free(struct wapbl_replay *wr)
 2420 {
 2421 
 2422         KDASSERT(!wapbl_replay_isopen(wr));
 2423 
 2424         if (wr->wr_inodes)
 2425                 wapbl_free(wr->wr_inodes);
 2426         wapbl_free(wr);
 2427 }
 2428 
 2429 int
 2430 wapbl_replay_isopen1(struct wapbl_replay *wr)
 2431 {
 2432 
 2433         return wapbl_replay_isopen(wr);
 2434 }
 2435 
 2436 static int
 2437 wapbl_replay_prescan(struct wapbl_replay *wr)
 2438 {
 2439         off_t off;
 2440         struct wapbl_wc_header *wch = &wr->wr_wc_header;
 2441         int error;
 2442 
 2443         int logblklen = 1<<wch->wc_log_dev_bshift;
 2444         int fsblklen = 1<<wch->wc_fs_dev_bshift;
 2445 
 2446         wapbl_blkhash_clear(wr);
 2447 
 2448         off = wch->wc_tail;
 2449         while (off != wch->wc_head) {
 2450                 struct wapbl_wc_null *wcn;
 2451                 off_t saveoff = off;
 2452                 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
 2453                 if (error)
 2454                         goto errout;
 2455                 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
 2456                 switch (wcn->wc_type) {
 2457                 case WAPBL_WC_BLOCKS:
 2458                         {
 2459                                 struct wapbl_wc_blocklist *wc =
 2460                                     (struct wapbl_wc_blocklist *)wr->wr_scratch;
 2461                                 int i;
 2462                                 for (i = 0; i < wc->wc_blkcount; i++) {
 2463                                         int j, n;
 2464                                         /*
 2465                                          * Enter each physical block into the
 2466                                          * hashtable independently
 2467                                          */
 2468                                         n = wc->wc_blocks[i].wc_dlen >>
 2469                                             wch->wc_fs_dev_bshift;
 2470                                         for (j = 0; j < n; j++) {
 2471                                                 wapbl_blkhash_ins(wr,
 2472                                                     wc->wc_blocks[i].wc_daddr + j,
 2473                                                     off);
 2474                                                 wapbl_circ_advance(wr,
 2475                                                     fsblklen, &off);
 2476                                         }
 2477                                 }
 2478                         }
 2479                         break;
 2480 
 2481                 case WAPBL_WC_REVOCATIONS:
 2482                         {
 2483                                 struct wapbl_wc_blocklist *wc =
 2484                                     (struct wapbl_wc_blocklist *)wr->wr_scratch;
 2485                                 int i;
 2486                                 for (i = 0; i < wc->wc_blkcount; i++) {
 2487                                         int j, n;
 2488                                         /*
 2489                                          * Remove any blocks found from the
 2490                                          * hashtable
 2491                                          */
 2492                                         n = wc->wc_blocks[i].wc_dlen >>
 2493                                             wch->wc_fs_dev_bshift;
 2494                                         for (j = 0; j < n; j++) {
 2495                                                 wapbl_blkhash_rem(wr,
 2496                                                    wc->wc_blocks[i].wc_daddr + j);
 2497                                         }
 2498                                 }
 2499                         }
 2500                         break;
 2501 
 2502                 case WAPBL_WC_INODES:
 2503                         {
 2504                                 struct wapbl_wc_inodelist *wc =
 2505                                     (struct wapbl_wc_inodelist *)wr->wr_scratch;
 2506                                 /*
 2507                                  * Keep track of where we found this so we
 2508                                  * can use it later
 2509                                  */
 2510                                 if (wc->wc_clear) {
 2511                                         wr->wr_inodestail = saveoff;
 2512                                         wr->wr_inodescnt = 0;
 2513                                 }
 2514                                 if (wr->wr_inodestail)
 2515                                         wr->wr_inodeshead = off;
 2516                                 wr->wr_inodescnt += wc->wc_inocnt;
 2517                         }
 2518                         break;
 2519                 default:
 2520                         printf("Unrecognized wapbl type: 0x%08x\n",
 2521                                wcn->wc_type);
 2522                         error = EFTYPE;
 2523                         goto errout;
 2524                 }
 2525                 wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
 2526                 if (off != saveoff) {
 2527                         printf("wapbl_replay: corrupted records\n");
 2528                         error = EFTYPE;
 2529                         goto errout;
 2530                 }
 2531         }
 2532         return 0;
 2533 
 2534  errout:
 2535         wapbl_blkhash_clear(wr);
 2536         return error;
 2537 }
 2538 
 2539 static int
 2540 wapbl_replay_get_inodes(struct wapbl_replay *wr)
 2541 {
 2542         off_t off;
 2543         struct wapbl_wc_header *wch = &wr->wr_wc_header;
 2544         int logblklen = 1<<wch->wc_log_dev_bshift;
 2545         int cnt= 0;
 2546 
 2547         KDASSERT(wapbl_replay_isopen(wr));
 2548 
 2549         if (wr->wr_inodescnt == 0)
 2550                 return 0;
 2551 
 2552         KASSERT(!wr->wr_inodes);
 2553 
 2554         wr->wr_inodes = wapbl_malloc(wr->wr_inodescnt*sizeof(wr->wr_inodes[0]));
 2555 
 2556         off = wr->wr_inodestail;
 2557 
 2558         while (off != wr->wr_inodeshead) {
 2559                 struct wapbl_wc_null *wcn;
 2560                 int error;
 2561                 off_t saveoff = off;
 2562                 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
 2563                 if (error) {
 2564                         wapbl_free(wr->wr_inodes);
 2565                         wr->wr_inodes = 0;
 2566                         return error;
 2567                 }
 2568                 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
 2569                 switch (wcn->wc_type) {
 2570                 case WAPBL_WC_BLOCKS:
 2571                 case WAPBL_WC_REVOCATIONS:
 2572                         break;
 2573                 case WAPBL_WC_INODES:
 2574                         {
 2575                                 struct wapbl_wc_inodelist *wc =
 2576                                     (struct wapbl_wc_inodelist *)wr->wr_scratch;
 2577                                 /*
 2578                                  * Keep track of where we found this so we
 2579                                  * can use it later
 2580                                  */
 2581                                 if (wc->wc_clear) {
 2582                                         cnt = 0;
 2583                                 }
 2584                                 /* This memcpy assumes that wr_inodes is
 2585                                  * laid out the same as wc_inodes. */
 2586                                 memcpy(&wr->wr_inodes[cnt], wc->wc_inodes,
 2587                                        wc->wc_inocnt*sizeof(wc->wc_inodes[0]));
 2588                                 cnt += wc->wc_inocnt;
 2589                         }
 2590                         break;
 2591                 default:
 2592                         KASSERT(0);
 2593                 }
 2594                 off = saveoff;
 2595                 wapbl_circ_advance(wr, wcn->wc_len, &off);
 2596         }
 2597         KASSERT(cnt == wr->wr_inodescnt);
 2598         return 0;
 2599 }
 2600 
 2601 #ifdef DEBUG
 2602 int
 2603 wapbl_replay_verify(struct wapbl_replay *wr, struct vnode *fsdevvp)
 2604 {
 2605         off_t off;
 2606         struct wapbl_wc_header *wch = &wr->wr_wc_header;
 2607         int mismatchcnt = 0;
 2608         int logblklen = 1<<wch->wc_log_dev_bshift;
 2609         int fsblklen = 1<<wch->wc_fs_dev_bshift;
 2610         void *scratch1 = wapbl_malloc(MAXBSIZE);
 2611         void *scratch2 = wapbl_malloc(MAXBSIZE);
 2612         int error = 0;
 2613 
 2614         KDASSERT(wapbl_replay_isopen(wr));
 2615 
 2616         off = wch->wc_tail;
 2617         while (off != wch->wc_head) {
 2618                 struct wapbl_wc_null *wcn;
 2619 #ifdef DEBUG
 2620                 off_t saveoff = off;
 2621 #endif
 2622                 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
 2623                 if (error)
 2624                         goto out;
 2625                 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
 2626                 switch (wcn->wc_type) {
 2627                 case WAPBL_WC_BLOCKS:
 2628                         {
 2629                                 struct wapbl_wc_blocklist *wc =
 2630                                     (struct wapbl_wc_blocklist *)wr->wr_scratch;
 2631                                 int i;
 2632                                 for (i = 0; i < wc->wc_blkcount; i++) {
 2633                                         int foundcnt = 0;
 2634                                         int dirtycnt = 0;
 2635                                         int j, n;
 2636                                         /*
 2637                                          * Check each physical block into the
 2638                                          * hashtable independently
 2639                                          */
 2640                                         n = wc->wc_blocks[i].wc_dlen >>
 2641                                             wch->wc_fs_dev_bshift;
 2642                                         for (j = 0; j < n; j++) {
 2643                                                 struct wapbl_blk *wb =
 2644                                                    wapbl_blkhash_get(wr,
 2645                                                    wc->wc_blocks[i].wc_daddr + j);
 2646                                                 if (wb && (wb->wb_off == off)) {
 2647                                                         foundcnt++;
 2648                                                         error =
 2649                                                             wapbl_circ_read(wr,
 2650                                                             scratch1, fsblklen,
 2651                                                             &off);
 2652                                                         if (error)
 2653                                                                 goto out;
 2654                                                         error =
 2655                                                             wapbl_read(scratch2,
 2656                                                             fsblklen, fsdevvp,
 2657                                                             wb->wb_blk);
 2658                                                         if (error)
 2659                                                                 goto out;
 2660                                                         if (memcmp(scratch1,
 2661                                                                    scratch2,
 2662                                                                    fsblklen)) {
 2663                                                                 printf(
 2664                 "wapbl_verify: mismatch block %"PRId64" at off %"PRIdMAX"\n",
 2665                 wb->wb_blk, (intmax_t)off);
 2666                                                                 dirtycnt++;
 2667                                                                 mismatchcnt++;
 2668                                                         }
 2669                                                 } else {
 2670                                                         wapbl_circ_advance(wr,
 2671                                                             fsblklen, &off);
 2672                                                 }
 2673                                         }
 2674 #if 0
 2675                                         /*
 2676                                          * If all of the blocks in an entry
 2677                                          * are clean, then remove all of its
 2678                                          * blocks from the hashtable since they
 2679                                          * never will need replay.
 2680                                          */
 2681                                         if ((foundcnt != 0) &&
 2682                                             (dirtycnt == 0)) {
 2683                                                 off = saveoff;
 2684                                                 wapbl_circ_advance(wr,
 2685                                                     logblklen, &off);
 2686                                                 for (j = 0; j < n; j++) {
 2687                                                         struct wapbl_blk *wb =
 2688                                                            wapbl_blkhash_get(wr,
 2689                                                            wc->wc_blocks[i].wc_daddr + j);
 2690                                                         if (wb &&
 2691                                                           (wb->wb_off == off)) {
 2692                                                                 wapbl_blkhash_rem(wr, wb->wb_blk);
 2693                                                         }
 2694                                                         wapbl_circ_advance(wr,
 2695                                                             fsblklen, &off);
 2696                                                 }
 2697                                         }
 2698 #endif
 2699                                 }
 2700                         }
 2701                         break;
 2702                 case WAPBL_WC_REVOCATIONS:
 2703                 case WAPBL_WC_INODES:
 2704                         break;
 2705                 default:
 2706                         KASSERT(0);
 2707                 }
 2708 #ifdef DEBUG
 2709                 wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
 2710                 KASSERT(off == saveoff);
 2711 #endif
 2712         }
 2713  out:
 2714         wapbl_free(scratch1);
 2715         wapbl_free(scratch2);
 2716         if (!error && mismatchcnt)
 2717                 error = EFTYPE;
 2718         return error;
 2719 }
 2720 #endif
 2721 
 2722 int
 2723 wapbl_replay_write(struct wapbl_replay *wr, struct vnode *fsdevvp)
 2724 {
 2725         off_t off;
 2726         struct wapbl_wc_header *wch = &wr->wr_wc_header;
 2727         int logblklen = 1<<wch->wc_log_dev_bshift;
 2728         int fsblklen = 1<<wch->wc_fs_dev_bshift;
 2729         void *scratch1 = wapbl_malloc(MAXBSIZE);
 2730         int error = 0;
 2731 
 2732         KDASSERT(wapbl_replay_isopen(wr));
 2733 
 2734         /*
 2735          * This parses the journal for replay, although it could
 2736          * just as easily walk the hashtable instead.
 2737          */
 2738 
 2739         off = wch->wc_tail;
 2740         while (off != wch->wc_head) {
 2741                 struct wapbl_wc_null *wcn;
 2742 #ifdef DEBUG
 2743                 off_t saveoff = off;
 2744 #endif
 2745                 error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
 2746                 if (error)
 2747                         goto out;
 2748                 wcn = (struct wapbl_wc_null *)wr->wr_scratch;
 2749                 switch (wcn->wc_type) {
 2750                 case WAPBL_WC_BLOCKS:
 2751                         {
 2752                                 struct wapbl_wc_blocklist *wc =
 2753                                     (struct wapbl_wc_blocklist *)wr->wr_scratch;
 2754                                 int i;
 2755                                 for (i = 0; i < wc->wc_blkcount; i++) {
 2756                                         int j, n;
 2757                                         /*
 2758                                          * Check each physical block against
 2759                                          * the hashtable independently
 2760                                          */
 2761                                         n = wc->wc_blocks[i].wc_dlen >>
 2762                                             wch->wc_fs_dev_bshift;
 2763                                         for (j = 0; j < n; j++) {
 2764                                                 struct wapbl_blk *wb =
 2765                                                    wapbl_blkhash_get(wr,
 2766                                                    wc->wc_blocks[i].wc_daddr + j);
 2767                                                 if (wb && (wb->wb_off == off)) {
 2768                                                         error = wapbl_circ_read(
 2769                                                             wr, scratch1,
 2770                                                             fsblklen, &off);
 2771                                                         if (error)
 2772                                                                 goto out;
 2773                                                         error =
 2774                                                            wapbl_write(scratch1,
 2775                                                            fsblklen, fsdevvp,
 2776                                                            wb->wb_blk);
 2777                                                         if (error)
 2778                                                                 goto out;
 2779                                                 } else {
 2780                                                         wapbl_circ_advance(wr,
 2781                                                             fsblklen, &off);
 2782                                                 }
 2783                                         }
 2784                                 }
 2785                         }
 2786                         break;
 2787                 case WAPBL_WC_REVOCATIONS:
 2788                 case WAPBL_WC_INODES:
 2789                         break;
 2790                 default:
 2791                         KASSERT(0);
 2792                 }
 2793 #ifdef DEBUG
 2794                 wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
 2795                 KASSERT(off == saveoff);
 2796 #endif
 2797         }
 2798  out:
 2799         wapbl_free(scratch1);
 2800         return error;
 2801 }
 2802 
 2803 int
 2804 wapbl_replay_read(struct wapbl_replay *wr, void *data, daddr_t blk, long len)
 2805 {
 2806         struct wapbl_wc_header *wch = &wr->wr_wc_header;
 2807         int fsblklen = 1<<wch->wc_fs_dev_bshift;
 2808 
 2809         KDASSERT(wapbl_replay_isopen(wr));
 2810 
 2811         KASSERT((len % fsblklen) == 0);
 2812 
 2813         while (len != 0) {
 2814                 struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
 2815                 if (wb) {
 2816                         off_t off = wb->wb_off;
 2817                         int error;
 2818                         error = wapbl_circ_read(wr, data, fsblklen, &off);
 2819                         if (error)
 2820                                 return error;
 2821                 }
 2822                 data = (uint8_t *)data + fsblklen;
 2823                 len -= fsblklen;
 2824                 blk++;
 2825         }
 2826         return 0;
 2827 }

Cache object: c4d4f9b48163750f99a1ecb936c8f07b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.