The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/sys/wapbl.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: wapbl.h,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 2003,2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Wasabi Systems, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 #ifndef _SYS_WAPBL_H
   33 #define _SYS_WAPBL_H
   34 
   35 #include <sys/mutex.h>
   36 
   37 #include <miscfs/specfs/specdev.h>
   38 
   39 /* This header file describes the api and data structures for
   40  * write ahead physical block logging (WAPBL) support.
   41  */
   42 
   43 #if defined(_KERNEL_OPT)
   44 #include "opt_wapbl.h"
   45 #endif
   46 
   47 #ifdef WAPBL_DEBUG
   48 #ifndef WAPBL_DEBUG_PRINT
   49 #define WAPBL_DEBUG_PRINT (WAPBL_PRINT_REPLAY | WAPBL_PRINT_OPEN)
   50 #endif
   51 
   52 #if 0
   53 #define WAPBL_DEBUG_BUFBYTES
   54 #define WAPBL_DEBUG_SERIALIZE
   55 #endif
   56 
   57 #endif
   58 
   59 #ifdef WAPBL_DEBUG_PRINT
   60 
   61 enum {
   62         WAPBL_PRINT_OPEN = 0x1,
   63         WAPBL_PRINT_FLUSH = 0x2,
   64         WAPBL_PRINT_TRUNCATE = 0x4,
   65         WAPBL_PRINT_TRANSACTION = 0x8,
   66         WAPBL_PRINT_BUFFER = 0x10,
   67         WAPBL_PRINT_BUFFER2 = 0x20,
   68         WAPBL_PRINT_ALLOC = 0x40,
   69         WAPBL_PRINT_INODE = 0x80,
   70         WAPBL_PRINT_WRITE = 0x100,
   71         WAPBL_PRINT_IO = 0x200,
   72         WAPBL_PRINT_REPLAY = 0x400,
   73         WAPBL_PRINT_ERROR = 0x800,
   74         WAPBL_PRINT_DISCARD = 0x1000,
   75         WAPBL_PRINT_BIODONE = 0x2000,
   76 };
   77 
   78 #define WAPBL_PRINTF(mask, a) if (wapbl_debug_print & (mask)) printf  a
   79 extern int wapbl_debug_print;
   80 #else
   81 #define WAPBL_PRINTF(mask, a)
   82 #endif
   83 
   84 /****************************************************************/
   85 
   86 /* The WAPBL journal layout.
   87  * 
   88  * The journal consists of a header followed by a circular buffer
   89  * region.  The circular data area is described by the header
   90  * wc_circ_off, wc_circ_size, wc_head and wc_tail fields as bytes
   91  * from the start of the journal header.  New records are inserted
   92  * at wc_head and the oldest valid record can be found at wc_tail.
   93  * When ((wc_head == wc_tail) && (wc_head == 0)), the journal is empty.
   94  * The condition of ((wc_head == wc_tail) && (wc_head != 0))
   95  * indicates a full journal, although this condition is rare.
   96  *
   97  * The journal header as well as its records are marked by a 32bit
   98  * type tag and length for ease of parsing.  Journal records are
   99  * padded so as to fall on journal device block boundaries.
  100  * (XXX i think there is currently a bug wrt WC_BLOCKS not ending
  101  * correctly on a journal device block boundary. this would need
  102  * to be fixed if the journal blocksize does not match filesystem.)
  103  */
  104 
  105 /*
  106  * The following are the 4 record types used by the journal:
  107  * Each tag indicates journal data organized by one of the
  108  * structures used below.
  109  */
  110 enum {
  111         WAPBL_WC_HEADER = 0x5741424c,   /* "WABL", struct wapbl_wc_header */
  112         WAPBL_WC_INODES,                /* struct wapbl_wc_inodelist */
  113         WAPBL_WC_REVOCATIONS,           /* struct wapbl_wc_blocklist */
  114         WAPBL_WC_BLOCKS,                /* struct wapbl_wc_blocklist */
  115 };
  116 
  117 /* null entry (on disk) */
  118 /* This structure isn't used directly, but shares its header
  119  * layout with all the other log structures for the purpose
  120  * of reading a log structure and determining its type
  121  */
  122 struct wapbl_wc_null {
  123         uint32_t        wc_type;        /* WAPBL_WC_* */
  124         int32_t         wc_len;
  125         uint8_t         wc_spare[0];    /* actually longer */
  126 };
  127 
  128 /* journal header (on-disk)
  129  * This record is found at the start of the
  130  * journal, but not within the circular buffer region.  As well as
  131  * describing the journal parameters and matching filesystem, it
  132  * additionally serves as the atomic update record for journal
  133  * updates.
  134  */
  135 struct wapbl_wc_header {
  136         uint32_t        wc_type;        /* WAPBL_WC_HEADER log magic number */
  137         int32_t         wc_len;         /* length of this journal entry */
  138         uint32_t        wc_checksum;
  139         uint32_t        wc_generation;
  140         int32_t         wc_fsid[2];
  141         uint64_t        wc_time;
  142         uint32_t        wc_timensec;
  143         uint32_t        wc_version;
  144         uint32_t        wc_log_dev_bshift;
  145         uint32_t        wc_fs_dev_bshift;
  146         int64_t         wc_head;
  147         int64_t         wc_tail;
  148         int64_t         wc_circ_off;    /* offset of of circ buffer region */
  149         int64_t         wc_circ_size;   /* size of circular buffer region */
  150         uint8_t         wc_spare[0];    /* actually longer */
  151 };
  152 
  153 /* list of blocks (on disk)
  154  * This record is used to describe a set of filesystem blocks,
  155  * and is used with two type tags, WAPBL_WC_BLOCKS and
  156  * WAPBL_WC_REVOCATIONS.
  157  * 
  158  * For WAPBL_WC_BLOCKS, a copy of each listed block can be found
  159  * starting at the next log device blocksize boundary.  starting at
  160  * one log device block since the start of the record.  This contains
  161  * the bulk of the filesystem journal data which is written using
  162  * these records before being written into the filesystem.
  163  *
  164  * The WAPBL_WC_REVOCATIONS record is used to indicate that any
  165  * previously listed blocks should not be written into the filesystem.
  166  * This is important so that deallocated and reallocated data blocks
  167  * do not get overwritten with stale data from the journal.  The
  168  * revocation records to not contain a copy of any actual block data.
  169  */
  170 struct wapbl_wc_blocklist {
  171         uint32_t        wc_type; /* WAPBL_WC_{REVOCATIONS,BLOCKS} */
  172         int32_t         wc_len;
  173         int32_t         wc_blkcount;
  174         int32_t         wc_unused;
  175         struct {
  176                 int64_t wc_daddr;
  177                 int32_t wc_unused;
  178                 int32_t wc_dlen;
  179         } wc_blocks[0];                 /* actually longer */
  180 };
  181 
  182 /* list of inodes (on disk)
  183  * This record is used to describe the set of inodes which
  184  * may be allocated but are unlinked.  Inodes end up listed here
  185  * while they are in the process of being initialized and
  186  * deinitialized.  Inodes unlinked while in use by a process
  187  * will be listed here and the actual deletion must be completed
  188  * on journal replay.
  189  */
  190 struct wapbl_wc_inodelist {
  191         uint32_t        wc_type; /* WAPBL_WC_INODES */
  192         int32_t         wc_len;
  193         int32_t         wc_inocnt;
  194         int32_t         wc_clear;       /* set if previously listed inodes 
  195                                            hould be ignored */
  196         struct {
  197                 uint32_t wc_inumber;
  198                 uint32_t wc_imode;
  199         } wc_inodes[0];         /* actually longer */
  200 };
  201 
  202 /****************************************************************/
  203 
  204 #include <sys/queue.h>
  205 #include <sys/vnode.h>
  206 #include <sys/buf.h>
  207 
  208 typedef void (*wapbl_flush_fn_t)(struct mount *, daddr_t *, int *, int);
  209 
  210 #ifdef _KERNEL
  211 
  212 struct wapbl_entry;
  213 struct wapbl_wc_header;
  214 struct wapbl_replay;
  215 struct wapbl;
  216 
  217 /*
  218  * This structure holds per transaction log information
  219  */
  220 struct wapbl_entry {
  221         struct wapbl *we_wapbl;
  222         SIMPLEQ_ENTRY(wapbl_entry) we_entries;
  223         size_t we_bufcount;             /* Count of unsynced buffers */
  224         size_t we_reclaimable_bytes;    /* Number on disk bytes for this
  225                                            transaction */
  226         int     we_error;
  227 #ifdef WAPBL_DEBUG_BUFBYTES
  228         size_t we_unsynced_bufbytes;    /* Byte count of unsynced buffers */
  229 #endif
  230 };
  231 
  232 void    wapbl_init(void);
  233 
  234 /* Start using a log */
  235 int     wapbl_start(struct wapbl **, struct mount *, struct vnode *, daddr_t,
  236                     size_t, size_t, struct wapbl_replay *,
  237                     wapbl_flush_fn_t, wapbl_flush_fn_t);
  238 
  239 /* Discard the current transaction, potentially dangerous */
  240 void    wapbl_discard(struct wapbl *);
  241 
  242 /* stop using a log */
  243 int     wapbl_stop(struct wapbl *, int);
  244 
  245 /*
  246  * Begin a new transaction or increment transaction recursion
  247  * level if called while a transaction is already in progress
  248  * by the current process.
  249  */
  250 int     wapbl_begin(struct wapbl *, const char *, int);
  251 
  252 
  253 /* End a transaction or decrement the transaction recursion level */
  254 void    wapbl_end(struct wapbl *);
  255 
  256 /*
  257  * Add a new buffer to the current transaction.  The buffers
  258  * data will be copied to the current transaction log and the
  259  * buffer will be marked B_LOCKED so that it will not be
  260  * flushed to disk by the syncer or reallocated.
  261  */
  262 void    wapbl_add_buf(struct wapbl *, struct buf *);
  263 
  264 /* Remove a buffer from the current transaction. */
  265 void    wapbl_remove_buf(struct wapbl *, struct buf *);
  266 
  267 void    wapbl_resize_buf(struct wapbl *, struct buf *, long, long);
  268 
  269 /*
  270  * This will flush all completed transactions to disk and
  271  * start asynchronous writes on the associated buffers
  272  */
  273 int     wapbl_flush(struct wapbl *, int);
  274 
  275 /*
  276  * Inodes that are allocated but have zero link count
  277  * must be registered with the current transaction
  278  * so they may be recorded in the log and cleaned up later.
  279  * registration/unregistration of ino numbers already registered is ok.
  280  */
  281 void    wapbl_register_inode(struct wapbl *, ino_t, mode_t);
  282 void    wapbl_unregister_inode(struct wapbl *, ino_t, mode_t);
  283 
  284 /*
  285  * Metadata block deallocations must be registered so
  286  * that revocations records can be written and to prevent
  287  * the corresponding blocks from being reused as data
  288  * blocks until the log is on disk.
  289  */
  290 void    wapbl_register_deallocation(struct wapbl *, daddr_t, int);
  291 
  292 void    wapbl_jlock_assert(struct wapbl *wl);
  293 void    wapbl_junlock_assert(struct wapbl *wl);
  294 
  295 void    wapbl_print(struct wapbl *wl, int full, void (*pr)(const char *, ...));
  296 
  297 #if defined(WAPBL_DEBUG) || defined(DDB)
  298 void    wapbl_dump(struct wapbl *);
  299 #endif
  300 
  301 void    wapbl_biodone(struct buf *);
  302 
  303 extern struct wapbl_ops wapbl_ops;
  304 
  305 static __inline struct mount *
  306 wapbl_vptomp(struct vnode *vp)
  307 {
  308         struct mount *mp;
  309 
  310         mp = NULL;
  311         if (vp != NULL) {
  312                 if (vp->v_type == VBLK)
  313                         mp = vp->v_specmountpoint;
  314                 else
  315                         mp = vp->v_mount;
  316         }
  317 
  318         return mp;
  319 }
  320 
  321 static __inline bool
  322 wapbl_vphaswapbl(struct vnode *vp)
  323 {
  324         struct mount *mp;
  325 
  326         if (vp == NULL)
  327                 return false;
  328 
  329         mp = wapbl_vptomp(vp);
  330         if (mp && mp->mnt_wapbl)
  331                 return true;
  332         else
  333                 return false;
  334 }
  335 
  336 #endif /* _KERNEL */
  337 
  338 /****************************************************************/
  339 /* Replay support */
  340 
  341 struct wapbl_replay {
  342         struct vnode *wr_logvp;
  343         struct vnode *wr_devvp;
  344         daddr_t wr_logpbn;
  345 
  346         struct wapbl_wc_header wr_wc_header;
  347         void *wr_scratch;
  348 
  349         LIST_HEAD(wapbl_blk_head, wapbl_blk) *wr_blkhash;
  350         u_long wr_blkhashmask;
  351         int wr_blkhashcnt;
  352 
  353         off_t wr_inodeshead;
  354         off_t wr_inodestail;
  355         int wr_inodescnt;
  356         struct {
  357                 uint32_t wr_inumber;
  358                 uint32_t wr_imode;
  359         } *wr_inodes;
  360 };
  361 
  362 #define wapbl_replay_isopen(wr) ((wr)->wr_scratch != 0)
  363 
  364 int     wapbl_replay_isopen1(struct wapbl_replay *);
  365 int     wapbl_replay_start(struct wapbl_replay **, struct vnode *,
  366         daddr_t, size_t, size_t);
  367 void    wapbl_replay_stop(struct wapbl_replay *);
  368 void    wapbl_replay_free(struct wapbl_replay *);
  369 int     wapbl_replay_verify(struct wapbl_replay *, struct vnode *);
  370 int     wapbl_replay_write(struct wapbl_replay *, struct vnode *);
  371 int     wapbl_replay_read(struct wapbl_replay *, void *, daddr_t, long);
  372 
  373 /****************************************************************/
  374 
  375 /* Supply this to provide i/o support */
  376 int wapbl_write(void *, size_t, struct vnode *, daddr_t);
  377 int wapbl_read(void *, size_t, struct vnode *, daddr_t);
  378 
  379 /****************************************************************/
  380 
  381 #endif /* !_SYS_WAPBL_H */

Cache object: b7e00182d2c4bd1223ef7522e68ee7af


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.