The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * This file and its contents are supplied under the terms of the
    3  * Common Development and Distribution License ("CDDL"), version 1.0.
    4  * You may only use this file in accordance with the terms of version
    5  * 1.0 of the CDDL.
    6  *
    7  * A full copy of the text of the CDDL should have accompanied this
    8  * source.  A copy of the CDDL is also available via the Internet at
    9  * http://www.illumos.org/license/CDDL.
   10  */
   11 
   12 /*
   13  * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
   14  * Copyright (c) 2016 by Delphix. All rights reserved.
   15  */
   16 
   17 /*
   18  * See abd.c for a general overview of the arc buffered data (ABD).
   19  *
   20  * Using a large proportion of scattered ABDs decreases ARC fragmentation since
   21  * when we are at the limit of allocatable space, using equal-size chunks will
   22  * allow us to quickly reclaim enough space for a new large allocation (assuming
   23  * it is also scattered).
   24  *
   25  * ABDs are allocated scattered by default unless the caller uses
   26  * abd_alloc_linear() or zfs_abd_scatter_enabled is disabled.
   27  */
   28 
   29 #include <sys/abd_impl.h>
   30 #include <sys/param.h>
   31 #include <sys/types.h>
   32 #include <sys/zio.h>
   33 #include <sys/zfs_context.h>
   34 #include <sys/zfs_znode.h>
   35 
   36 typedef struct abd_stats {
   37         kstat_named_t abdstat_struct_size;
   38         kstat_named_t abdstat_scatter_cnt;
   39         kstat_named_t abdstat_scatter_data_size;
   40         kstat_named_t abdstat_scatter_chunk_waste;
   41         kstat_named_t abdstat_linear_cnt;
   42         kstat_named_t abdstat_linear_data_size;
   43 } abd_stats_t;
   44 
   45 static abd_stats_t abd_stats = {
   46         /* Amount of memory occupied by all of the abd_t struct allocations */
   47         { "struct_size",                        KSTAT_DATA_UINT64 },
   48         /*
   49          * The number of scatter ABDs which are currently allocated, excluding
   50          * ABDs which don't own their data (for instance the ones which were
   51          * allocated through abd_get_offset()).
   52          */
   53         { "scatter_cnt",                        KSTAT_DATA_UINT64 },
   54         /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
   55         { "scatter_data_size",                  KSTAT_DATA_UINT64 },
   56         /*
   57          * The amount of space wasted at the end of the last chunk across all
   58          * scatter ABDs tracked by scatter_cnt.
   59          */
   60         { "scatter_chunk_waste",                KSTAT_DATA_UINT64 },
   61         /*
   62          * The number of linear ABDs which are currently allocated, excluding
   63          * ABDs which don't own their data (for instance the ones which were
   64          * allocated through abd_get_offset() and abd_get_from_buf()). If an
   65          * ABD takes ownership of its buf then it will become tracked.
   66          */
   67         { "linear_cnt",                         KSTAT_DATA_UINT64 },
   68         /* Amount of data stored in all linear ABDs tracked by linear_cnt */
   69         { "linear_data_size",                   KSTAT_DATA_UINT64 },
   70 };
   71 
   72 struct {
   73         wmsum_t abdstat_struct_size;
   74         wmsum_t abdstat_scatter_cnt;
   75         wmsum_t abdstat_scatter_data_size;
   76         wmsum_t abdstat_scatter_chunk_waste;
   77         wmsum_t abdstat_linear_cnt;
   78         wmsum_t abdstat_linear_data_size;
   79 } abd_sums;
   80 
   81 /*
   82  * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
   83  * ABD's for.  Smaller allocations will use linear ABD's which use
   84  * zio_[data_]buf_alloc().
   85  *
   86  * Scatter ABD's use at least one page each, so sub-page allocations waste
   87  * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
   88  * half of each page).  Using linear ABD's for small allocations means that
   89  * they will be put on slabs which contain many allocations.
   90  *
   91  * Linear ABDs for multi-page allocations are easier to use, and in some cases
   92  * it allows to avoid buffer copying.  But allocation and especially free
   93  * of multi-page linear ABDs are expensive operations due to KVA mapping and
   94  * unmapping, and with time they cause KVA fragmentations.
   95  */
   96 static size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
   97 
   98 #if defined(_KERNEL)
   99 SYSCTL_DECL(_vfs_zfs);
  100 
  101 SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
  102         &zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
  103 SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
  104         &zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
  105 #endif
  106 
  107 kmem_cache_t *abd_chunk_cache;
  108 static kstat_t *abd_ksp;
  109 
  110 /*
  111  * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
  112  * just a single zero'd page-sized buffer. This allows us to conserve
  113  * memory by only using a single zero buffer for the scatter chunks.
  114  */
  115 abd_t *abd_zero_scatter = NULL;
  116 
  117 static uint_t
  118 abd_chunkcnt_for_bytes(size_t size)
  119 {
  120         return ((size + PAGE_MASK) >> PAGE_SHIFT);
  121 }
  122 
  123 static inline uint_t
  124 abd_scatter_chunkcnt(abd_t *abd)
  125 {
  126         ASSERT(!abd_is_linear(abd));
  127         return (abd_chunkcnt_for_bytes(
  128             ABD_SCATTER(abd).abd_offset + abd->abd_size));
  129 }
  130 
  131 boolean_t
  132 abd_size_alloc_linear(size_t size)
  133 {
  134         return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
  135 }
  136 
  137 void
  138 abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
  139 {
  140         uint_t n = abd_scatter_chunkcnt(abd);
  141         ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
  142         int waste = (n << PAGE_SHIFT) - abd->abd_size;
  143         if (op == ABDSTAT_INCR) {
  144                 ABDSTAT_BUMP(abdstat_scatter_cnt);
  145                 ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
  146                 ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
  147                 arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
  148         } else {
  149                 ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
  150                 ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
  151                 ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
  152                 arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
  153         }
  154 }
  155 
  156 void
  157 abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
  158 {
  159         ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
  160         if (op == ABDSTAT_INCR) {
  161                 ABDSTAT_BUMP(abdstat_linear_cnt);
  162                 ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
  163         } else {
  164                 ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
  165                 ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
  166         }
  167 }
  168 
  169 void
  170 abd_verify_scatter(abd_t *abd)
  171 {
  172         uint_t i, n;
  173 
  174         /*
  175          * There is no scatter linear pages in FreeBSD so there is
  176          * an error if the ABD has been marked as a linear page.
  177          */
  178         ASSERT(!abd_is_linear_page(abd));
  179         ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
  180         n = abd_scatter_chunkcnt(abd);
  181         for (i = 0; i < n; i++) {
  182                 ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
  183         }
  184 }
  185 
  186 void
  187 abd_alloc_chunks(abd_t *abd, size_t size)
  188 {
  189         uint_t i, n;
  190 
  191         n = abd_chunkcnt_for_bytes(size);
  192         for (i = 0; i < n; i++) {
  193                 ABD_SCATTER(abd).abd_chunks[i] =
  194                     kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
  195         }
  196 }
  197 
  198 void
  199 abd_free_chunks(abd_t *abd)
  200 {
  201         uint_t i, n;
  202 
  203         n = abd_scatter_chunkcnt(abd);
  204         for (i = 0; i < n; i++) {
  205                 kmem_cache_free(abd_chunk_cache,
  206                     ABD_SCATTER(abd).abd_chunks[i]);
  207         }
  208 }
  209 
  210 abd_t *
  211 abd_alloc_struct_impl(size_t size)
  212 {
  213         uint_t chunkcnt = abd_chunkcnt_for_bytes(size);
  214         /*
  215          * In the event we are allocating a gang ABD, the size passed in
  216          * will be 0. We must make sure to set abd_size to the size of an
  217          * ABD struct as opposed to an ABD scatter with 0 chunks. The gang
  218          * ABD struct allocation accounts for an additional 24 bytes over
  219          * a scatter ABD with 0 chunks.
  220          */
  221         size_t abd_size = MAX(sizeof (abd_t),
  222             offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
  223         abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
  224         ASSERT3P(abd, !=, NULL);
  225         ABDSTAT_INCR(abdstat_struct_size, abd_size);
  226 
  227         return (abd);
  228 }
  229 
  230 void
  231 abd_free_struct_impl(abd_t *abd)
  232 {
  233         uint_t chunkcnt = abd_is_linear(abd) || abd_is_gang(abd) ? 0 :
  234             abd_scatter_chunkcnt(abd);
  235         ssize_t size = MAX(sizeof (abd_t),
  236             offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
  237         kmem_free(abd, size);
  238         ABDSTAT_INCR(abdstat_struct_size, -size);
  239 }
  240 
  241 /*
  242  * Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
  243  * each chunk in the scatterlist will be set to the same area.
  244  */
  245 _Static_assert(ZERO_REGION_SIZE >= PAGE_SIZE, "zero_region too small");
  246 static void
  247 abd_alloc_zero_scatter(void)
  248 {
  249         uint_t i, n;
  250 
  251         n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
  252         abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
  253         abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
  254         abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
  255 
  256         ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
  257 
  258         for (i = 0; i < n; i++) {
  259                 ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
  260                     __DECONST(void *, zero_region);
  261         }
  262 
  263         ABDSTAT_BUMP(abdstat_scatter_cnt);
  264         ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
  265 }
  266 
  267 static void
  268 abd_free_zero_scatter(void)
  269 {
  270         ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
  271         ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
  272 
  273         abd_free_struct(abd_zero_scatter);
  274         abd_zero_scatter = NULL;
  275 }
  276 
  277 static int
  278 abd_kstats_update(kstat_t *ksp, int rw)
  279 {
  280         abd_stats_t *as = ksp->ks_data;
  281 
  282         if (rw == KSTAT_WRITE)
  283                 return (EACCES);
  284         as->abdstat_struct_size.value.ui64 =
  285             wmsum_value(&abd_sums.abdstat_struct_size);
  286         as->abdstat_scatter_cnt.value.ui64 =
  287             wmsum_value(&abd_sums.abdstat_scatter_cnt);
  288         as->abdstat_scatter_data_size.value.ui64 =
  289             wmsum_value(&abd_sums.abdstat_scatter_data_size);
  290         as->abdstat_scatter_chunk_waste.value.ui64 =
  291             wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
  292         as->abdstat_linear_cnt.value.ui64 =
  293             wmsum_value(&abd_sums.abdstat_linear_cnt);
  294         as->abdstat_linear_data_size.value.ui64 =
  295             wmsum_value(&abd_sums.abdstat_linear_data_size);
  296         return (0);
  297 }
  298 
  299 void
  300 abd_init(void)
  301 {
  302         abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
  303             NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
  304 
  305         wmsum_init(&abd_sums.abdstat_struct_size, 0);
  306         wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
  307         wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
  308         wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
  309         wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
  310         wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
  311 
  312         abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
  313             sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
  314         if (abd_ksp != NULL) {
  315                 abd_ksp->ks_data = &abd_stats;
  316                 abd_ksp->ks_update = abd_kstats_update;
  317                 kstat_install(abd_ksp);
  318         }
  319 
  320         abd_alloc_zero_scatter();
  321 }
  322 
  323 void
  324 abd_fini(void)
  325 {
  326         abd_free_zero_scatter();
  327 
  328         if (abd_ksp != NULL) {
  329                 kstat_delete(abd_ksp);
  330                 abd_ksp = NULL;
  331         }
  332 
  333         wmsum_fini(&abd_sums.abdstat_struct_size);
  334         wmsum_fini(&abd_sums.abdstat_scatter_cnt);
  335         wmsum_fini(&abd_sums.abdstat_scatter_data_size);
  336         wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
  337         wmsum_fini(&abd_sums.abdstat_linear_cnt);
  338         wmsum_fini(&abd_sums.abdstat_linear_data_size);
  339 
  340         kmem_cache_destroy(abd_chunk_cache);
  341         abd_chunk_cache = NULL;
  342 }
  343 
  344 void
  345 abd_free_linear_page(abd_t *abd)
  346 {
  347         /*
  348          * FreeBSD does not have scatter linear pages
  349          * so there is an error.
  350          */
  351         VERIFY(0);
  352 }
  353 
  354 /*
  355  * If we're going to use this ABD for doing I/O using the block layer, the
  356  * consumer of the ABD data doesn't care if it's scattered or not, and we don't
  357  * plan to store this ABD in memory for a long period of time, we should
  358  * allocate the ABD type that requires the least data copying to do the I/O.
  359  *
  360  * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
  361  * using a scatter/gather list we should switch to that and replace this call
  362  * with vanilla abd_alloc().
  363  */
  364 abd_t *
  365 abd_alloc_for_io(size_t size, boolean_t is_metadata)
  366 {
  367         return (abd_alloc_linear(size, is_metadata));
  368 }
  369 
  370 abd_t *
  371 abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
  372     size_t size)
  373 {
  374         abd_verify(sabd);
  375         ASSERT3U(off, <=, sabd->abd_size);
  376 
  377         size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
  378         size_t chunkcnt = abd_chunkcnt_for_bytes(
  379             (new_offset & PAGE_MASK) + size);
  380 
  381         ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
  382 
  383         /*
  384          * If an abd struct is provided, it is only the minimum size.  If we
  385          * need additional chunks, we need to allocate a new struct.
  386          */
  387         if (abd != NULL &&
  388             offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]) >
  389             sizeof (abd_t)) {
  390                 abd = NULL;
  391         }
  392 
  393         if (abd == NULL)
  394                 abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
  395 
  396         /*
  397          * Even if this buf is filesystem metadata, we only track that
  398          * if we own the underlying data buffer, which is not true in
  399          * this case. Therefore, we don't ever use ABD_FLAG_META here.
  400          */
  401 
  402         ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
  403 
  404         /* Copy the scatterlist starting at the correct offset */
  405         (void) memcpy(&ABD_SCATTER(abd).abd_chunks,
  406             &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
  407             chunkcnt * sizeof (void *));
  408 
  409         return (abd);
  410 }
  411 
  412 /*
  413  * Initialize the abd_iter.
  414  */
  415 void
  416 abd_iter_init(struct abd_iter *aiter, abd_t *abd)
  417 {
  418         ASSERT(!abd_is_gang(abd));
  419         abd_verify(abd);
  420         aiter->iter_abd = abd;
  421         aiter->iter_pos = 0;
  422         aiter->iter_mapaddr = NULL;
  423         aiter->iter_mapsize = 0;
  424 }
  425 
  426 /*
  427  * This is just a helper function to see if we have exhausted the
  428  * abd_iter and reached the end.
  429  */
  430 boolean_t
  431 abd_iter_at_end(struct abd_iter *aiter)
  432 {
  433         return (aiter->iter_pos == aiter->iter_abd->abd_size);
  434 }
  435 
  436 /*
  437  * Advance the iterator by a certain amount. Cannot be called when a chunk is
  438  * in use. This can be safely called when the aiter has already exhausted, in
  439  * which case this does nothing.
  440  */
  441 void
  442 abd_iter_advance(struct abd_iter *aiter, size_t amount)
  443 {
  444         ASSERT3P(aiter->iter_mapaddr, ==, NULL);
  445         ASSERT0(aiter->iter_mapsize);
  446 
  447         /* There's nothing left to advance to, so do nothing */
  448         if (abd_iter_at_end(aiter))
  449                 return;
  450 
  451         aiter->iter_pos += amount;
  452 }
  453 
  454 /*
  455  * Map the current chunk into aiter. This can be safely called when the aiter
  456  * has already exhausted, in which case this does nothing.
  457  */
  458 void
  459 abd_iter_map(struct abd_iter *aiter)
  460 {
  461         void *paddr;
  462 
  463         ASSERT3P(aiter->iter_mapaddr, ==, NULL);
  464         ASSERT0(aiter->iter_mapsize);
  465 
  466         /* There's nothing left to iterate over, so do nothing */
  467         if (abd_iter_at_end(aiter))
  468                 return;
  469 
  470         abd_t *abd = aiter->iter_abd;
  471         size_t offset = aiter->iter_pos;
  472         if (abd_is_linear(abd)) {
  473                 aiter->iter_mapsize = abd->abd_size - offset;
  474                 paddr = ABD_LINEAR_BUF(abd);
  475         } else {
  476                 offset += ABD_SCATTER(abd).abd_offset;
  477                 paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
  478                 offset &= PAGE_MASK;
  479                 aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
  480                     abd->abd_size - aiter->iter_pos);
  481         }
  482         aiter->iter_mapaddr = (char *)paddr + offset;
  483 }
  484 
  485 /*
  486  * Unmap the current chunk from aiter. This can be safely called when the aiter
  487  * has already exhausted, in which case this does nothing.
  488  */
  489 void
  490 abd_iter_unmap(struct abd_iter *aiter)
  491 {
  492         if (!abd_iter_at_end(aiter)) {
  493                 ASSERT3P(aiter->iter_mapaddr, !=, NULL);
  494                 ASSERT3U(aiter->iter_mapsize, >, 0);
  495         }
  496 
  497         aiter->iter_mapaddr = NULL;
  498         aiter->iter_mapsize = 0;
  499 }
  500 
  501 void
  502 abd_cache_reap_now(void)
  503 {
  504         kmem_cache_reap_soon(abd_chunk_cache);
  505 }

Cache object: 9ed9799ffa57a8cefc9c4381c9cabfdc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.