The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/zfs_chksum.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
   24  */
   25 
   26 #include <sys/types.h>
   27 #include <sys/spa.h>
   28 #include <sys/zio_checksum.h>
   29 #include <sys/zfs_context.h>
   30 #include <sys/zfs_chksum.h>
   31 
   32 #include <sys/blake3.h>
   33 
   34 /* limit benchmarking to max 256KiB, when EdonR is slower then this: */
   35 #define LIMIT_PERF_MBS  300
   36 
   37 typedef struct {
   38         const char *name;
   39         const char *impl;
   40         uint64_t bs1k;
   41         uint64_t bs4k;
   42         uint64_t bs16k;
   43         uint64_t bs64k;
   44         uint64_t bs256k;
   45         uint64_t bs1m;
   46         uint64_t bs4m;
   47         uint64_t bs16m;
   48         zio_cksum_salt_t salt;
   49         zio_checksum_t *(func);
   50         zio_checksum_tmpl_init_t *(init);
   51         zio_checksum_tmpl_free_t *(free);
   52 } chksum_stat_t;
   53 
   54 static chksum_stat_t *chksum_stat_data = 0;
   55 static int chksum_stat_cnt = 0;
   56 static kstat_t *chksum_kstat = NULL;
   57 
   58 /*
   59  * i3-1005G1 test output:
   60  *
   61  * implementation     1k      4k     16k     64k    256k      1m      4m
   62  * fletcher-4       5421   15001   26468   32555   34720   32801   18847
   63  * edonr-generic    1196    1602    1761    1749    1762    1759    1751
   64  * skein-generic     546     591     608     615     619     612     616
   65  * sha256-generic    246     270     274     274     277     275     276
   66  * sha256-avx        262     296     304     307     307     307     306
   67  * sha256-sha-ni     769    1072    1172    1220    1219    1232    1228
   68  * sha256-openssl    240     300     316     314     304     285     276
   69  * sha512-generic    333     374     385     392     391     393     392
   70  * sha512-openssl    353     441     467     476     472     467     426
   71  * sha512-avx        362     444     473     475     479     476     478
   72  * sha512-avx2       394     500     530     538     543     545     542
   73  * blake3-generic    308     313     313     313     312     313     312
   74  * blake3-sse2       402    1289    1423    1446    1432    1458    1413
   75  * blake3-sse41      427    1470    1625    1704    1679    1607    1629
   76  * blake3-avx2       428    1920    3095    3343    3356    3318    3204
   77  * blake3-avx512     473    2687    4905    5836    5844    5643    5374
   78  */
   79 static int
   80 chksum_kstat_headers(char *buf, size_t size)
   81 {
   82         ssize_t off = 0;
   83 
   84         off += kmem_scnprintf(buf + off, size, "%-23s", "implementation");
   85         off += kmem_scnprintf(buf + off, size - off, "%8s", "1k");
   86         off += kmem_scnprintf(buf + off, size - off, "%8s", "4k");
   87         off += kmem_scnprintf(buf + off, size - off, "%8s", "16k");
   88         off += kmem_scnprintf(buf + off, size - off, "%8s", "64k");
   89         off += kmem_scnprintf(buf + off, size - off, "%8s", "256k");
   90         off += kmem_scnprintf(buf + off, size - off, "%8s", "1m");
   91         off += kmem_scnprintf(buf + off, size - off, "%8s", "4m");
   92         (void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m");
   93 
   94         return (0);
   95 }
   96 
   97 static int
   98 chksum_kstat_data(char *buf, size_t size, void *data)
   99 {
  100         chksum_stat_t *cs;
  101         ssize_t off = 0;
  102         char b[24];
  103 
  104         cs = (chksum_stat_t *)data;
  105         kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl);
  106         off += kmem_scnprintf(buf + off, size - off, "%-23s", b);
  107         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  108             (u_longlong_t)cs->bs1k);
  109         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  110             (u_longlong_t)cs->bs4k);
  111         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  112             (u_longlong_t)cs->bs16k);
  113         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  114             (u_longlong_t)cs->bs64k);
  115         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  116             (u_longlong_t)cs->bs256k);
  117         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  118             (u_longlong_t)cs->bs1m);
  119         off += kmem_scnprintf(buf + off, size - off, "%8llu",
  120             (u_longlong_t)cs->bs4m);
  121         (void) kmem_scnprintf(buf + off, size - off, "%8llu\n",
  122             (u_longlong_t)cs->bs16m);
  123 
  124         return (0);
  125 }
  126 
  127 static void *
  128 chksum_kstat_addr(kstat_t *ksp, loff_t n)
  129 {
  130         if (n < chksum_stat_cnt)
  131                 ksp->ks_private = (void *)(chksum_stat_data + n);
  132         else
  133                 ksp->ks_private = NULL;
  134 
  135         return (ksp->ks_private);
  136 }
  137 
  138 static void
  139 chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
  140     uint64_t *result)
  141 {
  142         hrtime_t start;
  143         uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
  144         uint32_t l, loops = 0;
  145         zio_cksum_t zcp;
  146 
  147         switch (round) {
  148         case 1: /* 1k */
  149                 size = 1<<10; loops = 128; break;
  150         case 2: /* 2k */
  151                 size = 1<<12; loops = 64; break;
  152         case 3: /* 4k */
  153                 size = 1<<14; loops = 32; break;
  154         case 4: /* 16k */
  155                 size = 1<<16; loops = 16; break;
  156         case 5: /* 256k */
  157                 size = 1<<18; loops = 8; break;
  158         case 6: /* 1m */
  159                 size = 1<<20; loops = 4; break;
  160         case 7: /* 4m */
  161                 size = 1<<22; loops = 1; break;
  162         case 8: /* 16m */
  163                 size = 1<<24; loops = 1; break;
  164         }
  165 
  166         kpreempt_disable();
  167         start = gethrtime();
  168         do {
  169                 for (l = 0; l < loops; l++, run_count++)
  170                         cs->func(abd, size, ctx, &zcp);
  171 
  172                 run_time_ns = gethrtime() - start;
  173         } while (run_time_ns < MSEC2NSEC(1));
  174         kpreempt_enable();
  175 
  176         run_bw = size * run_count * NANOSEC;
  177         run_bw /= run_time_ns;  /* B/s */
  178         *result = run_bw/1024/1024; /* MiB/s */
  179 }
  180 
  181 #define LIMIT_INIT      0
  182 #define LIMIT_NEEDED    1
  183 #define LIMIT_NOLIMIT   2
  184 
  185 static void
  186 chksum_benchit(chksum_stat_t *cs)
  187 {
  188         abd_t *abd;
  189         void *ctx = 0;
  190         void *salt = &cs->salt.zcs_bytes;
  191         static int chksum_stat_limit = LIMIT_INIT;
  192 
  193         memset(salt, 0, sizeof (cs->salt.zcs_bytes));
  194         if (cs->init)
  195                 ctx = cs->init(&cs->salt);
  196 
  197         /* allocate test memory via abd linear interface */
  198         abd = abd_alloc_linear(1<<20, B_FALSE);
  199         chksum_run(cs, abd, ctx, 1, &cs->bs1k);
  200         chksum_run(cs, abd, ctx, 2, &cs->bs4k);
  201         chksum_run(cs, abd, ctx, 3, &cs->bs16k);
  202         chksum_run(cs, abd, ctx, 4, &cs->bs64k);
  203         chksum_run(cs, abd, ctx, 5, &cs->bs256k);
  204 
  205         /* check if we ran on a slow cpu */
  206         if (chksum_stat_limit == LIMIT_INIT) {
  207                 if (cs->bs1k < LIMIT_PERF_MBS) {
  208                         chksum_stat_limit = LIMIT_NEEDED;
  209                 } else {
  210                         chksum_stat_limit = LIMIT_NOLIMIT;
  211                 }
  212         }
  213 
  214         /* skip benchmarks >= 1MiB when the CPU is to slow */
  215         if (chksum_stat_limit == LIMIT_NEEDED)
  216                 goto abort;
  217 
  218         chksum_run(cs, abd, ctx, 6, &cs->bs1m);
  219         abd_free(abd);
  220 
  221         /* allocate test memory via abd non linear interface */
  222         abd = abd_alloc(1<<24, B_FALSE);
  223         chksum_run(cs, abd, ctx, 7, &cs->bs4m);
  224         chksum_run(cs, abd, ctx, 8, &cs->bs16m);
  225 
  226 abort:
  227         abd_free(abd);
  228 
  229         /* free up temp memory */
  230         if (cs->free)
  231                 cs->free(ctx);
  232 }
  233 
  234 /*
  235  * Initialize and benchmark all supported implementations.
  236  */
  237 static void
  238 chksum_benchmark(void)
  239 {
  240 
  241 #ifndef _KERNEL
  242         /* we need the benchmark only for the kernel module */
  243         return;
  244 #endif
  245 
  246         chksum_stat_t *cs;
  247         int cbid = 0;
  248         uint64_t max = 0;
  249         uint32_t id, id_save;
  250 
  251         /* space for the benchmark times */
  252         chksum_stat_cnt = 4;
  253         chksum_stat_cnt += blake3_impl_getcnt();
  254         chksum_stat_data = kmem_zalloc(
  255             sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
  256 
  257         /* edonr - needs to be the first one here (slow CPU check) */
  258         cs = &chksum_stat_data[cbid++];
  259         cs->init = abd_checksum_edonr_tmpl_init;
  260         cs->func = abd_checksum_edonr_native;
  261         cs->free = abd_checksum_edonr_tmpl_free;
  262         cs->name = "edonr";
  263         cs->impl = "generic";
  264         chksum_benchit(cs);
  265 
  266         /* skein */
  267         cs = &chksum_stat_data[cbid++];
  268         cs->init = abd_checksum_skein_tmpl_init;
  269         cs->func = abd_checksum_skein_native;
  270         cs->free = abd_checksum_skein_tmpl_free;
  271         cs->name = "skein";
  272         cs->impl = "generic";
  273         chksum_benchit(cs);
  274 
  275         /* sha256 */
  276         cs = &chksum_stat_data[cbid++];
  277         cs->init = 0;
  278         cs->func = abd_checksum_SHA256;
  279         cs->free = 0;
  280         cs->name = "sha256";
  281         cs->impl = "generic";
  282         chksum_benchit(cs);
  283 
  284         /* sha512 */
  285         cs = &chksum_stat_data[cbid++];
  286         cs->init = 0;
  287         cs->func = abd_checksum_SHA512_native;
  288         cs->free = 0;
  289         cs->name = "sha512";
  290         cs->impl = "generic";
  291         chksum_benchit(cs);
  292 
  293         /* blake3 */
  294         id_save = blake3_impl_getid();
  295         for (id = 0; id < blake3_impl_getcnt(); id++) {
  296                 blake3_impl_setid(id);
  297                 cs = &chksum_stat_data[cbid++];
  298                 cs->init = abd_checksum_blake3_tmpl_init;
  299                 cs->func = abd_checksum_blake3_native;
  300                 cs->free = abd_checksum_blake3_tmpl_free;
  301                 cs->name = "blake3";
  302                 cs->impl = blake3_impl_getname();
  303                 chksum_benchit(cs);
  304                 if (cs->bs256k > max) {
  305                         max = cs->bs256k;
  306                         blake3_impl_set_fastest(id);
  307                 }
  308         }
  309 
  310         /* restore initial value */
  311         blake3_impl_setid(id_save);
  312 }
  313 
  314 void
  315 chksum_init(void)
  316 {
  317 #ifdef _KERNEL
  318         blake3_per_cpu_ctx_init();
  319 #endif
  320 
  321         /* Benchmark supported implementations */
  322         chksum_benchmark();
  323 
  324         /* Install kstats for all implementations */
  325         chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
  326             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
  327 
  328         if (chksum_kstat != NULL) {
  329                 chksum_kstat->ks_data = NULL;
  330                 chksum_kstat->ks_ndata = UINT32_MAX;
  331                 kstat_set_raw_ops(chksum_kstat,
  332                     chksum_kstat_headers,
  333                     chksum_kstat_data,
  334                     chksum_kstat_addr);
  335                 kstat_install(chksum_kstat);
  336         }
  337 }
  338 
  339 void
  340 chksum_fini(void)
  341 {
  342         if (chksum_kstat != NULL) {
  343                 kstat_delete(chksum_kstat);
  344                 chksum_kstat = NULL;
  345         }
  346 
  347         if (chksum_stat_cnt) {
  348                 kmem_free(chksum_stat_data,
  349                     sizeof (chksum_stat_t) * chksum_stat_cnt);
  350                 chksum_stat_cnt = 0;
  351                 chksum_stat_data = 0;
  352         }
  353 
  354 #ifdef _KERNEL
  355         blake3_per_cpu_ctx_fini();
  356 #endif
  357 }

Cache object: 9a882c47909637bdec67ab90f364ef6e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.