The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zcommon/zfs_fletcher_intel.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Implement fast Fletcher4 with AVX2 instructions. (x86_64)
    3  *
    4  * Use the 256-bit AVX2 SIMD instructions and registers to compute
    5  * Fletcher4 in four incremental 64-bit parallel accumulator streams,
    6  * and then combine the streams to form the final four checksum words.
    7  *
    8  * Copyright (C) 2015 Intel Corporation.
    9  *
   10  * Authors:
   11  *      James Guilford <james.guilford@intel.com>
   12  *      Jinshan Xiong <jinshan.xiong@intel.com>
   13  *
   14  * This software is available to you under a choice of one of two
   15  * licenses.  You may choose to be licensed under the terms of the GNU
   16  * General Public License (GPL) Version 2, available from the file
   17  * COPYING in the main directory of this source tree, or the
   18  * OpenIB.org BSD license below:
   19  *
   20  *     Redistribution and use in source and binary forms, with or
   21  *     without modification, are permitted provided that the following
   22  *     conditions are met:
   23  *
   24  *      - Redistributions of source code must retain the above
   25  *        copyright notice, this list of conditions and the following
   26  *        disclaimer.
   27  *
   28  *      - Redistributions in binary form must reproduce the above
   29  *        copyright notice, this list of conditions and the following
   30  *        disclaimer in the documentation and/or other materials
   31  *        provided with the distribution.
   32  *
   33  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   34  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   35  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   36  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   37  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   38  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   39  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   40  * SOFTWARE.
   41  */
   42 
   43 #if defined(HAVE_AVX) && defined(HAVE_AVX2)
   44 
   45 #include <sys/spa_checksum.h>
   46 #include <sys/string.h>
   47 #include <sys/simd.h>
   48 #include <zfs_fletcher.h>
   49 
   50 ZFS_NO_SANITIZE_UNDEFINED
   51 static void
   52 fletcher_4_avx2_init(fletcher_4_ctx_t *ctx)
   53 {
   54         kfpu_begin();
   55         memset(ctx->avx, 0, 4 * sizeof (zfs_fletcher_avx_t));
   56 }
   57 
   58 ZFS_NO_SANITIZE_UNDEFINED
   59 static void
   60 fletcher_4_avx2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
   61 {
   62         uint64_t A, B, C, D;
   63 
   64         A = ctx->avx[0].v[0] + ctx->avx[0].v[1] +
   65             ctx->avx[0].v[2] + ctx->avx[0].v[3];
   66         B = 0 - ctx->avx[0].v[1] - 2 * ctx->avx[0].v[2] - 3 * ctx->avx[0].v[3] +
   67             4 * ctx->avx[1].v[0] + 4 * ctx->avx[1].v[1] + 4 * ctx->avx[1].v[2] +
   68             4 * ctx->avx[1].v[3];
   69 
   70         C = ctx->avx[0].v[2] + 3 * ctx->avx[0].v[3] - 6 * ctx->avx[1].v[0] -
   71             10 * ctx->avx[1].v[1] - 14 * ctx->avx[1].v[2] -
   72             18 * ctx->avx[1].v[3] + 16 * ctx->avx[2].v[0] +
   73             16 * ctx->avx[2].v[1] + 16 * ctx->avx[2].v[2] +
   74             16 * ctx->avx[2].v[3];
   75 
   76         D = 0 - ctx->avx[0].v[3] + 4 * ctx->avx[1].v[0] +
   77             10 * ctx->avx[1].v[1] + 20 * ctx->avx[1].v[2] +
   78             34 * ctx->avx[1].v[3] - 48 * ctx->avx[2].v[0] -
   79             64 * ctx->avx[2].v[1] - 80 * ctx->avx[2].v[2] -
   80             96 * ctx->avx[2].v[3] + 64 * ctx->avx[3].v[0] +
   81             64 * ctx->avx[3].v[1] + 64 * ctx->avx[3].v[2] +
   82             64 * ctx->avx[3].v[3];
   83 
   84         ZIO_SET_CHECKSUM(zcp, A, B, C, D);
   85         kfpu_end();
   86 }
   87 
   88 #define FLETCHER_4_AVX2_RESTORE_CTX(ctx)                                \
   89 {                                                                       \
   90         asm volatile("vmovdqu %0, %%ymm0" :: "m" ((ctx)->avx[0]));      \
   91         asm volatile("vmovdqu %0, %%ymm1" :: "m" ((ctx)->avx[1]));      \
   92         asm volatile("vmovdqu %0, %%ymm2" :: "m" ((ctx)->avx[2]));      \
   93         asm volatile("vmovdqu %0, %%ymm3" :: "m" ((ctx)->avx[3]));      \
   94 }
   95 
   96 #define FLETCHER_4_AVX2_SAVE_CTX(ctx)                                   \
   97 {                                                                       \
   98         asm volatile("vmovdqu %%ymm0, %0" : "=m" ((ctx)->avx[0]));      \
   99         asm volatile("vmovdqu %%ymm1, %0" : "=m" ((ctx)->avx[1]));      \
  100         asm volatile("vmovdqu %%ymm2, %0" : "=m" ((ctx)->avx[2]));      \
  101         asm volatile("vmovdqu %%ymm3, %0" : "=m" ((ctx)->avx[3]));      \
  102 }
  103 
  104 
  105 static void
  106 fletcher_4_avx2_native(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
  107 {
  108         const uint64_t *ip = buf;
  109         const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
  110 
  111         FLETCHER_4_AVX2_RESTORE_CTX(ctx);
  112 
  113         do {
  114                 asm volatile("vpmovzxdq %0, %%ymm4"::"m" (*ip));
  115                 asm volatile("vpaddq %ymm4, %ymm0, %ymm0");
  116                 asm volatile("vpaddq %ymm0, %ymm1, %ymm1");
  117                 asm volatile("vpaddq %ymm1, %ymm2, %ymm2");
  118                 asm volatile("vpaddq %ymm2, %ymm3, %ymm3");
  119         } while ((ip += 2) < ipend);
  120 
  121         FLETCHER_4_AVX2_SAVE_CTX(ctx);
  122         asm volatile("vzeroupper");
  123 }
  124 
  125 static void
  126 fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
  127 {
  128         static const zfs_fletcher_avx_t mask = {
  129                 .v = { 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
  130                     0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B }
  131         };
  132         const uint64_t *ip = buf;
  133         const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
  134 
  135         FLETCHER_4_AVX2_RESTORE_CTX(ctx);
  136 
  137         asm volatile("vmovdqu %0, %%ymm5" :: "m" (mask));
  138 
  139         do {
  140                 asm volatile("vpmovzxdq %0, %%ymm4"::"m" (*ip));
  141                 asm volatile("vpshufb %ymm5, %ymm4, %ymm4");
  142 
  143                 asm volatile("vpaddq %ymm4, %ymm0, %ymm0");
  144                 asm volatile("vpaddq %ymm0, %ymm1, %ymm1");
  145                 asm volatile("vpaddq %ymm1, %ymm2, %ymm2");
  146                 asm volatile("vpaddq %ymm2, %ymm3, %ymm3");
  147         } while ((ip += 2) < ipend);
  148 
  149         FLETCHER_4_AVX2_SAVE_CTX(ctx);
  150         asm volatile("vzeroupper");
  151 }
  152 
  153 static boolean_t fletcher_4_avx2_valid(void)
  154 {
  155         return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
  156 }
  157 
  158 const fletcher_4_ops_t fletcher_4_avx2_ops = {
  159         .init_native = fletcher_4_avx2_init,
  160         .fini_native = fletcher_4_avx2_fini,
  161         .compute_native = fletcher_4_avx2_native,
  162         .init_byteswap = fletcher_4_avx2_init,
  163         .fini_byteswap = fletcher_4_avx2_fini,
  164         .compute_byteswap = fletcher_4_avx2_byteswap,
  165         .valid = fletcher_4_avx2_valid,
  166         .name = "avx2"
  167 };
  168 
  169 #endif /* defined(HAVE_AVX) && defined(HAVE_AVX2) */

Cache object: bdf879eb15b7419cf3c472daf5744651


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.