The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
   23  */
   24 
   25 #if defined(__x86_64) && defined(HAVE_AVX512F)
   26 
   27 #include <sys/byteorder.h>
   28 #include <sys/frame.h>
   29 #include <sys/spa_checksum.h>
   30 #include <sys/string.h>
   31 #include <sys/simd.h>
   32 #include <zfs_fletcher.h>
   33 
   34 #ifdef __linux__
   35 #define __asm __asm__ __volatile__
   36 #endif
   37 
   38 ZFS_NO_SANITIZE_UNDEFINED
   39 static void
   40 fletcher_4_avx512f_init(fletcher_4_ctx_t *ctx)
   41 {
   42         kfpu_begin();
   43         memset(ctx->avx512, 0, 4 * sizeof (zfs_fletcher_avx512_t));
   44 }
   45 
   46 ZFS_NO_SANITIZE_UNDEFINED
   47 static void
   48 fletcher_4_avx512f_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
   49 {
   50         static const uint64_t
   51         CcA[] = {   0,   0,   1,   3,   6,  10,  15,  21 },
   52         CcB[] = {  28,  36,  44,  52,  60,  68,  76,  84 },
   53         DcA[] = {   0,   0,   0,   1,   4,  10,  20,  35 },
   54         DcB[] = {  56,  84, 120, 164, 216, 276, 344, 420 },
   55         DcC[] = { 448, 512, 576, 640, 704, 768, 832, 896 };
   56 
   57         uint64_t A, B, C, D;
   58         uint64_t i;
   59 
   60         A = ctx->avx512[0].v[0];
   61         B = 8 * ctx->avx512[1].v[0];
   62         C = 64 * ctx->avx512[2].v[0] - CcB[0] * ctx->avx512[1].v[0];
   63         D = 512 * ctx->avx512[3].v[0] - DcC[0] * ctx->avx512[2].v[0] +
   64             DcB[0] * ctx->avx512[1].v[0];
   65 
   66         for (i = 1; i < 8; i++) {
   67                 A += ctx->avx512[0].v[i];
   68                 B += 8 * ctx->avx512[1].v[i] - i * ctx->avx512[0].v[i];
   69                 C += 64 * ctx->avx512[2].v[i] - CcB[i] * ctx->avx512[1].v[i] +
   70                     CcA[i] * ctx->avx512[0].v[i];
   71                 D += 512 * ctx->avx512[3].v[i] - DcC[i] * ctx->avx512[2].v[i] +
   72                     DcB[i] * ctx->avx512[1].v[i] - DcA[i] * ctx->avx512[0].v[i];
   73         }
   74 
   75         ZIO_SET_CHECKSUM(zcp, A, B, C, D);
   76         kfpu_end();
   77 }
   78 
   79 #define FLETCHER_4_AVX512_RESTORE_CTX(ctx)                              \
   80 {                                                                       \
   81         __asm("vmovdqu64 %0, %%zmm0" :: "m" ((ctx)->avx512[0]));        \
   82         __asm("vmovdqu64 %0, %%zmm1" :: "m" ((ctx)->avx512[1]));        \
   83         __asm("vmovdqu64 %0, %%zmm2" :: "m" ((ctx)->avx512[2]));        \
   84         __asm("vmovdqu64 %0, %%zmm3" :: "m" ((ctx)->avx512[3]));        \
   85 }
   86 
   87 #define FLETCHER_4_AVX512_SAVE_CTX(ctx)                                 \
   88 {                                                                       \
   89         __asm("vmovdqu64 %%zmm0, %0" : "=m" ((ctx)->avx512[0]));        \
   90         __asm("vmovdqu64 %%zmm1, %0" : "=m" ((ctx)->avx512[1]));        \
   91         __asm("vmovdqu64 %%zmm2, %0" : "=m" ((ctx)->avx512[2]));        \
   92         __asm("vmovdqu64 %%zmm3, %0" : "=m" ((ctx)->avx512[3]));        \
   93 }
   94 
   95 static void
   96 fletcher_4_avx512f_native(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
   97 {
   98         const uint32_t *ip = buf;
   99         const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
  100 
  101         FLETCHER_4_AVX512_RESTORE_CTX(ctx);
  102 
  103         do {
  104                 __asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
  105                 __asm("vpaddq %zmm4, %zmm0, %zmm0");
  106                 __asm("vpaddq %zmm0, %zmm1, %zmm1");
  107                 __asm("vpaddq %zmm1, %zmm2, %zmm2");
  108                 __asm("vpaddq %zmm2, %zmm3, %zmm3");
  109         } while ((ip += 8) < ipend);
  110 
  111         FLETCHER_4_AVX512_SAVE_CTX(ctx);
  112 }
  113 STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_native);
  114 
  115 static void
  116 fletcher_4_avx512f_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
  117     uint64_t size)
  118 {
  119         static const uint64_t byteswap_mask = 0xFFULL;
  120         const uint32_t *ip = buf;
  121         const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
  122 
  123         FLETCHER_4_AVX512_RESTORE_CTX(ctx);
  124 
  125         __asm("vpbroadcastq %0, %%zmm8" :: "r" (byteswap_mask));
  126         __asm("vpsllq $8, %zmm8, %zmm9");
  127         __asm("vpsllq $16, %zmm8, %zmm10");
  128         __asm("vpsllq $24, %zmm8, %zmm11");
  129 
  130         do {
  131                 __asm("vpmovzxdq %0, %%zmm5"::"m" (*ip));
  132 
  133                 __asm("vpsrlq $24, %zmm5, %zmm6");
  134                 __asm("vpandd %zmm8, %zmm6, %zmm6");
  135                 __asm("vpsrlq $8, %zmm5, %zmm7");
  136                 __asm("vpandd %zmm9, %zmm7, %zmm7");
  137                 __asm("vpord %zmm6, %zmm7, %zmm4");
  138                 __asm("vpsllq $8, %zmm5, %zmm6");
  139                 __asm("vpandd %zmm10, %zmm6, %zmm6");
  140                 __asm("vpord %zmm6, %zmm4, %zmm4");
  141                 __asm("vpsllq $24, %zmm5, %zmm5");
  142                 __asm("vpandd %zmm11, %zmm5, %zmm5");
  143                 __asm("vpord %zmm5, %zmm4, %zmm4");
  144 
  145                 __asm("vpaddq %zmm4, %zmm0, %zmm0");
  146                 __asm("vpaddq %zmm0, %zmm1, %zmm1");
  147                 __asm("vpaddq %zmm1, %zmm2, %zmm2");
  148                 __asm("vpaddq %zmm2, %zmm3, %zmm3");
  149         } while ((ip += 8) < ipend);
  150 
  151         FLETCHER_4_AVX512_SAVE_CTX(ctx)
  152 }
  153 STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap);
  154 
  155 static boolean_t
  156 fletcher_4_avx512f_valid(void)
  157 {
  158         return (kfpu_allowed() && zfs_avx512f_available());
  159 }
  160 
  161 const fletcher_4_ops_t fletcher_4_avx512f_ops = {
  162         .init_native = fletcher_4_avx512f_init,
  163         .fini_native = fletcher_4_avx512f_fini,
  164         .compute_native = fletcher_4_avx512f_native,
  165         .init_byteswap = fletcher_4_avx512f_init,
  166         .fini_byteswap = fletcher_4_avx512f_fini,
  167         .compute_byteswap = fletcher_4_avx512f_byteswap,
  168         .valid = fletcher_4_avx512f_valid,
  169         .name = "avx512f"
  170 };
  171 
  172 #if defined(HAVE_AVX512BW)
  173 static void
  174 fletcher_4_avx512bw_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
  175     uint64_t size)
  176 {
  177         static const zfs_fletcher_avx512_t mask = {
  178                 .v = { 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
  179                 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
  180                 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
  181                 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B }
  182         };
  183         const uint32_t *ip = buf;
  184         const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
  185 
  186         FLETCHER_4_AVX512_RESTORE_CTX(ctx);
  187 
  188         __asm("vmovdqu64 %0, %%zmm5" :: "m" (mask));
  189 
  190         do {
  191                 __asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
  192 
  193                 __asm("vpshufb %zmm5, %zmm4, %zmm4");
  194 
  195                 __asm("vpaddq %zmm4, %zmm0, %zmm0");
  196                 __asm("vpaddq %zmm0, %zmm1, %zmm1");
  197                 __asm("vpaddq %zmm1, %zmm2, %zmm2");
  198                 __asm("vpaddq %zmm2, %zmm3, %zmm3");
  199         } while ((ip += 8) < ipend);
  200 
  201         FLETCHER_4_AVX512_SAVE_CTX(ctx)
  202 }
  203 STACK_FRAME_NON_STANDARD(fletcher_4_avx512bw_byteswap);
  204 
  205 static boolean_t
  206 fletcher_4_avx512bw_valid(void)
  207 {
  208         return (fletcher_4_avx512f_valid() && zfs_avx512bw_available());
  209 }
  210 
  211 const fletcher_4_ops_t fletcher_4_avx512bw_ops = {
  212         .init_native = fletcher_4_avx512f_init,
  213         .fini_native = fletcher_4_avx512f_fini,
  214         .compute_native = fletcher_4_avx512f_native,
  215         .init_byteswap = fletcher_4_avx512f_init,
  216         .fini_byteswap = fletcher_4_avx512f_fini,
  217         .compute_byteswap = fletcher_4_avx512bw_byteswap,
  218         .valid = fletcher_4_avx512bw_valid,
  219         .name = "avx512bw"
  220 };
  221 #endif
  222 
  223 #endif /* defined(__x86_64) && defined(HAVE_AVX512F) */

Cache object: 719c3b4c5099486bf55ef62a93568c92


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.