The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
   24  */
   25 
   26 #include "blake3_impl.h"
   27 
   28 #if defined(__aarch64__) || \
   29         (defined(__x86_64) && defined(HAVE_SSE2)) || \
   30         (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
   31 
   32 extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
   33     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
   34     uint64_t counter, uint8_t flags);
   35 
   36 extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
   37     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
   38     uint64_t counter, uint8_t flags, uint8_t out[64]);
   39 
   40 extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
   41     size_t num_inputs, size_t blocks, const uint32_t key[8],
   42     uint64_t counter, boolean_t increment_counter, uint8_t flags,
   43     uint8_t flags_start, uint8_t flags_end, uint8_t *out);
   44 
   45 static void blake3_compress_in_place_sse2(uint32_t cv[8],
   46     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
   47     uint64_t counter, uint8_t flags) {
   48         kfpu_begin();
   49         zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
   50             flags);
   51         kfpu_end();
   52 }
   53 
   54 static void blake3_compress_xof_sse2(const uint32_t cv[8],
   55     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
   56     uint64_t counter, uint8_t flags, uint8_t out[64]) {
   57         kfpu_begin();
   58         zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
   59             out);
   60         kfpu_end();
   61 }
   62 
   63 static void blake3_hash_many_sse2(const uint8_t * const *inputs,
   64     size_t num_inputs, size_t blocks, const uint32_t key[8],
   65     uint64_t counter, boolean_t increment_counter, uint8_t flags,
   66     uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
   67         kfpu_begin();
   68         zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
   69             increment_counter, flags, flags_start, flags_end, out);
   70         kfpu_end();
   71 }
   72 
   73 static boolean_t blake3_is_sse2_supported(void)
   74 {
   75 #if defined(__x86_64)
   76         return (kfpu_allowed() && zfs_sse2_available());
   77 #elif defined(__PPC64__) && defined(__linux__)
   78         return (kfpu_allowed() && zfs_vsx_available());
   79 #else
   80         return (kfpu_allowed());
   81 #endif
   82 }
   83 
   84 const blake3_ops_t blake3_sse2_impl = {
   85         .compress_in_place = blake3_compress_in_place_sse2,
   86         .compress_xof = blake3_compress_xof_sse2,
   87         .hash_many = blake3_hash_many_sse2,
   88         .is_supported = blake3_is_sse2_supported,
   89         .degree = 4,
   90         .name = "sse2"
   91 };
   92 #endif
   93 
   94 #if defined(__aarch64__) || \
   95         (defined(__x86_64) && defined(HAVE_SSE2)) || \
   96         (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
   97 
   98 extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
   99     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  100     uint64_t counter, uint8_t flags);
  101 
  102 extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
  103     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  104     uint64_t counter, uint8_t flags, uint8_t out[64]);
  105 
  106 extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
  107     size_t num_inputs, size_t blocks, const uint32_t key[8],
  108     uint64_t counter, boolean_t increment_counter, uint8_t flags,
  109     uint8_t flags_start, uint8_t flags_end, uint8_t *out);
  110 
  111 static void blake3_compress_in_place_sse41(uint32_t cv[8],
  112     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  113     uint64_t counter, uint8_t flags) {
  114         kfpu_begin();
  115         zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
  116             flags);
  117         kfpu_end();
  118 }
  119 
  120 static void blake3_compress_xof_sse41(const uint32_t cv[8],
  121     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  122     uint64_t counter, uint8_t flags, uint8_t out[64]) {
  123         kfpu_begin();
  124         zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
  125             out);
  126         kfpu_end();
  127 }
  128 
  129 static void blake3_hash_many_sse41(const uint8_t * const *inputs,
  130     size_t num_inputs, size_t blocks, const uint32_t key[8],
  131     uint64_t counter, boolean_t increment_counter, uint8_t flags,
  132     uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
  133         kfpu_begin();
  134         zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
  135             increment_counter, flags, flags_start, flags_end, out);
  136         kfpu_end();
  137 }
  138 
  139 static boolean_t blake3_is_sse41_supported(void)
  140 {
  141 #if defined(__x86_64)
  142         return (kfpu_allowed() && zfs_sse4_1_available());
  143 #elif defined(__PPC64__) && defined(__linux__)
  144 <<<<<<< HEAD
  145         /* TODO: implement vsx handler or FreeBSD */
  146 =======
  147 >>>>>>> c629f0bf62e351355716f9870d6c2e377584b016
  148         return (kfpu_allowed() && zfs_vsx_available());
  149 #else
  150         return (kfpu_allowed());
  151 #endif
  152 }
  153 
  154 const blake3_ops_t blake3_sse41_impl = {
  155         .compress_in_place = blake3_compress_in_place_sse41,
  156         .compress_xof = blake3_compress_xof_sse41,
  157         .hash_many = blake3_hash_many_sse41,
  158         .is_supported = blake3_is_sse41_supported,
  159         .degree = 4,
  160         .name = "sse41"
  161 };
  162 #endif
  163 
  164 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
  165 extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
  166     size_t num_inputs, size_t blocks, const uint32_t key[8],
  167     uint64_t counter, boolean_t increment_counter, uint8_t flags,
  168     uint8_t flags_start, uint8_t flags_end, uint8_t *out);
  169 
  170 static void blake3_hash_many_avx2(const uint8_t * const *inputs,
  171     size_t num_inputs, size_t blocks, const uint32_t key[8],
  172     uint64_t counter, boolean_t increment_counter, uint8_t flags,
  173     uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
  174         kfpu_begin();
  175         zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
  176             increment_counter, flags, flags_start, flags_end, out);
  177         kfpu_end();
  178 }
  179 
  180 static boolean_t blake3_is_avx2_supported(void)
  181 {
  182         return (kfpu_allowed() && zfs_sse4_1_available() &&
  183             zfs_avx2_available());
  184 }
  185 
  186 const blake3_ops_t blake3_avx2_impl = {
  187         .compress_in_place = blake3_compress_in_place_sse41,
  188         .compress_xof = blake3_compress_xof_sse41,
  189         .hash_many = blake3_hash_many_avx2,
  190         .is_supported = blake3_is_avx2_supported,
  191         .degree = 8,
  192         .name = "avx2"
  193 };
  194 #endif
  195 
  196 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
  197 extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
  198     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  199     uint64_t counter, uint8_t flags);
  200 
  201 extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
  202     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  203     uint64_t counter, uint8_t flags, uint8_t out[64]);
  204 
  205 extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
  206     size_t num_inputs, size_t blocks, const uint32_t key[8],
  207     uint64_t counter, boolean_t increment_counter, uint8_t flags,
  208     uint8_t flags_start, uint8_t flags_end, uint8_t *out);
  209 
  210 static void blake3_compress_in_place_avx512(uint32_t cv[8],
  211     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  212     uint64_t counter, uint8_t flags) {
  213         kfpu_begin();
  214         zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
  215             flags);
  216         kfpu_end();
  217 }
  218 
  219 static void blake3_compress_xof_avx512(const uint32_t cv[8],
  220     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
  221     uint64_t counter, uint8_t flags, uint8_t out[64]) {
  222         kfpu_begin();
  223         zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
  224             out);
  225         kfpu_end();
  226 }
  227 
  228 static void blake3_hash_many_avx512(const uint8_t * const *inputs,
  229     size_t num_inputs, size_t blocks, const uint32_t key[8],
  230     uint64_t counter, boolean_t increment_counter, uint8_t flags,
  231     uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
  232         kfpu_begin();
  233         zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
  234             increment_counter, flags, flags_start, flags_end, out);
  235         kfpu_end();
  236 }
  237 
  238 static boolean_t blake3_is_avx512_supported(void)
  239 {
  240         return (kfpu_allowed() && zfs_avx512f_available() &&
  241             zfs_avx512vl_available());
  242 }
  243 
  244 const blake3_ops_t blake3_avx512_impl = {
  245         .compress_in_place = blake3_compress_in_place_avx512,
  246         .compress_xof = blake3_compress_xof_avx512,
  247         .hash_many = blake3_hash_many_avx512,
  248         .is_supported = blake3_is_avx512_supported,
  249         .degree = 16,
  250         .name = "avx512"
  251 };
  252 #endif

Cache object: 6e5a4983f77906a324d3fd1ba8cd8b06


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.