1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
24 */
25
26 #include "blake3_impl.h"
27
28 #if defined(__aarch64__) || \
29 (defined(__x86_64) && defined(HAVE_SSE2)) || \
30 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
31
32 extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
33 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
34 uint64_t counter, uint8_t flags);
35
36 extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
37 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
38 uint64_t counter, uint8_t flags, uint8_t out[64]);
39
40 extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
41 size_t num_inputs, size_t blocks, const uint32_t key[8],
42 uint64_t counter, boolean_t increment_counter, uint8_t flags,
43 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
44
45 static void blake3_compress_in_place_sse2(uint32_t cv[8],
46 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
47 uint64_t counter, uint8_t flags) {
48 kfpu_begin();
49 zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
50 flags);
51 kfpu_end();
52 }
53
54 static void blake3_compress_xof_sse2(const uint32_t cv[8],
55 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
56 uint64_t counter, uint8_t flags, uint8_t out[64]) {
57 kfpu_begin();
58 zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
59 out);
60 kfpu_end();
61 }
62
63 static void blake3_hash_many_sse2(const uint8_t * const *inputs,
64 size_t num_inputs, size_t blocks, const uint32_t key[8],
65 uint64_t counter, boolean_t increment_counter, uint8_t flags,
66 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
67 kfpu_begin();
68 zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
69 increment_counter, flags, flags_start, flags_end, out);
70 kfpu_end();
71 }
72
73 static boolean_t blake3_is_sse2_supported(void)
74 {
75 #if defined(__x86_64)
76 return (kfpu_allowed() && zfs_sse2_available());
77 #elif defined(__PPC64__) && defined(__linux__)
78 return (kfpu_allowed() && zfs_vsx_available());
79 #else
80 return (kfpu_allowed());
81 #endif
82 }
83
84 const blake3_ops_t blake3_sse2_impl = {
85 .compress_in_place = blake3_compress_in_place_sse2,
86 .compress_xof = blake3_compress_xof_sse2,
87 .hash_many = blake3_hash_many_sse2,
88 .is_supported = blake3_is_sse2_supported,
89 .degree = 4,
90 .name = "sse2"
91 };
92 #endif
93
94 #if defined(__aarch64__) || \
95 (defined(__x86_64) && defined(HAVE_SSE2)) || \
96 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
97
98 extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
99 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
100 uint64_t counter, uint8_t flags);
101
102 extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
103 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
104 uint64_t counter, uint8_t flags, uint8_t out[64]);
105
106 extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
107 size_t num_inputs, size_t blocks, const uint32_t key[8],
108 uint64_t counter, boolean_t increment_counter, uint8_t flags,
109 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
110
111 static void blake3_compress_in_place_sse41(uint32_t cv[8],
112 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
113 uint64_t counter, uint8_t flags) {
114 kfpu_begin();
115 zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
116 flags);
117 kfpu_end();
118 }
119
120 static void blake3_compress_xof_sse41(const uint32_t cv[8],
121 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
122 uint64_t counter, uint8_t flags, uint8_t out[64]) {
123 kfpu_begin();
124 zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
125 out);
126 kfpu_end();
127 }
128
129 static void blake3_hash_many_sse41(const uint8_t * const *inputs,
130 size_t num_inputs, size_t blocks, const uint32_t key[8],
131 uint64_t counter, boolean_t increment_counter, uint8_t flags,
132 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
133 kfpu_begin();
134 zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
135 increment_counter, flags, flags_start, flags_end, out);
136 kfpu_end();
137 }
138
139 static boolean_t blake3_is_sse41_supported(void)
140 {
141 #if defined(__x86_64)
142 return (kfpu_allowed() && zfs_sse4_1_available());
143 #elif defined(__PPC64__) && defined(__linux__)
144 <<<<<<< HEAD
145 /* TODO: implement vsx handler or FreeBSD */
146 =======
147 >>>>>>> c629f0bf62e351355716f9870d6c2e377584b016
148 return (kfpu_allowed() && zfs_vsx_available());
149 #else
150 return (kfpu_allowed());
151 #endif
152 }
153
154 const blake3_ops_t blake3_sse41_impl = {
155 .compress_in_place = blake3_compress_in_place_sse41,
156 .compress_xof = blake3_compress_xof_sse41,
157 .hash_many = blake3_hash_many_sse41,
158 .is_supported = blake3_is_sse41_supported,
159 .degree = 4,
160 .name = "sse41"
161 };
162 #endif
163
164 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
165 extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
166 size_t num_inputs, size_t blocks, const uint32_t key[8],
167 uint64_t counter, boolean_t increment_counter, uint8_t flags,
168 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
169
170 static void blake3_hash_many_avx2(const uint8_t * const *inputs,
171 size_t num_inputs, size_t blocks, const uint32_t key[8],
172 uint64_t counter, boolean_t increment_counter, uint8_t flags,
173 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
174 kfpu_begin();
175 zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
176 increment_counter, flags, flags_start, flags_end, out);
177 kfpu_end();
178 }
179
180 static boolean_t blake3_is_avx2_supported(void)
181 {
182 return (kfpu_allowed() && zfs_sse4_1_available() &&
183 zfs_avx2_available());
184 }
185
186 const blake3_ops_t blake3_avx2_impl = {
187 .compress_in_place = blake3_compress_in_place_sse41,
188 .compress_xof = blake3_compress_xof_sse41,
189 .hash_many = blake3_hash_many_avx2,
190 .is_supported = blake3_is_avx2_supported,
191 .degree = 8,
192 .name = "avx2"
193 };
194 #endif
195
196 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
197 extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
198 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
199 uint64_t counter, uint8_t flags);
200
201 extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
202 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
203 uint64_t counter, uint8_t flags, uint8_t out[64]);
204
205 extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
206 size_t num_inputs, size_t blocks, const uint32_t key[8],
207 uint64_t counter, boolean_t increment_counter, uint8_t flags,
208 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
209
210 static void blake3_compress_in_place_avx512(uint32_t cv[8],
211 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
212 uint64_t counter, uint8_t flags) {
213 kfpu_begin();
214 zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
215 flags);
216 kfpu_end();
217 }
218
219 static void blake3_compress_xof_avx512(const uint32_t cv[8],
220 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
221 uint64_t counter, uint8_t flags, uint8_t out[64]) {
222 kfpu_begin();
223 zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
224 out);
225 kfpu_end();
226 }
227
228 static void blake3_hash_many_avx512(const uint8_t * const *inputs,
229 size_t num_inputs, size_t blocks, const uint32_t key[8],
230 uint64_t counter, boolean_t increment_counter, uint8_t flags,
231 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
232 kfpu_begin();
233 zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
234 increment_counter, flags, flags_start, flags_end, out);
235 kfpu_end();
236 }
237
238 static boolean_t blake3_is_avx512_supported(void)
239 {
240 return (kfpu_allowed() && zfs_avx512f_available() &&
241 zfs_avx512vl_available());
242 }
243
244 const blake3_ops_t blake3_avx512_impl = {
245 .compress_in_place = blake3_compress_in_place_avx512,
246 .compress_xof = blake3_compress_xof_avx512,
247 .hash_many = blake3_hash_many_avx512,
248 .is_supported = blake3_is_avx512_supported,
249 .degree = 16,
250 .name = "avx512"
251 };
252 #endif
Cache object: 6e5a4983f77906a324d3fd1ba8cd8b06
|