1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2020 Samuel Neves
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 */
27
28 #if defined(HAVE_SSE4_1)
29
30 #define _ASM
31 #include <sys/asm_linkage.h>
32
33 .intel_syntax noprefix
34
35 .text
36
37 ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64)
38 ENDBR
39 push r15
40 push r14
41 push r13
42 push r12
43 push rbx
44 push rbp
45 mov rbp, rsp
46 sub rsp, 360
47 and rsp, 0xFFFFFFFFFFFFFFC0
48 neg r9d
49 movd xmm0, r9d
50 pshufd xmm0, xmm0, 0x00
51 movdqa xmmword ptr [rsp+0x130], xmm0
52 movdqa xmm1, xmm0
53 pand xmm1, xmmword ptr [ADD0+rip]
54 pand xmm0, xmmword ptr [ADD1+rip]
55 movdqa xmmword ptr [rsp+0x150], xmm0
56 movd xmm0, r8d
57 pshufd xmm0, xmm0, 0x00
58 paddd xmm0, xmm1
59 movdqa xmmword ptr [rsp+0x110], xmm0
60 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
61 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
62 pcmpgtd xmm1, xmm0
63 shr r8, 32
64 movd xmm2, r8d
65 pshufd xmm2, xmm2, 0x00
66 psubd xmm2, xmm1
67 movdqa xmmword ptr [rsp+0x120], xmm2
68 mov rbx, qword ptr [rbp+0x50]
69 mov r15, rdx
70 shl r15, 6
71 movzx r13d, byte ptr [rbp+0x38]
72 movzx r12d, byte ptr [rbp+0x48]
73 cmp rsi, 4
74 jc 3f
75 2:
76 movdqu xmm3, xmmword ptr [rcx]
77 pshufd xmm0, xmm3, 0x00
78 pshufd xmm1, xmm3, 0x55
79 pshufd xmm2, xmm3, 0xAA
80 pshufd xmm3, xmm3, 0xFF
81 movdqu xmm7, xmmword ptr [rcx+0x10]
82 pshufd xmm4, xmm7, 0x00
83 pshufd xmm5, xmm7, 0x55
84 pshufd xmm6, xmm7, 0xAA
85 pshufd xmm7, xmm7, 0xFF
86 mov r8, qword ptr [rdi]
87 mov r9, qword ptr [rdi+0x8]
88 mov r10, qword ptr [rdi+0x10]
89 mov r11, qword ptr [rdi+0x18]
90 movzx eax, byte ptr [rbp+0x40]
91 or eax, r13d
92 xor edx, edx
93 9:
94 mov r14d, eax
95 or eax, r12d
96 add rdx, 64
97 cmp rdx, r15
98 cmovne eax, r14d
99 movdqu xmm8, xmmword ptr [r8+rdx-0x40]
100 movdqu xmm9, xmmword ptr [r9+rdx-0x40]
101 movdqu xmm10, xmmword ptr [r10+rdx-0x40]
102 movdqu xmm11, xmmword ptr [r11+rdx-0x40]
103 movdqa xmm12, xmm8
104 punpckldq xmm8, xmm9
105 punpckhdq xmm12, xmm9
106 movdqa xmm14, xmm10
107 punpckldq xmm10, xmm11
108 punpckhdq xmm14, xmm11
109 movdqa xmm9, xmm8
110 punpcklqdq xmm8, xmm10
111 punpckhqdq xmm9, xmm10
112 movdqa xmm13, xmm12
113 punpcklqdq xmm12, xmm14
114 punpckhqdq xmm13, xmm14
115 movdqa xmmword ptr [rsp], xmm8
116 movdqa xmmword ptr [rsp+0x10], xmm9
117 movdqa xmmword ptr [rsp+0x20], xmm12
118 movdqa xmmword ptr [rsp+0x30], xmm13
119 movdqu xmm8, xmmword ptr [r8+rdx-0x30]
120 movdqu xmm9, xmmword ptr [r9+rdx-0x30]
121 movdqu xmm10, xmmword ptr [r10+rdx-0x30]
122 movdqu xmm11, xmmword ptr [r11+rdx-0x30]
123 movdqa xmm12, xmm8
124 punpckldq xmm8, xmm9
125 punpckhdq xmm12, xmm9
126 movdqa xmm14, xmm10
127 punpckldq xmm10, xmm11
128 punpckhdq xmm14, xmm11
129 movdqa xmm9, xmm8
130 punpcklqdq xmm8, xmm10
131 punpckhqdq xmm9, xmm10
132 movdqa xmm13, xmm12
133 punpcklqdq xmm12, xmm14
134 punpckhqdq xmm13, xmm14
135 movdqa xmmword ptr [rsp+0x40], xmm8
136 movdqa xmmword ptr [rsp+0x50], xmm9
137 movdqa xmmword ptr [rsp+0x60], xmm12
138 movdqa xmmword ptr [rsp+0x70], xmm13
139 movdqu xmm8, xmmword ptr [r8+rdx-0x20]
140 movdqu xmm9, xmmword ptr [r9+rdx-0x20]
141 movdqu xmm10, xmmword ptr [r10+rdx-0x20]
142 movdqu xmm11, xmmword ptr [r11+rdx-0x20]
143 movdqa xmm12, xmm8
144 punpckldq xmm8, xmm9
145 punpckhdq xmm12, xmm9
146 movdqa xmm14, xmm10
147 punpckldq xmm10, xmm11
148 punpckhdq xmm14, xmm11
149 movdqa xmm9, xmm8
150 punpcklqdq xmm8, xmm10
151 punpckhqdq xmm9, xmm10
152 movdqa xmm13, xmm12
153 punpcklqdq xmm12, xmm14
154 punpckhqdq xmm13, xmm14
155 movdqa xmmword ptr [rsp+0x80], xmm8
156 movdqa xmmword ptr [rsp+0x90], xmm9
157 movdqa xmmword ptr [rsp+0xA0], xmm12
158 movdqa xmmword ptr [rsp+0xB0], xmm13
159 movdqu xmm8, xmmword ptr [r8+rdx-0x10]
160 movdqu xmm9, xmmword ptr [r9+rdx-0x10]
161 movdqu xmm10, xmmword ptr [r10+rdx-0x10]
162 movdqu xmm11, xmmword ptr [r11+rdx-0x10]
163 movdqa xmm12, xmm8
164 punpckldq xmm8, xmm9
165 punpckhdq xmm12, xmm9
166 movdqa xmm14, xmm10
167 punpckldq xmm10, xmm11
168 punpckhdq xmm14, xmm11
169 movdqa xmm9, xmm8
170 punpcklqdq xmm8, xmm10
171 punpckhqdq xmm9, xmm10
172 movdqa xmm13, xmm12
173 punpcklqdq xmm12, xmm14
174 punpckhqdq xmm13, xmm14
175 movdqa xmmword ptr [rsp+0xC0], xmm8
176 movdqa xmmword ptr [rsp+0xD0], xmm9
177 movdqa xmmword ptr [rsp+0xE0], xmm12
178 movdqa xmmword ptr [rsp+0xF0], xmm13
179 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
180 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
181 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
182 movdqa xmm12, xmmword ptr [rsp+0x110]
183 movdqa xmm13, xmmword ptr [rsp+0x120]
184 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
185 movd xmm15, eax
186 pshufd xmm15, xmm15, 0x00
187 prefetcht0 [r8+rdx+0x80]
188 prefetcht0 [r9+rdx+0x80]
189 prefetcht0 [r10+rdx+0x80]
190 prefetcht0 [r11+rdx+0x80]
191 paddd xmm0, xmmword ptr [rsp]
192 paddd xmm1, xmmword ptr [rsp+0x20]
193 paddd xmm2, xmmword ptr [rsp+0x40]
194 paddd xmm3, xmmword ptr [rsp+0x60]
195 paddd xmm0, xmm4
196 paddd xmm1, xmm5
197 paddd xmm2, xmm6
198 paddd xmm3, xmm7
199 pxor xmm12, xmm0
200 pxor xmm13, xmm1
201 pxor xmm14, xmm2
202 pxor xmm15, xmm3
203 movdqa xmm8, xmmword ptr [ROT16+rip]
204 pshufb xmm12, xmm8
205 pshufb xmm13, xmm8
206 pshufb xmm14, xmm8
207 pshufb xmm15, xmm8
208 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
209 paddd xmm8, xmm12
210 paddd xmm9, xmm13
211 paddd xmm10, xmm14
212 paddd xmm11, xmm15
213 pxor xmm4, xmm8
214 pxor xmm5, xmm9
215 pxor xmm6, xmm10
216 pxor xmm7, xmm11
217 movdqa xmmword ptr [rsp+0x100], xmm8
218 movdqa xmm8, xmm4
219 psrld xmm8, 12
220 pslld xmm4, 20
221 por xmm4, xmm8
222 movdqa xmm8, xmm5
223 psrld xmm8, 12
224 pslld xmm5, 20
225 por xmm5, xmm8
226 movdqa xmm8, xmm6
227 psrld xmm8, 12
228 pslld xmm6, 20
229 por xmm6, xmm8
230 movdqa xmm8, xmm7
231 psrld xmm8, 12
232 pslld xmm7, 20
233 por xmm7, xmm8
234 paddd xmm0, xmmword ptr [rsp+0x10]
235 paddd xmm1, xmmword ptr [rsp+0x30]
236 paddd xmm2, xmmword ptr [rsp+0x50]
237 paddd xmm3, xmmword ptr [rsp+0x70]
238 paddd xmm0, xmm4
239 paddd xmm1, xmm5
240 paddd xmm2, xmm6
241 paddd xmm3, xmm7
242 pxor xmm12, xmm0
243 pxor xmm13, xmm1
244 pxor xmm14, xmm2
245 pxor xmm15, xmm3
246 movdqa xmm8, xmmword ptr [ROT8+rip]
247 pshufb xmm12, xmm8
248 pshufb xmm13, xmm8
249 pshufb xmm14, xmm8
250 pshufb xmm15, xmm8
251 movdqa xmm8, xmmword ptr [rsp+0x100]
252 paddd xmm8, xmm12
253 paddd xmm9, xmm13
254 paddd xmm10, xmm14
255 paddd xmm11, xmm15
256 pxor xmm4, xmm8
257 pxor xmm5, xmm9
258 pxor xmm6, xmm10
259 pxor xmm7, xmm11
260 movdqa xmmword ptr [rsp+0x100], xmm8
261 movdqa xmm8, xmm4
262 psrld xmm8, 7
263 pslld xmm4, 25
264 por xmm4, xmm8
265 movdqa xmm8, xmm5
266 psrld xmm8, 7
267 pslld xmm5, 25
268 por xmm5, xmm8
269 movdqa xmm8, xmm6
270 psrld xmm8, 7
271 pslld xmm6, 25
272 por xmm6, xmm8
273 movdqa xmm8, xmm7
274 psrld xmm8, 7
275 pslld xmm7, 25
276 por xmm7, xmm8
277 paddd xmm0, xmmword ptr [rsp+0x80]
278 paddd xmm1, xmmword ptr [rsp+0xA0]
279 paddd xmm2, xmmword ptr [rsp+0xC0]
280 paddd xmm3, xmmword ptr [rsp+0xE0]
281 paddd xmm0, xmm5
282 paddd xmm1, xmm6
283 paddd xmm2, xmm7
284 paddd xmm3, xmm4
285 pxor xmm15, xmm0
286 pxor xmm12, xmm1
287 pxor xmm13, xmm2
288 pxor xmm14, xmm3
289 movdqa xmm8, xmmword ptr [ROT16+rip]
290 pshufb xmm15, xmm8
291 pshufb xmm12, xmm8
292 pshufb xmm13, xmm8
293 pshufb xmm14, xmm8
294 paddd xmm10, xmm15
295 paddd xmm11, xmm12
296 movdqa xmm8, xmmword ptr [rsp+0x100]
297 paddd xmm8, xmm13
298 paddd xmm9, xmm14
299 pxor xmm5, xmm10
300 pxor xmm6, xmm11
301 pxor xmm7, xmm8
302 pxor xmm4, xmm9
303 movdqa xmmword ptr [rsp+0x100], xmm8
304 movdqa xmm8, xmm5
305 psrld xmm8, 12
306 pslld xmm5, 20
307 por xmm5, xmm8
308 movdqa xmm8, xmm6
309 psrld xmm8, 12
310 pslld xmm6, 20
311 por xmm6, xmm8
312 movdqa xmm8, xmm7
313 psrld xmm8, 12
314 pslld xmm7, 20
315 por xmm7, xmm8
316 movdqa xmm8, xmm4
317 psrld xmm8, 12
318 pslld xmm4, 20
319 por xmm4, xmm8
320 paddd xmm0, xmmword ptr [rsp+0x90]
321 paddd xmm1, xmmword ptr [rsp+0xB0]
322 paddd xmm2, xmmword ptr [rsp+0xD0]
323 paddd xmm3, xmmword ptr [rsp+0xF0]
324 paddd xmm0, xmm5
325 paddd xmm1, xmm6
326 paddd xmm2, xmm7
327 paddd xmm3, xmm4
328 pxor xmm15, xmm0
329 pxor xmm12, xmm1
330 pxor xmm13, xmm2
331 pxor xmm14, xmm3
332 movdqa xmm8, xmmword ptr [ROT8+rip]
333 pshufb xmm15, xmm8
334 pshufb xmm12, xmm8
335 pshufb xmm13, xmm8
336 pshufb xmm14, xmm8
337 paddd xmm10, xmm15
338 paddd xmm11, xmm12
339 movdqa xmm8, xmmword ptr [rsp+0x100]
340 paddd xmm8, xmm13
341 paddd xmm9, xmm14
342 pxor xmm5, xmm10
343 pxor xmm6, xmm11
344 pxor xmm7, xmm8
345 pxor xmm4, xmm9
346 movdqa xmmword ptr [rsp+0x100], xmm8
347 movdqa xmm8, xmm5
348 psrld xmm8, 7
349 pslld xmm5, 25
350 por xmm5, xmm8
351 movdqa xmm8, xmm6
352 psrld xmm8, 7
353 pslld xmm6, 25
354 por xmm6, xmm8
355 movdqa xmm8, xmm7
356 psrld xmm8, 7
357 pslld xmm7, 25
358 por xmm7, xmm8
359 movdqa xmm8, xmm4
360 psrld xmm8, 7
361 pslld xmm4, 25
362 por xmm4, xmm8
363 paddd xmm0, xmmword ptr [rsp+0x20]
364 paddd xmm1, xmmword ptr [rsp+0x30]
365 paddd xmm2, xmmword ptr [rsp+0x70]
366 paddd xmm3, xmmword ptr [rsp+0x40]
367 paddd xmm0, xmm4
368 paddd xmm1, xmm5
369 paddd xmm2, xmm6
370 paddd xmm3, xmm7
371 pxor xmm12, xmm0
372 pxor xmm13, xmm1
373 pxor xmm14, xmm2
374 pxor xmm15, xmm3
375 movdqa xmm8, xmmword ptr [ROT16+rip]
376 pshufb xmm12, xmm8
377 pshufb xmm13, xmm8
378 pshufb xmm14, xmm8
379 pshufb xmm15, xmm8
380 movdqa xmm8, xmmword ptr [rsp+0x100]
381 paddd xmm8, xmm12
382 paddd xmm9, xmm13
383 paddd xmm10, xmm14
384 paddd xmm11, xmm15
385 pxor xmm4, xmm8
386 pxor xmm5, xmm9
387 pxor xmm6, xmm10
388 pxor xmm7, xmm11
389 movdqa xmmword ptr [rsp+0x100], xmm8
390 movdqa xmm8, xmm4
391 psrld xmm8, 12
392 pslld xmm4, 20
393 por xmm4, xmm8
394 movdqa xmm8, xmm5
395 psrld xmm8, 12
396 pslld xmm5, 20
397 por xmm5, xmm8
398 movdqa xmm8, xmm6
399 psrld xmm8, 12
400 pslld xmm6, 20
401 por xmm6, xmm8
402 movdqa xmm8, xmm7
403 psrld xmm8, 12
404 pslld xmm7, 20
405 por xmm7, xmm8
406 paddd xmm0, xmmword ptr [rsp+0x60]
407 paddd xmm1, xmmword ptr [rsp+0xA0]
408 paddd xmm2, xmmword ptr [rsp]
409 paddd xmm3, xmmword ptr [rsp+0xD0]
410 paddd xmm0, xmm4
411 paddd xmm1, xmm5
412 paddd xmm2, xmm6
413 paddd xmm3, xmm7
414 pxor xmm12, xmm0
415 pxor xmm13, xmm1
416 pxor xmm14, xmm2
417 pxor xmm15, xmm3
418 movdqa xmm8, xmmword ptr [ROT8+rip]
419 pshufb xmm12, xmm8
420 pshufb xmm13, xmm8
421 pshufb xmm14, xmm8
422 pshufb xmm15, xmm8
423 movdqa xmm8, xmmword ptr [rsp+0x100]
424 paddd xmm8, xmm12
425 paddd xmm9, xmm13
426 paddd xmm10, xmm14
427 paddd xmm11, xmm15
428 pxor xmm4, xmm8
429 pxor xmm5, xmm9
430 pxor xmm6, xmm10
431 pxor xmm7, xmm11
432 movdqa xmmword ptr [rsp+0x100], xmm8
433 movdqa xmm8, xmm4
434 psrld xmm8, 7
435 pslld xmm4, 25
436 por xmm4, xmm8
437 movdqa xmm8, xmm5
438 psrld xmm8, 7
439 pslld xmm5, 25
440 por xmm5, xmm8
441 movdqa xmm8, xmm6
442 psrld xmm8, 7
443 pslld xmm6, 25
444 por xmm6, xmm8
445 movdqa xmm8, xmm7
446 psrld xmm8, 7
447 pslld xmm7, 25
448 por xmm7, xmm8
449 paddd xmm0, xmmword ptr [rsp+0x10]
450 paddd xmm1, xmmword ptr [rsp+0xC0]
451 paddd xmm2, xmmword ptr [rsp+0x90]
452 paddd xmm3, xmmword ptr [rsp+0xF0]
453 paddd xmm0, xmm5
454 paddd xmm1, xmm6
455 paddd xmm2, xmm7
456 paddd xmm3, xmm4
457 pxor xmm15, xmm0
458 pxor xmm12, xmm1
459 pxor xmm13, xmm2
460 pxor xmm14, xmm3
461 movdqa xmm8, xmmword ptr [ROT16+rip]
462 pshufb xmm15, xmm8
463 pshufb xmm12, xmm8
464 pshufb xmm13, xmm8
465 pshufb xmm14, xmm8
466 paddd xmm10, xmm15
467 paddd xmm11, xmm12
468 movdqa xmm8, xmmword ptr [rsp+0x100]
469 paddd xmm8, xmm13
470 paddd xmm9, xmm14
471 pxor xmm5, xmm10
472 pxor xmm6, xmm11
473 pxor xmm7, xmm8
474 pxor xmm4, xmm9
475 movdqa xmmword ptr [rsp+0x100], xmm8
476 movdqa xmm8, xmm5
477 psrld xmm8, 12
478 pslld xmm5, 20
479 por xmm5, xmm8
480 movdqa xmm8, xmm6
481 psrld xmm8, 12
482 pslld xmm6, 20
483 por xmm6, xmm8
484 movdqa xmm8, xmm7
485 psrld xmm8, 12
486 pslld xmm7, 20
487 por xmm7, xmm8
488 movdqa xmm8, xmm4
489 psrld xmm8, 12
490 pslld xmm4, 20
491 por xmm4, xmm8
492 paddd xmm0, xmmword ptr [rsp+0xB0]
493 paddd xmm1, xmmword ptr [rsp+0x50]
494 paddd xmm2, xmmword ptr [rsp+0xE0]
495 paddd xmm3, xmmword ptr [rsp+0x80]
496 paddd xmm0, xmm5
497 paddd xmm1, xmm6
498 paddd xmm2, xmm7
499 paddd xmm3, xmm4
500 pxor xmm15, xmm0
501 pxor xmm12, xmm1
502 pxor xmm13, xmm2
503 pxor xmm14, xmm3
504 movdqa xmm8, xmmword ptr [ROT8+rip]
505 pshufb xmm15, xmm8
506 pshufb xmm12, xmm8
507 pshufb xmm13, xmm8
508 pshufb xmm14, xmm8
509 paddd xmm10, xmm15
510 paddd xmm11, xmm12
511 movdqa xmm8, xmmword ptr [rsp+0x100]
512 paddd xmm8, xmm13
513 paddd xmm9, xmm14
514 pxor xmm5, xmm10
515 pxor xmm6, xmm11
516 pxor xmm7, xmm8
517 pxor xmm4, xmm9
518 movdqa xmmword ptr [rsp+0x100], xmm8
519 movdqa xmm8, xmm5
520 psrld xmm8, 7
521 pslld xmm5, 25
522 por xmm5, xmm8
523 movdqa xmm8, xmm6
524 psrld xmm8, 7
525 pslld xmm6, 25
526 por xmm6, xmm8
527 movdqa xmm8, xmm7
528 psrld xmm8, 7
529 pslld xmm7, 25
530 por xmm7, xmm8
531 movdqa xmm8, xmm4
532 psrld xmm8, 7
533 pslld xmm4, 25
534 por xmm4, xmm8
535 paddd xmm0, xmmword ptr [rsp+0x30]
536 paddd xmm1, xmmword ptr [rsp+0xA0]
537 paddd xmm2, xmmword ptr [rsp+0xD0]
538 paddd xmm3, xmmword ptr [rsp+0x70]
539 paddd xmm0, xmm4
540 paddd xmm1, xmm5
541 paddd xmm2, xmm6
542 paddd xmm3, xmm7
543 pxor xmm12, xmm0
544 pxor xmm13, xmm1
545 pxor xmm14, xmm2
546 pxor xmm15, xmm3
547 movdqa xmm8, xmmword ptr [ROT16+rip]
548 pshufb xmm12, xmm8
549 pshufb xmm13, xmm8
550 pshufb xmm14, xmm8
551 pshufb xmm15, xmm8
552 movdqa xmm8, xmmword ptr [rsp+0x100]
553 paddd xmm8, xmm12
554 paddd xmm9, xmm13
555 paddd xmm10, xmm14
556 paddd xmm11, xmm15
557 pxor xmm4, xmm8
558 pxor xmm5, xmm9
559 pxor xmm6, xmm10
560 pxor xmm7, xmm11
561 movdqa xmmword ptr [rsp+0x100], xmm8
562 movdqa xmm8, xmm4
563 psrld xmm8, 12
564 pslld xmm4, 20
565 por xmm4, xmm8
566 movdqa xmm8, xmm5
567 psrld xmm8, 12
568 pslld xmm5, 20
569 por xmm5, xmm8
570 movdqa xmm8, xmm6
571 psrld xmm8, 12
572 pslld xmm6, 20
573 por xmm6, xmm8
574 movdqa xmm8, xmm7
575 psrld xmm8, 12
576 pslld xmm7, 20
577 por xmm7, xmm8
578 paddd xmm0, xmmword ptr [rsp+0x40]
579 paddd xmm1, xmmword ptr [rsp+0xC0]
580 paddd xmm2, xmmword ptr [rsp+0x20]
581 paddd xmm3, xmmword ptr [rsp+0xE0]
582 paddd xmm0, xmm4
583 paddd xmm1, xmm5
584 paddd xmm2, xmm6
585 paddd xmm3, xmm7
586 pxor xmm12, xmm0
587 pxor xmm13, xmm1
588 pxor xmm14, xmm2
589 pxor xmm15, xmm3
590 movdqa xmm8, xmmword ptr [ROT8+rip]
591 pshufb xmm12, xmm8
592 pshufb xmm13, xmm8
593 pshufb xmm14, xmm8
594 pshufb xmm15, xmm8
595 movdqa xmm8, xmmword ptr [rsp+0x100]
596 paddd xmm8, xmm12
597 paddd xmm9, xmm13
598 paddd xmm10, xmm14
599 paddd xmm11, xmm15
600 pxor xmm4, xmm8
601 pxor xmm5, xmm9
602 pxor xmm6, xmm10
603 pxor xmm7, xmm11
604 movdqa xmmword ptr [rsp+0x100], xmm8
605 movdqa xmm8, xmm4
606 psrld xmm8, 7
607 pslld xmm4, 25
608 por xmm4, xmm8
609 movdqa xmm8, xmm5
610 psrld xmm8, 7
611 pslld xmm5, 25
612 por xmm5, xmm8
613 movdqa xmm8, xmm6
614 psrld xmm8, 7
615 pslld xmm6, 25
616 por xmm6, xmm8
617 movdqa xmm8, xmm7
618 psrld xmm8, 7
619 pslld xmm7, 25
620 por xmm7, xmm8
621 paddd xmm0, xmmword ptr [rsp+0x60]
622 paddd xmm1, xmmword ptr [rsp+0x90]
623 paddd xmm2, xmmword ptr [rsp+0xB0]
624 paddd xmm3, xmmword ptr [rsp+0x80]
625 paddd xmm0, xmm5
626 paddd xmm1, xmm6
627 paddd xmm2, xmm7
628 paddd xmm3, xmm4
629 pxor xmm15, xmm0
630 pxor xmm12, xmm1
631 pxor xmm13, xmm2
632 pxor xmm14, xmm3
633 movdqa xmm8, xmmword ptr [ROT16+rip]
634 pshufb xmm15, xmm8
635 pshufb xmm12, xmm8
636 pshufb xmm13, xmm8
637 pshufb xmm14, xmm8
638 paddd xmm10, xmm15
639 paddd xmm11, xmm12
640 movdqa xmm8, xmmword ptr [rsp+0x100]
641 paddd xmm8, xmm13
642 paddd xmm9, xmm14
643 pxor xmm5, xmm10
644 pxor xmm6, xmm11
645 pxor xmm7, xmm8
646 pxor xmm4, xmm9
647 movdqa xmmword ptr [rsp+0x100], xmm8
648 movdqa xmm8, xmm5
649 psrld xmm8, 12
650 pslld xmm5, 20
651 por xmm5, xmm8
652 movdqa xmm8, xmm6
653 psrld xmm8, 12
654 pslld xmm6, 20
655 por xmm6, xmm8
656 movdqa xmm8, xmm7
657 psrld xmm8, 12
658 pslld xmm7, 20
659 por xmm7, xmm8
660 movdqa xmm8, xmm4
661 psrld xmm8, 12
662 pslld xmm4, 20
663 por xmm4, xmm8
664 paddd xmm0, xmmword ptr [rsp+0x50]
665 paddd xmm1, xmmword ptr [rsp]
666 paddd xmm2, xmmword ptr [rsp+0xF0]
667 paddd xmm3, xmmword ptr [rsp+0x10]
668 paddd xmm0, xmm5
669 paddd xmm1, xmm6
670 paddd xmm2, xmm7
671 paddd xmm3, xmm4
672 pxor xmm15, xmm0
673 pxor xmm12, xmm1
674 pxor xmm13, xmm2
675 pxor xmm14, xmm3
676 movdqa xmm8, xmmword ptr [ROT8+rip]
677 pshufb xmm15, xmm8
678 pshufb xmm12, xmm8
679 pshufb xmm13, xmm8
680 pshufb xmm14, xmm8
681 paddd xmm10, xmm15
682 paddd xmm11, xmm12
683 movdqa xmm8, xmmword ptr [rsp+0x100]
684 paddd xmm8, xmm13
685 paddd xmm9, xmm14
686 pxor xmm5, xmm10
687 pxor xmm6, xmm11
688 pxor xmm7, xmm8
689 pxor xmm4, xmm9
690 movdqa xmmword ptr [rsp+0x100], xmm8
691 movdqa xmm8, xmm5
692 psrld xmm8, 7
693 pslld xmm5, 25
694 por xmm5, xmm8
695 movdqa xmm8, xmm6
696 psrld xmm8, 7
697 pslld xmm6, 25
698 por xmm6, xmm8
699 movdqa xmm8, xmm7
700 psrld xmm8, 7
701 pslld xmm7, 25
702 por xmm7, xmm8
703 movdqa xmm8, xmm4
704 psrld xmm8, 7
705 pslld xmm4, 25
706 por xmm4, xmm8
707 paddd xmm0, xmmword ptr [rsp+0xA0]
708 paddd xmm1, xmmword ptr [rsp+0xC0]
709 paddd xmm2, xmmword ptr [rsp+0xE0]
710 paddd xmm3, xmmword ptr [rsp+0xD0]
711 paddd xmm0, xmm4
712 paddd xmm1, xmm5
713 paddd xmm2, xmm6
714 paddd xmm3, xmm7
715 pxor xmm12, xmm0
716 pxor xmm13, xmm1
717 pxor xmm14, xmm2
718 pxor xmm15, xmm3
719 movdqa xmm8, xmmword ptr [ROT16+rip]
720 pshufb xmm12, xmm8
721 pshufb xmm13, xmm8
722 pshufb xmm14, xmm8
723 pshufb xmm15, xmm8
724 movdqa xmm8, xmmword ptr [rsp+0x100]
725 paddd xmm8, xmm12
726 paddd xmm9, xmm13
727 paddd xmm10, xmm14
728 paddd xmm11, xmm15
729 pxor xmm4, xmm8
730 pxor xmm5, xmm9
731 pxor xmm6, xmm10
732 pxor xmm7, xmm11
733 movdqa xmmword ptr [rsp+0x100], xmm8
734 movdqa xmm8, xmm4
735 psrld xmm8, 12
736 pslld xmm4, 20
737 por xmm4, xmm8
738 movdqa xmm8, xmm5
739 psrld xmm8, 12
740 pslld xmm5, 20
741 por xmm5, xmm8
742 movdqa xmm8, xmm6
743 psrld xmm8, 12
744 pslld xmm6, 20
745 por xmm6, xmm8
746 movdqa xmm8, xmm7
747 psrld xmm8, 12
748 pslld xmm7, 20
749 por xmm7, xmm8
750 paddd xmm0, xmmword ptr [rsp+0x70]
751 paddd xmm1, xmmword ptr [rsp+0x90]
752 paddd xmm2, xmmword ptr [rsp+0x30]
753 paddd xmm3, xmmword ptr [rsp+0xF0]
754 paddd xmm0, xmm4
755 paddd xmm1, xmm5
756 paddd xmm2, xmm6
757 paddd xmm3, xmm7
758 pxor xmm12, xmm0
759 pxor xmm13, xmm1
760 pxor xmm14, xmm2
761 pxor xmm15, xmm3
762 movdqa xmm8, xmmword ptr [ROT8+rip]
763 pshufb xmm12, xmm8
764 pshufb xmm13, xmm8
765 pshufb xmm14, xmm8
766 pshufb xmm15, xmm8
767 movdqa xmm8, xmmword ptr [rsp+0x100]
768 paddd xmm8, xmm12
769 paddd xmm9, xmm13
770 paddd xmm10, xmm14
771 paddd xmm11, xmm15
772 pxor xmm4, xmm8
773 pxor xmm5, xmm9
774 pxor xmm6, xmm10
775 pxor xmm7, xmm11
776 movdqa xmmword ptr [rsp+0x100], xmm8
777 movdqa xmm8, xmm4
778 psrld xmm8, 7
779 pslld xmm4, 25
780 por xmm4, xmm8
781 movdqa xmm8, xmm5
782 psrld xmm8, 7
783 pslld xmm5, 25
784 por xmm5, xmm8
785 movdqa xmm8, xmm6
786 psrld xmm8, 7
787 pslld xmm6, 25
788 por xmm6, xmm8
789 movdqa xmm8, xmm7
790 psrld xmm8, 7
791 pslld xmm7, 25
792 por xmm7, xmm8
793 paddd xmm0, xmmword ptr [rsp+0x40]
794 paddd xmm1, xmmword ptr [rsp+0xB0]
795 paddd xmm2, xmmword ptr [rsp+0x50]
796 paddd xmm3, xmmword ptr [rsp+0x10]
797 paddd xmm0, xmm5
798 paddd xmm1, xmm6
799 paddd xmm2, xmm7
800 paddd xmm3, xmm4
801 pxor xmm15, xmm0
802 pxor xmm12, xmm1
803 pxor xmm13, xmm2
804 pxor xmm14, xmm3
805 movdqa xmm8, xmmword ptr [ROT16+rip]
806 pshufb xmm15, xmm8
807 pshufb xmm12, xmm8
808 pshufb xmm13, xmm8
809 pshufb xmm14, xmm8
810 paddd xmm10, xmm15
811 paddd xmm11, xmm12
812 movdqa xmm8, xmmword ptr [rsp+0x100]
813 paddd xmm8, xmm13
814 paddd xmm9, xmm14
815 pxor xmm5, xmm10
816 pxor xmm6, xmm11
817 pxor xmm7, xmm8
818 pxor xmm4, xmm9
819 movdqa xmmword ptr [rsp+0x100], xmm8
820 movdqa xmm8, xmm5
821 psrld xmm8, 12
822 pslld xmm5, 20
823 por xmm5, xmm8
824 movdqa xmm8, xmm6
825 psrld xmm8, 12
826 pslld xmm6, 20
827 por xmm6, xmm8
828 movdqa xmm8, xmm7
829 psrld xmm8, 12
830 pslld xmm7, 20
831 por xmm7, xmm8
832 movdqa xmm8, xmm4
833 psrld xmm8, 12
834 pslld xmm4, 20
835 por xmm4, xmm8
836 paddd xmm0, xmmword ptr [rsp]
837 paddd xmm1, xmmword ptr [rsp+0x20]
838 paddd xmm2, xmmword ptr [rsp+0x80]
839 paddd xmm3, xmmword ptr [rsp+0x60]
840 paddd xmm0, xmm5
841 paddd xmm1, xmm6
842 paddd xmm2, xmm7
843 paddd xmm3, xmm4
844 pxor xmm15, xmm0
845 pxor xmm12, xmm1
846 pxor xmm13, xmm2
847 pxor xmm14, xmm3
848 movdqa xmm8, xmmword ptr [ROT8+rip]
849 pshufb xmm15, xmm8
850 pshufb xmm12, xmm8
851 pshufb xmm13, xmm8
852 pshufb xmm14, xmm8
853 paddd xmm10, xmm15
854 paddd xmm11, xmm12
855 movdqa xmm8, xmmword ptr [rsp+0x100]
856 paddd xmm8, xmm13
857 paddd xmm9, xmm14
858 pxor xmm5, xmm10
859 pxor xmm6, xmm11
860 pxor xmm7, xmm8
861 pxor xmm4, xmm9
862 movdqa xmmword ptr [rsp+0x100], xmm8
863 movdqa xmm8, xmm5
864 psrld xmm8, 7
865 pslld xmm5, 25
866 por xmm5, xmm8
867 movdqa xmm8, xmm6
868 psrld xmm8, 7
869 pslld xmm6, 25
870 por xmm6, xmm8
871 movdqa xmm8, xmm7
872 psrld xmm8, 7
873 pslld xmm7, 25
874 por xmm7, xmm8
875 movdqa xmm8, xmm4
876 psrld xmm8, 7
877 pslld xmm4, 25
878 por xmm4, xmm8
879 paddd xmm0, xmmword ptr [rsp+0xC0]
880 paddd xmm1, xmmword ptr [rsp+0x90]
881 paddd xmm2, xmmword ptr [rsp+0xF0]
882 paddd xmm3, xmmword ptr [rsp+0xE0]
883 paddd xmm0, xmm4
884 paddd xmm1, xmm5
885 paddd xmm2, xmm6
886 paddd xmm3, xmm7
887 pxor xmm12, xmm0
888 pxor xmm13, xmm1
889 pxor xmm14, xmm2
890 pxor xmm15, xmm3
891 movdqa xmm8, xmmword ptr [ROT16+rip]
892 pshufb xmm12, xmm8
893 pshufb xmm13, xmm8
894 pshufb xmm14, xmm8
895 pshufb xmm15, xmm8
896 movdqa xmm8, xmmword ptr [rsp+0x100]
897 paddd xmm8, xmm12
898 paddd xmm9, xmm13
899 paddd xmm10, xmm14
900 paddd xmm11, xmm15
901 pxor xmm4, xmm8
902 pxor xmm5, xmm9
903 pxor xmm6, xmm10
904 pxor xmm7, xmm11
905 movdqa xmmword ptr [rsp+0x100], xmm8
906 movdqa xmm8, xmm4
907 psrld xmm8, 12
908 pslld xmm4, 20
909 por xmm4, xmm8
910 movdqa xmm8, xmm5
911 psrld xmm8, 12
912 pslld xmm5, 20
913 por xmm5, xmm8
914 movdqa xmm8, xmm6
915 psrld xmm8, 12
916 pslld xmm6, 20
917 por xmm6, xmm8
918 movdqa xmm8, xmm7
919 psrld xmm8, 12
920 pslld xmm7, 20
921 por xmm7, xmm8
922 paddd xmm0, xmmword ptr [rsp+0xD0]
923 paddd xmm1, xmmword ptr [rsp+0xB0]
924 paddd xmm2, xmmword ptr [rsp+0xA0]
925 paddd xmm3, xmmword ptr [rsp+0x80]
926 paddd xmm0, xmm4
927 paddd xmm1, xmm5
928 paddd xmm2, xmm6
929 paddd xmm3, xmm7
930 pxor xmm12, xmm0
931 pxor xmm13, xmm1
932 pxor xmm14, xmm2
933 pxor xmm15, xmm3
934 movdqa xmm8, xmmword ptr [ROT8+rip]
935 pshufb xmm12, xmm8
936 pshufb xmm13, xmm8
937 pshufb xmm14, xmm8
938 pshufb xmm15, xmm8
939 movdqa xmm8, xmmword ptr [rsp+0x100]
940 paddd xmm8, xmm12
941 paddd xmm9, xmm13
942 paddd xmm10, xmm14
943 paddd xmm11, xmm15
944 pxor xmm4, xmm8
945 pxor xmm5, xmm9
946 pxor xmm6, xmm10
947 pxor xmm7, xmm11
948 movdqa xmmword ptr [rsp+0x100], xmm8
949 movdqa xmm8, xmm4
950 psrld xmm8, 7
951 pslld xmm4, 25
952 por xmm4, xmm8
953 movdqa xmm8, xmm5
954 psrld xmm8, 7
955 pslld xmm5, 25
956 por xmm5, xmm8
957 movdqa xmm8, xmm6
958 psrld xmm8, 7
959 pslld xmm6, 25
960 por xmm6, xmm8
961 movdqa xmm8, xmm7
962 psrld xmm8, 7
963 pslld xmm7, 25
964 por xmm7, xmm8
965 paddd xmm0, xmmword ptr [rsp+0x70]
966 paddd xmm1, xmmword ptr [rsp+0x50]
967 paddd xmm2, xmmword ptr [rsp]
968 paddd xmm3, xmmword ptr [rsp+0x60]
969 paddd xmm0, xmm5
970 paddd xmm1, xmm6
971 paddd xmm2, xmm7
972 paddd xmm3, xmm4
973 pxor xmm15, xmm0
974 pxor xmm12, xmm1
975 pxor xmm13, xmm2
976 pxor xmm14, xmm3
977 movdqa xmm8, xmmword ptr [ROT16+rip]
978 pshufb xmm15, xmm8
979 pshufb xmm12, xmm8
980 pshufb xmm13, xmm8
981 pshufb xmm14, xmm8
982 paddd xmm10, xmm15
983 paddd xmm11, xmm12
984 movdqa xmm8, xmmword ptr [rsp+0x100]
985 paddd xmm8, xmm13
986 paddd xmm9, xmm14
987 pxor xmm5, xmm10
988 pxor xmm6, xmm11
989 pxor xmm7, xmm8
990 pxor xmm4, xmm9
991 movdqa xmmword ptr [rsp+0x100], xmm8
992 movdqa xmm8, xmm5
993 psrld xmm8, 12
994 pslld xmm5, 20
995 por xmm5, xmm8
996 movdqa xmm8, xmm6
997 psrld xmm8, 12
998 pslld xmm6, 20
999 por xmm6, xmm8
1000 movdqa xmm8, xmm7
1001 psrld xmm8, 12
1002 pslld xmm7, 20
1003 por xmm7, xmm8
1004 movdqa xmm8, xmm4
1005 psrld xmm8, 12
1006 pslld xmm4, 20
1007 por xmm4, xmm8
1008 paddd xmm0, xmmword ptr [rsp+0x20]
1009 paddd xmm1, xmmword ptr [rsp+0x30]
1010 paddd xmm2, xmmword ptr [rsp+0x10]
1011 paddd xmm3, xmmword ptr [rsp+0x40]
1012 paddd xmm0, xmm5
1013 paddd xmm1, xmm6
1014 paddd xmm2, xmm7
1015 paddd xmm3, xmm4
1016 pxor xmm15, xmm0
1017 pxor xmm12, xmm1
1018 pxor xmm13, xmm2
1019 pxor xmm14, xmm3
1020 movdqa xmm8, xmmword ptr [ROT8+rip]
1021 pshufb xmm15, xmm8
1022 pshufb xmm12, xmm8
1023 pshufb xmm13, xmm8
1024 pshufb xmm14, xmm8
1025 paddd xmm10, xmm15
1026 paddd xmm11, xmm12
1027 movdqa xmm8, xmmword ptr [rsp+0x100]
1028 paddd xmm8, xmm13
1029 paddd xmm9, xmm14
1030 pxor xmm5, xmm10
1031 pxor xmm6, xmm11
1032 pxor xmm7, xmm8
1033 pxor xmm4, xmm9
1034 movdqa xmmword ptr [rsp+0x100], xmm8
1035 movdqa xmm8, xmm5
1036 psrld xmm8, 7
1037 pslld xmm5, 25
1038 por xmm5, xmm8
1039 movdqa xmm8, xmm6
1040 psrld xmm8, 7
1041 pslld xmm6, 25
1042 por xmm6, xmm8
1043 movdqa xmm8, xmm7
1044 psrld xmm8, 7
1045 pslld xmm7, 25
1046 por xmm7, xmm8
1047 movdqa xmm8, xmm4
1048 psrld xmm8, 7
1049 pslld xmm4, 25
1050 por xmm4, xmm8
1051 paddd xmm0, xmmword ptr [rsp+0x90]
1052 paddd xmm1, xmmword ptr [rsp+0xB0]
1053 paddd xmm2, xmmword ptr [rsp+0x80]
1054 paddd xmm3, xmmword ptr [rsp+0xF0]
1055 paddd xmm0, xmm4
1056 paddd xmm1, xmm5
1057 paddd xmm2, xmm6
1058 paddd xmm3, xmm7
1059 pxor xmm12, xmm0
1060 pxor xmm13, xmm1
1061 pxor xmm14, xmm2
1062 pxor xmm15, xmm3
1063 movdqa xmm8, xmmword ptr [ROT16+rip]
1064 pshufb xmm12, xmm8
1065 pshufb xmm13, xmm8
1066 pshufb xmm14, xmm8
1067 pshufb xmm15, xmm8
1068 movdqa xmm8, xmmword ptr [rsp+0x100]
1069 paddd xmm8, xmm12
1070 paddd xmm9, xmm13
1071 paddd xmm10, xmm14
1072 paddd xmm11, xmm15
1073 pxor xmm4, xmm8
1074 pxor xmm5, xmm9
1075 pxor xmm6, xmm10
1076 pxor xmm7, xmm11
1077 movdqa xmmword ptr [rsp+0x100], xmm8
1078 movdqa xmm8, xmm4
1079 psrld xmm8, 12
1080 pslld xmm4, 20
1081 por xmm4, xmm8
1082 movdqa xmm8, xmm5
1083 psrld xmm8, 12
1084 pslld xmm5, 20
1085 por xmm5, xmm8
1086 movdqa xmm8, xmm6
1087 psrld xmm8, 12
1088 pslld xmm6, 20
1089 por xmm6, xmm8
1090 movdqa xmm8, xmm7
1091 psrld xmm8, 12
1092 pslld xmm7, 20
1093 por xmm7, xmm8
1094 paddd xmm0, xmmword ptr [rsp+0xE0]
1095 paddd xmm1, xmmword ptr [rsp+0x50]
1096 paddd xmm2, xmmword ptr [rsp+0xC0]
1097 paddd xmm3, xmmword ptr [rsp+0x10]
1098 paddd xmm0, xmm4
1099 paddd xmm1, xmm5
1100 paddd xmm2, xmm6
1101 paddd xmm3, xmm7
1102 pxor xmm12, xmm0
1103 pxor xmm13, xmm1
1104 pxor xmm14, xmm2
1105 pxor xmm15, xmm3
1106 movdqa xmm8, xmmword ptr [ROT8+rip]
1107 pshufb xmm12, xmm8
1108 pshufb xmm13, xmm8
1109 pshufb xmm14, xmm8
1110 pshufb xmm15, xmm8
1111 movdqa xmm8, xmmword ptr [rsp+0x100]
1112 paddd xmm8, xmm12
1113 paddd xmm9, xmm13
1114 paddd xmm10, xmm14
1115 paddd xmm11, xmm15
1116 pxor xmm4, xmm8
1117 pxor xmm5, xmm9
1118 pxor xmm6, xmm10
1119 pxor xmm7, xmm11
1120 movdqa xmmword ptr [rsp+0x100], xmm8
1121 movdqa xmm8, xmm4
1122 psrld xmm8, 7
1123 pslld xmm4, 25
1124 por xmm4, xmm8
1125 movdqa xmm8, xmm5
1126 psrld xmm8, 7
1127 pslld xmm5, 25
1128 por xmm5, xmm8
1129 movdqa xmm8, xmm6
1130 psrld xmm8, 7
1131 pslld xmm6, 25
1132 por xmm6, xmm8
1133 movdqa xmm8, xmm7
1134 psrld xmm8, 7
1135 pslld xmm7, 25
1136 por xmm7, xmm8
1137 paddd xmm0, xmmword ptr [rsp+0xD0]
1138 paddd xmm1, xmmword ptr [rsp]
1139 paddd xmm2, xmmword ptr [rsp+0x20]
1140 paddd xmm3, xmmword ptr [rsp+0x40]
1141 paddd xmm0, xmm5
1142 paddd xmm1, xmm6
1143 paddd xmm2, xmm7
1144 paddd xmm3, xmm4
1145 pxor xmm15, xmm0
1146 pxor xmm12, xmm1
1147 pxor xmm13, xmm2
1148 pxor xmm14, xmm3
1149 movdqa xmm8, xmmword ptr [ROT16+rip]
1150 pshufb xmm15, xmm8
1151 pshufb xmm12, xmm8
1152 pshufb xmm13, xmm8
1153 pshufb xmm14, xmm8
1154 paddd xmm10, xmm15
1155 paddd xmm11, xmm12
1156 movdqa xmm8, xmmword ptr [rsp+0x100]
1157 paddd xmm8, xmm13
1158 paddd xmm9, xmm14
1159 pxor xmm5, xmm10
1160 pxor xmm6, xmm11
1161 pxor xmm7, xmm8
1162 pxor xmm4, xmm9
1163 movdqa xmmword ptr [rsp+0x100], xmm8
1164 movdqa xmm8, xmm5
1165 psrld xmm8, 12
1166 pslld xmm5, 20
1167 por xmm5, xmm8
1168 movdqa xmm8, xmm6
1169 psrld xmm8, 12
1170 pslld xmm6, 20
1171 por xmm6, xmm8
1172 movdqa xmm8, xmm7
1173 psrld xmm8, 12
1174 pslld xmm7, 20
1175 por xmm7, xmm8
1176 movdqa xmm8, xmm4
1177 psrld xmm8, 12
1178 pslld xmm4, 20
1179 por xmm4, xmm8
1180 paddd xmm0, xmmword ptr [rsp+0x30]
1181 paddd xmm1, xmmword ptr [rsp+0xA0]
1182 paddd xmm2, xmmword ptr [rsp+0x60]
1183 paddd xmm3, xmmword ptr [rsp+0x70]
1184 paddd xmm0, xmm5
1185 paddd xmm1, xmm6
1186 paddd xmm2, xmm7
1187 paddd xmm3, xmm4
1188 pxor xmm15, xmm0
1189 pxor xmm12, xmm1
1190 pxor xmm13, xmm2
1191 pxor xmm14, xmm3
1192 movdqa xmm8, xmmword ptr [ROT8+rip]
1193 pshufb xmm15, xmm8
1194 pshufb xmm12, xmm8
1195 pshufb xmm13, xmm8
1196 pshufb xmm14, xmm8
1197 paddd xmm10, xmm15
1198 paddd xmm11, xmm12
1199 movdqa xmm8, xmmword ptr [rsp+0x100]
1200 paddd xmm8, xmm13
1201 paddd xmm9, xmm14
1202 pxor xmm5, xmm10
1203 pxor xmm6, xmm11
1204 pxor xmm7, xmm8
1205 pxor xmm4, xmm9
1206 movdqa xmmword ptr [rsp+0x100], xmm8
1207 movdqa xmm8, xmm5
1208 psrld xmm8, 7
1209 pslld xmm5, 25
1210 por xmm5, xmm8
1211 movdqa xmm8, xmm6
1212 psrld xmm8, 7
1213 pslld xmm6, 25
1214 por xmm6, xmm8
1215 movdqa xmm8, xmm7
1216 psrld xmm8, 7
1217 pslld xmm7, 25
1218 por xmm7, xmm8
1219 movdqa xmm8, xmm4
1220 psrld xmm8, 7
1221 pslld xmm4, 25
1222 por xmm4, xmm8
1223 paddd xmm0, xmmword ptr [rsp+0xB0]
1224 paddd xmm1, xmmword ptr [rsp+0x50]
1225 paddd xmm2, xmmword ptr [rsp+0x10]
1226 paddd xmm3, xmmword ptr [rsp+0x80]
1227 paddd xmm0, xmm4
1228 paddd xmm1, xmm5
1229 paddd xmm2, xmm6
1230 paddd xmm3, xmm7
1231 pxor xmm12, xmm0
1232 pxor xmm13, xmm1
1233 pxor xmm14, xmm2
1234 pxor xmm15, xmm3
1235 movdqa xmm8, xmmword ptr [ROT16+rip]
1236 pshufb xmm12, xmm8
1237 pshufb xmm13, xmm8
1238 pshufb xmm14, xmm8
1239 pshufb xmm15, xmm8
1240 movdqa xmm8, xmmword ptr [rsp+0x100]
1241 paddd xmm8, xmm12
1242 paddd xmm9, xmm13
1243 paddd xmm10, xmm14
1244 paddd xmm11, xmm15
1245 pxor xmm4, xmm8
1246 pxor xmm5, xmm9
1247 pxor xmm6, xmm10
1248 pxor xmm7, xmm11
1249 movdqa xmmword ptr [rsp+0x100], xmm8
1250 movdqa xmm8, xmm4
1251 psrld xmm8, 12
1252 pslld xmm4, 20
1253 por xmm4, xmm8
1254 movdqa xmm8, xmm5
1255 psrld xmm8, 12
1256 pslld xmm5, 20
1257 por xmm5, xmm8
1258 movdqa xmm8, xmm6
1259 psrld xmm8, 12
1260 pslld xmm6, 20
1261 por xmm6, xmm8
1262 movdqa xmm8, xmm7
1263 psrld xmm8, 12
1264 pslld xmm7, 20
1265 por xmm7, xmm8
1266 paddd xmm0, xmmword ptr [rsp+0xF0]
1267 paddd xmm1, xmmword ptr [rsp]
1268 paddd xmm2, xmmword ptr [rsp+0x90]
1269 paddd xmm3, xmmword ptr [rsp+0x60]
1270 paddd xmm0, xmm4
1271 paddd xmm1, xmm5
1272 paddd xmm2, xmm6
1273 paddd xmm3, xmm7
1274 pxor xmm12, xmm0
1275 pxor xmm13, xmm1
1276 pxor xmm14, xmm2
1277 pxor xmm15, xmm3
1278 movdqa xmm8, xmmword ptr [ROT8+rip]
1279 pshufb xmm12, xmm8
1280 pshufb xmm13, xmm8
1281 pshufb xmm14, xmm8
1282 pshufb xmm15, xmm8
1283 movdqa xmm8, xmmword ptr [rsp+0x100]
1284 paddd xmm8, xmm12
1285 paddd xmm9, xmm13
1286 paddd xmm10, xmm14
1287 paddd xmm11, xmm15
1288 pxor xmm4, xmm8
1289 pxor xmm5, xmm9
1290 pxor xmm6, xmm10
1291 pxor xmm7, xmm11
1292 movdqa xmmword ptr [rsp+0x100], xmm8
1293 movdqa xmm8, xmm4
1294 psrld xmm8, 7
1295 pslld xmm4, 25
1296 por xmm4, xmm8
1297 movdqa xmm8, xmm5
1298 psrld xmm8, 7
1299 pslld xmm5, 25
1300 por xmm5, xmm8
1301 movdqa xmm8, xmm6
1302 psrld xmm8, 7
1303 pslld xmm6, 25
1304 por xmm6, xmm8
1305 movdqa xmm8, xmm7
1306 psrld xmm8, 7
1307 pslld xmm7, 25
1308 por xmm7, xmm8
1309 paddd xmm0, xmmword ptr [rsp+0xE0]
1310 paddd xmm1, xmmword ptr [rsp+0x20]
1311 paddd xmm2, xmmword ptr [rsp+0x30]
1312 paddd xmm3, xmmword ptr [rsp+0x70]
1313 paddd xmm0, xmm5
1314 paddd xmm1, xmm6
1315 paddd xmm2, xmm7
1316 paddd xmm3, xmm4
1317 pxor xmm15, xmm0
1318 pxor xmm12, xmm1
1319 pxor xmm13, xmm2
1320 pxor xmm14, xmm3
1321 movdqa xmm8, xmmword ptr [ROT16+rip]
1322 pshufb xmm15, xmm8
1323 pshufb xmm12, xmm8
1324 pshufb xmm13, xmm8
1325 pshufb xmm14, xmm8
1326 paddd xmm10, xmm15
1327 paddd xmm11, xmm12
1328 movdqa xmm8, xmmword ptr [rsp+0x100]
1329 paddd xmm8, xmm13
1330 paddd xmm9, xmm14
1331 pxor xmm5, xmm10
1332 pxor xmm6, xmm11
1333 pxor xmm7, xmm8
1334 pxor xmm4, xmm9
1335 movdqa xmmword ptr [rsp+0x100], xmm8
1336 movdqa xmm8, xmm5
1337 psrld xmm8, 12
1338 pslld xmm5, 20
1339 por xmm5, xmm8
1340 movdqa xmm8, xmm6
1341 psrld xmm8, 12
1342 pslld xmm6, 20
1343 por xmm6, xmm8
1344 movdqa xmm8, xmm7
1345 psrld xmm8, 12
1346 pslld xmm7, 20
1347 por xmm7, xmm8
1348 movdqa xmm8, xmm4
1349 psrld xmm8, 12
1350 pslld xmm4, 20
1351 por xmm4, xmm8
1352 paddd xmm0, xmmword ptr [rsp+0xA0]
1353 paddd xmm1, xmmword ptr [rsp+0xC0]
1354 paddd xmm2, xmmword ptr [rsp+0x40]
1355 paddd xmm3, xmmword ptr [rsp+0xD0]
1356 paddd xmm0, xmm5
1357 paddd xmm1, xmm6
1358 paddd xmm2, xmm7
1359 paddd xmm3, xmm4
1360 pxor xmm15, xmm0
1361 pxor xmm12, xmm1
1362 pxor xmm13, xmm2
1363 pxor xmm14, xmm3
1364 movdqa xmm8, xmmword ptr [ROT8+rip]
1365 pshufb xmm15, xmm8
1366 pshufb xmm12, xmm8
1367 pshufb xmm13, xmm8
1368 pshufb xmm14, xmm8
1369 paddd xmm10, xmm15
1370 paddd xmm11, xmm12
1371 movdqa xmm8, xmmword ptr [rsp+0x100]
1372 paddd xmm8, xmm13
1373 paddd xmm9, xmm14
1374 pxor xmm5, xmm10
1375 pxor xmm6, xmm11
1376 pxor xmm7, xmm8
1377 pxor xmm4, xmm9
1378 pxor xmm0, xmm8
1379 pxor xmm1, xmm9
1380 pxor xmm2, xmm10
1381 pxor xmm3, xmm11
1382 movdqa xmm8, xmm5
1383 psrld xmm8, 7
1384 pslld xmm5, 25
1385 por xmm5, xmm8
1386 movdqa xmm8, xmm6
1387 psrld xmm8, 7
1388 pslld xmm6, 25
1389 por xmm6, xmm8
1390 movdqa xmm8, xmm7
1391 psrld xmm8, 7
1392 pslld xmm7, 25
1393 por xmm7, xmm8
1394 movdqa xmm8, xmm4
1395 psrld xmm8, 7
1396 pslld xmm4, 25
1397 por xmm4, xmm8
1398 pxor xmm4, xmm12
1399 pxor xmm5, xmm13
1400 pxor xmm6, xmm14
1401 pxor xmm7, xmm15
1402 mov eax, r13d
1403 jne 9b
1404 movdqa xmm9, xmm0
1405 punpckldq xmm0, xmm1
1406 punpckhdq xmm9, xmm1
1407 movdqa xmm11, xmm2
1408 punpckldq xmm2, xmm3
1409 punpckhdq xmm11, xmm3
1410 movdqa xmm1, xmm0
1411 punpcklqdq xmm0, xmm2
1412 punpckhqdq xmm1, xmm2
1413 movdqa xmm3, xmm9
1414 punpcklqdq xmm9, xmm11
1415 punpckhqdq xmm3, xmm11
1416 movdqu xmmword ptr [rbx], xmm0
1417 movdqu xmmword ptr [rbx+0x20], xmm1
1418 movdqu xmmword ptr [rbx+0x40], xmm9
1419 movdqu xmmword ptr [rbx+0x60], xmm3
1420 movdqa xmm9, xmm4
1421 punpckldq xmm4, xmm5
1422 punpckhdq xmm9, xmm5
1423 movdqa xmm11, xmm6
1424 punpckldq xmm6, xmm7
1425 punpckhdq xmm11, xmm7
1426 movdqa xmm5, xmm4
1427 punpcklqdq xmm4, xmm6
1428 punpckhqdq xmm5, xmm6
1429 movdqa xmm7, xmm9
1430 punpcklqdq xmm9, xmm11
1431 punpckhqdq xmm7, xmm11
1432 movdqu xmmword ptr [rbx+0x10], xmm4
1433 movdqu xmmword ptr [rbx+0x30], xmm5
1434 movdqu xmmword ptr [rbx+0x50], xmm9
1435 movdqu xmmword ptr [rbx+0x70], xmm7
1436 movdqa xmm1, xmmword ptr [rsp+0x110]
1437 movdqa xmm0, xmm1
1438 paddd xmm1, xmmword ptr [rsp+0x150]
1439 movdqa xmmword ptr [rsp+0x110], xmm1
1440 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
1441 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
1442 pcmpgtd xmm0, xmm1
1443 movdqa xmm1, xmmword ptr [rsp+0x120]
1444 psubd xmm1, xmm0
1445 movdqa xmmword ptr [rsp+0x120], xmm1
1446 add rbx, 128
1447 add rdi, 32
1448 sub rsi, 4
1449 cmp rsi, 4
1450 jnc 2b
1451 test rsi, rsi
1452 jnz 3f
1453 4:
1454 mov rsp, rbp
1455 pop rbp
1456 pop rbx
1457 pop r12
1458 pop r13
1459 pop r14
1460 pop r15
1461 RET
1462 .p2align 5
1463 3:
1464 test esi, 0x2
1465 je 3f
1466 movups xmm0, xmmword ptr [rcx]
1467 movups xmm1, xmmword ptr [rcx+0x10]
1468 movaps xmm8, xmm0
1469 movaps xmm9, xmm1
1470 movd xmm13, dword ptr [rsp+0x110]
1471 pinsrd xmm13, dword ptr [rsp+0x120], 1
1472 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
1473 movaps xmmword ptr [rsp], xmm13
1474 movd xmm14, dword ptr [rsp+0x114]
1475 pinsrd xmm14, dword ptr [rsp+0x124], 1
1476 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
1477 movaps xmmword ptr [rsp+0x10], xmm14
1478 mov r8, qword ptr [rdi]
1479 mov r9, qword ptr [rdi+0x8]
1480 movzx eax, byte ptr [rbp+0x40]
1481 or eax, r13d
1482 xor edx, edx
1483 2:
1484 mov r14d, eax
1485 or eax, r12d
1486 add rdx, 64
1487 cmp rdx, r15
1488 cmovne eax, r14d
1489 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1490 movaps xmm10, xmm2
1491 movups xmm4, xmmword ptr [r8+rdx-0x40]
1492 movups xmm5, xmmword ptr [r8+rdx-0x30]
1493 movaps xmm3, xmm4
1494 shufps xmm4, xmm5, 136
1495 shufps xmm3, xmm5, 221
1496 movaps xmm5, xmm3
1497 movups xmm6, xmmword ptr [r8+rdx-0x20]
1498 movups xmm7, xmmword ptr [r8+rdx-0x10]
1499 movaps xmm3, xmm6
1500 shufps xmm6, xmm7, 136
1501 pshufd xmm6, xmm6, 0x93
1502 shufps xmm3, xmm7, 221
1503 pshufd xmm7, xmm3, 0x93
1504 movups xmm12, xmmword ptr [r9+rdx-0x40]
1505 movups xmm13, xmmword ptr [r9+rdx-0x30]
1506 movaps xmm11, xmm12
1507 shufps xmm12, xmm13, 136
1508 shufps xmm11, xmm13, 221
1509 movaps xmm13, xmm11
1510 movups xmm14, xmmword ptr [r9+rdx-0x20]
1511 movups xmm15, xmmword ptr [r9+rdx-0x10]
1512 movaps xmm11, xmm14
1513 shufps xmm14, xmm15, 136
1514 pshufd xmm14, xmm14, 0x93
1515 shufps xmm11, xmm15, 221
1516 pshufd xmm15, xmm11, 0x93
1517 movaps xmm3, xmmword ptr [rsp]
1518 movaps xmm11, xmmword ptr [rsp+0x10]
1519 pinsrd xmm3, eax, 3
1520 pinsrd xmm11, eax, 3
1521 mov al, 7
1522 9:
1523 paddd xmm0, xmm4
1524 paddd xmm8, xmm12
1525 movaps xmmword ptr [rsp+0x20], xmm4
1526 movaps xmmword ptr [rsp+0x30], xmm12
1527 paddd xmm0, xmm1
1528 paddd xmm8, xmm9
1529 pxor xmm3, xmm0
1530 pxor xmm11, xmm8
1531 movaps xmm12, xmmword ptr [ROT16+rip]
1532 pshufb xmm3, xmm12
1533 pshufb xmm11, xmm12
1534 paddd xmm2, xmm3
1535 paddd xmm10, xmm11
1536 pxor xmm1, xmm2
1537 pxor xmm9, xmm10
1538 movdqa xmm4, xmm1
1539 pslld xmm1, 20
1540 psrld xmm4, 12
1541 por xmm1, xmm4
1542 movdqa xmm4, xmm9
1543 pslld xmm9, 20
1544 psrld xmm4, 12
1545 por xmm9, xmm4
1546 paddd xmm0, xmm5
1547 paddd xmm8, xmm13
1548 movaps xmmword ptr [rsp+0x40], xmm5
1549 movaps xmmword ptr [rsp+0x50], xmm13
1550 paddd xmm0, xmm1
1551 paddd xmm8, xmm9
1552 pxor xmm3, xmm0
1553 pxor xmm11, xmm8
1554 movaps xmm13, xmmword ptr [ROT8+rip]
1555 pshufb xmm3, xmm13
1556 pshufb xmm11, xmm13
1557 paddd xmm2, xmm3
1558 paddd xmm10, xmm11
1559 pxor xmm1, xmm2
1560 pxor xmm9, xmm10
1561 movdqa xmm4, xmm1
1562 pslld xmm1, 25
1563 psrld xmm4, 7
1564 por xmm1, xmm4
1565 movdqa xmm4, xmm9
1566 pslld xmm9, 25
1567 psrld xmm4, 7
1568 por xmm9, xmm4
1569 pshufd xmm0, xmm0, 0x93
1570 pshufd xmm8, xmm8, 0x93
1571 pshufd xmm3, xmm3, 0x4E
1572 pshufd xmm11, xmm11, 0x4E
1573 pshufd xmm2, xmm2, 0x39
1574 pshufd xmm10, xmm10, 0x39
1575 paddd xmm0, xmm6
1576 paddd xmm8, xmm14
1577 paddd xmm0, xmm1
1578 paddd xmm8, xmm9
1579 pxor xmm3, xmm0
1580 pxor xmm11, xmm8
1581 pshufb xmm3, xmm12
1582 pshufb xmm11, xmm12
1583 paddd xmm2, xmm3
1584 paddd xmm10, xmm11
1585 pxor xmm1, xmm2
1586 pxor xmm9, xmm10
1587 movdqa xmm4, xmm1
1588 pslld xmm1, 20
1589 psrld xmm4, 12
1590 por xmm1, xmm4
1591 movdqa xmm4, xmm9
1592 pslld xmm9, 20
1593 psrld xmm4, 12
1594 por xmm9, xmm4
1595 paddd xmm0, xmm7
1596 paddd xmm8, xmm15
1597 paddd xmm0, xmm1
1598 paddd xmm8, xmm9
1599 pxor xmm3, xmm0
1600 pxor xmm11, xmm8
1601 pshufb xmm3, xmm13
1602 pshufb xmm11, xmm13
1603 paddd xmm2, xmm3
1604 paddd xmm10, xmm11
1605 pxor xmm1, xmm2
1606 pxor xmm9, xmm10
1607 movdqa xmm4, xmm1
1608 pslld xmm1, 25
1609 psrld xmm4, 7
1610 por xmm1, xmm4
1611 movdqa xmm4, xmm9
1612 pslld xmm9, 25
1613 psrld xmm4, 7
1614 por xmm9, xmm4
1615 pshufd xmm0, xmm0, 0x39
1616 pshufd xmm8, xmm8, 0x39
1617 pshufd xmm3, xmm3, 0x4E
1618 pshufd xmm11, xmm11, 0x4E
1619 pshufd xmm2, xmm2, 0x93
1620 pshufd xmm10, xmm10, 0x93
1621 dec al
1622 je 9f
1623 movdqa xmm12, xmmword ptr [rsp+0x20]
1624 movdqa xmm5, xmmword ptr [rsp+0x40]
1625 pshufd xmm13, xmm12, 0x0F
1626 shufps xmm12, xmm5, 214
1627 pshufd xmm4, xmm12, 0x39
1628 movdqa xmm12, xmm6
1629 shufps xmm12, xmm7, 250
1630 pblendw xmm13, xmm12, 0xCC
1631 movdqa xmm12, xmm7
1632 punpcklqdq xmm12, xmm5
1633 pblendw xmm12, xmm6, 0xC0
1634 pshufd xmm12, xmm12, 0x78
1635 punpckhdq xmm5, xmm7
1636 punpckldq xmm6, xmm5
1637 pshufd xmm7, xmm6, 0x1E
1638 movdqa xmmword ptr [rsp+0x20], xmm13
1639 movdqa xmmword ptr [rsp+0x40], xmm12
1640 movdqa xmm5, xmmword ptr [rsp+0x30]
1641 movdqa xmm13, xmmword ptr [rsp+0x50]
1642 pshufd xmm6, xmm5, 0x0F
1643 shufps xmm5, xmm13, 214
1644 pshufd xmm12, xmm5, 0x39
1645 movdqa xmm5, xmm14
1646 shufps xmm5, xmm15, 250
1647 pblendw xmm6, xmm5, 0xCC
1648 movdqa xmm5, xmm15
1649 punpcklqdq xmm5, xmm13
1650 pblendw xmm5, xmm14, 0xC0
1651 pshufd xmm5, xmm5, 0x78
1652 punpckhdq xmm13, xmm15
1653 punpckldq xmm14, xmm13
1654 pshufd xmm15, xmm14, 0x1E
1655 movdqa xmm13, xmm6
1656 movdqa xmm14, xmm5
1657 movdqa xmm5, xmmword ptr [rsp+0x20]
1658 movdqa xmm6, xmmword ptr [rsp+0x40]
1659 jmp 9b
1660 9:
1661 pxor xmm0, xmm2
1662 pxor xmm1, xmm3
1663 pxor xmm8, xmm10
1664 pxor xmm9, xmm11
1665 mov eax, r13d
1666 cmp rdx, r15
1667 jne 2b
1668 movups xmmword ptr [rbx], xmm0
1669 movups xmmword ptr [rbx+0x10], xmm1
1670 movups xmmword ptr [rbx+0x20], xmm8
1671 movups xmmword ptr [rbx+0x30], xmm9
1672 movdqa xmm0, xmmword ptr [rsp+0x130]
1673 movdqa xmm1, xmmword ptr [rsp+0x110]
1674 movdqa xmm2, xmmword ptr [rsp+0x120]
1675 movdqu xmm3, xmmword ptr [rsp+0x118]
1676 movdqu xmm4, xmmword ptr [rsp+0x128]
1677 blendvps xmm1, xmm3, xmm0
1678 blendvps xmm2, xmm4, xmm0
1679 movdqa xmmword ptr [rsp+0x110], xmm1
1680 movdqa xmmword ptr [rsp+0x120], xmm2
1681 add rdi, 16
1682 add rbx, 64
1683 sub rsi, 2
1684 3:
1685 test esi, 0x1
1686 je 4b
1687 movups xmm0, xmmword ptr [rcx]
1688 movups xmm1, xmmword ptr [rcx+0x10]
1689 movd xmm13, dword ptr [rsp+0x110]
1690 pinsrd xmm13, dword ptr [rsp+0x120], 1
1691 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
1692 movaps xmm14, xmmword ptr [ROT8+rip]
1693 movaps xmm15, xmmword ptr [ROT16+rip]
1694 mov r8, qword ptr [rdi]
1695 movzx eax, byte ptr [rbp+0x40]
1696 or eax, r13d
1697 xor edx, edx
1698 2:
1699 mov r14d, eax
1700 or eax, r12d
1701 add rdx, 64
1702 cmp rdx, r15
1703 cmovne eax, r14d
1704 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1705 movaps xmm3, xmm13
1706 pinsrd xmm3, eax, 3
1707 movups xmm4, xmmword ptr [r8+rdx-0x40]
1708 movups xmm5, xmmword ptr [r8+rdx-0x30]
1709 movaps xmm8, xmm4
1710 shufps xmm4, xmm5, 136
1711 shufps xmm8, xmm5, 221
1712 movaps xmm5, xmm8
1713 movups xmm6, xmmword ptr [r8+rdx-0x20]
1714 movups xmm7, xmmword ptr [r8+rdx-0x10]
1715 movaps xmm8, xmm6
1716 shufps xmm6, xmm7, 136
1717 pshufd xmm6, xmm6, 0x93
1718 shufps xmm8, xmm7, 221
1719 pshufd xmm7, xmm8, 0x93
1720 mov al, 7
1721 9:
1722 paddd xmm0, xmm4
1723 paddd xmm0, xmm1
1724 pxor xmm3, xmm0
1725 pshufb xmm3, xmm15
1726 paddd xmm2, xmm3
1727 pxor xmm1, xmm2
1728 movdqa xmm11, xmm1
1729 pslld xmm1, 20
1730 psrld xmm11, 12
1731 por xmm1, xmm11
1732 paddd xmm0, xmm5
1733 paddd xmm0, xmm1
1734 pxor xmm3, xmm0
1735 pshufb xmm3, xmm14
1736 paddd xmm2, xmm3
1737 pxor xmm1, xmm2
1738 movdqa xmm11, xmm1
1739 pslld xmm1, 25
1740 psrld xmm11, 7
1741 por xmm1, xmm11
1742 pshufd xmm0, xmm0, 0x93
1743 pshufd xmm3, xmm3, 0x4E
1744 pshufd xmm2, xmm2, 0x39
1745 paddd xmm0, xmm6
1746 paddd xmm0, xmm1
1747 pxor xmm3, xmm0
1748 pshufb xmm3, xmm15
1749 paddd xmm2, xmm3
1750 pxor xmm1, xmm2
1751 movdqa xmm11, xmm1
1752 pslld xmm1, 20
1753 psrld xmm11, 12
1754 por xmm1, xmm11
1755 paddd xmm0, xmm7
1756 paddd xmm0, xmm1
1757 pxor xmm3, xmm0
1758 pshufb xmm3, xmm14
1759 paddd xmm2, xmm3
1760 pxor xmm1, xmm2
1761 movdqa xmm11, xmm1
1762 pslld xmm1, 25
1763 psrld xmm11, 7
1764 por xmm1, xmm11
1765 pshufd xmm0, xmm0, 0x39
1766 pshufd xmm3, xmm3, 0x4E
1767 pshufd xmm2, xmm2, 0x93
1768 dec al
1769 jz 9f
1770 movdqa xmm8, xmm4
1771 shufps xmm8, xmm5, 214
1772 pshufd xmm9, xmm4, 0x0F
1773 pshufd xmm4, xmm8, 0x39
1774 movdqa xmm8, xmm6
1775 shufps xmm8, xmm7, 250
1776 pblendw xmm9, xmm8, 0xCC
1777 movdqa xmm8, xmm7
1778 punpcklqdq xmm8, xmm5
1779 pblendw xmm8, xmm6, 0xC0
1780 pshufd xmm8, xmm8, 0x78
1781 punpckhdq xmm5, xmm7
1782 punpckldq xmm6, xmm5
1783 pshufd xmm7, xmm6, 0x1E
1784 movdqa xmm5, xmm9
1785 movdqa xmm6, xmm8
1786 jmp 9b
1787 9:
1788 pxor xmm0, xmm2
1789 pxor xmm1, xmm3
1790 mov eax, r13d
1791 cmp rdx, r15
1792 jne 2b
1793 movups xmmword ptr [rbx], xmm0
1794 movups xmmword ptr [rbx+0x10], xmm1
1795 jmp 4b
1796 SET_SIZE(zfs_blake3_hash_many_sse41)
1797
1798 ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64)
1799 ENDBR
1800 movups xmm0, xmmword ptr [rdi]
1801 movups xmm1, xmmword ptr [rdi+0x10]
1802 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1803 shl r8, 32
1804 add rdx, r8
1805 movq xmm3, rcx
1806 movq xmm4, rdx
1807 punpcklqdq xmm3, xmm4
1808 movups xmm4, xmmword ptr [rsi]
1809 movups xmm5, xmmword ptr [rsi+0x10]
1810 movaps xmm8, xmm4
1811 shufps xmm4, xmm5, 136
1812 shufps xmm8, xmm5, 221
1813 movaps xmm5, xmm8
1814 movups xmm6, xmmword ptr [rsi+0x20]
1815 movups xmm7, xmmword ptr [rsi+0x30]
1816 movaps xmm8, xmm6
1817 shufps xmm6, xmm7, 136
1818 pshufd xmm6, xmm6, 0x93
1819 shufps xmm8, xmm7, 221
1820 pshufd xmm7, xmm8, 0x93
1821 movaps xmm14, xmmword ptr [ROT8+rip]
1822 movaps xmm15, xmmword ptr [ROT16+rip]
1823 mov al, 7
1824 9:
1825 paddd xmm0, xmm4
1826 paddd xmm0, xmm1
1827 pxor xmm3, xmm0
1828 pshufb xmm3, xmm15
1829 paddd xmm2, xmm3
1830 pxor xmm1, xmm2
1831 movdqa xmm11, xmm1
1832 pslld xmm1, 20
1833 psrld xmm11, 12
1834 por xmm1, xmm11
1835 paddd xmm0, xmm5
1836 paddd xmm0, xmm1
1837 pxor xmm3, xmm0
1838 pshufb xmm3, xmm14
1839 paddd xmm2, xmm3
1840 pxor xmm1, xmm2
1841 movdqa xmm11, xmm1
1842 pslld xmm1, 25
1843 psrld xmm11, 7
1844 por xmm1, xmm11
1845 pshufd xmm0, xmm0, 0x93
1846 pshufd xmm3, xmm3, 0x4E
1847 pshufd xmm2, xmm2, 0x39
1848 paddd xmm0, xmm6
1849 paddd xmm0, xmm1
1850 pxor xmm3, xmm0
1851 pshufb xmm3, xmm15
1852 paddd xmm2, xmm3
1853 pxor xmm1, xmm2
1854 movdqa xmm11, xmm1
1855 pslld xmm1, 20
1856 psrld xmm11, 12
1857 por xmm1, xmm11
1858 paddd xmm0, xmm7
1859 paddd xmm0, xmm1
1860 pxor xmm3, xmm0
1861 pshufb xmm3, xmm14
1862 paddd xmm2, xmm3
1863 pxor xmm1, xmm2
1864 movdqa xmm11, xmm1
1865 pslld xmm1, 25
1866 psrld xmm11, 7
1867 por xmm1, xmm11
1868 pshufd xmm0, xmm0, 0x39
1869 pshufd xmm3, xmm3, 0x4E
1870 pshufd xmm2, xmm2, 0x93
1871 dec al
1872 jz 9f
1873 movdqa xmm8, xmm4
1874 shufps xmm8, xmm5, 214
1875 pshufd xmm9, xmm4, 0x0F
1876 pshufd xmm4, xmm8, 0x39
1877 movdqa xmm8, xmm6
1878 shufps xmm8, xmm7, 250
1879 pblendw xmm9, xmm8, 0xCC
1880 movdqa xmm8, xmm7
1881 punpcklqdq xmm8, xmm5
1882 pblendw xmm8, xmm6, 0xC0
1883 pshufd xmm8, xmm8, 0x78
1884 punpckhdq xmm5, xmm7
1885 punpckldq xmm6, xmm5
1886 pshufd xmm7, xmm6, 0x1E
1887 movdqa xmm5, xmm9
1888 movdqa xmm6, xmm8
1889 jmp 9b
1890 9:
1891 pxor xmm0, xmm2
1892 pxor xmm1, xmm3
1893 movups xmmword ptr [rdi], xmm0
1894 movups xmmword ptr [rdi+0x10], xmm1
1895 RET
1896 SET_SIZE(zfs_blake3_compress_in_place_sse41)
1897
1898 ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64)
1899 ENDBR
1900 movups xmm0, xmmword ptr [rdi]
1901 movups xmm1, xmmword ptr [rdi+0x10]
1902 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1903 movzx eax, r8b
1904 movzx edx, dl
1905 shl rax, 32
1906 add rdx, rax
1907 movq xmm3, rcx
1908 movq xmm4, rdx
1909 punpcklqdq xmm3, xmm4
1910 movups xmm4, xmmword ptr [rsi]
1911 movups xmm5, xmmword ptr [rsi+0x10]
1912 movaps xmm8, xmm4
1913 shufps xmm4, xmm5, 136
1914 shufps xmm8, xmm5, 221
1915 movaps xmm5, xmm8
1916 movups xmm6, xmmword ptr [rsi+0x20]
1917 movups xmm7, xmmword ptr [rsi+0x30]
1918 movaps xmm8, xmm6
1919 shufps xmm6, xmm7, 136
1920 pshufd xmm6, xmm6, 0x93
1921 shufps xmm8, xmm7, 221
1922 pshufd xmm7, xmm8, 0x93
1923 movaps xmm14, xmmword ptr [ROT8+rip]
1924 movaps xmm15, xmmword ptr [ROT16+rip]
1925 mov al, 7
1926 9:
1927 paddd xmm0, xmm4
1928 paddd xmm0, xmm1
1929 pxor xmm3, xmm0
1930 pshufb xmm3, xmm15
1931 paddd xmm2, xmm3
1932 pxor xmm1, xmm2
1933 movdqa xmm11, xmm1
1934 pslld xmm1, 20
1935 psrld xmm11, 12
1936 por xmm1, xmm11
1937 paddd xmm0, xmm5
1938 paddd xmm0, xmm1
1939 pxor xmm3, xmm0
1940 pshufb xmm3, xmm14
1941 paddd xmm2, xmm3
1942 pxor xmm1, xmm2
1943 movdqa xmm11, xmm1
1944 pslld xmm1, 25
1945 psrld xmm11, 7
1946 por xmm1, xmm11
1947 pshufd xmm0, xmm0, 0x93
1948 pshufd xmm3, xmm3, 0x4E
1949 pshufd xmm2, xmm2, 0x39
1950 paddd xmm0, xmm6
1951 paddd xmm0, xmm1
1952 pxor xmm3, xmm0
1953 pshufb xmm3, xmm15
1954 paddd xmm2, xmm3
1955 pxor xmm1, xmm2
1956 movdqa xmm11, xmm1
1957 pslld xmm1, 20
1958 psrld xmm11, 12
1959 por xmm1, xmm11
1960 paddd xmm0, xmm7
1961 paddd xmm0, xmm1
1962 pxor xmm3, xmm0
1963 pshufb xmm3, xmm14
1964 paddd xmm2, xmm3
1965 pxor xmm1, xmm2
1966 movdqa xmm11, xmm1
1967 pslld xmm1, 25
1968 psrld xmm11, 7
1969 por xmm1, xmm11
1970 pshufd xmm0, xmm0, 0x39
1971 pshufd xmm3, xmm3, 0x4E
1972 pshufd xmm2, xmm2, 0x93
1973 dec al
1974 jz 9f
1975 movdqa xmm8, xmm4
1976 shufps xmm8, xmm5, 214
1977 pshufd xmm9, xmm4, 0x0F
1978 pshufd xmm4, xmm8, 0x39
1979 movdqa xmm8, xmm6
1980 shufps xmm8, xmm7, 250
1981 pblendw xmm9, xmm8, 0xCC
1982 movdqa xmm8, xmm7
1983 punpcklqdq xmm8, xmm5
1984 pblendw xmm8, xmm6, 0xC0
1985 pshufd xmm8, xmm8, 0x78
1986 punpckhdq xmm5, xmm7
1987 punpckldq xmm6, xmm5
1988 pshufd xmm7, xmm6, 0x1E
1989 movdqa xmm5, xmm9
1990 movdqa xmm6, xmm8
1991 jmp 9b
1992 9:
1993 movdqu xmm4, xmmword ptr [rdi]
1994 movdqu xmm5, xmmword ptr [rdi+0x10]
1995 pxor xmm0, xmm2
1996 pxor xmm1, xmm3
1997 pxor xmm2, xmm4
1998 pxor xmm3, xmm5
1999 movups xmmword ptr [r9], xmm0
2000 movups xmmword ptr [r9+0x10], xmm1
2001 movups xmmword ptr [r9+0x20], xmm2
2002 movups xmmword ptr [r9+0x30], xmm3
2003 RET
2004 SET_SIZE(zfs_blake3_compress_xof_sse41)
2005
2006 SECTION_STATIC
2007
2008 .p2align 6
2009 BLAKE3_IV:
2010 .long 0x6A09E667, 0xBB67AE85
2011 .long 0x3C6EF372, 0xA54FF53A
2012 ROT16:
2013 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
2014 ROT8:
2015 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
2016 ADD0:
2017 .long 0, 1, 2, 3
2018 ADD1:
2019 .long 4, 4, 4, 4
2020 BLAKE3_IV_0:
2021 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
2022 BLAKE3_IV_1:
2023 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
2024 BLAKE3_IV_2:
2025 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
2026 BLAKE3_IV_3:
2027 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
2028 BLAKE3_BLOCK_LEN:
2029 .long 64, 64, 64, 64
2030 CMP_MSB_MASK:
2031 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
2032
2033 #endif /* HAVE_SSE4_1 */
2034
2035 #ifdef __ELF__
2036 .section .note.GNU-stack,"",%progbits
2037 #endif
Cache object: 2ebc0e1a0c092891cacf3a9774c906fa
|