1 /*
2 * ====================================================================
3 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4 * project. Rights for redistribution and usage in source and binary
5 * forms are granted according to the OpenSSL license.
6 * ====================================================================
7 *
8 * sha256/512_block procedure for x86_64.
9 *
10 * 40% improvement over compiler-generated code on Opteron. On EM64T
11 * sha256 was observed to run >80% faster and sha512 - >40%. No magical
12 * tricks, just straight implementation... I really wonder why gcc
13 * [being armed with inline assembler] fails to generate as fast code.
14 * The only thing which is cool about this module is that it's very
15 * same instruction sequence used for both SHA-256 and SHA-512. In
16 * former case the instructions operate on 32-bit operands, while in
17 * latter - on 64-bit ones. All I had to do is to get one flavor right,
18 * the other one passed the test right away:-)
19 *
20 * sha256_block runs in ~1005 cycles on Opteron, which gives you
21 * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
22 * frequency in GHz. sha512_block runs in ~1275 cycles, which results
23 * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
24 * Well, if you compare it to IA-64 implementation, which maintains
25 * X[16] in register bank[!], tends to 4 instructions per CPU clock
26 * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
27 * issue Opteron pipeline and X[16] maintained in memory. So that *if*
28 * there is a way to improve it, *then* the only way would be to try to
29 * offload X[16] updates to SSE unit, but that would require "deeper"
30 * loop unroll, which in turn would naturally cause size blow-up, not
31 * to mention increased complexity! And once again, only *if* it's
32 * actually possible to noticeably improve overall ILP, instruction
33 * level parallelism, on a given CPU implementation in this case.
34 *
35 * Special note on Intel EM64T. While Opteron CPU exhibits perfect
36 * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
37 * [currently available] EM64T CPUs apparently are far from it. On the
38 * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
39 * sha256_block:-( This is presumably because 64-bit shifts/rotates
40 * apparently are not atomic instructions, but implemented in microcode.
41 */
42
43 /*
44 * OpenSolaris OS modifications
45 *
46 * Sun elects to use this software under the BSD license.
47 *
48 * This source originates from OpenSSL file sha512-x86_64.pl at
49 * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
50 * (presumably for future OpenSSL release 0.9.8h), with these changes:
51 *
52 * 1. Added perl "use strict" and declared variables.
53 *
54 * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
55 * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
56 *
57 * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
58 * assemblers). Replaced the .picmeup macro with assembler code.
59 *
60 * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
61 * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
62 */
63
64 /*
65 * This file was generated by a perl script (sha512-x86_64.pl) that were
66 * used to generate sha256 and sha512 variants from the same code base.
67 * The comments from the original file have been pasted above.
68 */
69
70 #if defined(lint) || defined(__lint)
71 #include <sys/stdint.h>
72 #include <sha2/sha2.h>
73
74 void
75 SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
76 {
77 (void) ctx, (void) in, (void) num;
78 }
79
80
81 #else
82 #define _ASM
83 #include <sys/asm_linkage.h>
84
85 ENTRY_NP(SHA256TransformBlocks)
86 .cfi_startproc
87 ENDBR
88 movq %rsp, %rax
89 .cfi_def_cfa_register %rax
90 push %rbx
91 .cfi_offset %rbx,-16
92 push %rbp
93 .cfi_offset %rbp,-24
94 push %r12
95 .cfi_offset %r12,-32
96 push %r13
97 .cfi_offset %r13,-40
98 push %r14
99 .cfi_offset %r14,-48
100 push %r15
101 .cfi_offset %r15,-56
102 mov %rsp,%rbp # copy %rsp
103 shl $4,%rdx # num*16
104 sub $16*4+4*8,%rsp
105 lea (%rsi,%rdx,4),%rdx # inp+num*16*4
106 and $-64,%rsp # align stack frame
107 add $8,%rdi # Skip OpenSolaris field, "algotype"
108 mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg
109 mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg
110 mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg
111 mov %rbp,16*4+3*8(%rsp) # save copy of %rsp
112 # echo ".cfi_cfa_expression %rsp+88,deref,+56" |
113 # openssl/crypto/perlasm/x86_64-xlate.pl
114 .cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
115
116 #.picmeup %rbp
117 # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
118 # the address of the "next" instruction into the target register
119 # (%rbp). This generates these 2 instructions:
120 lea .Llea(%rip),%rbp
121 #nop # .picmeup generates a nop for mod 8 alignment--not needed here
122
123 .Llea:
124 lea K256-.(%rbp),%rbp
125
126 mov 4*0(%rdi),%eax
127 mov 4*1(%rdi),%ebx
128 mov 4*2(%rdi),%ecx
129 mov 4*3(%rdi),%edx
130 mov 4*4(%rdi),%r8d
131 mov 4*5(%rdi),%r9d
132 mov 4*6(%rdi),%r10d
133 mov 4*7(%rdi),%r11d
134 jmp .Lloop
135
136 .balign 16
137 .Lloop:
138 xor %rdi,%rdi
139 mov 4*0(%rsi),%r12d
140 bswap %r12d
141 mov %r8d,%r13d
142 mov %r8d,%r14d
143 mov %r9d,%r15d
144
145 ror $6,%r13d
146 ror $11,%r14d
147 xor %r10d,%r15d # f^g
148
149 xor %r14d,%r13d
150 ror $14,%r14d
151 and %r8d,%r15d # (f^g)&e
152 mov %r12d,0(%rsp)
153
154 xor %r14d,%r13d # Sigma1(e)
155 xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
156 add %r11d,%r12d # T1+=h
157
158 mov %eax,%r11d
159 add %r13d,%r12d # T1+=Sigma1(e)
160
161 add %r15d,%r12d # T1+=Ch(e,f,g)
162 mov %eax,%r13d
163 mov %eax,%r14d
164
165 ror $2,%r11d
166 ror $13,%r13d
167 mov %eax,%r15d
168 add (%rbp,%rdi,4),%r12d # T1+=K[round]
169
170 xor %r13d,%r11d
171 ror $9,%r13d
172 or %ecx,%r14d # a|c
173
174 xor %r13d,%r11d # h=Sigma0(a)
175 and %ecx,%r15d # a&c
176 add %r12d,%edx # d+=T1
177
178 and %ebx,%r14d # (a|c)&b
179 add %r12d,%r11d # h+=T1
180
181 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
182 lea 1(%rdi),%rdi # round++
183
184 add %r14d,%r11d # h+=Maj(a,b,c)
185 mov 4*1(%rsi),%r12d
186 bswap %r12d
187 mov %edx,%r13d
188 mov %edx,%r14d
189 mov %r8d,%r15d
190
191 ror $6,%r13d
192 ror $11,%r14d
193 xor %r9d,%r15d # f^g
194
195 xor %r14d,%r13d
196 ror $14,%r14d
197 and %edx,%r15d # (f^g)&e
198 mov %r12d,4(%rsp)
199
200 xor %r14d,%r13d # Sigma1(e)
201 xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
202 add %r10d,%r12d # T1+=h
203
204 mov %r11d,%r10d
205 add %r13d,%r12d # T1+=Sigma1(e)
206
207 add %r15d,%r12d # T1+=Ch(e,f,g)
208 mov %r11d,%r13d
209 mov %r11d,%r14d
210
211 ror $2,%r10d
212 ror $13,%r13d
213 mov %r11d,%r15d
214 add (%rbp,%rdi,4),%r12d # T1+=K[round]
215
216 xor %r13d,%r10d
217 ror $9,%r13d
218 or %ebx,%r14d # a|c
219
220 xor %r13d,%r10d # h=Sigma0(a)
221 and %ebx,%r15d # a&c
222 add %r12d,%ecx # d+=T1
223
224 and %eax,%r14d # (a|c)&b
225 add %r12d,%r10d # h+=T1
226
227 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
228 lea 1(%rdi),%rdi # round++
229
230 add %r14d,%r10d # h+=Maj(a,b,c)
231 mov 4*2(%rsi),%r12d
232 bswap %r12d
233 mov %ecx,%r13d
234 mov %ecx,%r14d
235 mov %edx,%r15d
236
237 ror $6,%r13d
238 ror $11,%r14d
239 xor %r8d,%r15d # f^g
240
241 xor %r14d,%r13d
242 ror $14,%r14d
243 and %ecx,%r15d # (f^g)&e
244 mov %r12d,8(%rsp)
245
246 xor %r14d,%r13d # Sigma1(e)
247 xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
248 add %r9d,%r12d # T1+=h
249
250 mov %r10d,%r9d
251 add %r13d,%r12d # T1+=Sigma1(e)
252
253 add %r15d,%r12d # T1+=Ch(e,f,g)
254 mov %r10d,%r13d
255 mov %r10d,%r14d
256
257 ror $2,%r9d
258 ror $13,%r13d
259 mov %r10d,%r15d
260 add (%rbp,%rdi,4),%r12d # T1+=K[round]
261
262 xor %r13d,%r9d
263 ror $9,%r13d
264 or %eax,%r14d # a|c
265
266 xor %r13d,%r9d # h=Sigma0(a)
267 and %eax,%r15d # a&c
268 add %r12d,%ebx # d+=T1
269
270 and %r11d,%r14d # (a|c)&b
271 add %r12d,%r9d # h+=T1
272
273 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
274 lea 1(%rdi),%rdi # round++
275
276 add %r14d,%r9d # h+=Maj(a,b,c)
277 mov 4*3(%rsi),%r12d
278 bswap %r12d
279 mov %ebx,%r13d
280 mov %ebx,%r14d
281 mov %ecx,%r15d
282
283 ror $6,%r13d
284 ror $11,%r14d
285 xor %edx,%r15d # f^g
286
287 xor %r14d,%r13d
288 ror $14,%r14d
289 and %ebx,%r15d # (f^g)&e
290 mov %r12d,12(%rsp)
291
292 xor %r14d,%r13d # Sigma1(e)
293 xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
294 add %r8d,%r12d # T1+=h
295
296 mov %r9d,%r8d
297 add %r13d,%r12d # T1+=Sigma1(e)
298
299 add %r15d,%r12d # T1+=Ch(e,f,g)
300 mov %r9d,%r13d
301 mov %r9d,%r14d
302
303 ror $2,%r8d
304 ror $13,%r13d
305 mov %r9d,%r15d
306 add (%rbp,%rdi,4),%r12d # T1+=K[round]
307
308 xor %r13d,%r8d
309 ror $9,%r13d
310 or %r11d,%r14d # a|c
311
312 xor %r13d,%r8d # h=Sigma0(a)
313 and %r11d,%r15d # a&c
314 add %r12d,%eax # d+=T1
315
316 and %r10d,%r14d # (a|c)&b
317 add %r12d,%r8d # h+=T1
318
319 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
320 lea 1(%rdi),%rdi # round++
321
322 add %r14d,%r8d # h+=Maj(a,b,c)
323 mov 4*4(%rsi),%r12d
324 bswap %r12d
325 mov %eax,%r13d
326 mov %eax,%r14d
327 mov %ebx,%r15d
328
329 ror $6,%r13d
330 ror $11,%r14d
331 xor %ecx,%r15d # f^g
332
333 xor %r14d,%r13d
334 ror $14,%r14d
335 and %eax,%r15d # (f^g)&e
336 mov %r12d,16(%rsp)
337
338 xor %r14d,%r13d # Sigma1(e)
339 xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
340 add %edx,%r12d # T1+=h
341
342 mov %r8d,%edx
343 add %r13d,%r12d # T1+=Sigma1(e)
344
345 add %r15d,%r12d # T1+=Ch(e,f,g)
346 mov %r8d,%r13d
347 mov %r8d,%r14d
348
349 ror $2,%edx
350 ror $13,%r13d
351 mov %r8d,%r15d
352 add (%rbp,%rdi,4),%r12d # T1+=K[round]
353
354 xor %r13d,%edx
355 ror $9,%r13d
356 or %r10d,%r14d # a|c
357
358 xor %r13d,%edx # h=Sigma0(a)
359 and %r10d,%r15d # a&c
360 add %r12d,%r11d # d+=T1
361
362 and %r9d,%r14d # (a|c)&b
363 add %r12d,%edx # h+=T1
364
365 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
366 lea 1(%rdi),%rdi # round++
367
368 add %r14d,%edx # h+=Maj(a,b,c)
369 mov 4*5(%rsi),%r12d
370 bswap %r12d
371 mov %r11d,%r13d
372 mov %r11d,%r14d
373 mov %eax,%r15d
374
375 ror $6,%r13d
376 ror $11,%r14d
377 xor %ebx,%r15d # f^g
378
379 xor %r14d,%r13d
380 ror $14,%r14d
381 and %r11d,%r15d # (f^g)&e
382 mov %r12d,20(%rsp)
383
384 xor %r14d,%r13d # Sigma1(e)
385 xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
386 add %ecx,%r12d # T1+=h
387
388 mov %edx,%ecx
389 add %r13d,%r12d # T1+=Sigma1(e)
390
391 add %r15d,%r12d # T1+=Ch(e,f,g)
392 mov %edx,%r13d
393 mov %edx,%r14d
394
395 ror $2,%ecx
396 ror $13,%r13d
397 mov %edx,%r15d
398 add (%rbp,%rdi,4),%r12d # T1+=K[round]
399
400 xor %r13d,%ecx
401 ror $9,%r13d
402 or %r9d,%r14d # a|c
403
404 xor %r13d,%ecx # h=Sigma0(a)
405 and %r9d,%r15d # a&c
406 add %r12d,%r10d # d+=T1
407
408 and %r8d,%r14d # (a|c)&b
409 add %r12d,%ecx # h+=T1
410
411 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
412 lea 1(%rdi),%rdi # round++
413
414 add %r14d,%ecx # h+=Maj(a,b,c)
415 mov 4*6(%rsi),%r12d
416 bswap %r12d
417 mov %r10d,%r13d
418 mov %r10d,%r14d
419 mov %r11d,%r15d
420
421 ror $6,%r13d
422 ror $11,%r14d
423 xor %eax,%r15d # f^g
424
425 xor %r14d,%r13d
426 ror $14,%r14d
427 and %r10d,%r15d # (f^g)&e
428 mov %r12d,24(%rsp)
429
430 xor %r14d,%r13d # Sigma1(e)
431 xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
432 add %ebx,%r12d # T1+=h
433
434 mov %ecx,%ebx
435 add %r13d,%r12d # T1+=Sigma1(e)
436
437 add %r15d,%r12d # T1+=Ch(e,f,g)
438 mov %ecx,%r13d
439 mov %ecx,%r14d
440
441 ror $2,%ebx
442 ror $13,%r13d
443 mov %ecx,%r15d
444 add (%rbp,%rdi,4),%r12d # T1+=K[round]
445
446 xor %r13d,%ebx
447 ror $9,%r13d
448 or %r8d,%r14d # a|c
449
450 xor %r13d,%ebx # h=Sigma0(a)
451 and %r8d,%r15d # a&c
452 add %r12d,%r9d # d+=T1
453
454 and %edx,%r14d # (a|c)&b
455 add %r12d,%ebx # h+=T1
456
457 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
458 lea 1(%rdi),%rdi # round++
459
460 add %r14d,%ebx # h+=Maj(a,b,c)
461 mov 4*7(%rsi),%r12d
462 bswap %r12d
463 mov %r9d,%r13d
464 mov %r9d,%r14d
465 mov %r10d,%r15d
466
467 ror $6,%r13d
468 ror $11,%r14d
469 xor %r11d,%r15d # f^g
470
471 xor %r14d,%r13d
472 ror $14,%r14d
473 and %r9d,%r15d # (f^g)&e
474 mov %r12d,28(%rsp)
475
476 xor %r14d,%r13d # Sigma1(e)
477 xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
478 add %eax,%r12d # T1+=h
479
480 mov %ebx,%eax
481 add %r13d,%r12d # T1+=Sigma1(e)
482
483 add %r15d,%r12d # T1+=Ch(e,f,g)
484 mov %ebx,%r13d
485 mov %ebx,%r14d
486
487 ror $2,%eax
488 ror $13,%r13d
489 mov %ebx,%r15d
490 add (%rbp,%rdi,4),%r12d # T1+=K[round]
491
492 xor %r13d,%eax
493 ror $9,%r13d
494 or %edx,%r14d # a|c
495
496 xor %r13d,%eax # h=Sigma0(a)
497 and %edx,%r15d # a&c
498 add %r12d,%r8d # d+=T1
499
500 and %ecx,%r14d # (a|c)&b
501 add %r12d,%eax # h+=T1
502
503 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
504 lea 1(%rdi),%rdi # round++
505
506 add %r14d,%eax # h+=Maj(a,b,c)
507 mov 4*8(%rsi),%r12d
508 bswap %r12d
509 mov %r8d,%r13d
510 mov %r8d,%r14d
511 mov %r9d,%r15d
512
513 ror $6,%r13d
514 ror $11,%r14d
515 xor %r10d,%r15d # f^g
516
517 xor %r14d,%r13d
518 ror $14,%r14d
519 and %r8d,%r15d # (f^g)&e
520 mov %r12d,32(%rsp)
521
522 xor %r14d,%r13d # Sigma1(e)
523 xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
524 add %r11d,%r12d # T1+=h
525
526 mov %eax,%r11d
527 add %r13d,%r12d # T1+=Sigma1(e)
528
529 add %r15d,%r12d # T1+=Ch(e,f,g)
530 mov %eax,%r13d
531 mov %eax,%r14d
532
533 ror $2,%r11d
534 ror $13,%r13d
535 mov %eax,%r15d
536 add (%rbp,%rdi,4),%r12d # T1+=K[round]
537
538 xor %r13d,%r11d
539 ror $9,%r13d
540 or %ecx,%r14d # a|c
541
542 xor %r13d,%r11d # h=Sigma0(a)
543 and %ecx,%r15d # a&c
544 add %r12d,%edx # d+=T1
545
546 and %ebx,%r14d # (a|c)&b
547 add %r12d,%r11d # h+=T1
548
549 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
550 lea 1(%rdi),%rdi # round++
551
552 add %r14d,%r11d # h+=Maj(a,b,c)
553 mov 4*9(%rsi),%r12d
554 bswap %r12d
555 mov %edx,%r13d
556 mov %edx,%r14d
557 mov %r8d,%r15d
558
559 ror $6,%r13d
560 ror $11,%r14d
561 xor %r9d,%r15d # f^g
562
563 xor %r14d,%r13d
564 ror $14,%r14d
565 and %edx,%r15d # (f^g)&e
566 mov %r12d,36(%rsp)
567
568 xor %r14d,%r13d # Sigma1(e)
569 xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
570 add %r10d,%r12d # T1+=h
571
572 mov %r11d,%r10d
573 add %r13d,%r12d # T1+=Sigma1(e)
574
575 add %r15d,%r12d # T1+=Ch(e,f,g)
576 mov %r11d,%r13d
577 mov %r11d,%r14d
578
579 ror $2,%r10d
580 ror $13,%r13d
581 mov %r11d,%r15d
582 add (%rbp,%rdi,4),%r12d # T1+=K[round]
583
584 xor %r13d,%r10d
585 ror $9,%r13d
586 or %ebx,%r14d # a|c
587
588 xor %r13d,%r10d # h=Sigma0(a)
589 and %ebx,%r15d # a&c
590 add %r12d,%ecx # d+=T1
591
592 and %eax,%r14d # (a|c)&b
593 add %r12d,%r10d # h+=T1
594
595 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
596 lea 1(%rdi),%rdi # round++
597
598 add %r14d,%r10d # h+=Maj(a,b,c)
599 mov 4*10(%rsi),%r12d
600 bswap %r12d
601 mov %ecx,%r13d
602 mov %ecx,%r14d
603 mov %edx,%r15d
604
605 ror $6,%r13d
606 ror $11,%r14d
607 xor %r8d,%r15d # f^g
608
609 xor %r14d,%r13d
610 ror $14,%r14d
611 and %ecx,%r15d # (f^g)&e
612 mov %r12d,40(%rsp)
613
614 xor %r14d,%r13d # Sigma1(e)
615 xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
616 add %r9d,%r12d # T1+=h
617
618 mov %r10d,%r9d
619 add %r13d,%r12d # T1+=Sigma1(e)
620
621 add %r15d,%r12d # T1+=Ch(e,f,g)
622 mov %r10d,%r13d
623 mov %r10d,%r14d
624
625 ror $2,%r9d
626 ror $13,%r13d
627 mov %r10d,%r15d
628 add (%rbp,%rdi,4),%r12d # T1+=K[round]
629
630 xor %r13d,%r9d
631 ror $9,%r13d
632 or %eax,%r14d # a|c
633
634 xor %r13d,%r9d # h=Sigma0(a)
635 and %eax,%r15d # a&c
636 add %r12d,%ebx # d+=T1
637
638 and %r11d,%r14d # (a|c)&b
639 add %r12d,%r9d # h+=T1
640
641 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
642 lea 1(%rdi),%rdi # round++
643
644 add %r14d,%r9d # h+=Maj(a,b,c)
645 mov 4*11(%rsi),%r12d
646 bswap %r12d
647 mov %ebx,%r13d
648 mov %ebx,%r14d
649 mov %ecx,%r15d
650
651 ror $6,%r13d
652 ror $11,%r14d
653 xor %edx,%r15d # f^g
654
655 xor %r14d,%r13d
656 ror $14,%r14d
657 and %ebx,%r15d # (f^g)&e
658 mov %r12d,44(%rsp)
659
660 xor %r14d,%r13d # Sigma1(e)
661 xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
662 add %r8d,%r12d # T1+=h
663
664 mov %r9d,%r8d
665 add %r13d,%r12d # T1+=Sigma1(e)
666
667 add %r15d,%r12d # T1+=Ch(e,f,g)
668 mov %r9d,%r13d
669 mov %r9d,%r14d
670
671 ror $2,%r8d
672 ror $13,%r13d
673 mov %r9d,%r15d
674 add (%rbp,%rdi,4),%r12d # T1+=K[round]
675
676 xor %r13d,%r8d
677 ror $9,%r13d
678 or %r11d,%r14d # a|c
679
680 xor %r13d,%r8d # h=Sigma0(a)
681 and %r11d,%r15d # a&c
682 add %r12d,%eax # d+=T1
683
684 and %r10d,%r14d # (a|c)&b
685 add %r12d,%r8d # h+=T1
686
687 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
688 lea 1(%rdi),%rdi # round++
689
690 add %r14d,%r8d # h+=Maj(a,b,c)
691 mov 4*12(%rsi),%r12d
692 bswap %r12d
693 mov %eax,%r13d
694 mov %eax,%r14d
695 mov %ebx,%r15d
696
697 ror $6,%r13d
698 ror $11,%r14d
699 xor %ecx,%r15d # f^g
700
701 xor %r14d,%r13d
702 ror $14,%r14d
703 and %eax,%r15d # (f^g)&e
704 mov %r12d,48(%rsp)
705
706 xor %r14d,%r13d # Sigma1(e)
707 xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
708 add %edx,%r12d # T1+=h
709
710 mov %r8d,%edx
711 add %r13d,%r12d # T1+=Sigma1(e)
712
713 add %r15d,%r12d # T1+=Ch(e,f,g)
714 mov %r8d,%r13d
715 mov %r8d,%r14d
716
717 ror $2,%edx
718 ror $13,%r13d
719 mov %r8d,%r15d
720 add (%rbp,%rdi,4),%r12d # T1+=K[round]
721
722 xor %r13d,%edx
723 ror $9,%r13d
724 or %r10d,%r14d # a|c
725
726 xor %r13d,%edx # h=Sigma0(a)
727 and %r10d,%r15d # a&c
728 add %r12d,%r11d # d+=T1
729
730 and %r9d,%r14d # (a|c)&b
731 add %r12d,%edx # h+=T1
732
733 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
734 lea 1(%rdi),%rdi # round++
735
736 add %r14d,%edx # h+=Maj(a,b,c)
737 mov 4*13(%rsi),%r12d
738 bswap %r12d
739 mov %r11d,%r13d
740 mov %r11d,%r14d
741 mov %eax,%r15d
742
743 ror $6,%r13d
744 ror $11,%r14d
745 xor %ebx,%r15d # f^g
746
747 xor %r14d,%r13d
748 ror $14,%r14d
749 and %r11d,%r15d # (f^g)&e
750 mov %r12d,52(%rsp)
751
752 xor %r14d,%r13d # Sigma1(e)
753 xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
754 add %ecx,%r12d # T1+=h
755
756 mov %edx,%ecx
757 add %r13d,%r12d # T1+=Sigma1(e)
758
759 add %r15d,%r12d # T1+=Ch(e,f,g)
760 mov %edx,%r13d
761 mov %edx,%r14d
762
763 ror $2,%ecx
764 ror $13,%r13d
765 mov %edx,%r15d
766 add (%rbp,%rdi,4),%r12d # T1+=K[round]
767
768 xor %r13d,%ecx
769 ror $9,%r13d
770 or %r9d,%r14d # a|c
771
772 xor %r13d,%ecx # h=Sigma0(a)
773 and %r9d,%r15d # a&c
774 add %r12d,%r10d # d+=T1
775
776 and %r8d,%r14d # (a|c)&b
777 add %r12d,%ecx # h+=T1
778
779 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
780 lea 1(%rdi),%rdi # round++
781
782 add %r14d,%ecx # h+=Maj(a,b,c)
783 mov 4*14(%rsi),%r12d
784 bswap %r12d
785 mov %r10d,%r13d
786 mov %r10d,%r14d
787 mov %r11d,%r15d
788
789 ror $6,%r13d
790 ror $11,%r14d
791 xor %eax,%r15d # f^g
792
793 xor %r14d,%r13d
794 ror $14,%r14d
795 and %r10d,%r15d # (f^g)&e
796 mov %r12d,56(%rsp)
797
798 xor %r14d,%r13d # Sigma1(e)
799 xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
800 add %ebx,%r12d # T1+=h
801
802 mov %ecx,%ebx
803 add %r13d,%r12d # T1+=Sigma1(e)
804
805 add %r15d,%r12d # T1+=Ch(e,f,g)
806 mov %ecx,%r13d
807 mov %ecx,%r14d
808
809 ror $2,%ebx
810 ror $13,%r13d
811 mov %ecx,%r15d
812 add (%rbp,%rdi,4),%r12d # T1+=K[round]
813
814 xor %r13d,%ebx
815 ror $9,%r13d
816 or %r8d,%r14d # a|c
817
818 xor %r13d,%ebx # h=Sigma0(a)
819 and %r8d,%r15d # a&c
820 add %r12d,%r9d # d+=T1
821
822 and %edx,%r14d # (a|c)&b
823 add %r12d,%ebx # h+=T1
824
825 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
826 lea 1(%rdi),%rdi # round++
827
828 add %r14d,%ebx # h+=Maj(a,b,c)
829 mov 4*15(%rsi),%r12d
830 bswap %r12d
831 mov %r9d,%r13d
832 mov %r9d,%r14d
833 mov %r10d,%r15d
834
835 ror $6,%r13d
836 ror $11,%r14d
837 xor %r11d,%r15d # f^g
838
839 xor %r14d,%r13d
840 ror $14,%r14d
841 and %r9d,%r15d # (f^g)&e
842 mov %r12d,60(%rsp)
843
844 xor %r14d,%r13d # Sigma1(e)
845 xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
846 add %eax,%r12d # T1+=h
847
848 mov %ebx,%eax
849 add %r13d,%r12d # T1+=Sigma1(e)
850
851 add %r15d,%r12d # T1+=Ch(e,f,g)
852 mov %ebx,%r13d
853 mov %ebx,%r14d
854
855 ror $2,%eax
856 ror $13,%r13d
857 mov %ebx,%r15d
858 add (%rbp,%rdi,4),%r12d # T1+=K[round]
859
860 xor %r13d,%eax
861 ror $9,%r13d
862 or %edx,%r14d # a|c
863
864 xor %r13d,%eax # h=Sigma0(a)
865 and %edx,%r15d # a&c
866 add %r12d,%r8d # d+=T1
867
868 and %ecx,%r14d # (a|c)&b
869 add %r12d,%eax # h+=T1
870
871 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
872 lea 1(%rdi),%rdi # round++
873
874 add %r14d,%eax # h+=Maj(a,b,c)
875 jmp .Lrounds_16_xx
876 .balign 16
877 .Lrounds_16_xx:
878 mov 4(%rsp),%r13d
879 mov 56(%rsp),%r12d
880
881 mov %r13d,%r15d
882
883 shr $3,%r13d
884 ror $7,%r15d
885
886 xor %r15d,%r13d
887 ror $11,%r15d
888
889 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
890 mov %r12d,%r14d
891
892 shr $10,%r12d
893 ror $17,%r14d
894
895 xor %r14d,%r12d
896 ror $2,%r14d
897
898 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
899
900 add %r13d,%r12d
901
902 add 36(%rsp),%r12d
903
904 add 0(%rsp),%r12d
905 mov %r8d,%r13d
906 mov %r8d,%r14d
907 mov %r9d,%r15d
908
909 ror $6,%r13d
910 ror $11,%r14d
911 xor %r10d,%r15d # f^g
912
913 xor %r14d,%r13d
914 ror $14,%r14d
915 and %r8d,%r15d # (f^g)&e
916 mov %r12d,0(%rsp)
917
918 xor %r14d,%r13d # Sigma1(e)
919 xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
920 add %r11d,%r12d # T1+=h
921
922 mov %eax,%r11d
923 add %r13d,%r12d # T1+=Sigma1(e)
924
925 add %r15d,%r12d # T1+=Ch(e,f,g)
926 mov %eax,%r13d
927 mov %eax,%r14d
928
929 ror $2,%r11d
930 ror $13,%r13d
931 mov %eax,%r15d
932 add (%rbp,%rdi,4),%r12d # T1+=K[round]
933
934 xor %r13d,%r11d
935 ror $9,%r13d
936 or %ecx,%r14d # a|c
937
938 xor %r13d,%r11d # h=Sigma0(a)
939 and %ecx,%r15d # a&c
940 add %r12d,%edx # d+=T1
941
942 and %ebx,%r14d # (a|c)&b
943 add %r12d,%r11d # h+=T1
944
945 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
946 lea 1(%rdi),%rdi # round++
947
948 add %r14d,%r11d # h+=Maj(a,b,c)
949 mov 8(%rsp),%r13d
950 mov 60(%rsp),%r12d
951
952 mov %r13d,%r15d
953
954 shr $3,%r13d
955 ror $7,%r15d
956
957 xor %r15d,%r13d
958 ror $11,%r15d
959
960 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
961 mov %r12d,%r14d
962
963 shr $10,%r12d
964 ror $17,%r14d
965
966 xor %r14d,%r12d
967 ror $2,%r14d
968
969 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
970
971 add %r13d,%r12d
972
973 add 40(%rsp),%r12d
974
975 add 4(%rsp),%r12d
976 mov %edx,%r13d
977 mov %edx,%r14d
978 mov %r8d,%r15d
979
980 ror $6,%r13d
981 ror $11,%r14d
982 xor %r9d,%r15d # f^g
983
984 xor %r14d,%r13d
985 ror $14,%r14d
986 and %edx,%r15d # (f^g)&e
987 mov %r12d,4(%rsp)
988
989 xor %r14d,%r13d # Sigma1(e)
990 xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
991 add %r10d,%r12d # T1+=h
992
993 mov %r11d,%r10d
994 add %r13d,%r12d # T1+=Sigma1(e)
995
996 add %r15d,%r12d # T1+=Ch(e,f,g)
997 mov %r11d,%r13d
998 mov %r11d,%r14d
999
1000 ror $2,%r10d
1001 ror $13,%r13d
1002 mov %r11d,%r15d
1003 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1004
1005 xor %r13d,%r10d
1006 ror $9,%r13d
1007 or %ebx,%r14d # a|c
1008
1009 xor %r13d,%r10d # h=Sigma0(a)
1010 and %ebx,%r15d # a&c
1011 add %r12d,%ecx # d+=T1
1012
1013 and %eax,%r14d # (a|c)&b
1014 add %r12d,%r10d # h+=T1
1015
1016 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1017 lea 1(%rdi),%rdi # round++
1018
1019 add %r14d,%r10d # h+=Maj(a,b,c)
1020 mov 12(%rsp),%r13d
1021 mov 0(%rsp),%r12d
1022
1023 mov %r13d,%r15d
1024
1025 shr $3,%r13d
1026 ror $7,%r15d
1027
1028 xor %r15d,%r13d
1029 ror $11,%r15d
1030
1031 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1032 mov %r12d,%r14d
1033
1034 shr $10,%r12d
1035 ror $17,%r14d
1036
1037 xor %r14d,%r12d
1038 ror $2,%r14d
1039
1040 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1041
1042 add %r13d,%r12d
1043
1044 add 44(%rsp),%r12d
1045
1046 add 8(%rsp),%r12d
1047 mov %ecx,%r13d
1048 mov %ecx,%r14d
1049 mov %edx,%r15d
1050
1051 ror $6,%r13d
1052 ror $11,%r14d
1053 xor %r8d,%r15d # f^g
1054
1055 xor %r14d,%r13d
1056 ror $14,%r14d
1057 and %ecx,%r15d # (f^g)&e
1058 mov %r12d,8(%rsp)
1059
1060 xor %r14d,%r13d # Sigma1(e)
1061 xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
1062 add %r9d,%r12d # T1+=h
1063
1064 mov %r10d,%r9d
1065 add %r13d,%r12d # T1+=Sigma1(e)
1066
1067 add %r15d,%r12d # T1+=Ch(e,f,g)
1068 mov %r10d,%r13d
1069 mov %r10d,%r14d
1070
1071 ror $2,%r9d
1072 ror $13,%r13d
1073 mov %r10d,%r15d
1074 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1075
1076 xor %r13d,%r9d
1077 ror $9,%r13d
1078 or %eax,%r14d # a|c
1079
1080 xor %r13d,%r9d # h=Sigma0(a)
1081 and %eax,%r15d # a&c
1082 add %r12d,%ebx # d+=T1
1083
1084 and %r11d,%r14d # (a|c)&b
1085 add %r12d,%r9d # h+=T1
1086
1087 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1088 lea 1(%rdi),%rdi # round++
1089
1090 add %r14d,%r9d # h+=Maj(a,b,c)
1091 mov 16(%rsp),%r13d
1092 mov 4(%rsp),%r12d
1093
1094 mov %r13d,%r15d
1095
1096 shr $3,%r13d
1097 ror $7,%r15d
1098
1099 xor %r15d,%r13d
1100 ror $11,%r15d
1101
1102 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1103 mov %r12d,%r14d
1104
1105 shr $10,%r12d
1106 ror $17,%r14d
1107
1108 xor %r14d,%r12d
1109 ror $2,%r14d
1110
1111 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1112
1113 add %r13d,%r12d
1114
1115 add 48(%rsp),%r12d
1116
1117 add 12(%rsp),%r12d
1118 mov %ebx,%r13d
1119 mov %ebx,%r14d
1120 mov %ecx,%r15d
1121
1122 ror $6,%r13d
1123 ror $11,%r14d
1124 xor %edx,%r15d # f^g
1125
1126 xor %r14d,%r13d
1127 ror $14,%r14d
1128 and %ebx,%r15d # (f^g)&e
1129 mov %r12d,12(%rsp)
1130
1131 xor %r14d,%r13d # Sigma1(e)
1132 xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
1133 add %r8d,%r12d # T1+=h
1134
1135 mov %r9d,%r8d
1136 add %r13d,%r12d # T1+=Sigma1(e)
1137
1138 add %r15d,%r12d # T1+=Ch(e,f,g)
1139 mov %r9d,%r13d
1140 mov %r9d,%r14d
1141
1142 ror $2,%r8d
1143 ror $13,%r13d
1144 mov %r9d,%r15d
1145 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1146
1147 xor %r13d,%r8d
1148 ror $9,%r13d
1149 or %r11d,%r14d # a|c
1150
1151 xor %r13d,%r8d # h=Sigma0(a)
1152 and %r11d,%r15d # a&c
1153 add %r12d,%eax # d+=T1
1154
1155 and %r10d,%r14d # (a|c)&b
1156 add %r12d,%r8d # h+=T1
1157
1158 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1159 lea 1(%rdi),%rdi # round++
1160
1161 add %r14d,%r8d # h+=Maj(a,b,c)
1162 mov 20(%rsp),%r13d
1163 mov 8(%rsp),%r12d
1164
1165 mov %r13d,%r15d
1166
1167 shr $3,%r13d
1168 ror $7,%r15d
1169
1170 xor %r15d,%r13d
1171 ror $11,%r15d
1172
1173 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1174 mov %r12d,%r14d
1175
1176 shr $10,%r12d
1177 ror $17,%r14d
1178
1179 xor %r14d,%r12d
1180 ror $2,%r14d
1181
1182 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1183
1184 add %r13d,%r12d
1185
1186 add 52(%rsp),%r12d
1187
1188 add 16(%rsp),%r12d
1189 mov %eax,%r13d
1190 mov %eax,%r14d
1191 mov %ebx,%r15d
1192
1193 ror $6,%r13d
1194 ror $11,%r14d
1195 xor %ecx,%r15d # f^g
1196
1197 xor %r14d,%r13d
1198 ror $14,%r14d
1199 and %eax,%r15d # (f^g)&e
1200 mov %r12d,16(%rsp)
1201
1202 xor %r14d,%r13d # Sigma1(e)
1203 xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
1204 add %edx,%r12d # T1+=h
1205
1206 mov %r8d,%edx
1207 add %r13d,%r12d # T1+=Sigma1(e)
1208
1209 add %r15d,%r12d # T1+=Ch(e,f,g)
1210 mov %r8d,%r13d
1211 mov %r8d,%r14d
1212
1213 ror $2,%edx
1214 ror $13,%r13d
1215 mov %r8d,%r15d
1216 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1217
1218 xor %r13d,%edx
1219 ror $9,%r13d
1220 or %r10d,%r14d # a|c
1221
1222 xor %r13d,%edx # h=Sigma0(a)
1223 and %r10d,%r15d # a&c
1224 add %r12d,%r11d # d+=T1
1225
1226 and %r9d,%r14d # (a|c)&b
1227 add %r12d,%edx # h+=T1
1228
1229 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1230 lea 1(%rdi),%rdi # round++
1231
1232 add %r14d,%edx # h+=Maj(a,b,c)
1233 mov 24(%rsp),%r13d
1234 mov 12(%rsp),%r12d
1235
1236 mov %r13d,%r15d
1237
1238 shr $3,%r13d
1239 ror $7,%r15d
1240
1241 xor %r15d,%r13d
1242 ror $11,%r15d
1243
1244 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1245 mov %r12d,%r14d
1246
1247 shr $10,%r12d
1248 ror $17,%r14d
1249
1250 xor %r14d,%r12d
1251 ror $2,%r14d
1252
1253 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1254
1255 add %r13d,%r12d
1256
1257 add 56(%rsp),%r12d
1258
1259 add 20(%rsp),%r12d
1260 mov %r11d,%r13d
1261 mov %r11d,%r14d
1262 mov %eax,%r15d
1263
1264 ror $6,%r13d
1265 ror $11,%r14d
1266 xor %ebx,%r15d # f^g
1267
1268 xor %r14d,%r13d
1269 ror $14,%r14d
1270 and %r11d,%r15d # (f^g)&e
1271 mov %r12d,20(%rsp)
1272
1273 xor %r14d,%r13d # Sigma1(e)
1274 xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
1275 add %ecx,%r12d # T1+=h
1276
1277 mov %edx,%ecx
1278 add %r13d,%r12d # T1+=Sigma1(e)
1279
1280 add %r15d,%r12d # T1+=Ch(e,f,g)
1281 mov %edx,%r13d
1282 mov %edx,%r14d
1283
1284 ror $2,%ecx
1285 ror $13,%r13d
1286 mov %edx,%r15d
1287 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1288
1289 xor %r13d,%ecx
1290 ror $9,%r13d
1291 or %r9d,%r14d # a|c
1292
1293 xor %r13d,%ecx # h=Sigma0(a)
1294 and %r9d,%r15d # a&c
1295 add %r12d,%r10d # d+=T1
1296
1297 and %r8d,%r14d # (a|c)&b
1298 add %r12d,%ecx # h+=T1
1299
1300 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1301 lea 1(%rdi),%rdi # round++
1302
1303 add %r14d,%ecx # h+=Maj(a,b,c)
1304 mov 28(%rsp),%r13d
1305 mov 16(%rsp),%r12d
1306
1307 mov %r13d,%r15d
1308
1309 shr $3,%r13d
1310 ror $7,%r15d
1311
1312 xor %r15d,%r13d
1313 ror $11,%r15d
1314
1315 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1316 mov %r12d,%r14d
1317
1318 shr $10,%r12d
1319 ror $17,%r14d
1320
1321 xor %r14d,%r12d
1322 ror $2,%r14d
1323
1324 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1325
1326 add %r13d,%r12d
1327
1328 add 60(%rsp),%r12d
1329
1330 add 24(%rsp),%r12d
1331 mov %r10d,%r13d
1332 mov %r10d,%r14d
1333 mov %r11d,%r15d
1334
1335 ror $6,%r13d
1336 ror $11,%r14d
1337 xor %eax,%r15d # f^g
1338
1339 xor %r14d,%r13d
1340 ror $14,%r14d
1341 and %r10d,%r15d # (f^g)&e
1342 mov %r12d,24(%rsp)
1343
1344 xor %r14d,%r13d # Sigma1(e)
1345 xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
1346 add %ebx,%r12d # T1+=h
1347
1348 mov %ecx,%ebx
1349 add %r13d,%r12d # T1+=Sigma1(e)
1350
1351 add %r15d,%r12d # T1+=Ch(e,f,g)
1352 mov %ecx,%r13d
1353 mov %ecx,%r14d
1354
1355 ror $2,%ebx
1356 ror $13,%r13d
1357 mov %ecx,%r15d
1358 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1359
1360 xor %r13d,%ebx
1361 ror $9,%r13d
1362 or %r8d,%r14d # a|c
1363
1364 xor %r13d,%ebx # h=Sigma0(a)
1365 and %r8d,%r15d # a&c
1366 add %r12d,%r9d # d+=T1
1367
1368 and %edx,%r14d # (a|c)&b
1369 add %r12d,%ebx # h+=T1
1370
1371 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1372 lea 1(%rdi),%rdi # round++
1373
1374 add %r14d,%ebx # h+=Maj(a,b,c)
1375 mov 32(%rsp),%r13d
1376 mov 20(%rsp),%r12d
1377
1378 mov %r13d,%r15d
1379
1380 shr $3,%r13d
1381 ror $7,%r15d
1382
1383 xor %r15d,%r13d
1384 ror $11,%r15d
1385
1386 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1387 mov %r12d,%r14d
1388
1389 shr $10,%r12d
1390 ror $17,%r14d
1391
1392 xor %r14d,%r12d
1393 ror $2,%r14d
1394
1395 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1396
1397 add %r13d,%r12d
1398
1399 add 0(%rsp),%r12d
1400
1401 add 28(%rsp),%r12d
1402 mov %r9d,%r13d
1403 mov %r9d,%r14d
1404 mov %r10d,%r15d
1405
1406 ror $6,%r13d
1407 ror $11,%r14d
1408 xor %r11d,%r15d # f^g
1409
1410 xor %r14d,%r13d
1411 ror $14,%r14d
1412 and %r9d,%r15d # (f^g)&e
1413 mov %r12d,28(%rsp)
1414
1415 xor %r14d,%r13d # Sigma1(e)
1416 xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
1417 add %eax,%r12d # T1+=h
1418
1419 mov %ebx,%eax
1420 add %r13d,%r12d # T1+=Sigma1(e)
1421
1422 add %r15d,%r12d # T1+=Ch(e,f,g)
1423 mov %ebx,%r13d
1424 mov %ebx,%r14d
1425
1426 ror $2,%eax
1427 ror $13,%r13d
1428 mov %ebx,%r15d
1429 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1430
1431 xor %r13d,%eax
1432 ror $9,%r13d
1433 or %edx,%r14d # a|c
1434
1435 xor %r13d,%eax # h=Sigma0(a)
1436 and %edx,%r15d # a&c
1437 add %r12d,%r8d # d+=T1
1438
1439 and %ecx,%r14d # (a|c)&b
1440 add %r12d,%eax # h+=T1
1441
1442 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1443 lea 1(%rdi),%rdi # round++
1444
1445 add %r14d,%eax # h+=Maj(a,b,c)
1446 mov 36(%rsp),%r13d
1447 mov 24(%rsp),%r12d
1448
1449 mov %r13d,%r15d
1450
1451 shr $3,%r13d
1452 ror $7,%r15d
1453
1454 xor %r15d,%r13d
1455 ror $11,%r15d
1456
1457 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1458 mov %r12d,%r14d
1459
1460 shr $10,%r12d
1461 ror $17,%r14d
1462
1463 xor %r14d,%r12d
1464 ror $2,%r14d
1465
1466 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1467
1468 add %r13d,%r12d
1469
1470 add 4(%rsp),%r12d
1471
1472 add 32(%rsp),%r12d
1473 mov %r8d,%r13d
1474 mov %r8d,%r14d
1475 mov %r9d,%r15d
1476
1477 ror $6,%r13d
1478 ror $11,%r14d
1479 xor %r10d,%r15d # f^g
1480
1481 xor %r14d,%r13d
1482 ror $14,%r14d
1483 and %r8d,%r15d # (f^g)&e
1484 mov %r12d,32(%rsp)
1485
1486 xor %r14d,%r13d # Sigma1(e)
1487 xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
1488 add %r11d,%r12d # T1+=h
1489
1490 mov %eax,%r11d
1491 add %r13d,%r12d # T1+=Sigma1(e)
1492
1493 add %r15d,%r12d # T1+=Ch(e,f,g)
1494 mov %eax,%r13d
1495 mov %eax,%r14d
1496
1497 ror $2,%r11d
1498 ror $13,%r13d
1499 mov %eax,%r15d
1500 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1501
1502 xor %r13d,%r11d
1503 ror $9,%r13d
1504 or %ecx,%r14d # a|c
1505
1506 xor %r13d,%r11d # h=Sigma0(a)
1507 and %ecx,%r15d # a&c
1508 add %r12d,%edx # d+=T1
1509
1510 and %ebx,%r14d # (a|c)&b
1511 add %r12d,%r11d # h+=T1
1512
1513 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1514 lea 1(%rdi),%rdi # round++
1515
1516 add %r14d,%r11d # h+=Maj(a,b,c)
1517 mov 40(%rsp),%r13d
1518 mov 28(%rsp),%r12d
1519
1520 mov %r13d,%r15d
1521
1522 shr $3,%r13d
1523 ror $7,%r15d
1524
1525 xor %r15d,%r13d
1526 ror $11,%r15d
1527
1528 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1529 mov %r12d,%r14d
1530
1531 shr $10,%r12d
1532 ror $17,%r14d
1533
1534 xor %r14d,%r12d
1535 ror $2,%r14d
1536
1537 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1538
1539 add %r13d,%r12d
1540
1541 add 8(%rsp),%r12d
1542
1543 add 36(%rsp),%r12d
1544 mov %edx,%r13d
1545 mov %edx,%r14d
1546 mov %r8d,%r15d
1547
1548 ror $6,%r13d
1549 ror $11,%r14d
1550 xor %r9d,%r15d # f^g
1551
1552 xor %r14d,%r13d
1553 ror $14,%r14d
1554 and %edx,%r15d # (f^g)&e
1555 mov %r12d,36(%rsp)
1556
1557 xor %r14d,%r13d # Sigma1(e)
1558 xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
1559 add %r10d,%r12d # T1+=h
1560
1561 mov %r11d,%r10d
1562 add %r13d,%r12d # T1+=Sigma1(e)
1563
1564 add %r15d,%r12d # T1+=Ch(e,f,g)
1565 mov %r11d,%r13d
1566 mov %r11d,%r14d
1567
1568 ror $2,%r10d
1569 ror $13,%r13d
1570 mov %r11d,%r15d
1571 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1572
1573 xor %r13d,%r10d
1574 ror $9,%r13d
1575 or %ebx,%r14d # a|c
1576
1577 xor %r13d,%r10d # h=Sigma0(a)
1578 and %ebx,%r15d # a&c
1579 add %r12d,%ecx # d+=T1
1580
1581 and %eax,%r14d # (a|c)&b
1582 add %r12d,%r10d # h+=T1
1583
1584 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1585 lea 1(%rdi),%rdi # round++
1586
1587 add %r14d,%r10d # h+=Maj(a,b,c)
1588 mov 44(%rsp),%r13d
1589 mov 32(%rsp),%r12d
1590
1591 mov %r13d,%r15d
1592
1593 shr $3,%r13d
1594 ror $7,%r15d
1595
1596 xor %r15d,%r13d
1597 ror $11,%r15d
1598
1599 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1600 mov %r12d,%r14d
1601
1602 shr $10,%r12d
1603 ror $17,%r14d
1604
1605 xor %r14d,%r12d
1606 ror $2,%r14d
1607
1608 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1609
1610 add %r13d,%r12d
1611
1612 add 12(%rsp),%r12d
1613
1614 add 40(%rsp),%r12d
1615 mov %ecx,%r13d
1616 mov %ecx,%r14d
1617 mov %edx,%r15d
1618
1619 ror $6,%r13d
1620 ror $11,%r14d
1621 xor %r8d,%r15d # f^g
1622
1623 xor %r14d,%r13d
1624 ror $14,%r14d
1625 and %ecx,%r15d # (f^g)&e
1626 mov %r12d,40(%rsp)
1627
1628 xor %r14d,%r13d # Sigma1(e)
1629 xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
1630 add %r9d,%r12d # T1+=h
1631
1632 mov %r10d,%r9d
1633 add %r13d,%r12d # T1+=Sigma1(e)
1634
1635 add %r15d,%r12d # T1+=Ch(e,f,g)
1636 mov %r10d,%r13d
1637 mov %r10d,%r14d
1638
1639 ror $2,%r9d
1640 ror $13,%r13d
1641 mov %r10d,%r15d
1642 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1643
1644 xor %r13d,%r9d
1645 ror $9,%r13d
1646 or %eax,%r14d # a|c
1647
1648 xor %r13d,%r9d # h=Sigma0(a)
1649 and %eax,%r15d # a&c
1650 add %r12d,%ebx # d+=T1
1651
1652 and %r11d,%r14d # (a|c)&b
1653 add %r12d,%r9d # h+=T1
1654
1655 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1656 lea 1(%rdi),%rdi # round++
1657
1658 add %r14d,%r9d # h+=Maj(a,b,c)
1659 mov 48(%rsp),%r13d
1660 mov 36(%rsp),%r12d
1661
1662 mov %r13d,%r15d
1663
1664 shr $3,%r13d
1665 ror $7,%r15d
1666
1667 xor %r15d,%r13d
1668 ror $11,%r15d
1669
1670 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1671 mov %r12d,%r14d
1672
1673 shr $10,%r12d
1674 ror $17,%r14d
1675
1676 xor %r14d,%r12d
1677 ror $2,%r14d
1678
1679 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1680
1681 add %r13d,%r12d
1682
1683 add 16(%rsp),%r12d
1684
1685 add 44(%rsp),%r12d
1686 mov %ebx,%r13d
1687 mov %ebx,%r14d
1688 mov %ecx,%r15d
1689
1690 ror $6,%r13d
1691 ror $11,%r14d
1692 xor %edx,%r15d # f^g
1693
1694 xor %r14d,%r13d
1695 ror $14,%r14d
1696 and %ebx,%r15d # (f^g)&e
1697 mov %r12d,44(%rsp)
1698
1699 xor %r14d,%r13d # Sigma1(e)
1700 xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
1701 add %r8d,%r12d # T1+=h
1702
1703 mov %r9d,%r8d
1704 add %r13d,%r12d # T1+=Sigma1(e)
1705
1706 add %r15d,%r12d # T1+=Ch(e,f,g)
1707 mov %r9d,%r13d
1708 mov %r9d,%r14d
1709
1710 ror $2,%r8d
1711 ror $13,%r13d
1712 mov %r9d,%r15d
1713 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1714
1715 xor %r13d,%r8d
1716 ror $9,%r13d
1717 or %r11d,%r14d # a|c
1718
1719 xor %r13d,%r8d # h=Sigma0(a)
1720 and %r11d,%r15d # a&c
1721 add %r12d,%eax # d+=T1
1722
1723 and %r10d,%r14d # (a|c)&b
1724 add %r12d,%r8d # h+=T1
1725
1726 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1727 lea 1(%rdi),%rdi # round++
1728
1729 add %r14d,%r8d # h+=Maj(a,b,c)
1730 mov 52(%rsp),%r13d
1731 mov 40(%rsp),%r12d
1732
1733 mov %r13d,%r15d
1734
1735 shr $3,%r13d
1736 ror $7,%r15d
1737
1738 xor %r15d,%r13d
1739 ror $11,%r15d
1740
1741 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1742 mov %r12d,%r14d
1743
1744 shr $10,%r12d
1745 ror $17,%r14d
1746
1747 xor %r14d,%r12d
1748 ror $2,%r14d
1749
1750 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1751
1752 add %r13d,%r12d
1753
1754 add 20(%rsp),%r12d
1755
1756 add 48(%rsp),%r12d
1757 mov %eax,%r13d
1758 mov %eax,%r14d
1759 mov %ebx,%r15d
1760
1761 ror $6,%r13d
1762 ror $11,%r14d
1763 xor %ecx,%r15d # f^g
1764
1765 xor %r14d,%r13d
1766 ror $14,%r14d
1767 and %eax,%r15d # (f^g)&e
1768 mov %r12d,48(%rsp)
1769
1770 xor %r14d,%r13d # Sigma1(e)
1771 xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
1772 add %edx,%r12d # T1+=h
1773
1774 mov %r8d,%edx
1775 add %r13d,%r12d # T1+=Sigma1(e)
1776
1777 add %r15d,%r12d # T1+=Ch(e,f,g)
1778 mov %r8d,%r13d
1779 mov %r8d,%r14d
1780
1781 ror $2,%edx
1782 ror $13,%r13d
1783 mov %r8d,%r15d
1784 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1785
1786 xor %r13d,%edx
1787 ror $9,%r13d
1788 or %r10d,%r14d # a|c
1789
1790 xor %r13d,%edx # h=Sigma0(a)
1791 and %r10d,%r15d # a&c
1792 add %r12d,%r11d # d+=T1
1793
1794 and %r9d,%r14d # (a|c)&b
1795 add %r12d,%edx # h+=T1
1796
1797 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1798 lea 1(%rdi),%rdi # round++
1799
1800 add %r14d,%edx # h+=Maj(a,b,c)
1801 mov 56(%rsp),%r13d
1802 mov 44(%rsp),%r12d
1803
1804 mov %r13d,%r15d
1805
1806 shr $3,%r13d
1807 ror $7,%r15d
1808
1809 xor %r15d,%r13d
1810 ror $11,%r15d
1811
1812 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1813 mov %r12d,%r14d
1814
1815 shr $10,%r12d
1816 ror $17,%r14d
1817
1818 xor %r14d,%r12d
1819 ror $2,%r14d
1820
1821 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1822
1823 add %r13d,%r12d
1824
1825 add 24(%rsp),%r12d
1826
1827 add 52(%rsp),%r12d
1828 mov %r11d,%r13d
1829 mov %r11d,%r14d
1830 mov %eax,%r15d
1831
1832 ror $6,%r13d
1833 ror $11,%r14d
1834 xor %ebx,%r15d # f^g
1835
1836 xor %r14d,%r13d
1837 ror $14,%r14d
1838 and %r11d,%r15d # (f^g)&e
1839 mov %r12d,52(%rsp)
1840
1841 xor %r14d,%r13d # Sigma1(e)
1842 xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
1843 add %ecx,%r12d # T1+=h
1844
1845 mov %edx,%ecx
1846 add %r13d,%r12d # T1+=Sigma1(e)
1847
1848 add %r15d,%r12d # T1+=Ch(e,f,g)
1849 mov %edx,%r13d
1850 mov %edx,%r14d
1851
1852 ror $2,%ecx
1853 ror $13,%r13d
1854 mov %edx,%r15d
1855 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1856
1857 xor %r13d,%ecx
1858 ror $9,%r13d
1859 or %r9d,%r14d # a|c
1860
1861 xor %r13d,%ecx # h=Sigma0(a)
1862 and %r9d,%r15d # a&c
1863 add %r12d,%r10d # d+=T1
1864
1865 and %r8d,%r14d # (a|c)&b
1866 add %r12d,%ecx # h+=T1
1867
1868 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1869 lea 1(%rdi),%rdi # round++
1870
1871 add %r14d,%ecx # h+=Maj(a,b,c)
1872 mov 60(%rsp),%r13d
1873 mov 48(%rsp),%r12d
1874
1875 mov %r13d,%r15d
1876
1877 shr $3,%r13d
1878 ror $7,%r15d
1879
1880 xor %r15d,%r13d
1881 ror $11,%r15d
1882
1883 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1884 mov %r12d,%r14d
1885
1886 shr $10,%r12d
1887 ror $17,%r14d
1888
1889 xor %r14d,%r12d
1890 ror $2,%r14d
1891
1892 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1893
1894 add %r13d,%r12d
1895
1896 add 28(%rsp),%r12d
1897
1898 add 56(%rsp),%r12d
1899 mov %r10d,%r13d
1900 mov %r10d,%r14d
1901 mov %r11d,%r15d
1902
1903 ror $6,%r13d
1904 ror $11,%r14d
1905 xor %eax,%r15d # f^g
1906
1907 xor %r14d,%r13d
1908 ror $14,%r14d
1909 and %r10d,%r15d # (f^g)&e
1910 mov %r12d,56(%rsp)
1911
1912 xor %r14d,%r13d # Sigma1(e)
1913 xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
1914 add %ebx,%r12d # T1+=h
1915
1916 mov %ecx,%ebx
1917 add %r13d,%r12d # T1+=Sigma1(e)
1918
1919 add %r15d,%r12d # T1+=Ch(e,f,g)
1920 mov %ecx,%r13d
1921 mov %ecx,%r14d
1922
1923 ror $2,%ebx
1924 ror $13,%r13d
1925 mov %ecx,%r15d
1926 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1927
1928 xor %r13d,%ebx
1929 ror $9,%r13d
1930 or %r8d,%r14d # a|c
1931
1932 xor %r13d,%ebx # h=Sigma0(a)
1933 and %r8d,%r15d # a&c
1934 add %r12d,%r9d # d+=T1
1935
1936 and %edx,%r14d # (a|c)&b
1937 add %r12d,%ebx # h+=T1
1938
1939 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
1940 lea 1(%rdi),%rdi # round++
1941
1942 add %r14d,%ebx # h+=Maj(a,b,c)
1943 mov 0(%rsp),%r13d
1944 mov 52(%rsp),%r12d
1945
1946 mov %r13d,%r15d
1947
1948 shr $3,%r13d
1949 ror $7,%r15d
1950
1951 xor %r15d,%r13d
1952 ror $11,%r15d
1953
1954 xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
1955 mov %r12d,%r14d
1956
1957 shr $10,%r12d
1958 ror $17,%r14d
1959
1960 xor %r14d,%r12d
1961 ror $2,%r14d
1962
1963 xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
1964
1965 add %r13d,%r12d
1966
1967 add 32(%rsp),%r12d
1968
1969 add 60(%rsp),%r12d
1970 mov %r9d,%r13d
1971 mov %r9d,%r14d
1972 mov %r10d,%r15d
1973
1974 ror $6,%r13d
1975 ror $11,%r14d
1976 xor %r11d,%r15d # f^g
1977
1978 xor %r14d,%r13d
1979 ror $14,%r14d
1980 and %r9d,%r15d # (f^g)&e
1981 mov %r12d,60(%rsp)
1982
1983 xor %r14d,%r13d # Sigma1(e)
1984 xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
1985 add %eax,%r12d # T1+=h
1986
1987 mov %ebx,%eax
1988 add %r13d,%r12d # T1+=Sigma1(e)
1989
1990 add %r15d,%r12d # T1+=Ch(e,f,g)
1991 mov %ebx,%r13d
1992 mov %ebx,%r14d
1993
1994 ror $2,%eax
1995 ror $13,%r13d
1996 mov %ebx,%r15d
1997 add (%rbp,%rdi,4),%r12d # T1+=K[round]
1998
1999 xor %r13d,%eax
2000 ror $9,%r13d
2001 or %edx,%r14d # a|c
2002
2003 xor %r13d,%eax # h=Sigma0(a)
2004 and %edx,%r15d # a&c
2005 add %r12d,%r8d # d+=T1
2006
2007 and %ecx,%r14d # (a|c)&b
2008 add %r12d,%eax # h+=T1
2009
2010 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
2011 lea 1(%rdi),%rdi # round++
2012
2013 add %r14d,%eax # h+=Maj(a,b,c)
2014 cmp $64,%rdi
2015 jb .Lrounds_16_xx
2016
2017 mov 16*4+0*8(%rsp),%rdi
2018 lea 16*4(%rsi),%rsi
2019
2020 add 4*0(%rdi),%eax
2021 add 4*1(%rdi),%ebx
2022 add 4*2(%rdi),%ecx
2023 add 4*3(%rdi),%edx
2024 add 4*4(%rdi),%r8d
2025 add 4*5(%rdi),%r9d
2026 add 4*6(%rdi),%r10d
2027 add 4*7(%rdi),%r11d
2028
2029 cmp 16*4+2*8(%rsp),%rsi
2030
2031 mov %eax,4*0(%rdi)
2032 mov %ebx,4*1(%rdi)
2033 mov %ecx,4*2(%rdi)
2034 mov %edx,4*3(%rdi)
2035 mov %r8d,4*4(%rdi)
2036 mov %r9d,4*5(%rdi)
2037 mov %r10d,4*6(%rdi)
2038 mov %r11d,4*7(%rdi)
2039 jb .Lloop
2040
2041 mov 16*4+3*8(%rsp),%rsp
2042 .cfi_def_cfa %rsp,56
2043 pop %r15
2044 .cfi_adjust_cfa_offset -8
2045 .cfi_restore %r15
2046 pop %r14
2047 .cfi_adjust_cfa_offset -8
2048 .cfi_restore %r14
2049 pop %r13
2050 .cfi_adjust_cfa_offset -8
2051 .cfi_restore %r13
2052 pop %r12
2053 .cfi_adjust_cfa_offset -8
2054 .cfi_restore %r12
2055 pop %rbp
2056 .cfi_adjust_cfa_offset -8
2057 .cfi_restore %rbp
2058 pop %rbx
2059 .cfi_adjust_cfa_offset -8
2060 .cfi_restore %rbx
2061
2062 RET
2063 .cfi_endproc
2064 SET_SIZE(SHA256TransformBlocks)
2065
2066 .section .rodata
2067 .balign 64
2068 SET_OBJ(K256)
2069 K256:
2070 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
2071 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
2072 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
2073 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
2074 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
2075 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
2076 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
2077 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
2078 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
2079 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
2080 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
2081 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
2082 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
2083 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
2084 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
2085 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
2086 #endif /* !lint && !__lint */
2087
2088 #ifdef __ELF__
2089 .section .note.GNU-stack,"",%progbits
2090 #endif
Cache object: 0ba99fab07b71d25f5db444c85528bda
|