1 /*
2 * ====================================================================
3 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4 * project. Rights for redistribution and usage in source and binary
5 * forms are granted according to the OpenSSL license.
6 * ====================================================================
7 *
8 * sha256/512_block procedure for x86_64.
9 *
10 * 40% improvement over compiler-generated code on Opteron. On EM64T
11 * sha256 was observed to run >80% faster and sha512 - >40%. No magical
12 * tricks, just straight implementation... I really wonder why gcc
13 * [being armed with inline assembler] fails to generate as fast code.
14 * The only thing which is cool about this module is that it's very
15 * same instruction sequence used for both SHA-256 and SHA-512. In
16 * former case the instructions operate on 32-bit operands, while in
17 * latter - on 64-bit ones. All I had to do is to get one flavor right,
18 * the other one passed the test right away:-)
19 *
20 * sha256_block runs in ~1005 cycles on Opteron, which gives you
21 * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
22 * frequency in GHz. sha512_block runs in ~1275 cycles, which results
23 * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
24 * Well, if you compare it to IA-64 implementation, which maintains
25 * X[16] in register bank[!], tends to 4 instructions per CPU clock
26 * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
27 * issue Opteron pipeline and X[16] maintained in memory. So that *if*
28 * there is a way to improve it, *then* the only way would be to try to
29 * offload X[16] updates to SSE unit, but that would require "deeper"
30 * loop unroll, which in turn would naturally cause size blow-up, not
31 * to mention increased complexity! And once again, only *if* it's
32 * actually possible to noticeably improve overall ILP, instruction
33 * level parallelism, on a given CPU implementation in this case.
34 *
35 * Special note on Intel EM64T. While Opteron CPU exhibits perfect
36 * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
37 * [currently available] EM64T CPUs apparently are far from it. On the
38 * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
39 * sha256_block:-( This is presumably because 64-bit shifts/rotates
40 * apparently are not atomic instructions, but implemented in microcode.
41 */
42
43 /*
44 * OpenSolaris OS modifications
45 *
46 * Sun elects to use this software under the BSD license.
47 *
48 * This source originates from OpenSSL file sha512-x86_64.pl at
49 * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
50 * (presumably for future OpenSSL release 0.9.8h), with these changes:
51 *
52 * 1. Added perl "use strict" and declared variables.
53 *
54 * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
55 * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
56 *
57 * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
58 * assemblers). Replaced the .picmeup macro with assembler code.
59 *
60 * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
61 * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
62 */
63
64 /*
65 * This file was generated by a perl script (sha512-x86_64.pl) that were
66 * used to generate sha256 and sha512 variants from the same code base.
67 * The comments from the original file have been pasted above.
68 */
69
70
71 #if defined(lint) || defined(__lint)
72 #include <sys/stdint.h>
73 #include <sha2/sha2.h>
74
75 void
76 SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
77 {
78 (void) ctx, (void) in, (void) num;
79 }
80
81
82 #else
83 #define _ASM
84 #include <sys/asm_linkage.h>
85
86 ENTRY_NP(SHA512TransformBlocks)
87 .cfi_startproc
88 ENDBR
89 movq %rsp, %rax
90 .cfi_def_cfa_register %rax
91 push %rbx
92 .cfi_offset %rbx,-16
93 push %rbp
94 .cfi_offset %rbp,-24
95 push %r12
96 .cfi_offset %r12,-32
97 push %r13
98 .cfi_offset %r13,-40
99 push %r14
100 .cfi_offset %r14,-48
101 push %r15
102 .cfi_offset %r15,-56
103 mov %rsp,%rbp # copy %rsp
104 shl $4,%rdx # num*16
105 sub $16*8+4*8,%rsp
106 lea (%rsi,%rdx,8),%rdx # inp+num*16*8
107 and $-64,%rsp # align stack frame
108 add $8,%rdi # Skip OpenSolaris field, "algotype"
109 mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg
110 mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg
111 mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg
112 mov %rbp,16*8+3*8(%rsp) # save copy of %rsp
113 # echo ".cfi_cfa_expression %rsp+152,deref,+56" |
114 # openssl/crypto/perlasm/x86_64-xlate.pl
115 .cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
116
117 #.picmeup %rbp
118 # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
119 # the address of the "next" instruction into the target register
120 # (%rbp). This generates these 2 instructions:
121 lea .Llea(%rip),%rbp
122 #nop # .picmeup generates a nop for mod 8 alignment--not needed here
123
124 .Llea:
125 lea K512-.(%rbp),%rbp
126
127 mov 8*0(%rdi),%rax
128 mov 8*1(%rdi),%rbx
129 mov 8*2(%rdi),%rcx
130 mov 8*3(%rdi),%rdx
131 mov 8*4(%rdi),%r8
132 mov 8*5(%rdi),%r9
133 mov 8*6(%rdi),%r10
134 mov 8*7(%rdi),%r11
135 jmp .Lloop
136
137 .balign 16
138 .Lloop:
139 xor %rdi,%rdi
140 mov 8*0(%rsi),%r12
141 bswap %r12
142 mov %r8,%r13
143 mov %r8,%r14
144 mov %r9,%r15
145
146 ror $14,%r13
147 ror $18,%r14
148 xor %r10,%r15 # f^g
149
150 xor %r14,%r13
151 ror $23,%r14
152 and %r8,%r15 # (f^g)&e
153 mov %r12,0(%rsp)
154
155 xor %r14,%r13 # Sigma1(e)
156 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
157 add %r11,%r12 # T1+=h
158
159 mov %rax,%r11
160 add %r13,%r12 # T1+=Sigma1(e)
161
162 add %r15,%r12 # T1+=Ch(e,f,g)
163 mov %rax,%r13
164 mov %rax,%r14
165
166 ror $28,%r11
167 ror $34,%r13
168 mov %rax,%r15
169 add (%rbp,%rdi,8),%r12 # T1+=K[round]
170
171 xor %r13,%r11
172 ror $5,%r13
173 or %rcx,%r14 # a|c
174
175 xor %r13,%r11 # h=Sigma0(a)
176 and %rcx,%r15 # a&c
177 add %r12,%rdx # d+=T1
178
179 and %rbx,%r14 # (a|c)&b
180 add %r12,%r11 # h+=T1
181
182 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
183 lea 1(%rdi),%rdi # round++
184
185 add %r14,%r11 # h+=Maj(a,b,c)
186 mov 8*1(%rsi),%r12
187 bswap %r12
188 mov %rdx,%r13
189 mov %rdx,%r14
190 mov %r8,%r15
191
192 ror $14,%r13
193 ror $18,%r14
194 xor %r9,%r15 # f^g
195
196 xor %r14,%r13
197 ror $23,%r14
198 and %rdx,%r15 # (f^g)&e
199 mov %r12,8(%rsp)
200
201 xor %r14,%r13 # Sigma1(e)
202 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
203 add %r10,%r12 # T1+=h
204
205 mov %r11,%r10
206 add %r13,%r12 # T1+=Sigma1(e)
207
208 add %r15,%r12 # T1+=Ch(e,f,g)
209 mov %r11,%r13
210 mov %r11,%r14
211
212 ror $28,%r10
213 ror $34,%r13
214 mov %r11,%r15
215 add (%rbp,%rdi,8),%r12 # T1+=K[round]
216
217 xor %r13,%r10
218 ror $5,%r13
219 or %rbx,%r14 # a|c
220
221 xor %r13,%r10 # h=Sigma0(a)
222 and %rbx,%r15 # a&c
223 add %r12,%rcx # d+=T1
224
225 and %rax,%r14 # (a|c)&b
226 add %r12,%r10 # h+=T1
227
228 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
229 lea 1(%rdi),%rdi # round++
230
231 add %r14,%r10 # h+=Maj(a,b,c)
232 mov 8*2(%rsi),%r12
233 bswap %r12
234 mov %rcx,%r13
235 mov %rcx,%r14
236 mov %rdx,%r15
237
238 ror $14,%r13
239 ror $18,%r14
240 xor %r8,%r15 # f^g
241
242 xor %r14,%r13
243 ror $23,%r14
244 and %rcx,%r15 # (f^g)&e
245 mov %r12,16(%rsp)
246
247 xor %r14,%r13 # Sigma1(e)
248 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
249 add %r9,%r12 # T1+=h
250
251 mov %r10,%r9
252 add %r13,%r12 # T1+=Sigma1(e)
253
254 add %r15,%r12 # T1+=Ch(e,f,g)
255 mov %r10,%r13
256 mov %r10,%r14
257
258 ror $28,%r9
259 ror $34,%r13
260 mov %r10,%r15
261 add (%rbp,%rdi,8),%r12 # T1+=K[round]
262
263 xor %r13,%r9
264 ror $5,%r13
265 or %rax,%r14 # a|c
266
267 xor %r13,%r9 # h=Sigma0(a)
268 and %rax,%r15 # a&c
269 add %r12,%rbx # d+=T1
270
271 and %r11,%r14 # (a|c)&b
272 add %r12,%r9 # h+=T1
273
274 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
275 lea 1(%rdi),%rdi # round++
276
277 add %r14,%r9 # h+=Maj(a,b,c)
278 mov 8*3(%rsi),%r12
279 bswap %r12
280 mov %rbx,%r13
281 mov %rbx,%r14
282 mov %rcx,%r15
283
284 ror $14,%r13
285 ror $18,%r14
286 xor %rdx,%r15 # f^g
287
288 xor %r14,%r13
289 ror $23,%r14
290 and %rbx,%r15 # (f^g)&e
291 mov %r12,24(%rsp)
292
293 xor %r14,%r13 # Sigma1(e)
294 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
295 add %r8,%r12 # T1+=h
296
297 mov %r9,%r8
298 add %r13,%r12 # T1+=Sigma1(e)
299
300 add %r15,%r12 # T1+=Ch(e,f,g)
301 mov %r9,%r13
302 mov %r9,%r14
303
304 ror $28,%r8
305 ror $34,%r13
306 mov %r9,%r15
307 add (%rbp,%rdi,8),%r12 # T1+=K[round]
308
309 xor %r13,%r8
310 ror $5,%r13
311 or %r11,%r14 # a|c
312
313 xor %r13,%r8 # h=Sigma0(a)
314 and %r11,%r15 # a&c
315 add %r12,%rax # d+=T1
316
317 and %r10,%r14 # (a|c)&b
318 add %r12,%r8 # h+=T1
319
320 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
321 lea 1(%rdi),%rdi # round++
322
323 add %r14,%r8 # h+=Maj(a,b,c)
324 mov 8*4(%rsi),%r12
325 bswap %r12
326 mov %rax,%r13
327 mov %rax,%r14
328 mov %rbx,%r15
329
330 ror $14,%r13
331 ror $18,%r14
332 xor %rcx,%r15 # f^g
333
334 xor %r14,%r13
335 ror $23,%r14
336 and %rax,%r15 # (f^g)&e
337 mov %r12,32(%rsp)
338
339 xor %r14,%r13 # Sigma1(e)
340 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
341 add %rdx,%r12 # T1+=h
342
343 mov %r8,%rdx
344 add %r13,%r12 # T1+=Sigma1(e)
345
346 add %r15,%r12 # T1+=Ch(e,f,g)
347 mov %r8,%r13
348 mov %r8,%r14
349
350 ror $28,%rdx
351 ror $34,%r13
352 mov %r8,%r15
353 add (%rbp,%rdi,8),%r12 # T1+=K[round]
354
355 xor %r13,%rdx
356 ror $5,%r13
357 or %r10,%r14 # a|c
358
359 xor %r13,%rdx # h=Sigma0(a)
360 and %r10,%r15 # a&c
361 add %r12,%r11 # d+=T1
362
363 and %r9,%r14 # (a|c)&b
364 add %r12,%rdx # h+=T1
365
366 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
367 lea 1(%rdi),%rdi # round++
368
369 add %r14,%rdx # h+=Maj(a,b,c)
370 mov 8*5(%rsi),%r12
371 bswap %r12
372 mov %r11,%r13
373 mov %r11,%r14
374 mov %rax,%r15
375
376 ror $14,%r13
377 ror $18,%r14
378 xor %rbx,%r15 # f^g
379
380 xor %r14,%r13
381 ror $23,%r14
382 and %r11,%r15 # (f^g)&e
383 mov %r12,40(%rsp)
384
385 xor %r14,%r13 # Sigma1(e)
386 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
387 add %rcx,%r12 # T1+=h
388
389 mov %rdx,%rcx
390 add %r13,%r12 # T1+=Sigma1(e)
391
392 add %r15,%r12 # T1+=Ch(e,f,g)
393 mov %rdx,%r13
394 mov %rdx,%r14
395
396 ror $28,%rcx
397 ror $34,%r13
398 mov %rdx,%r15
399 add (%rbp,%rdi,8),%r12 # T1+=K[round]
400
401 xor %r13,%rcx
402 ror $5,%r13
403 or %r9,%r14 # a|c
404
405 xor %r13,%rcx # h=Sigma0(a)
406 and %r9,%r15 # a&c
407 add %r12,%r10 # d+=T1
408
409 and %r8,%r14 # (a|c)&b
410 add %r12,%rcx # h+=T1
411
412 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
413 lea 1(%rdi),%rdi # round++
414
415 add %r14,%rcx # h+=Maj(a,b,c)
416 mov 8*6(%rsi),%r12
417 bswap %r12
418 mov %r10,%r13
419 mov %r10,%r14
420 mov %r11,%r15
421
422 ror $14,%r13
423 ror $18,%r14
424 xor %rax,%r15 # f^g
425
426 xor %r14,%r13
427 ror $23,%r14
428 and %r10,%r15 # (f^g)&e
429 mov %r12,48(%rsp)
430
431 xor %r14,%r13 # Sigma1(e)
432 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
433 add %rbx,%r12 # T1+=h
434
435 mov %rcx,%rbx
436 add %r13,%r12 # T1+=Sigma1(e)
437
438 add %r15,%r12 # T1+=Ch(e,f,g)
439 mov %rcx,%r13
440 mov %rcx,%r14
441
442 ror $28,%rbx
443 ror $34,%r13
444 mov %rcx,%r15
445 add (%rbp,%rdi,8),%r12 # T1+=K[round]
446
447 xor %r13,%rbx
448 ror $5,%r13
449 or %r8,%r14 # a|c
450
451 xor %r13,%rbx # h=Sigma0(a)
452 and %r8,%r15 # a&c
453 add %r12,%r9 # d+=T1
454
455 and %rdx,%r14 # (a|c)&b
456 add %r12,%rbx # h+=T1
457
458 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
459 lea 1(%rdi),%rdi # round++
460
461 add %r14,%rbx # h+=Maj(a,b,c)
462 mov 8*7(%rsi),%r12
463 bswap %r12
464 mov %r9,%r13
465 mov %r9,%r14
466 mov %r10,%r15
467
468 ror $14,%r13
469 ror $18,%r14
470 xor %r11,%r15 # f^g
471
472 xor %r14,%r13
473 ror $23,%r14
474 and %r9,%r15 # (f^g)&e
475 mov %r12,56(%rsp)
476
477 xor %r14,%r13 # Sigma1(e)
478 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
479 add %rax,%r12 # T1+=h
480
481 mov %rbx,%rax
482 add %r13,%r12 # T1+=Sigma1(e)
483
484 add %r15,%r12 # T1+=Ch(e,f,g)
485 mov %rbx,%r13
486 mov %rbx,%r14
487
488 ror $28,%rax
489 ror $34,%r13
490 mov %rbx,%r15
491 add (%rbp,%rdi,8),%r12 # T1+=K[round]
492
493 xor %r13,%rax
494 ror $5,%r13
495 or %rdx,%r14 # a|c
496
497 xor %r13,%rax # h=Sigma0(a)
498 and %rdx,%r15 # a&c
499 add %r12,%r8 # d+=T1
500
501 and %rcx,%r14 # (a|c)&b
502 add %r12,%rax # h+=T1
503
504 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
505 lea 1(%rdi),%rdi # round++
506
507 add %r14,%rax # h+=Maj(a,b,c)
508 mov 8*8(%rsi),%r12
509 bswap %r12
510 mov %r8,%r13
511 mov %r8,%r14
512 mov %r9,%r15
513
514 ror $14,%r13
515 ror $18,%r14
516 xor %r10,%r15 # f^g
517
518 xor %r14,%r13
519 ror $23,%r14
520 and %r8,%r15 # (f^g)&e
521 mov %r12,64(%rsp)
522
523 xor %r14,%r13 # Sigma1(e)
524 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
525 add %r11,%r12 # T1+=h
526
527 mov %rax,%r11
528 add %r13,%r12 # T1+=Sigma1(e)
529
530 add %r15,%r12 # T1+=Ch(e,f,g)
531 mov %rax,%r13
532 mov %rax,%r14
533
534 ror $28,%r11
535 ror $34,%r13
536 mov %rax,%r15
537 add (%rbp,%rdi,8),%r12 # T1+=K[round]
538
539 xor %r13,%r11
540 ror $5,%r13
541 or %rcx,%r14 # a|c
542
543 xor %r13,%r11 # h=Sigma0(a)
544 and %rcx,%r15 # a&c
545 add %r12,%rdx # d+=T1
546
547 and %rbx,%r14 # (a|c)&b
548 add %r12,%r11 # h+=T1
549
550 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
551 lea 1(%rdi),%rdi # round++
552
553 add %r14,%r11 # h+=Maj(a,b,c)
554 mov 8*9(%rsi),%r12
555 bswap %r12
556 mov %rdx,%r13
557 mov %rdx,%r14
558 mov %r8,%r15
559
560 ror $14,%r13
561 ror $18,%r14
562 xor %r9,%r15 # f^g
563
564 xor %r14,%r13
565 ror $23,%r14
566 and %rdx,%r15 # (f^g)&e
567 mov %r12,72(%rsp)
568
569 xor %r14,%r13 # Sigma1(e)
570 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
571 add %r10,%r12 # T1+=h
572
573 mov %r11,%r10
574 add %r13,%r12 # T1+=Sigma1(e)
575
576 add %r15,%r12 # T1+=Ch(e,f,g)
577 mov %r11,%r13
578 mov %r11,%r14
579
580 ror $28,%r10
581 ror $34,%r13
582 mov %r11,%r15
583 add (%rbp,%rdi,8),%r12 # T1+=K[round]
584
585 xor %r13,%r10
586 ror $5,%r13
587 or %rbx,%r14 # a|c
588
589 xor %r13,%r10 # h=Sigma0(a)
590 and %rbx,%r15 # a&c
591 add %r12,%rcx # d+=T1
592
593 and %rax,%r14 # (a|c)&b
594 add %r12,%r10 # h+=T1
595
596 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
597 lea 1(%rdi),%rdi # round++
598
599 add %r14,%r10 # h+=Maj(a,b,c)
600 mov 8*10(%rsi),%r12
601 bswap %r12
602 mov %rcx,%r13
603 mov %rcx,%r14
604 mov %rdx,%r15
605
606 ror $14,%r13
607 ror $18,%r14
608 xor %r8,%r15 # f^g
609
610 xor %r14,%r13
611 ror $23,%r14
612 and %rcx,%r15 # (f^g)&e
613 mov %r12,80(%rsp)
614
615 xor %r14,%r13 # Sigma1(e)
616 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
617 add %r9,%r12 # T1+=h
618
619 mov %r10,%r9
620 add %r13,%r12 # T1+=Sigma1(e)
621
622 add %r15,%r12 # T1+=Ch(e,f,g)
623 mov %r10,%r13
624 mov %r10,%r14
625
626 ror $28,%r9
627 ror $34,%r13
628 mov %r10,%r15
629 add (%rbp,%rdi,8),%r12 # T1+=K[round]
630
631 xor %r13,%r9
632 ror $5,%r13
633 or %rax,%r14 # a|c
634
635 xor %r13,%r9 # h=Sigma0(a)
636 and %rax,%r15 # a&c
637 add %r12,%rbx # d+=T1
638
639 and %r11,%r14 # (a|c)&b
640 add %r12,%r9 # h+=T1
641
642 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
643 lea 1(%rdi),%rdi # round++
644
645 add %r14,%r9 # h+=Maj(a,b,c)
646 mov 8*11(%rsi),%r12
647 bswap %r12
648 mov %rbx,%r13
649 mov %rbx,%r14
650 mov %rcx,%r15
651
652 ror $14,%r13
653 ror $18,%r14
654 xor %rdx,%r15 # f^g
655
656 xor %r14,%r13
657 ror $23,%r14
658 and %rbx,%r15 # (f^g)&e
659 mov %r12,88(%rsp)
660
661 xor %r14,%r13 # Sigma1(e)
662 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
663 add %r8,%r12 # T1+=h
664
665 mov %r9,%r8
666 add %r13,%r12 # T1+=Sigma1(e)
667
668 add %r15,%r12 # T1+=Ch(e,f,g)
669 mov %r9,%r13
670 mov %r9,%r14
671
672 ror $28,%r8
673 ror $34,%r13
674 mov %r9,%r15
675 add (%rbp,%rdi,8),%r12 # T1+=K[round]
676
677 xor %r13,%r8
678 ror $5,%r13
679 or %r11,%r14 # a|c
680
681 xor %r13,%r8 # h=Sigma0(a)
682 and %r11,%r15 # a&c
683 add %r12,%rax # d+=T1
684
685 and %r10,%r14 # (a|c)&b
686 add %r12,%r8 # h+=T1
687
688 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
689 lea 1(%rdi),%rdi # round++
690
691 add %r14,%r8 # h+=Maj(a,b,c)
692 mov 8*12(%rsi),%r12
693 bswap %r12
694 mov %rax,%r13
695 mov %rax,%r14
696 mov %rbx,%r15
697
698 ror $14,%r13
699 ror $18,%r14
700 xor %rcx,%r15 # f^g
701
702 xor %r14,%r13
703 ror $23,%r14
704 and %rax,%r15 # (f^g)&e
705 mov %r12,96(%rsp)
706
707 xor %r14,%r13 # Sigma1(e)
708 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
709 add %rdx,%r12 # T1+=h
710
711 mov %r8,%rdx
712 add %r13,%r12 # T1+=Sigma1(e)
713
714 add %r15,%r12 # T1+=Ch(e,f,g)
715 mov %r8,%r13
716 mov %r8,%r14
717
718 ror $28,%rdx
719 ror $34,%r13
720 mov %r8,%r15
721 add (%rbp,%rdi,8),%r12 # T1+=K[round]
722
723 xor %r13,%rdx
724 ror $5,%r13
725 or %r10,%r14 # a|c
726
727 xor %r13,%rdx # h=Sigma0(a)
728 and %r10,%r15 # a&c
729 add %r12,%r11 # d+=T1
730
731 and %r9,%r14 # (a|c)&b
732 add %r12,%rdx # h+=T1
733
734 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
735 lea 1(%rdi),%rdi # round++
736
737 add %r14,%rdx # h+=Maj(a,b,c)
738 mov 8*13(%rsi),%r12
739 bswap %r12
740 mov %r11,%r13
741 mov %r11,%r14
742 mov %rax,%r15
743
744 ror $14,%r13
745 ror $18,%r14
746 xor %rbx,%r15 # f^g
747
748 xor %r14,%r13
749 ror $23,%r14
750 and %r11,%r15 # (f^g)&e
751 mov %r12,104(%rsp)
752
753 xor %r14,%r13 # Sigma1(e)
754 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
755 add %rcx,%r12 # T1+=h
756
757 mov %rdx,%rcx
758 add %r13,%r12 # T1+=Sigma1(e)
759
760 add %r15,%r12 # T1+=Ch(e,f,g)
761 mov %rdx,%r13
762 mov %rdx,%r14
763
764 ror $28,%rcx
765 ror $34,%r13
766 mov %rdx,%r15
767 add (%rbp,%rdi,8),%r12 # T1+=K[round]
768
769 xor %r13,%rcx
770 ror $5,%r13
771 or %r9,%r14 # a|c
772
773 xor %r13,%rcx # h=Sigma0(a)
774 and %r9,%r15 # a&c
775 add %r12,%r10 # d+=T1
776
777 and %r8,%r14 # (a|c)&b
778 add %r12,%rcx # h+=T1
779
780 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
781 lea 1(%rdi),%rdi # round++
782
783 add %r14,%rcx # h+=Maj(a,b,c)
784 mov 8*14(%rsi),%r12
785 bswap %r12
786 mov %r10,%r13
787 mov %r10,%r14
788 mov %r11,%r15
789
790 ror $14,%r13
791 ror $18,%r14
792 xor %rax,%r15 # f^g
793
794 xor %r14,%r13
795 ror $23,%r14
796 and %r10,%r15 # (f^g)&e
797 mov %r12,112(%rsp)
798
799 xor %r14,%r13 # Sigma1(e)
800 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
801 add %rbx,%r12 # T1+=h
802
803 mov %rcx,%rbx
804 add %r13,%r12 # T1+=Sigma1(e)
805
806 add %r15,%r12 # T1+=Ch(e,f,g)
807 mov %rcx,%r13
808 mov %rcx,%r14
809
810 ror $28,%rbx
811 ror $34,%r13
812 mov %rcx,%r15
813 add (%rbp,%rdi,8),%r12 # T1+=K[round]
814
815 xor %r13,%rbx
816 ror $5,%r13
817 or %r8,%r14 # a|c
818
819 xor %r13,%rbx # h=Sigma0(a)
820 and %r8,%r15 # a&c
821 add %r12,%r9 # d+=T1
822
823 and %rdx,%r14 # (a|c)&b
824 add %r12,%rbx # h+=T1
825
826 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
827 lea 1(%rdi),%rdi # round++
828
829 add %r14,%rbx # h+=Maj(a,b,c)
830 mov 8*15(%rsi),%r12
831 bswap %r12
832 mov %r9,%r13
833 mov %r9,%r14
834 mov %r10,%r15
835
836 ror $14,%r13
837 ror $18,%r14
838 xor %r11,%r15 # f^g
839
840 xor %r14,%r13
841 ror $23,%r14
842 and %r9,%r15 # (f^g)&e
843 mov %r12,120(%rsp)
844
845 xor %r14,%r13 # Sigma1(e)
846 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
847 add %rax,%r12 # T1+=h
848
849 mov %rbx,%rax
850 add %r13,%r12 # T1+=Sigma1(e)
851
852 add %r15,%r12 # T1+=Ch(e,f,g)
853 mov %rbx,%r13
854 mov %rbx,%r14
855
856 ror $28,%rax
857 ror $34,%r13
858 mov %rbx,%r15
859 add (%rbp,%rdi,8),%r12 # T1+=K[round]
860
861 xor %r13,%rax
862 ror $5,%r13
863 or %rdx,%r14 # a|c
864
865 xor %r13,%rax # h=Sigma0(a)
866 and %rdx,%r15 # a&c
867 add %r12,%r8 # d+=T1
868
869 and %rcx,%r14 # (a|c)&b
870 add %r12,%rax # h+=T1
871
872 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
873 lea 1(%rdi),%rdi # round++
874
875 add %r14,%rax # h+=Maj(a,b,c)
876 jmp .Lrounds_16_xx
877 .balign 16
878 .Lrounds_16_xx:
879 mov 8(%rsp),%r13
880 mov 112(%rsp),%r12
881
882 mov %r13,%r15
883
884 shr $7,%r13
885 ror $1,%r15
886
887 xor %r15,%r13
888 ror $7,%r15
889
890 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
891 mov %r12,%r14
892
893 shr $6,%r12
894 ror $19,%r14
895
896 xor %r14,%r12
897 ror $42,%r14
898
899 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
900
901 add %r13,%r12
902
903 add 72(%rsp),%r12
904
905 add 0(%rsp),%r12
906 mov %r8,%r13
907 mov %r8,%r14
908 mov %r9,%r15
909
910 ror $14,%r13
911 ror $18,%r14
912 xor %r10,%r15 # f^g
913
914 xor %r14,%r13
915 ror $23,%r14
916 and %r8,%r15 # (f^g)&e
917 mov %r12,0(%rsp)
918
919 xor %r14,%r13 # Sigma1(e)
920 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
921 add %r11,%r12 # T1+=h
922
923 mov %rax,%r11
924 add %r13,%r12 # T1+=Sigma1(e)
925
926 add %r15,%r12 # T1+=Ch(e,f,g)
927 mov %rax,%r13
928 mov %rax,%r14
929
930 ror $28,%r11
931 ror $34,%r13
932 mov %rax,%r15
933 add (%rbp,%rdi,8),%r12 # T1+=K[round]
934
935 xor %r13,%r11
936 ror $5,%r13
937 or %rcx,%r14 # a|c
938
939 xor %r13,%r11 # h=Sigma0(a)
940 and %rcx,%r15 # a&c
941 add %r12,%rdx # d+=T1
942
943 and %rbx,%r14 # (a|c)&b
944 add %r12,%r11 # h+=T1
945
946 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
947 lea 1(%rdi),%rdi # round++
948
949 add %r14,%r11 # h+=Maj(a,b,c)
950 mov 16(%rsp),%r13
951 mov 120(%rsp),%r12
952
953 mov %r13,%r15
954
955 shr $7,%r13
956 ror $1,%r15
957
958 xor %r15,%r13
959 ror $7,%r15
960
961 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
962 mov %r12,%r14
963
964 shr $6,%r12
965 ror $19,%r14
966
967 xor %r14,%r12
968 ror $42,%r14
969
970 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
971
972 add %r13,%r12
973
974 add 80(%rsp),%r12
975
976 add 8(%rsp),%r12
977 mov %rdx,%r13
978 mov %rdx,%r14
979 mov %r8,%r15
980
981 ror $14,%r13
982 ror $18,%r14
983 xor %r9,%r15 # f^g
984
985 xor %r14,%r13
986 ror $23,%r14
987 and %rdx,%r15 # (f^g)&e
988 mov %r12,8(%rsp)
989
990 xor %r14,%r13 # Sigma1(e)
991 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
992 add %r10,%r12 # T1+=h
993
994 mov %r11,%r10
995 add %r13,%r12 # T1+=Sigma1(e)
996
997 add %r15,%r12 # T1+=Ch(e,f,g)
998 mov %r11,%r13
999 mov %r11,%r14
1000
1001 ror $28,%r10
1002 ror $34,%r13
1003 mov %r11,%r15
1004 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1005
1006 xor %r13,%r10
1007 ror $5,%r13
1008 or %rbx,%r14 # a|c
1009
1010 xor %r13,%r10 # h=Sigma0(a)
1011 and %rbx,%r15 # a&c
1012 add %r12,%rcx # d+=T1
1013
1014 and %rax,%r14 # (a|c)&b
1015 add %r12,%r10 # h+=T1
1016
1017 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1018 lea 1(%rdi),%rdi # round++
1019
1020 add %r14,%r10 # h+=Maj(a,b,c)
1021 mov 24(%rsp),%r13
1022 mov 0(%rsp),%r12
1023
1024 mov %r13,%r15
1025
1026 shr $7,%r13
1027 ror $1,%r15
1028
1029 xor %r15,%r13
1030 ror $7,%r15
1031
1032 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1033 mov %r12,%r14
1034
1035 shr $6,%r12
1036 ror $19,%r14
1037
1038 xor %r14,%r12
1039 ror $42,%r14
1040
1041 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1042
1043 add %r13,%r12
1044
1045 add 88(%rsp),%r12
1046
1047 add 16(%rsp),%r12
1048 mov %rcx,%r13
1049 mov %rcx,%r14
1050 mov %rdx,%r15
1051
1052 ror $14,%r13
1053 ror $18,%r14
1054 xor %r8,%r15 # f^g
1055
1056 xor %r14,%r13
1057 ror $23,%r14
1058 and %rcx,%r15 # (f^g)&e
1059 mov %r12,16(%rsp)
1060
1061 xor %r14,%r13 # Sigma1(e)
1062 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
1063 add %r9,%r12 # T1+=h
1064
1065 mov %r10,%r9
1066 add %r13,%r12 # T1+=Sigma1(e)
1067
1068 add %r15,%r12 # T1+=Ch(e,f,g)
1069 mov %r10,%r13
1070 mov %r10,%r14
1071
1072 ror $28,%r9
1073 ror $34,%r13
1074 mov %r10,%r15
1075 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1076
1077 xor %r13,%r9
1078 ror $5,%r13
1079 or %rax,%r14 # a|c
1080
1081 xor %r13,%r9 # h=Sigma0(a)
1082 and %rax,%r15 # a&c
1083 add %r12,%rbx # d+=T1
1084
1085 and %r11,%r14 # (a|c)&b
1086 add %r12,%r9 # h+=T1
1087
1088 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1089 lea 1(%rdi),%rdi # round++
1090
1091 add %r14,%r9 # h+=Maj(a,b,c)
1092 mov 32(%rsp),%r13
1093 mov 8(%rsp),%r12
1094
1095 mov %r13,%r15
1096
1097 shr $7,%r13
1098 ror $1,%r15
1099
1100 xor %r15,%r13
1101 ror $7,%r15
1102
1103 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1104 mov %r12,%r14
1105
1106 shr $6,%r12
1107 ror $19,%r14
1108
1109 xor %r14,%r12
1110 ror $42,%r14
1111
1112 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1113
1114 add %r13,%r12
1115
1116 add 96(%rsp),%r12
1117
1118 add 24(%rsp),%r12
1119 mov %rbx,%r13
1120 mov %rbx,%r14
1121 mov %rcx,%r15
1122
1123 ror $14,%r13
1124 ror $18,%r14
1125 xor %rdx,%r15 # f^g
1126
1127 xor %r14,%r13
1128 ror $23,%r14
1129 and %rbx,%r15 # (f^g)&e
1130 mov %r12,24(%rsp)
1131
1132 xor %r14,%r13 # Sigma1(e)
1133 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1134 add %r8,%r12 # T1+=h
1135
1136 mov %r9,%r8
1137 add %r13,%r12 # T1+=Sigma1(e)
1138
1139 add %r15,%r12 # T1+=Ch(e,f,g)
1140 mov %r9,%r13
1141 mov %r9,%r14
1142
1143 ror $28,%r8
1144 ror $34,%r13
1145 mov %r9,%r15
1146 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1147
1148 xor %r13,%r8
1149 ror $5,%r13
1150 or %r11,%r14 # a|c
1151
1152 xor %r13,%r8 # h=Sigma0(a)
1153 and %r11,%r15 # a&c
1154 add %r12,%rax # d+=T1
1155
1156 and %r10,%r14 # (a|c)&b
1157 add %r12,%r8 # h+=T1
1158
1159 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1160 lea 1(%rdi),%rdi # round++
1161
1162 add %r14,%r8 # h+=Maj(a,b,c)
1163 mov 40(%rsp),%r13
1164 mov 16(%rsp),%r12
1165
1166 mov %r13,%r15
1167
1168 shr $7,%r13
1169 ror $1,%r15
1170
1171 xor %r15,%r13
1172 ror $7,%r15
1173
1174 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1175 mov %r12,%r14
1176
1177 shr $6,%r12
1178 ror $19,%r14
1179
1180 xor %r14,%r12
1181 ror $42,%r14
1182
1183 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1184
1185 add %r13,%r12
1186
1187 add 104(%rsp),%r12
1188
1189 add 32(%rsp),%r12
1190 mov %rax,%r13
1191 mov %rax,%r14
1192 mov %rbx,%r15
1193
1194 ror $14,%r13
1195 ror $18,%r14
1196 xor %rcx,%r15 # f^g
1197
1198 xor %r14,%r13
1199 ror $23,%r14
1200 and %rax,%r15 # (f^g)&e
1201 mov %r12,32(%rsp)
1202
1203 xor %r14,%r13 # Sigma1(e)
1204 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1205 add %rdx,%r12 # T1+=h
1206
1207 mov %r8,%rdx
1208 add %r13,%r12 # T1+=Sigma1(e)
1209
1210 add %r15,%r12 # T1+=Ch(e,f,g)
1211 mov %r8,%r13
1212 mov %r8,%r14
1213
1214 ror $28,%rdx
1215 ror $34,%r13
1216 mov %r8,%r15
1217 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1218
1219 xor %r13,%rdx
1220 ror $5,%r13
1221 or %r10,%r14 # a|c
1222
1223 xor %r13,%rdx # h=Sigma0(a)
1224 and %r10,%r15 # a&c
1225 add %r12,%r11 # d+=T1
1226
1227 and %r9,%r14 # (a|c)&b
1228 add %r12,%rdx # h+=T1
1229
1230 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1231 lea 1(%rdi),%rdi # round++
1232
1233 add %r14,%rdx # h+=Maj(a,b,c)
1234 mov 48(%rsp),%r13
1235 mov 24(%rsp),%r12
1236
1237 mov %r13,%r15
1238
1239 shr $7,%r13
1240 ror $1,%r15
1241
1242 xor %r15,%r13
1243 ror $7,%r15
1244
1245 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1246 mov %r12,%r14
1247
1248 shr $6,%r12
1249 ror $19,%r14
1250
1251 xor %r14,%r12
1252 ror $42,%r14
1253
1254 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1255
1256 add %r13,%r12
1257
1258 add 112(%rsp),%r12
1259
1260 add 40(%rsp),%r12
1261 mov %r11,%r13
1262 mov %r11,%r14
1263 mov %rax,%r15
1264
1265 ror $14,%r13
1266 ror $18,%r14
1267 xor %rbx,%r15 # f^g
1268
1269 xor %r14,%r13
1270 ror $23,%r14
1271 and %r11,%r15 # (f^g)&e
1272 mov %r12,40(%rsp)
1273
1274 xor %r14,%r13 # Sigma1(e)
1275 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1276 add %rcx,%r12 # T1+=h
1277
1278 mov %rdx,%rcx
1279 add %r13,%r12 # T1+=Sigma1(e)
1280
1281 add %r15,%r12 # T1+=Ch(e,f,g)
1282 mov %rdx,%r13
1283 mov %rdx,%r14
1284
1285 ror $28,%rcx
1286 ror $34,%r13
1287 mov %rdx,%r15
1288 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1289
1290 xor %r13,%rcx
1291 ror $5,%r13
1292 or %r9,%r14 # a|c
1293
1294 xor %r13,%rcx # h=Sigma0(a)
1295 and %r9,%r15 # a&c
1296 add %r12,%r10 # d+=T1
1297
1298 and %r8,%r14 # (a|c)&b
1299 add %r12,%rcx # h+=T1
1300
1301 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1302 lea 1(%rdi),%rdi # round++
1303
1304 add %r14,%rcx # h+=Maj(a,b,c)
1305 mov 56(%rsp),%r13
1306 mov 32(%rsp),%r12
1307
1308 mov %r13,%r15
1309
1310 shr $7,%r13
1311 ror $1,%r15
1312
1313 xor %r15,%r13
1314 ror $7,%r15
1315
1316 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1317 mov %r12,%r14
1318
1319 shr $6,%r12
1320 ror $19,%r14
1321
1322 xor %r14,%r12
1323 ror $42,%r14
1324
1325 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1326
1327 add %r13,%r12
1328
1329 add 120(%rsp),%r12
1330
1331 add 48(%rsp),%r12
1332 mov %r10,%r13
1333 mov %r10,%r14
1334 mov %r11,%r15
1335
1336 ror $14,%r13
1337 ror $18,%r14
1338 xor %rax,%r15 # f^g
1339
1340 xor %r14,%r13
1341 ror $23,%r14
1342 and %r10,%r15 # (f^g)&e
1343 mov %r12,48(%rsp)
1344
1345 xor %r14,%r13 # Sigma1(e)
1346 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
1347 add %rbx,%r12 # T1+=h
1348
1349 mov %rcx,%rbx
1350 add %r13,%r12 # T1+=Sigma1(e)
1351
1352 add %r15,%r12 # T1+=Ch(e,f,g)
1353 mov %rcx,%r13
1354 mov %rcx,%r14
1355
1356 ror $28,%rbx
1357 ror $34,%r13
1358 mov %rcx,%r15
1359 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1360
1361 xor %r13,%rbx
1362 ror $5,%r13
1363 or %r8,%r14 # a|c
1364
1365 xor %r13,%rbx # h=Sigma0(a)
1366 and %r8,%r15 # a&c
1367 add %r12,%r9 # d+=T1
1368
1369 and %rdx,%r14 # (a|c)&b
1370 add %r12,%rbx # h+=T1
1371
1372 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1373 lea 1(%rdi),%rdi # round++
1374
1375 add %r14,%rbx # h+=Maj(a,b,c)
1376 mov 64(%rsp),%r13
1377 mov 40(%rsp),%r12
1378
1379 mov %r13,%r15
1380
1381 shr $7,%r13
1382 ror $1,%r15
1383
1384 xor %r15,%r13
1385 ror $7,%r15
1386
1387 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1388 mov %r12,%r14
1389
1390 shr $6,%r12
1391 ror $19,%r14
1392
1393 xor %r14,%r12
1394 ror $42,%r14
1395
1396 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1397
1398 add %r13,%r12
1399
1400 add 0(%rsp),%r12
1401
1402 add 56(%rsp),%r12
1403 mov %r9,%r13
1404 mov %r9,%r14
1405 mov %r10,%r15
1406
1407 ror $14,%r13
1408 ror $18,%r14
1409 xor %r11,%r15 # f^g
1410
1411 xor %r14,%r13
1412 ror $23,%r14
1413 and %r9,%r15 # (f^g)&e
1414 mov %r12,56(%rsp)
1415
1416 xor %r14,%r13 # Sigma1(e)
1417 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
1418 add %rax,%r12 # T1+=h
1419
1420 mov %rbx,%rax
1421 add %r13,%r12 # T1+=Sigma1(e)
1422
1423 add %r15,%r12 # T1+=Ch(e,f,g)
1424 mov %rbx,%r13
1425 mov %rbx,%r14
1426
1427 ror $28,%rax
1428 ror $34,%r13
1429 mov %rbx,%r15
1430 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1431
1432 xor %r13,%rax
1433 ror $5,%r13
1434 or %rdx,%r14 # a|c
1435
1436 xor %r13,%rax # h=Sigma0(a)
1437 and %rdx,%r15 # a&c
1438 add %r12,%r8 # d+=T1
1439
1440 and %rcx,%r14 # (a|c)&b
1441 add %r12,%rax # h+=T1
1442
1443 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1444 lea 1(%rdi),%rdi # round++
1445
1446 add %r14,%rax # h+=Maj(a,b,c)
1447 mov 72(%rsp),%r13
1448 mov 48(%rsp),%r12
1449
1450 mov %r13,%r15
1451
1452 shr $7,%r13
1453 ror $1,%r15
1454
1455 xor %r15,%r13
1456 ror $7,%r15
1457
1458 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1459 mov %r12,%r14
1460
1461 shr $6,%r12
1462 ror $19,%r14
1463
1464 xor %r14,%r12
1465 ror $42,%r14
1466
1467 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1468
1469 add %r13,%r12
1470
1471 add 8(%rsp),%r12
1472
1473 add 64(%rsp),%r12
1474 mov %r8,%r13
1475 mov %r8,%r14
1476 mov %r9,%r15
1477
1478 ror $14,%r13
1479 ror $18,%r14
1480 xor %r10,%r15 # f^g
1481
1482 xor %r14,%r13
1483 ror $23,%r14
1484 and %r8,%r15 # (f^g)&e
1485 mov %r12,64(%rsp)
1486
1487 xor %r14,%r13 # Sigma1(e)
1488 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
1489 add %r11,%r12 # T1+=h
1490
1491 mov %rax,%r11
1492 add %r13,%r12 # T1+=Sigma1(e)
1493
1494 add %r15,%r12 # T1+=Ch(e,f,g)
1495 mov %rax,%r13
1496 mov %rax,%r14
1497
1498 ror $28,%r11
1499 ror $34,%r13
1500 mov %rax,%r15
1501 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1502
1503 xor %r13,%r11
1504 ror $5,%r13
1505 or %rcx,%r14 # a|c
1506
1507 xor %r13,%r11 # h=Sigma0(a)
1508 and %rcx,%r15 # a&c
1509 add %r12,%rdx # d+=T1
1510
1511 and %rbx,%r14 # (a|c)&b
1512 add %r12,%r11 # h+=T1
1513
1514 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1515 lea 1(%rdi),%rdi # round++
1516
1517 add %r14,%r11 # h+=Maj(a,b,c)
1518 mov 80(%rsp),%r13
1519 mov 56(%rsp),%r12
1520
1521 mov %r13,%r15
1522
1523 shr $7,%r13
1524 ror $1,%r15
1525
1526 xor %r15,%r13
1527 ror $7,%r15
1528
1529 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1530 mov %r12,%r14
1531
1532 shr $6,%r12
1533 ror $19,%r14
1534
1535 xor %r14,%r12
1536 ror $42,%r14
1537
1538 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1539
1540 add %r13,%r12
1541
1542 add 16(%rsp),%r12
1543
1544 add 72(%rsp),%r12
1545 mov %rdx,%r13
1546 mov %rdx,%r14
1547 mov %r8,%r15
1548
1549 ror $14,%r13
1550 ror $18,%r14
1551 xor %r9,%r15 # f^g
1552
1553 xor %r14,%r13
1554 ror $23,%r14
1555 and %rdx,%r15 # (f^g)&e
1556 mov %r12,72(%rsp)
1557
1558 xor %r14,%r13 # Sigma1(e)
1559 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
1560 add %r10,%r12 # T1+=h
1561
1562 mov %r11,%r10
1563 add %r13,%r12 # T1+=Sigma1(e)
1564
1565 add %r15,%r12 # T1+=Ch(e,f,g)
1566 mov %r11,%r13
1567 mov %r11,%r14
1568
1569 ror $28,%r10
1570 ror $34,%r13
1571 mov %r11,%r15
1572 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1573
1574 xor %r13,%r10
1575 ror $5,%r13
1576 or %rbx,%r14 # a|c
1577
1578 xor %r13,%r10 # h=Sigma0(a)
1579 and %rbx,%r15 # a&c
1580 add %r12,%rcx # d+=T1
1581
1582 and %rax,%r14 # (a|c)&b
1583 add %r12,%r10 # h+=T1
1584
1585 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1586 lea 1(%rdi),%rdi # round++
1587
1588 add %r14,%r10 # h+=Maj(a,b,c)
1589 mov 88(%rsp),%r13
1590 mov 64(%rsp),%r12
1591
1592 mov %r13,%r15
1593
1594 shr $7,%r13
1595 ror $1,%r15
1596
1597 xor %r15,%r13
1598 ror $7,%r15
1599
1600 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1601 mov %r12,%r14
1602
1603 shr $6,%r12
1604 ror $19,%r14
1605
1606 xor %r14,%r12
1607 ror $42,%r14
1608
1609 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1610
1611 add %r13,%r12
1612
1613 add 24(%rsp),%r12
1614
1615 add 80(%rsp),%r12
1616 mov %rcx,%r13
1617 mov %rcx,%r14
1618 mov %rdx,%r15
1619
1620 ror $14,%r13
1621 ror $18,%r14
1622 xor %r8,%r15 # f^g
1623
1624 xor %r14,%r13
1625 ror $23,%r14
1626 and %rcx,%r15 # (f^g)&e
1627 mov %r12,80(%rsp)
1628
1629 xor %r14,%r13 # Sigma1(e)
1630 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
1631 add %r9,%r12 # T1+=h
1632
1633 mov %r10,%r9
1634 add %r13,%r12 # T1+=Sigma1(e)
1635
1636 add %r15,%r12 # T1+=Ch(e,f,g)
1637 mov %r10,%r13
1638 mov %r10,%r14
1639
1640 ror $28,%r9
1641 ror $34,%r13
1642 mov %r10,%r15
1643 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1644
1645 xor %r13,%r9
1646 ror $5,%r13
1647 or %rax,%r14 # a|c
1648
1649 xor %r13,%r9 # h=Sigma0(a)
1650 and %rax,%r15 # a&c
1651 add %r12,%rbx # d+=T1
1652
1653 and %r11,%r14 # (a|c)&b
1654 add %r12,%r9 # h+=T1
1655
1656 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1657 lea 1(%rdi),%rdi # round++
1658
1659 add %r14,%r9 # h+=Maj(a,b,c)
1660 mov 96(%rsp),%r13
1661 mov 72(%rsp),%r12
1662
1663 mov %r13,%r15
1664
1665 shr $7,%r13
1666 ror $1,%r15
1667
1668 xor %r15,%r13
1669 ror $7,%r15
1670
1671 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1672 mov %r12,%r14
1673
1674 shr $6,%r12
1675 ror $19,%r14
1676
1677 xor %r14,%r12
1678 ror $42,%r14
1679
1680 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1681
1682 add %r13,%r12
1683
1684 add 32(%rsp),%r12
1685
1686 add 88(%rsp),%r12
1687 mov %rbx,%r13
1688 mov %rbx,%r14
1689 mov %rcx,%r15
1690
1691 ror $14,%r13
1692 ror $18,%r14
1693 xor %rdx,%r15 # f^g
1694
1695 xor %r14,%r13
1696 ror $23,%r14
1697 and %rbx,%r15 # (f^g)&e
1698 mov %r12,88(%rsp)
1699
1700 xor %r14,%r13 # Sigma1(e)
1701 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1702 add %r8,%r12 # T1+=h
1703
1704 mov %r9,%r8
1705 add %r13,%r12 # T1+=Sigma1(e)
1706
1707 add %r15,%r12 # T1+=Ch(e,f,g)
1708 mov %r9,%r13
1709 mov %r9,%r14
1710
1711 ror $28,%r8
1712 ror $34,%r13
1713 mov %r9,%r15
1714 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1715
1716 xor %r13,%r8
1717 ror $5,%r13
1718 or %r11,%r14 # a|c
1719
1720 xor %r13,%r8 # h=Sigma0(a)
1721 and %r11,%r15 # a&c
1722 add %r12,%rax # d+=T1
1723
1724 and %r10,%r14 # (a|c)&b
1725 add %r12,%r8 # h+=T1
1726
1727 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1728 lea 1(%rdi),%rdi # round++
1729
1730 add %r14,%r8 # h+=Maj(a,b,c)
1731 mov 104(%rsp),%r13
1732 mov 80(%rsp),%r12
1733
1734 mov %r13,%r15
1735
1736 shr $7,%r13
1737 ror $1,%r15
1738
1739 xor %r15,%r13
1740 ror $7,%r15
1741
1742 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1743 mov %r12,%r14
1744
1745 shr $6,%r12
1746 ror $19,%r14
1747
1748 xor %r14,%r12
1749 ror $42,%r14
1750
1751 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1752
1753 add %r13,%r12
1754
1755 add 40(%rsp),%r12
1756
1757 add 96(%rsp),%r12
1758 mov %rax,%r13
1759 mov %rax,%r14
1760 mov %rbx,%r15
1761
1762 ror $14,%r13
1763 ror $18,%r14
1764 xor %rcx,%r15 # f^g
1765
1766 xor %r14,%r13
1767 ror $23,%r14
1768 and %rax,%r15 # (f^g)&e
1769 mov %r12,96(%rsp)
1770
1771 xor %r14,%r13 # Sigma1(e)
1772 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1773 add %rdx,%r12 # T1+=h
1774
1775 mov %r8,%rdx
1776 add %r13,%r12 # T1+=Sigma1(e)
1777
1778 add %r15,%r12 # T1+=Ch(e,f,g)
1779 mov %r8,%r13
1780 mov %r8,%r14
1781
1782 ror $28,%rdx
1783 ror $34,%r13
1784 mov %r8,%r15
1785 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1786
1787 xor %r13,%rdx
1788 ror $5,%r13
1789 or %r10,%r14 # a|c
1790
1791 xor %r13,%rdx # h=Sigma0(a)
1792 and %r10,%r15 # a&c
1793 add %r12,%r11 # d+=T1
1794
1795 and %r9,%r14 # (a|c)&b
1796 add %r12,%rdx # h+=T1
1797
1798 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1799 lea 1(%rdi),%rdi # round++
1800
1801 add %r14,%rdx # h+=Maj(a,b,c)
1802 mov 112(%rsp),%r13
1803 mov 88(%rsp),%r12
1804
1805 mov %r13,%r15
1806
1807 shr $7,%r13
1808 ror $1,%r15
1809
1810 xor %r15,%r13
1811 ror $7,%r15
1812
1813 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1814 mov %r12,%r14
1815
1816 shr $6,%r12
1817 ror $19,%r14
1818
1819 xor %r14,%r12
1820 ror $42,%r14
1821
1822 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1823
1824 add %r13,%r12
1825
1826 add 48(%rsp),%r12
1827
1828 add 104(%rsp),%r12
1829 mov %r11,%r13
1830 mov %r11,%r14
1831 mov %rax,%r15
1832
1833 ror $14,%r13
1834 ror $18,%r14
1835 xor %rbx,%r15 # f^g
1836
1837 xor %r14,%r13
1838 ror $23,%r14
1839 and %r11,%r15 # (f^g)&e
1840 mov %r12,104(%rsp)
1841
1842 xor %r14,%r13 # Sigma1(e)
1843 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1844 add %rcx,%r12 # T1+=h
1845
1846 mov %rdx,%rcx
1847 add %r13,%r12 # T1+=Sigma1(e)
1848
1849 add %r15,%r12 # T1+=Ch(e,f,g)
1850 mov %rdx,%r13
1851 mov %rdx,%r14
1852
1853 ror $28,%rcx
1854 ror $34,%r13
1855 mov %rdx,%r15
1856 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1857
1858 xor %r13,%rcx
1859 ror $5,%r13
1860 or %r9,%r14 # a|c
1861
1862 xor %r13,%rcx # h=Sigma0(a)
1863 and %r9,%r15 # a&c
1864 add %r12,%r10 # d+=T1
1865
1866 and %r8,%r14 # (a|c)&b
1867 add %r12,%rcx # h+=T1
1868
1869 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1870 lea 1(%rdi),%rdi # round++
1871
1872 add %r14,%rcx # h+=Maj(a,b,c)
1873 mov 120(%rsp),%r13
1874 mov 96(%rsp),%r12
1875
1876 mov %r13,%r15
1877
1878 shr $7,%r13
1879 ror $1,%r15
1880
1881 xor %r15,%r13
1882 ror $7,%r15
1883
1884 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1885 mov %r12,%r14
1886
1887 shr $6,%r12
1888 ror $19,%r14
1889
1890 xor %r14,%r12
1891 ror $42,%r14
1892
1893 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1894
1895 add %r13,%r12
1896
1897 add 56(%rsp),%r12
1898
1899 add 112(%rsp),%r12
1900 mov %r10,%r13
1901 mov %r10,%r14
1902 mov %r11,%r15
1903
1904 ror $14,%r13
1905 ror $18,%r14
1906 xor %rax,%r15 # f^g
1907
1908 xor %r14,%r13
1909 ror $23,%r14
1910 and %r10,%r15 # (f^g)&e
1911 mov %r12,112(%rsp)
1912
1913 xor %r14,%r13 # Sigma1(e)
1914 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
1915 add %rbx,%r12 # T1+=h
1916
1917 mov %rcx,%rbx
1918 add %r13,%r12 # T1+=Sigma1(e)
1919
1920 add %r15,%r12 # T1+=Ch(e,f,g)
1921 mov %rcx,%r13
1922 mov %rcx,%r14
1923
1924 ror $28,%rbx
1925 ror $34,%r13
1926 mov %rcx,%r15
1927 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1928
1929 xor %r13,%rbx
1930 ror $5,%r13
1931 or %r8,%r14 # a|c
1932
1933 xor %r13,%rbx # h=Sigma0(a)
1934 and %r8,%r15 # a&c
1935 add %r12,%r9 # d+=T1
1936
1937 and %rdx,%r14 # (a|c)&b
1938 add %r12,%rbx # h+=T1
1939
1940 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1941 lea 1(%rdi),%rdi # round++
1942
1943 add %r14,%rbx # h+=Maj(a,b,c)
1944 mov 0(%rsp),%r13
1945 mov 104(%rsp),%r12
1946
1947 mov %r13,%r15
1948
1949 shr $7,%r13
1950 ror $1,%r15
1951
1952 xor %r15,%r13
1953 ror $7,%r15
1954
1955 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1956 mov %r12,%r14
1957
1958 shr $6,%r12
1959 ror $19,%r14
1960
1961 xor %r14,%r12
1962 ror $42,%r14
1963
1964 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1965
1966 add %r13,%r12
1967
1968 add 64(%rsp),%r12
1969
1970 add 120(%rsp),%r12
1971 mov %r9,%r13
1972 mov %r9,%r14
1973 mov %r10,%r15
1974
1975 ror $14,%r13
1976 ror $18,%r14
1977 xor %r11,%r15 # f^g
1978
1979 xor %r14,%r13
1980 ror $23,%r14
1981 and %r9,%r15 # (f^g)&e
1982 mov %r12,120(%rsp)
1983
1984 xor %r14,%r13 # Sigma1(e)
1985 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
1986 add %rax,%r12 # T1+=h
1987
1988 mov %rbx,%rax
1989 add %r13,%r12 # T1+=Sigma1(e)
1990
1991 add %r15,%r12 # T1+=Ch(e,f,g)
1992 mov %rbx,%r13
1993 mov %rbx,%r14
1994
1995 ror $28,%rax
1996 ror $34,%r13
1997 mov %rbx,%r15
1998 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1999
2000 xor %r13,%rax
2001 ror $5,%r13
2002 or %rdx,%r14 # a|c
2003
2004 xor %r13,%rax # h=Sigma0(a)
2005 and %rdx,%r15 # a&c
2006 add %r12,%r8 # d+=T1
2007
2008 and %rcx,%r14 # (a|c)&b
2009 add %r12,%rax # h+=T1
2010
2011 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
2012 lea 1(%rdi),%rdi # round++
2013
2014 add %r14,%rax # h+=Maj(a,b,c)
2015 cmp $80,%rdi
2016 jb .Lrounds_16_xx
2017
2018 mov 16*8+0*8(%rsp),%rdi
2019 lea 16*8(%rsi),%rsi
2020
2021 add 8*0(%rdi),%rax
2022 add 8*1(%rdi),%rbx
2023 add 8*2(%rdi),%rcx
2024 add 8*3(%rdi),%rdx
2025 add 8*4(%rdi),%r8
2026 add 8*5(%rdi),%r9
2027 add 8*6(%rdi),%r10
2028 add 8*7(%rdi),%r11
2029
2030 cmp 16*8+2*8(%rsp),%rsi
2031
2032 mov %rax,8*0(%rdi)
2033 mov %rbx,8*1(%rdi)
2034 mov %rcx,8*2(%rdi)
2035 mov %rdx,8*3(%rdi)
2036 mov %r8,8*4(%rdi)
2037 mov %r9,8*5(%rdi)
2038 mov %r10,8*6(%rdi)
2039 mov %r11,8*7(%rdi)
2040 jb .Lloop
2041
2042 mov 16*8+3*8(%rsp),%rsp
2043 .cfi_def_cfa %rsp,56
2044 pop %r15
2045 .cfi_adjust_cfa_offset -8
2046 .cfi_restore %r15
2047 pop %r14
2048 .cfi_adjust_cfa_offset -8
2049 .cfi_restore %r14
2050 pop %r13
2051 .cfi_adjust_cfa_offset -8
2052 .cfi_restore %r13
2053 pop %r12
2054 .cfi_adjust_cfa_offset -8
2055 .cfi_restore %r12
2056 pop %rbp
2057 .cfi_adjust_cfa_offset -8
2058 .cfi_restore %rbp
2059 pop %rbx
2060 .cfi_adjust_cfa_offset -8
2061 .cfi_restore %rbx
2062
2063 RET
2064 .cfi_endproc
2065 SET_SIZE(SHA512TransformBlocks)
2066
2067 .section .rodata
2068 .balign 64
2069 SET_OBJ(K512)
2070 K512:
2071 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
2072 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
2073 .quad 0x3956c25bf348b538,0x59f111f1b605d019
2074 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
2075 .quad 0xd807aa98a3030242,0x12835b0145706fbe
2076 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
2077 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
2078 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
2079 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
2080 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
2081 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
2082 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
2083 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
2084 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
2085 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
2086 .quad 0x06ca6351e003826f,0x142929670a0e6e70
2087 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
2088 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
2089 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
2090 .quad 0x81c2c92e47edaee6,0x92722c851482353b
2091 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
2092 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
2093 .quad 0xd192e819d6ef5218,0xd69906245565a910
2094 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
2095 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
2096 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
2097 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
2098 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
2099 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
2100 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
2101 .quad 0x90befffa23631e28,0xa4506cebde82bde9
2102 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
2103 .quad 0xca273eceea26619c,0xd186b8c721c0c207
2104 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
2105 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
2106 .quad 0x113f9804bef90dae,0x1b710b35131c471b
2107 .quad 0x28db77f523047d84,0x32caab7b40c72493
2108 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
2109 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
2110 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
2111 #endif /* !lint && !__lint */
2112
2113 #if defined(__ELF__)
2114 .section .note.GNU-stack,"",%progbits
2115 #endif
2116
Cache object: be4b18b1ccb8467a978080d4b46c9f42
|