1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
3 #include "arm_arch.h"
4
5 .text
6
7 #if defined(__thumb2__)
8 .syntax unified
9 .thumb
10 #else
11 .code 32
12 #endif
13
14 .type iotas32, %object
15 .align 5
16 iotas32:
17 .long 0x00000001, 0x00000000
18 .long 0x00000000, 0x00000089
19 .long 0x00000000, 0x8000008b
20 .long 0x00000000, 0x80008080
21 .long 0x00000001, 0x0000008b
22 .long 0x00000001, 0x00008000
23 .long 0x00000001, 0x80008088
24 .long 0x00000001, 0x80000082
25 .long 0x00000000, 0x0000000b
26 .long 0x00000000, 0x0000000a
27 .long 0x00000001, 0x00008082
28 .long 0x00000000, 0x00008003
29 .long 0x00000001, 0x0000808b
30 .long 0x00000001, 0x8000000b
31 .long 0x00000001, 0x8000008a
32 .long 0x00000001, 0x80000081
33 .long 0x00000000, 0x80000081
34 .long 0x00000000, 0x80000008
35 .long 0x00000000, 0x00000083
36 .long 0x00000000, 0x80008003
37 .long 0x00000001, 0x80008088
38 .long 0x00000000, 0x80000088
39 .long 0x00000001, 0x00008000
40 .long 0x00000000, 0x80008082
41 .size iotas32,.-iotas32
42
43 .type KeccakF1600_int, %function
44 .align 5
45 KeccakF1600_int:
46 add r9,sp,#176
47 add r12,sp,#0
48 add r10,sp,#40
49 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4]
50 KeccakF1600_enter:
51 str lr,[sp,#440]
52 eor r11,r11,r11
53 str r11,[sp,#444]
54 b .Lround2x
55
56 .align 4
57 .Lround2x:
58 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1]
59 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1]
60 #ifdef __thumb2__
61 eor r0,r0,r10
62 eor r1,r1,r11
63 eor r2,r2,r12
64 ldrd r10,r11,[sp,#56]
65 eor r3,r3,r14
66 ldrd r12,r14,[sp,#64]
67 eor r4,r4,r10
68 eor r5,r5,r11
69 eor r6,r6,r12
70 ldrd r10,r11,[sp,#72]
71 eor r7,r7,r14
72 ldrd r12,r14,[sp,#80]
73 eor r8,r8,r10
74 eor r9,r9,r11
75 eor r0,r0,r12
76 ldrd r10,r11,[sp,#88]
77 eor r1,r1,r14
78 ldrd r12,r14,[sp,#96]
79 eor r2,r2,r10
80 eor r3,r3,r11
81 eor r4,r4,r12
82 ldrd r10,r11,[sp,#104]
83 eor r5,r5,r14
84 ldrd r12,r14,[sp,#112]
85 eor r6,r6,r10
86 eor r7,r7,r11
87 eor r8,r8,r12
88 ldrd r10,r11,[sp,#120]
89 eor r9,r9,r14
90 ldrd r12,r14,[sp,#128]
91 eor r0,r0,r10
92 eor r1,r1,r11
93 eor r2,r2,r12
94 ldrd r10,r11,[sp,#136]
95 eor r3,r3,r14
96 ldrd r12,r14,[sp,#144]
97 eor r4,r4,r10
98 eor r5,r5,r11
99 eor r6,r6,r12
100 ldrd r10,r11,[sp,#152]
101 eor r7,r7,r14
102 ldrd r12,r14,[sp,#160]
103 eor r8,r8,r10
104 eor r9,r9,r11
105 eor r0,r0,r12
106 ldrd r10,r11,[sp,#168]
107 eor r1,r1,r14
108 ldrd r12,r14,[sp,#16]
109 eor r2,r2,r10
110 eor r3,r3,r11
111 eor r4,r4,r12
112 ldrd r10,r11,[sp,#24]
113 eor r5,r5,r14
114 ldrd r12,r14,[sp,#32]
115 #else
116 eor r0,r0,r10
117 add r10,sp,#56
118 eor r1,r1,r11
119 eor r2,r2,r12
120 eor r3,r3,r14
121 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3]
122 eor r4,r4,r10
123 add r10,sp,#72
124 eor r5,r5,r11
125 eor r6,r6,r12
126 eor r7,r7,r14
127 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0]
128 eor r8,r8,r10
129 add r10,sp,#88
130 eor r9,r9,r11
131 eor r0,r0,r12
132 eor r1,r1,r14
133 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2]
134 eor r2,r2,r10
135 add r10,sp,#104
136 eor r3,r3,r11
137 eor r4,r4,r12
138 eor r5,r5,r14
139 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4]
140 eor r6,r6,r10
141 add r10,sp,#120
142 eor r7,r7,r11
143 eor r8,r8,r12
144 eor r9,r9,r14
145 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1]
146 eor r0,r0,r10
147 add r10,sp,#136
148 eor r1,r1,r11
149 eor r2,r2,r12
150 eor r3,r3,r14
151 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3]
152 eor r4,r4,r10
153 add r10,sp,#152
154 eor r5,r5,r11
155 eor r6,r6,r12
156 eor r7,r7,r14
157 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0]
158 eor r8,r8,r10
159 ldr r10,[sp,#168] @ A[4][1]
160 eor r9,r9,r11
161 ldr r11,[sp,#168+4]
162 eor r0,r0,r12
163 ldr r12,[sp,#16] @ A[0][2]
164 eor r1,r1,r14
165 ldr r14,[sp,#16+4]
166 eor r2,r2,r10
167 add r10,sp,#24
168 eor r3,r3,r11
169 eor r4,r4,r12
170 eor r5,r5,r14
171 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4]
172 #endif
173 eor r6,r6,r10
174 eor r7,r7,r11
175 eor r8,r8,r12
176 eor r9,r9,r14
177
178 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
179 #ifndef __thumb2__
180 str r10,[sp,#208] @ D[1] = E[0]
181 #endif
182 eor r11,r1,r4
183 #ifndef __thumb2__
184 str r11,[sp,#208+4]
185 #else
186 strd r10,r11,[sp,#208] @ D[1] = E[0]
187 #endif
188 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
189 eor r14,r7,r0
190 #ifndef __thumb2__
191 str r12,[sp,#232] @ D[4] = E[1]
192 #endif
193 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
194 #ifndef __thumb2__
195 str r14,[sp,#232+4]
196 #else
197 strd r12,r14,[sp,#232] @ D[4] = E[1]
198 #endif
199 eor r1,r9,r2
200 #ifndef __thumb2__
201 str r0,[sp,#200] @ D[0] = C[0]
202 #endif
203 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
204 #ifndef __thumb2__
205 ldr r7,[sp,#144]
206 #endif
207 eor r3,r3,r6
208 #ifndef __thumb2__
209 str r1,[sp,#200+4]
210 #else
211 strd r0,r1,[sp,#200] @ D[0] = C[0]
212 #endif
213 #ifndef __thumb2__
214 ldr r6,[sp,#144+4]
215 #else
216 ldrd r7,r6,[sp,#144]
217 #endif
218 #ifndef __thumb2__
219 str r2,[sp,#216] @ D[2] = C[1]
220 #endif
221 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
222 #ifndef __thumb2__
223 str r3,[sp,#216+4]
224 #else
225 strd r2,r3,[sp,#216] @ D[2] = C[1]
226 #endif
227 eor r5,r5,r8
228
229 #ifndef __thumb2__
230 ldr r8,[sp,#192]
231 #endif
232 #ifndef __thumb2__
233 ldr r9,[sp,#192+4]
234 #else
235 ldrd r8,r9,[sp,#192]
236 #endif
237 #ifndef __thumb2__
238 str r4,[sp,#224] @ D[3] = C[2]
239 #endif
240 eor r7,r7,r4
241 #ifndef __thumb2__
242 str r5,[sp,#224+4]
243 #else
244 strd r4,r5,[sp,#224] @ D[3] = C[2]
245 #endif
246 eor r6,r6,r5
247 #ifndef __thumb2__
248 ldr r4,[sp,#0]
249 #endif
250 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
251 @ mov r6,r6,ror#32-11
252 #ifndef __thumb2__
253 ldr r5,[sp,#0+4]
254 #else
255 ldrd r4,r5,[sp,#0]
256 #endif
257 eor r8,r8,r12
258 eor r9,r9,r14
259 #ifndef __thumb2__
260 ldr r12,[sp,#96]
261 #endif
262 eor r0,r0,r4
263 #ifndef __thumb2__
264 ldr r14,[sp,#96+4]
265 #else
266 ldrd r12,r14,[sp,#96]
267 #endif
268 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
269 @ mov r9,r9,ror#32-7
270 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0];
271 eor r12,r12,r2
272 #ifndef __thumb2__
273 ldr r2,[sp,#48]
274 #endif
275 eor r14,r14,r3
276 #ifndef __thumb2__
277 ldr r3,[sp,#48+4]
278 #else
279 ldrd r2,r3,[sp,#48]
280 #endif
281 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
282 ldr r12,[sp,#444] @ load counter
283 eor r2,r2,r10
284 adr r10,iotas32
285 mov r4,r14,ror#32-22
286 add r14,r10,r12
287 eor r3,r3,r11
288 ldmia r14,{r10,r11} @ iotas[i]
289 bic r12,r4,r2,ror#32-22
290 bic r14,r5,r3,ror#32-22
291 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
292 mov r3,r3,ror#32-22
293 eor r12,r12,r0
294 eor r14,r14,r1
295 eor r10,r10,r12
296 eor r11,r11,r14
297 #ifndef __thumb2__
298 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
299 #endif
300 bic r12,r6,r4,ror#11
301 #ifndef __thumb2__
302 str r11,[sp,#240+4]
303 #else
304 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
305 #endif
306 bic r14,r7,r5,ror#10
307 bic r10,r8,r6,ror#32-(11-7)
308 bic r11,r9,r7,ror#32-(10-7)
309 eor r12,r2,r12,ror#32-11
310 #ifndef __thumb2__
311 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
312 #endif
313 eor r14,r3,r14,ror#32-10
314 #ifndef __thumb2__
315 str r14,[sp,#248+4]
316 #else
317 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
318 #endif
319 eor r10,r4,r10,ror#32-7
320 eor r11,r5,r11,ror#32-7
321 #ifndef __thumb2__
322 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
323 #endif
324 bic r12,r0,r8,ror#32-7
325 #ifndef __thumb2__
326 str r11,[sp,#256+4]
327 #else
328 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
329 #endif
330 bic r14,r1,r9,ror#32-7
331 eor r12,r12,r6,ror#32-11
332 #ifndef __thumb2__
333 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
334 #endif
335 eor r14,r14,r7,ror#32-10
336 #ifndef __thumb2__
337 str r14,[sp,#264+4]
338 #else
339 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
340 #endif
341 bic r10,r2,r0
342 add r14,sp,#224
343 #ifndef __thumb2__
344 ldr r0,[sp,#24] @ A[0][3]
345 #endif
346 bic r11,r3,r1
347 #ifndef __thumb2__
348 ldr r1,[sp,#24+4]
349 #else
350 ldrd r0,r1,[sp,#24] @ A[0][3]
351 #endif
352 eor r10,r10,r8,ror#32-7
353 eor r11,r11,r9,ror#32-7
354 #ifndef __thumb2__
355 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
356 #endif
357 add r9,sp,#200
358 #ifndef __thumb2__
359 str r11,[sp,#272+4]
360 #else
361 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
362 #endif
363
364 ldmia r14,{r10,r11,r12,r14} @ D[3..4]
365 ldmia r9,{r6,r7,r8,r9} @ D[0..1]
366
367 #ifndef __thumb2__
368 ldr r2,[sp,#72] @ A[1][4]
369 #endif
370 eor r0,r0,r10
371 #ifndef __thumb2__
372 ldr r3,[sp,#72+4]
373 #else
374 ldrd r2,r3,[sp,#72] @ A[1][4]
375 #endif
376 eor r1,r1,r11
377 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
378 #ifndef __thumb2__
379 ldr r10,[sp,#128] @ A[3][1]
380 #endif
381 @ mov r1,r1,ror#32-14
382 #ifndef __thumb2__
383 ldr r11,[sp,#128+4]
384 #else
385 ldrd r10,r11,[sp,#128] @ A[3][1]
386 #endif
387
388 eor r2,r2,r12
389 #ifndef __thumb2__
390 ldr r4,[sp,#80] @ A[2][0]
391 #endif
392 eor r3,r3,r14
393 #ifndef __thumb2__
394 ldr r5,[sp,#80+4]
395 #else
396 ldrd r4,r5,[sp,#80] @ A[2][0]
397 #endif
398 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
399 @ mov r3,r3,ror#32-10
400
401 eor r6,r6,r4
402 #ifndef __thumb2__
403 ldr r12,[sp,#216] @ D[2]
404 #endif
405 eor r7,r7,r5
406 #ifndef __thumb2__
407 ldr r14,[sp,#216+4]
408 #else
409 ldrd r12,r14,[sp,#216] @ D[2]
410 #endif
411 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
412 mov r4,r7,ror#32-2
413
414 eor r10,r10,r8
415 #ifndef __thumb2__
416 ldr r8,[sp,#176] @ A[4][2]
417 #endif
418 eor r11,r11,r9
419 #ifndef __thumb2__
420 ldr r9,[sp,#176+4]
421 #else
422 ldrd r8,r9,[sp,#176] @ A[4][2]
423 #endif
424 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
425 mov r6,r11,ror#32-23
426
427 bic r10,r4,r2,ror#32-10
428 bic r11,r5,r3,ror#32-10
429 eor r12,r12,r8
430 eor r14,r14,r9
431 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
432 mov r8,r14,ror#32-31
433 eor r10,r10,r0,ror#32-14
434 eor r11,r11,r1,ror#32-14
435 #ifndef __thumb2__
436 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
437 #endif
438 bic r12,r6,r4
439 #ifndef __thumb2__
440 str r11,[sp,#280+4]
441 #else
442 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
443 #endif
444 bic r14,r7,r5
445 eor r12,r12,r2,ror#32-10
446 #ifndef __thumb2__
447 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
448 #endif
449 eor r14,r14,r3,ror#32-10
450 #ifndef __thumb2__
451 str r14,[sp,#288+4]
452 #else
453 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
454 #endif
455 bic r10,r8,r6
456 bic r11,r9,r7
457 bic r12,r0,r8,ror#14
458 bic r14,r1,r9,ror#14
459 eor r10,r10,r4
460 eor r11,r11,r5
461 #ifndef __thumb2__
462 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
463 #endif
464 bic r2,r2,r0,ror#32-(14-10)
465 #ifndef __thumb2__
466 str r11,[sp,#296+4]
467 #else
468 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
469 #endif
470 eor r12,r6,r12,ror#32-14
471 bic r11,r3,r1,ror#32-(14-10)
472 #ifndef __thumb2__
473 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
474 #endif
475 eor r14,r7,r14,ror#32-14
476 #ifndef __thumb2__
477 str r14,[sp,#304+4]
478 #else
479 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
480 #endif
481 add r12,sp,#208
482 #ifndef __thumb2__
483 ldr r1,[sp,#8] @ A[0][1]
484 #endif
485 eor r10,r8,r2,ror#32-10
486 #ifndef __thumb2__
487 ldr r0,[sp,#8+4]
488 #else
489 ldrd r1,r0,[sp,#8] @ A[0][1]
490 #endif
491 eor r11,r9,r11,ror#32-10
492 #ifndef __thumb2__
493 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
494 #endif
495 #ifndef __thumb2__
496 str r11,[sp,#312+4]
497 #else
498 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
499 #endif
500
501 add r9,sp,#224
502 ldmia r12,{r10,r11,r12,r14} @ D[1..2]
503 #ifndef __thumb2__
504 ldr r2,[sp,#56] @ A[1][2]
505 #endif
506 #ifndef __thumb2__
507 ldr r3,[sp,#56+4]
508 #else
509 ldrd r2,r3,[sp,#56] @ A[1][2]
510 #endif
511 ldmia r9,{r6,r7,r8,r9} @ D[3..4]
512
513 eor r1,r1,r10
514 #ifndef __thumb2__
515 ldr r4,[sp,#104] @ A[2][3]
516 #endif
517 eor r0,r0,r11
518 #ifndef __thumb2__
519 ldr r5,[sp,#104+4]
520 #else
521 ldrd r4,r5,[sp,#104] @ A[2][3]
522 #endif
523 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
524
525 eor r2,r2,r12
526 #ifndef __thumb2__
527 ldr r10,[sp,#152] @ A[3][4]
528 #endif
529 eor r3,r3,r14
530 #ifndef __thumb2__
531 ldr r11,[sp,#152+4]
532 #else
533 ldrd r10,r11,[sp,#152] @ A[3][4]
534 #endif
535 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
536 #ifndef __thumb2__
537 ldr r12,[sp,#200] @ D[0]
538 #endif
539 @ mov r3,r3,ror#32-3
540 #ifndef __thumb2__
541 ldr r14,[sp,#200+4]
542 #else
543 ldrd r12,r14,[sp,#200] @ D[0]
544 #endif
545
546 eor r4,r4,r6
547 eor r5,r5,r7
548 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
549 @ mov r4,r7,ror#32-13 @ [track reverse order below]
550
551 eor r10,r10,r8
552 #ifndef __thumb2__
553 ldr r8,[sp,#160] @ A[4][0]
554 #endif
555 eor r11,r11,r9
556 #ifndef __thumb2__
557 ldr r9,[sp,#160+4]
558 #else
559 ldrd r8,r9,[sp,#160] @ A[4][0]
560 #endif
561 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
562 mov r7,r11,ror#32-4
563
564 eor r12,r12,r8
565 eor r14,r14,r9
566 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
567 mov r9,r14,ror#32-9
568
569 bic r10,r5,r2,ror#13-3
570 bic r11,r4,r3,ror#12-3
571 bic r12,r6,r5,ror#32-13
572 bic r14,r7,r4,ror#32-12
573 eor r10,r0,r10,ror#32-13
574 eor r11,r1,r11,ror#32-12
575 #ifndef __thumb2__
576 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
577 #endif
578 eor r12,r12,r2,ror#32-3
579 #ifndef __thumb2__
580 str r11,[sp,#320+4]
581 #else
582 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
583 #endif
584 eor r14,r14,r3,ror#32-3
585 #ifndef __thumb2__
586 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
587 #endif
588 bic r10,r8,r6
589 bic r11,r9,r7
590 #ifndef __thumb2__
591 str r14,[sp,#328+4]
592 #else
593 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
594 #endif
595 eor r10,r10,r5,ror#32-13
596 eor r11,r11,r4,ror#32-12
597 #ifndef __thumb2__
598 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
599 #endif
600 bic r12,r0,r8
601 #ifndef __thumb2__
602 str r11,[sp,#336+4]
603 #else
604 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
605 #endif
606 bic r14,r1,r9
607 eor r12,r12,r6
608 eor r14,r14,r7
609 #ifndef __thumb2__
610 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
611 #endif
612 bic r10,r2,r0,ror#3
613 #ifndef __thumb2__
614 str r14,[sp,#344+4]
615 #else
616 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
617 #endif
618 bic r11,r3,r1,ror#3
619 #ifndef __thumb2__
620 ldr r1,[sp,#32] @ A[0][4] [in reverse order]
621 #endif
622 eor r10,r8,r10,ror#32-3
623 #ifndef __thumb2__
624 ldr r0,[sp,#32+4]
625 #else
626 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order]
627 #endif
628 eor r11,r9,r11,ror#32-3
629 #ifndef __thumb2__
630 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
631 #endif
632 add r9,sp,#208
633 #ifndef __thumb2__
634 str r11,[sp,#352+4]
635 #else
636 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
637 #endif
638
639 #ifndef __thumb2__
640 ldr r10,[sp,#232] @ D[4]
641 #endif
642 #ifndef __thumb2__
643 ldr r11,[sp,#232+4]
644 #else
645 ldrd r10,r11,[sp,#232] @ D[4]
646 #endif
647 #ifndef __thumb2__
648 ldr r12,[sp,#200] @ D[0]
649 #endif
650 #ifndef __thumb2__
651 ldr r14,[sp,#200+4]
652 #else
653 ldrd r12,r14,[sp,#200] @ D[0]
654 #endif
655
656 ldmia r9,{r6,r7,r8,r9} @ D[1..2]
657
658 eor r1,r1,r10
659 #ifndef __thumb2__
660 ldr r2,[sp,#40] @ A[1][0]
661 #endif
662 eor r0,r0,r11
663 #ifndef __thumb2__
664 ldr r3,[sp,#40+4]
665 #else
666 ldrd r2,r3,[sp,#40] @ A[1][0]
667 #endif
668 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
669 #ifndef __thumb2__
670 ldr r4,[sp,#88] @ A[2][1]
671 #endif
672 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
673 #ifndef __thumb2__
674 ldr r5,[sp,#88+4]
675 #else
676 ldrd r4,r5,[sp,#88] @ A[2][1]
677 #endif
678
679 eor r2,r2,r12
680 #ifndef __thumb2__
681 ldr r10,[sp,#136] @ A[3][2]
682 #endif
683 eor r3,r3,r14
684 #ifndef __thumb2__
685 ldr r11,[sp,#136+4]
686 #else
687 ldrd r10,r11,[sp,#136] @ A[3][2]
688 #endif
689 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
690 #ifndef __thumb2__
691 ldr r12,[sp,#224] @ D[3]
692 #endif
693 @ mov r3,r3,ror#32-18
694 #ifndef __thumb2__
695 ldr r14,[sp,#224+4]
696 #else
697 ldrd r12,r14,[sp,#224] @ D[3]
698 #endif
699
700 eor r6,r6,r4
701 eor r7,r7,r5
702 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
703 mov r5,r7,ror#32-5
704
705 eor r10,r10,r8
706 #ifndef __thumb2__
707 ldr r8,[sp,#184] @ A[4][3]
708 #endif
709 eor r11,r11,r9
710 #ifndef __thumb2__
711 ldr r9,[sp,#184+4]
712 #else
713 ldrd r8,r9,[sp,#184] @ A[4][3]
714 #endif
715 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
716 mov r6,r11,ror#32-8
717
718 eor r12,r12,r8
719 eor r14,r14,r9
720 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
721 mov r9,r14,ror#32-28
722
723 bic r10,r4,r2,ror#32-18
724 bic r11,r5,r3,ror#32-18
725 eor r10,r10,r0,ror#32-14
726 eor r11,r11,r1,ror#32-13
727 #ifndef __thumb2__
728 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
729 #endif
730 bic r12,r6,r4
731 #ifndef __thumb2__
732 str r11,[sp,#360+4]
733 #else
734 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
735 #endif
736 bic r14,r7,r5
737 eor r12,r12,r2,ror#32-18
738 #ifndef __thumb2__
739 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
740 #endif
741 eor r14,r14,r3,ror#32-18
742 #ifndef __thumb2__
743 str r14,[sp,#368+4]
744 #else
745 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
746 #endif
747 bic r10,r8,r6
748 bic r11,r9,r7
749 bic r12,r0,r8,ror#14
750 bic r14,r1,r9,ror#13
751 eor r10,r10,r4
752 eor r11,r11,r5
753 #ifndef __thumb2__
754 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
755 #endif
756 bic r2,r2,r0,ror#18-14
757 #ifndef __thumb2__
758 str r11,[sp,#376+4]
759 #else
760 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
761 #endif
762 eor r12,r6,r12,ror#32-14
763 bic r11,r3,r1,ror#18-13
764 eor r14,r7,r14,ror#32-13
765 #ifndef __thumb2__
766 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
767 #endif
768 #ifndef __thumb2__
769 str r14,[sp,#384+4]
770 #else
771 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
772 #endif
773 add r14,sp,#216
774 #ifndef __thumb2__
775 ldr r0,[sp,#16] @ A[0][2]
776 #endif
777 eor r10,r8,r2,ror#32-18
778 #ifndef __thumb2__
779 ldr r1,[sp,#16+4]
780 #else
781 ldrd r0,r1,[sp,#16] @ A[0][2]
782 #endif
783 eor r11,r9,r11,ror#32-18
784 #ifndef __thumb2__
785 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
786 #endif
787 #ifndef __thumb2__
788 str r11,[sp,#392+4]
789 #else
790 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
791 #endif
792
793 ldmia r14,{r10,r11,r12,r14} @ D[2..3]
794 #ifndef __thumb2__
795 ldr r2,[sp,#64] @ A[1][3]
796 #endif
797 #ifndef __thumb2__
798 ldr r3,[sp,#64+4]
799 #else
800 ldrd r2,r3,[sp,#64] @ A[1][3]
801 #endif
802 #ifndef __thumb2__
803 ldr r6,[sp,#232] @ D[4]
804 #endif
805 #ifndef __thumb2__
806 ldr r7,[sp,#232+4]
807 #else
808 ldrd r6,r7,[sp,#232] @ D[4]
809 #endif
810
811 eor r0,r0,r10
812 #ifndef __thumb2__
813 ldr r4,[sp,#112] @ A[2][4]
814 #endif
815 eor r1,r1,r11
816 #ifndef __thumb2__
817 ldr r5,[sp,#112+4]
818 #else
819 ldrd r4,r5,[sp,#112] @ A[2][4]
820 #endif
821 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
822 #ifndef __thumb2__
823 ldr r8,[sp,#200] @ D[0]
824 #endif
825 @ mov r1,r1,ror#32-31
826 #ifndef __thumb2__
827 ldr r9,[sp,#200+4]
828 #else
829 ldrd r8,r9,[sp,#200] @ D[0]
830 #endif
831
832 eor r12,r12,r2
833 #ifndef __thumb2__
834 ldr r10,[sp,#120] @ A[3][0]
835 #endif
836 eor r14,r14,r3
837 #ifndef __thumb2__
838 ldr r11,[sp,#120+4]
839 #else
840 ldrd r10,r11,[sp,#120] @ A[3][0]
841 #endif
842 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
843 #ifndef __thumb2__
844 ldr r12,[sp,#208] @ D[1]
845 #endif
846 mov r2,r14,ror#32-28
847 #ifndef __thumb2__
848 ldr r14,[sp,#208+4]
849 #else
850 ldrd r12,r14,[sp,#208] @ D[1]
851 #endif
852
853 eor r6,r6,r4
854 eor r7,r7,r5
855 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
856 mov r4,r7,ror#32-20
857
858 eor r10,r10,r8
859 #ifndef __thumb2__
860 ldr r8,[sp,#168] @ A[4][1]
861 #endif
862 eor r11,r11,r9
863 #ifndef __thumb2__
864 ldr r9,[sp,#168+4]
865 #else
866 ldrd r8,r9,[sp,#168] @ A[4][1]
867 #endif
868 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
869 mov r6,r11,ror#32-21
870
871 eor r8,r8,r12
872 eor r9,r9,r14
873 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
874 @ mov r9,r3,ror#32-1
875
876 bic r10,r4,r2
877 bic r11,r5,r3
878 eor r10,r10,r0,ror#32-31
879 #ifndef __thumb2__
880 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
881 #endif
882 eor r11,r11,r1,ror#32-31
883 #ifndef __thumb2__
884 str r11,[sp,#400+4]
885 #else
886 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
887 #endif
888 bic r12,r6,r4
889 bic r14,r7,r5
890 eor r12,r12,r2
891 eor r14,r14,r3
892 #ifndef __thumb2__
893 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
894 #endif
895 bic r10,r8,r6,ror#1
896 #ifndef __thumb2__
897 str r14,[sp,#408+4]
898 #else
899 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
900 #endif
901 bic r11,r9,r7,ror#1
902 bic r12,r0,r8,ror#31-1
903 bic r14,r1,r9,ror#31-1
904 eor r4,r4,r10,ror#32-1
905 #ifndef __thumb2__
906 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
907 #endif
908 eor r5,r5,r11,ror#32-1
909 #ifndef __thumb2__
910 str r5,[sp,#416+4]
911 #else
912 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
913 #endif
914 eor r6,r6,r12,ror#32-31
915 eor r7,r7,r14,ror#32-31
916 #ifndef __thumb2__
917 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
918 #endif
919 bic r10,r2,r0,ror#32-31
920 #ifndef __thumb2__
921 str r7,[sp,#424+4]
922 #else
923 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
924 #endif
925 bic r11,r3,r1,ror#32-31
926 add r12,sp,#240
927 eor r8,r10,r8,ror#32-1
928 add r10,sp,#280
929 eor r9,r11,r9,ror#32-1
930 #ifndef __thumb2__
931 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
932 #endif
933 #ifndef __thumb2__
934 str r9,[sp,#432+4]
935 #else
936 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
937 #endif
938 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1]
939 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1]
940 #ifdef __thumb2__
941 eor r0,r0,r10
942 eor r1,r1,r11
943 eor r2,r2,r12
944 ldrd r10,r11,[sp,#296]
945 eor r3,r3,r14
946 ldrd r12,r14,[sp,#304]
947 eor r4,r4,r10
948 eor r5,r5,r11
949 eor r6,r6,r12
950 ldrd r10,r11,[sp,#312]
951 eor r7,r7,r14
952 ldrd r12,r14,[sp,#320]
953 eor r8,r8,r10
954 eor r9,r9,r11
955 eor r0,r0,r12
956 ldrd r10,r11,[sp,#328]
957 eor r1,r1,r14
958 ldrd r12,r14,[sp,#336]
959 eor r2,r2,r10
960 eor r3,r3,r11
961 eor r4,r4,r12
962 ldrd r10,r11,[sp,#344]
963 eor r5,r5,r14
964 ldrd r12,r14,[sp,#352]
965 eor r6,r6,r10
966 eor r7,r7,r11
967 eor r8,r8,r12
968 ldrd r10,r11,[sp,#360]
969 eor r9,r9,r14
970 ldrd r12,r14,[sp,#368]
971 eor r0,r0,r10
972 eor r1,r1,r11
973 eor r2,r2,r12
974 ldrd r10,r11,[sp,#376]
975 eor r3,r3,r14
976 ldrd r12,r14,[sp,#384]
977 eor r4,r4,r10
978 eor r5,r5,r11
979 eor r6,r6,r12
980 ldrd r10,r11,[sp,#392]
981 eor r7,r7,r14
982 ldrd r12,r14,[sp,#400]
983 eor r8,r8,r10
984 eor r9,r9,r11
985 eor r0,r0,r12
986 ldrd r10,r11,[sp,#408]
987 eor r1,r1,r14
988 ldrd r12,r14,[sp,#256]
989 eor r2,r2,r10
990 eor r3,r3,r11
991 eor r4,r4,r12
992 ldrd r10,r11,[sp,#264]
993 eor r5,r5,r14
994 ldrd r12,r14,[sp,#272]
995 #else
996 eor r0,r0,r10
997 add r10,sp,#296
998 eor r1,r1,r11
999 eor r2,r2,r12
1000 eor r3,r3,r14
1001 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3]
1002 eor r4,r4,r10
1003 add r10,sp,#312
1004 eor r5,r5,r11
1005 eor r6,r6,r12
1006 eor r7,r7,r14
1007 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0]
1008 eor r8,r8,r10
1009 add r10,sp,#328
1010 eor r9,r9,r11
1011 eor r0,r0,r12
1012 eor r1,r1,r14
1013 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2]
1014 eor r2,r2,r10
1015 add r10,sp,#344
1016 eor r3,r3,r11
1017 eor r4,r4,r12
1018 eor r5,r5,r14
1019 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4]
1020 eor r6,r6,r10
1021 add r10,sp,#360
1022 eor r7,r7,r11
1023 eor r8,r8,r12
1024 eor r9,r9,r14
1025 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1]
1026 eor r0,r0,r10
1027 add r10,sp,#376
1028 eor r1,r1,r11
1029 eor r2,r2,r12
1030 eor r3,r3,r14
1031 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3]
1032 eor r4,r4,r10
1033 add r10,sp,#392
1034 eor r5,r5,r11
1035 eor r6,r6,r12
1036 eor r7,r7,r14
1037 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0]
1038 eor r8,r8,r10
1039 ldr r10,[sp,#408] @ A[4][1]
1040 eor r9,r9,r11
1041 ldr r11,[sp,#408+4]
1042 eor r0,r0,r12
1043 ldr r12,[sp,#256] @ A[0][2]
1044 eor r1,r1,r14
1045 ldr r14,[sp,#256+4]
1046 eor r2,r2,r10
1047 add r10,sp,#264
1048 eor r3,r3,r11
1049 eor r4,r4,r12
1050 eor r5,r5,r14
1051 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4]
1052 #endif
1053 eor r6,r6,r10
1054 eor r7,r7,r11
1055 eor r8,r8,r12
1056 eor r9,r9,r14
1057
1058 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
1059 #ifndef __thumb2__
1060 str r10,[sp,#208] @ D[1] = E[0]
1061 #endif
1062 eor r11,r1,r4
1063 #ifndef __thumb2__
1064 str r11,[sp,#208+4]
1065 #else
1066 strd r10,r11,[sp,#208] @ D[1] = E[0]
1067 #endif
1068 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
1069 eor r14,r7,r0
1070 #ifndef __thumb2__
1071 str r12,[sp,#232] @ D[4] = E[1]
1072 #endif
1073 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
1074 #ifndef __thumb2__
1075 str r14,[sp,#232+4]
1076 #else
1077 strd r12,r14,[sp,#232] @ D[4] = E[1]
1078 #endif
1079 eor r1,r9,r2
1080 #ifndef __thumb2__
1081 str r0,[sp,#200] @ D[0] = C[0]
1082 #endif
1083 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
1084 #ifndef __thumb2__
1085 ldr r7,[sp,#384]
1086 #endif
1087 eor r3,r3,r6
1088 #ifndef __thumb2__
1089 str r1,[sp,#200+4]
1090 #else
1091 strd r0,r1,[sp,#200] @ D[0] = C[0]
1092 #endif
1093 #ifndef __thumb2__
1094 ldr r6,[sp,#384+4]
1095 #else
1096 ldrd r7,r6,[sp,#384]
1097 #endif
1098 #ifndef __thumb2__
1099 str r2,[sp,#216] @ D[2] = C[1]
1100 #endif
1101 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
1102 #ifndef __thumb2__
1103 str r3,[sp,#216+4]
1104 #else
1105 strd r2,r3,[sp,#216] @ D[2] = C[1]
1106 #endif
1107 eor r5,r5,r8
1108
1109 #ifndef __thumb2__
1110 ldr r8,[sp,#432]
1111 #endif
1112 #ifndef __thumb2__
1113 ldr r9,[sp,#432+4]
1114 #else
1115 ldrd r8,r9,[sp,#432]
1116 #endif
1117 #ifndef __thumb2__
1118 str r4,[sp,#224] @ D[3] = C[2]
1119 #endif
1120 eor r7,r7,r4
1121 #ifndef __thumb2__
1122 str r5,[sp,#224+4]
1123 #else
1124 strd r4,r5,[sp,#224] @ D[3] = C[2]
1125 #endif
1126 eor r6,r6,r5
1127 #ifndef __thumb2__
1128 ldr r4,[sp,#240]
1129 #endif
1130 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
1131 @ mov r6,r6,ror#32-11
1132 #ifndef __thumb2__
1133 ldr r5,[sp,#240+4]
1134 #else
1135 ldrd r4,r5,[sp,#240]
1136 #endif
1137 eor r8,r8,r12
1138 eor r9,r9,r14
1139 #ifndef __thumb2__
1140 ldr r12,[sp,#336]
1141 #endif
1142 eor r0,r0,r4
1143 #ifndef __thumb2__
1144 ldr r14,[sp,#336+4]
1145 #else
1146 ldrd r12,r14,[sp,#336]
1147 #endif
1148 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
1149 @ mov r9,r9,ror#32-7
1150 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0];
1151 eor r12,r12,r2
1152 #ifndef __thumb2__
1153 ldr r2,[sp,#288]
1154 #endif
1155 eor r14,r14,r3
1156 #ifndef __thumb2__
1157 ldr r3,[sp,#288+4]
1158 #else
1159 ldrd r2,r3,[sp,#288]
1160 #endif
1161 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
1162 ldr r12,[sp,#444] @ load counter
1163 eor r2,r2,r10
1164 adr r10,iotas32
1165 mov r4,r14,ror#32-22
1166 add r14,r10,r12
1167 eor r3,r3,r11
1168 #ifndef __thumb2__
1169 ldr r10,[r14,#8] @ iotas[i].lo
1170 #endif
1171 add r12,r12,#16
1172 #ifndef __thumb2__
1173 ldr r11,[r14,#12] @ iotas[i].hi
1174 #else
1175 ldrd r10,r11,[r14,#8] @ iotas[i].lo
1176 #endif
1177 cmp r12,#192
1178 str r12,[sp,#444] @ store counter
1179 bic r12,r4,r2,ror#32-22
1180 bic r14,r5,r3,ror#32-22
1181 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
1182 mov r3,r3,ror#32-22
1183 eor r12,r12,r0
1184 eor r14,r14,r1
1185 eor r10,r10,r12
1186 eor r11,r11,r14
1187 #ifndef __thumb2__
1188 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1189 #endif
1190 bic r12,r6,r4,ror#11
1191 #ifndef __thumb2__
1192 str r11,[sp,#0+4]
1193 #else
1194 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1195 #endif
1196 bic r14,r7,r5,ror#10
1197 bic r10,r8,r6,ror#32-(11-7)
1198 bic r11,r9,r7,ror#32-(10-7)
1199 eor r12,r2,r12,ror#32-11
1200 #ifndef __thumb2__
1201 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1202 #endif
1203 eor r14,r3,r14,ror#32-10
1204 #ifndef __thumb2__
1205 str r14,[sp,#8+4]
1206 #else
1207 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1208 #endif
1209 eor r10,r4,r10,ror#32-7
1210 eor r11,r5,r11,ror#32-7
1211 #ifndef __thumb2__
1212 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1213 #endif
1214 bic r12,r0,r8,ror#32-7
1215 #ifndef __thumb2__
1216 str r11,[sp,#16+4]
1217 #else
1218 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1219 #endif
1220 bic r14,r1,r9,ror#32-7
1221 eor r12,r12,r6,ror#32-11
1222 #ifndef __thumb2__
1223 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1224 #endif
1225 eor r14,r14,r7,ror#32-10
1226 #ifndef __thumb2__
1227 str r14,[sp,#24+4]
1228 #else
1229 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1230 #endif
1231 bic r10,r2,r0
1232 add r14,sp,#224
1233 #ifndef __thumb2__
1234 ldr r0,[sp,#264] @ A[0][3]
1235 #endif
1236 bic r11,r3,r1
1237 #ifndef __thumb2__
1238 ldr r1,[sp,#264+4]
1239 #else
1240 ldrd r0,r1,[sp,#264] @ A[0][3]
1241 #endif
1242 eor r10,r10,r8,ror#32-7
1243 eor r11,r11,r9,ror#32-7
1244 #ifndef __thumb2__
1245 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1246 #endif
1247 add r9,sp,#200
1248 #ifndef __thumb2__
1249 str r11,[sp,#32+4]
1250 #else
1251 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1252 #endif
1253
1254 ldmia r14,{r10,r11,r12,r14} @ D[3..4]
1255 ldmia r9,{r6,r7,r8,r9} @ D[0..1]
1256
1257 #ifndef __thumb2__
1258 ldr r2,[sp,#312] @ A[1][4]
1259 #endif
1260 eor r0,r0,r10
1261 #ifndef __thumb2__
1262 ldr r3,[sp,#312+4]
1263 #else
1264 ldrd r2,r3,[sp,#312] @ A[1][4]
1265 #endif
1266 eor r1,r1,r11
1267 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
1268 #ifndef __thumb2__
1269 ldr r10,[sp,#368] @ A[3][1]
1270 #endif
1271 @ mov r1,r1,ror#32-14
1272 #ifndef __thumb2__
1273 ldr r11,[sp,#368+4]
1274 #else
1275 ldrd r10,r11,[sp,#368] @ A[3][1]
1276 #endif
1277
1278 eor r2,r2,r12
1279 #ifndef __thumb2__
1280 ldr r4,[sp,#320] @ A[2][0]
1281 #endif
1282 eor r3,r3,r14
1283 #ifndef __thumb2__
1284 ldr r5,[sp,#320+4]
1285 #else
1286 ldrd r4,r5,[sp,#320] @ A[2][0]
1287 #endif
1288 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
1289 @ mov r3,r3,ror#32-10
1290
1291 eor r6,r6,r4
1292 #ifndef __thumb2__
1293 ldr r12,[sp,#216] @ D[2]
1294 #endif
1295 eor r7,r7,r5
1296 #ifndef __thumb2__
1297 ldr r14,[sp,#216+4]
1298 #else
1299 ldrd r12,r14,[sp,#216] @ D[2]
1300 #endif
1301 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
1302 mov r4,r7,ror#32-2
1303
1304 eor r10,r10,r8
1305 #ifndef __thumb2__
1306 ldr r8,[sp,#416] @ A[4][2]
1307 #endif
1308 eor r11,r11,r9
1309 #ifndef __thumb2__
1310 ldr r9,[sp,#416+4]
1311 #else
1312 ldrd r8,r9,[sp,#416] @ A[4][2]
1313 #endif
1314 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
1315 mov r6,r11,ror#32-23
1316
1317 bic r10,r4,r2,ror#32-10
1318 bic r11,r5,r3,ror#32-10
1319 eor r12,r12,r8
1320 eor r14,r14,r9
1321 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
1322 mov r8,r14,ror#32-31
1323 eor r10,r10,r0,ror#32-14
1324 eor r11,r11,r1,ror#32-14
1325 #ifndef __thumb2__
1326 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
1327 #endif
1328 bic r12,r6,r4
1329 #ifndef __thumb2__
1330 str r11,[sp,#40+4]
1331 #else
1332 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
1333 #endif
1334 bic r14,r7,r5
1335 eor r12,r12,r2,ror#32-10
1336 #ifndef __thumb2__
1337 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1338 #endif
1339 eor r14,r14,r3,ror#32-10
1340 #ifndef __thumb2__
1341 str r14,[sp,#48+4]
1342 #else
1343 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1344 #endif
1345 bic r10,r8,r6
1346 bic r11,r9,r7
1347 bic r12,r0,r8,ror#14
1348 bic r14,r1,r9,ror#14
1349 eor r10,r10,r4
1350 eor r11,r11,r5
1351 #ifndef __thumb2__
1352 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1353 #endif
1354 bic r2,r2,r0,ror#32-(14-10)
1355 #ifndef __thumb2__
1356 str r11,[sp,#56+4]
1357 #else
1358 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1359 #endif
1360 eor r12,r6,r12,ror#32-14
1361 bic r11,r3,r1,ror#32-(14-10)
1362 #ifndef __thumb2__
1363 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1364 #endif
1365 eor r14,r7,r14,ror#32-14
1366 #ifndef __thumb2__
1367 str r14,[sp,#64+4]
1368 #else
1369 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1370 #endif
1371 add r12,sp,#208
1372 #ifndef __thumb2__
1373 ldr r1,[sp,#248] @ A[0][1]
1374 #endif
1375 eor r10,r8,r2,ror#32-10
1376 #ifndef __thumb2__
1377 ldr r0,[sp,#248+4]
1378 #else
1379 ldrd r1,r0,[sp,#248] @ A[0][1]
1380 #endif
1381 eor r11,r9,r11,ror#32-10
1382 #ifndef __thumb2__
1383 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1384 #endif
1385 #ifndef __thumb2__
1386 str r11,[sp,#72+4]
1387 #else
1388 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1389 #endif
1390
1391 add r9,sp,#224
1392 ldmia r12,{r10,r11,r12,r14} @ D[1..2]
1393 #ifndef __thumb2__
1394 ldr r2,[sp,#296] @ A[1][2]
1395 #endif
1396 #ifndef __thumb2__
1397 ldr r3,[sp,#296+4]
1398 #else
1399 ldrd r2,r3,[sp,#296] @ A[1][2]
1400 #endif
1401 ldmia r9,{r6,r7,r8,r9} @ D[3..4]
1402
1403 eor r1,r1,r10
1404 #ifndef __thumb2__
1405 ldr r4,[sp,#344] @ A[2][3]
1406 #endif
1407 eor r0,r0,r11
1408 #ifndef __thumb2__
1409 ldr r5,[sp,#344+4]
1410 #else
1411 ldrd r4,r5,[sp,#344] @ A[2][3]
1412 #endif
1413 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
1414
1415 eor r2,r2,r12
1416 #ifndef __thumb2__
1417 ldr r10,[sp,#392] @ A[3][4]
1418 #endif
1419 eor r3,r3,r14
1420 #ifndef __thumb2__
1421 ldr r11,[sp,#392+4]
1422 #else
1423 ldrd r10,r11,[sp,#392] @ A[3][4]
1424 #endif
1425 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
1426 #ifndef __thumb2__
1427 ldr r12,[sp,#200] @ D[0]
1428 #endif
1429 @ mov r3,r3,ror#32-3
1430 #ifndef __thumb2__
1431 ldr r14,[sp,#200+4]
1432 #else
1433 ldrd r12,r14,[sp,#200] @ D[0]
1434 #endif
1435
1436 eor r4,r4,r6
1437 eor r5,r5,r7
1438 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
1439 @ mov r4,r7,ror#32-13 @ [track reverse order below]
1440
1441 eor r10,r10,r8
1442 #ifndef __thumb2__
1443 ldr r8,[sp,#400] @ A[4][0]
1444 #endif
1445 eor r11,r11,r9
1446 #ifndef __thumb2__
1447 ldr r9,[sp,#400+4]
1448 #else
1449 ldrd r8,r9,[sp,#400] @ A[4][0]
1450 #endif
1451 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
1452 mov r7,r11,ror#32-4
1453
1454 eor r12,r12,r8
1455 eor r14,r14,r9
1456 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
1457 mov r9,r14,ror#32-9
1458
1459 bic r10,r5,r2,ror#13-3
1460 bic r11,r4,r3,ror#12-3
1461 bic r12,r6,r5,ror#32-13
1462 bic r14,r7,r4,ror#32-12
1463 eor r10,r0,r10,ror#32-13
1464 eor r11,r1,r11,ror#32-12
1465 #ifndef __thumb2__
1466 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
1467 #endif
1468 eor r12,r12,r2,ror#32-3
1469 #ifndef __thumb2__
1470 str r11,[sp,#80+4]
1471 #else
1472 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
1473 #endif
1474 eor r14,r14,r3,ror#32-3
1475 #ifndef __thumb2__
1476 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1477 #endif
1478 bic r10,r8,r6
1479 bic r11,r9,r7
1480 #ifndef __thumb2__
1481 str r14,[sp,#88+4]
1482 #else
1483 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1484 #endif
1485 eor r10,r10,r5,ror#32-13
1486 eor r11,r11,r4,ror#32-12
1487 #ifndef __thumb2__
1488 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1489 #endif
1490 bic r12,r0,r8
1491 #ifndef __thumb2__
1492 str r11,[sp,#96+4]
1493 #else
1494 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1495 #endif
1496 bic r14,r1,r9
1497 eor r12,r12,r6
1498 eor r14,r14,r7
1499 #ifndef __thumb2__
1500 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1501 #endif
1502 bic r10,r2,r0,ror#3
1503 #ifndef __thumb2__
1504 str r14,[sp,#104+4]
1505 #else
1506 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1507 #endif
1508 bic r11,r3,r1,ror#3
1509 #ifndef __thumb2__
1510 ldr r1,[sp,#272] @ A[0][4] [in reverse order]
1511 #endif
1512 eor r10,r8,r10,ror#32-3
1513 #ifndef __thumb2__
1514 ldr r0,[sp,#272+4]
1515 #else
1516 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order]
1517 #endif
1518 eor r11,r9,r11,ror#32-3
1519 #ifndef __thumb2__
1520 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1521 #endif
1522 add r9,sp,#208
1523 #ifndef __thumb2__
1524 str r11,[sp,#112+4]
1525 #else
1526 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1527 #endif
1528
1529 #ifndef __thumb2__
1530 ldr r10,[sp,#232] @ D[4]
1531 #endif
1532 #ifndef __thumb2__
1533 ldr r11,[sp,#232+4]
1534 #else
1535 ldrd r10,r11,[sp,#232] @ D[4]
1536 #endif
1537 #ifndef __thumb2__
1538 ldr r12,[sp,#200] @ D[0]
1539 #endif
1540 #ifndef __thumb2__
1541 ldr r14,[sp,#200+4]
1542 #else
1543 ldrd r12,r14,[sp,#200] @ D[0]
1544 #endif
1545
1546 ldmia r9,{r6,r7,r8,r9} @ D[1..2]
1547
1548 eor r1,r1,r10
1549 #ifndef __thumb2__
1550 ldr r2,[sp,#280] @ A[1][0]
1551 #endif
1552 eor r0,r0,r11
1553 #ifndef __thumb2__
1554 ldr r3,[sp,#280+4]
1555 #else
1556 ldrd r2,r3,[sp,#280] @ A[1][0]
1557 #endif
1558 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
1559 #ifndef __thumb2__
1560 ldr r4,[sp,#328] @ A[2][1]
1561 #endif
1562 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
1563 #ifndef __thumb2__
1564 ldr r5,[sp,#328+4]
1565 #else
1566 ldrd r4,r5,[sp,#328] @ A[2][1]
1567 #endif
1568
1569 eor r2,r2,r12
1570 #ifndef __thumb2__
1571 ldr r10,[sp,#376] @ A[3][2]
1572 #endif
1573 eor r3,r3,r14
1574 #ifndef __thumb2__
1575 ldr r11,[sp,#376+4]
1576 #else
1577 ldrd r10,r11,[sp,#376] @ A[3][2]
1578 #endif
1579 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
1580 #ifndef __thumb2__
1581 ldr r12,[sp,#224] @ D[3]
1582 #endif
1583 @ mov r3,r3,ror#32-18
1584 #ifndef __thumb2__
1585 ldr r14,[sp,#224+4]
1586 #else
1587 ldrd r12,r14,[sp,#224] @ D[3]
1588 #endif
1589
1590 eor r6,r6,r4
1591 eor r7,r7,r5
1592 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
1593 mov r5,r7,ror#32-5
1594
1595 eor r10,r10,r8
1596 #ifndef __thumb2__
1597 ldr r8,[sp,#424] @ A[4][3]
1598 #endif
1599 eor r11,r11,r9
1600 #ifndef __thumb2__
1601 ldr r9,[sp,#424+4]
1602 #else
1603 ldrd r8,r9,[sp,#424] @ A[4][3]
1604 #endif
1605 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
1606 mov r6,r11,ror#32-8
1607
1608 eor r12,r12,r8
1609 eor r14,r14,r9
1610 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
1611 mov r9,r14,ror#32-28
1612
1613 bic r10,r4,r2,ror#32-18
1614 bic r11,r5,r3,ror#32-18
1615 eor r10,r10,r0,ror#32-14
1616 eor r11,r11,r1,ror#32-13
1617 #ifndef __thumb2__
1618 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
1619 #endif
1620 bic r12,r6,r4
1621 #ifndef __thumb2__
1622 str r11,[sp,#120+4]
1623 #else
1624 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
1625 #endif
1626 bic r14,r7,r5
1627 eor r12,r12,r2,ror#32-18
1628 #ifndef __thumb2__
1629 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1630 #endif
1631 eor r14,r14,r3,ror#32-18
1632 #ifndef __thumb2__
1633 str r14,[sp,#128+4]
1634 #else
1635 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1636 #endif
1637 bic r10,r8,r6
1638 bic r11,r9,r7
1639 bic r12,r0,r8,ror#14
1640 bic r14,r1,r9,ror#13
1641 eor r10,r10,r4
1642 eor r11,r11,r5
1643 #ifndef __thumb2__
1644 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1645 #endif
1646 bic r2,r2,r0,ror#18-14
1647 #ifndef __thumb2__
1648 str r11,[sp,#136+4]
1649 #else
1650 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1651 #endif
1652 eor r12,r6,r12,ror#32-14
1653 bic r11,r3,r1,ror#18-13
1654 eor r14,r7,r14,ror#32-13
1655 #ifndef __thumb2__
1656 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1657 #endif
1658 #ifndef __thumb2__
1659 str r14,[sp,#144+4]
1660 #else
1661 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1662 #endif
1663 add r14,sp,#216
1664 #ifndef __thumb2__
1665 ldr r0,[sp,#256] @ A[0][2]
1666 #endif
1667 eor r10,r8,r2,ror#32-18
1668 #ifndef __thumb2__
1669 ldr r1,[sp,#256+4]
1670 #else
1671 ldrd r0,r1,[sp,#256] @ A[0][2]
1672 #endif
1673 eor r11,r9,r11,ror#32-18
1674 #ifndef __thumb2__
1675 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1676 #endif
1677 #ifndef __thumb2__
1678 str r11,[sp,#152+4]
1679 #else
1680 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1681 #endif
1682
1683 ldmia r14,{r10,r11,r12,r14} @ D[2..3]
1684 #ifndef __thumb2__
1685 ldr r2,[sp,#304] @ A[1][3]
1686 #endif
1687 #ifndef __thumb2__
1688 ldr r3,[sp,#304+4]
1689 #else
1690 ldrd r2,r3,[sp,#304] @ A[1][3]
1691 #endif
1692 #ifndef __thumb2__
1693 ldr r6,[sp,#232] @ D[4]
1694 #endif
1695 #ifndef __thumb2__
1696 ldr r7,[sp,#232+4]
1697 #else
1698 ldrd r6,r7,[sp,#232] @ D[4]
1699 #endif
1700
1701 eor r0,r0,r10
1702 #ifndef __thumb2__
1703 ldr r4,[sp,#352] @ A[2][4]
1704 #endif
1705 eor r1,r1,r11
1706 #ifndef __thumb2__
1707 ldr r5,[sp,#352+4]
1708 #else
1709 ldrd r4,r5,[sp,#352] @ A[2][4]
1710 #endif
1711 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
1712 #ifndef __thumb2__
1713 ldr r8,[sp,#200] @ D[0]
1714 #endif
1715 @ mov r1,r1,ror#32-31
1716 #ifndef __thumb2__
1717 ldr r9,[sp,#200+4]
1718 #else
1719 ldrd r8,r9,[sp,#200] @ D[0]
1720 #endif
1721
1722 eor r12,r12,r2
1723 #ifndef __thumb2__
1724 ldr r10,[sp,#360] @ A[3][0]
1725 #endif
1726 eor r14,r14,r3
1727 #ifndef __thumb2__
1728 ldr r11,[sp,#360+4]
1729 #else
1730 ldrd r10,r11,[sp,#360] @ A[3][0]
1731 #endif
1732 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
1733 #ifndef __thumb2__
1734 ldr r12,[sp,#208] @ D[1]
1735 #endif
1736 mov r2,r14,ror#32-28
1737 #ifndef __thumb2__
1738 ldr r14,[sp,#208+4]
1739 #else
1740 ldrd r12,r14,[sp,#208] @ D[1]
1741 #endif
1742
1743 eor r6,r6,r4
1744 eor r7,r7,r5
1745 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
1746 mov r4,r7,ror#32-20
1747
1748 eor r10,r10,r8
1749 #ifndef __thumb2__
1750 ldr r8,[sp,#408] @ A[4][1]
1751 #endif
1752 eor r11,r11,r9
1753 #ifndef __thumb2__
1754 ldr r9,[sp,#408+4]
1755 #else
1756 ldrd r8,r9,[sp,#408] @ A[4][1]
1757 #endif
1758 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
1759 mov r6,r11,ror#32-21
1760
1761 eor r8,r8,r12
1762 eor r9,r9,r14
1763 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
1764 @ mov r9,r3,ror#32-1
1765
1766 bic r10,r4,r2
1767 bic r11,r5,r3
1768 eor r10,r10,r0,ror#32-31
1769 #ifndef __thumb2__
1770 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
1771 #endif
1772 eor r11,r11,r1,ror#32-31
1773 #ifndef __thumb2__
1774 str r11,[sp,#160+4]
1775 #else
1776 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
1777 #endif
1778 bic r12,r6,r4
1779 bic r14,r7,r5
1780 eor r12,r12,r2
1781 eor r14,r14,r3
1782 #ifndef __thumb2__
1783 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1784 #endif
1785 bic r10,r8,r6,ror#1
1786 #ifndef __thumb2__
1787 str r14,[sp,#168+4]
1788 #else
1789 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1790 #endif
1791 bic r11,r9,r7,ror#1
1792 bic r12,r0,r8,ror#31-1
1793 bic r14,r1,r9,ror#31-1
1794 eor r4,r4,r10,ror#32-1
1795 #ifndef __thumb2__
1796 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1797 #endif
1798 eor r5,r5,r11,ror#32-1
1799 #ifndef __thumb2__
1800 str r5,[sp,#176+4]
1801 #else
1802 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1803 #endif
1804 eor r6,r6,r12,ror#32-31
1805 eor r7,r7,r14,ror#32-31
1806 #ifndef __thumb2__
1807 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1808 #endif
1809 bic r10,r2,r0,ror#32-31
1810 #ifndef __thumb2__
1811 str r7,[sp,#184+4]
1812 #else
1813 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1814 #endif
1815 bic r11,r3,r1,ror#32-31
1816 add r12,sp,#0
1817 eor r8,r10,r8,ror#32-1
1818 add r10,sp,#40
1819 eor r9,r11,r9,ror#32-1
1820 #ifndef __thumb2__
1821 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1822 #endif
1823 #ifndef __thumb2__
1824 str r9,[sp,#192+4]
1825 #else
1826 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1827 #endif
1828 blo .Lround2x
1829
1830 ldr pc,[sp,#440]
1831 .size KeccakF1600_int,.-KeccakF1600_int
1832
1833 .type KeccakF1600, %function
1834 .align 5
1835 KeccakF1600:
1836 stmdb sp!,{r0,r4-r11,lr}
1837 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],...
1838
1839 add r10,r0,#40
1840 add r11,sp,#40
1841 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack
1842 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1843 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1844 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1845 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1846 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1847 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1848 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1849 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1850 add r12,sp,#0
1851 add r10,sp,#40
1852 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1853
1854 bl KeccakF1600_enter
1855
1856 ldr r11, [sp,#440+16] @ restore pointer to A
1857 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1858 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5]
1859 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1860 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1861 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1862 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1863 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1864 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1865 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1866 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1867
1868 add sp,sp,#440+20
1869 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1870 .size KeccakF1600,.-KeccakF1600
1871 .globl SHA3_absorb
1872 .type SHA3_absorb,%function
1873 .align 5
1874 SHA3_absorb:
1875 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1876 sub sp,sp,#456+16
1877
1878 add r10,r0,#40
1879 @ mov r11,r1
1880 mov r12,r2
1881 mov r14,r3
1882 cmp r2,r3
1883 blo .Labsorb_abort
1884
1885 add r11,sp,#0
1886 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack
1887 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1888 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1889 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1890 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1891 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1892 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1893 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1894 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1895 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1896
1897 ldr r11,[sp,#476] @ restore r11
1898 #ifdef __thumb2__
1899 mov r9,#0x00ff00ff
1900 mov r8,#0x0f0f0f0f
1901 mov r7,#0x33333333
1902 mov r6,#0x55555555
1903 #else
1904 mov r6,#0x11 @ compose constants
1905 mov r8,#0x0f
1906 mov r9,#0xff
1907 orr r6,r6,r6,lsl#8
1908 orr r8,r8,r8,lsl#8
1909 orr r6,r6,r6,lsl#16 @ 0x11111111
1910 orr r9,r9,r9,lsl#16 @ 0x00ff00ff
1911 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
1912 orr r7,r6,r6,lsl#1 @ 0x33333333
1913 orr r6,r6,r6,lsl#2 @ 0x55555555
1914 #endif
1915 str r9,[sp,#468]
1916 str r8,[sp,#464]
1917 str r7,[sp,#460]
1918 str r6,[sp,#456]
1919 b .Loop_absorb
1920
1921 .align 4
1922 .Loop_absorb:
1923 subs r0,r12,r14
1924 blo .Labsorbed
1925 add r10,sp,#0
1926 str r0,[sp,#480] @ save len - bsz
1927
1928 .align 4
1929 .Loop_block:
1930 ldrb r0,[r11],#1
1931 ldrb r1,[r11],#1
1932 ldrb r2,[r11],#1
1933 ldrb r3,[r11],#1
1934 ldrb r4,[r11],#1
1935 orr r0,r0,r1,lsl#8
1936 ldrb r1,[r11],#1
1937 orr r0,r0,r2,lsl#16
1938 ldrb r2,[r11],#1
1939 orr r0,r0,r3,lsl#24 @ lo
1940 ldrb r3,[r11],#1
1941 orr r1,r4,r1,lsl#8
1942 orr r1,r1,r2,lsl#16
1943 orr r1,r1,r3,lsl#24 @ hi
1944
1945 and r2,r0,r6 @ &=0x55555555
1946 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa
1947 and r3,r1,r6 @ &=0x55555555
1948 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
1949 orr r2,r2,r2,lsr#1
1950 orr r0,r0,r0,lsl#1
1951 orr r3,r3,r3,lsr#1
1952 orr r1,r1,r1,lsl#1
1953 and r2,r2,r7 @ &=0x33333333
1954 and r0,r0,r7,lsl#2 @ &=0xcccccccc
1955 and r3,r3,r7 @ &=0x33333333
1956 and r1,r1,r7,lsl#2 @ &=0xcccccccc
1957 orr r2,r2,r2,lsr#2
1958 orr r0,r0,r0,lsl#2
1959 orr r3,r3,r3,lsr#2
1960 orr r1,r1,r1,lsl#2
1961 and r2,r2,r8 @ &=0x0f0f0f0f
1962 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0
1963 and r3,r3,r8 @ &=0x0f0f0f0f
1964 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
1965 ldmia r10,{r4,r5} @ A_flat[i]
1966 orr r2,r2,r2,lsr#4
1967 orr r0,r0,r0,lsl#4
1968 orr r3,r3,r3,lsr#4
1969 orr r1,r1,r1,lsl#4
1970 and r2,r2,r9 @ &=0x00ff00ff
1971 and r0,r0,r9,lsl#8 @ &=0xff00ff00
1972 and r3,r3,r9 @ &=0x00ff00ff
1973 and r1,r1,r9,lsl#8 @ &=0xff00ff00
1974 orr r2,r2,r2,lsr#8
1975 orr r0,r0,r0,lsl#8
1976 orr r3,r3,r3,lsr#8
1977 orr r1,r1,r1,lsl#8
1978
1979 mov r2,r2,lsl#16
1980 mov r1,r1,lsr#16
1981 eor r4,r4,r3,lsl#16
1982 eor r5,r5,r0,lsr#16
1983 eor r4,r4,r2,lsr#16
1984 eor r5,r5,r1,lsl#16
1985 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7])
1986
1987 subs r14,r14,#8
1988 bhi .Loop_block
1989
1990 str r11,[sp,#476]
1991
1992 bl KeccakF1600_int
1993
1994 add r14,sp,#456
1995 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables
1996 b .Loop_absorb
1997
1998 .align 4
1999 .Labsorbed:
2000 add r11,sp,#40
2001 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2002 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5]
2003 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2004 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2005 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2006 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2007 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2008 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2009 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2010 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2011
2012 .Labsorb_abort:
2013 add sp,sp,#456+32
2014 mov r0,r12 @ return value
2015 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2016 .size SHA3_absorb,.-SHA3_absorb
2017 .globl SHA3_squeeze
2018 .type SHA3_squeeze,%function
2019 .align 5
2020 SHA3_squeeze:
2021 stmdb sp!,{r0,r3-r10,lr}
2022
2023 mov r10,r0
2024 mov r4,r1
2025 mov r5,r2
2026 mov r12,r3
2027
2028 #ifdef __thumb2__
2029 mov r9,#0x00ff00ff
2030 mov r8,#0x0f0f0f0f
2031 mov r7,#0x33333333
2032 mov r6,#0x55555555
2033 #else
2034 mov r6,#0x11 @ compose constants
2035 mov r8,#0x0f
2036 mov r9,#0xff
2037 orr r6,r6,r6,lsl#8
2038 orr r8,r8,r8,lsl#8
2039 orr r6,r6,r6,lsl#16 @ 0x11111111
2040 orr r9,r9,r9,lsl#16 @ 0x00ff00ff
2041 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
2042 orr r7,r6,r6,lsl#1 @ 0x33333333
2043 orr r6,r6,r6,lsl#2 @ 0x55555555
2044 #endif
2045 stmdb sp!,{r6,r7,r8,r9}
2046
2047 mov r14,r10
2048 b .Loop_squeeze
2049
2050 .align 4
2051 .Loop_squeeze:
2052 ldmia r10!,{r0,r1} @ A_flat[i++]
2053
2054 mov r2,r0,lsl#16
2055 mov r3,r1,lsl#16 @ r3 = r1 << 16
2056 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff
2057 mov r1,r1,lsr#16
2058 mov r0,r0,lsr#16 @ r0 = r0 >> 16
2059 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000
2060
2061 orr r2,r2,r2,lsl#8
2062 orr r3,r3,r3,lsr#8
2063 orr r0,r0,r0,lsl#8
2064 orr r1,r1,r1,lsr#8
2065 and r2,r2,r9 @ &=0x00ff00ff
2066 and r3,r3,r9,lsl#8 @ &=0xff00ff00
2067 and r0,r0,r9 @ &=0x00ff00ff
2068 and r1,r1,r9,lsl#8 @ &=0xff00ff00
2069 orr r2,r2,r2,lsl#4
2070 orr r3,r3,r3,lsr#4
2071 orr r0,r0,r0,lsl#4
2072 orr r1,r1,r1,lsr#4
2073 and r2,r2,r8 @ &=0x0f0f0f0f
2074 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0
2075 and r0,r0,r8 @ &=0x0f0f0f0f
2076 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
2077 orr r2,r2,r2,lsl#2
2078 orr r3,r3,r3,lsr#2
2079 orr r0,r0,r0,lsl#2
2080 orr r1,r1,r1,lsr#2
2081 and r2,r2,r7 @ &=0x33333333
2082 and r3,r3,r7,lsl#2 @ &=0xcccccccc
2083 and r0,r0,r7 @ &=0x33333333
2084 and r1,r1,r7,lsl#2 @ &=0xcccccccc
2085 orr r2,r2,r2,lsl#1
2086 orr r3,r3,r3,lsr#1
2087 orr r0,r0,r0,lsl#1
2088 orr r1,r1,r1,lsr#1
2089 and r2,r2,r6 @ &=0x55555555
2090 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa
2091 and r0,r0,r6 @ &=0x55555555
2092 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
2093
2094 orr r2,r2,r3
2095 orr r0,r0,r1
2096
2097 cmp r5,#8
2098 blo .Lsqueeze_tail
2099 mov r1,r2,lsr#8
2100 strb r2,[r4],#1
2101 mov r3,r2,lsr#16
2102 strb r1,[r4],#1
2103 mov r2,r2,lsr#24
2104 strb r3,[r4],#1
2105 strb r2,[r4],#1
2106
2107 mov r1,r0,lsr#8
2108 strb r0,[r4],#1
2109 mov r3,r0,lsr#16
2110 strb r1,[r4],#1
2111 mov r0,r0,lsr#24
2112 strb r3,[r4],#1
2113 strb r0,[r4],#1
2114 subs r5,r5,#8
2115 beq .Lsqueeze_done
2116
2117 subs r12,r12,#8 @ bsz -= 8
2118 bhi .Loop_squeeze
2119
2120 mov r0,r14 @ original r10
2121
2122 bl KeccakF1600
2123
2124 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables
2125 mov r14,r10
2126 b .Loop_squeeze
2127
2128 .align 4
2129 .Lsqueeze_tail:
2130 strb r2,[r4],#1
2131 mov r2,r2,lsr#8
2132 subs r5,r5,#1
2133 beq .Lsqueeze_done
2134 strb r2,[r4],#1
2135 mov r2,r2,lsr#8
2136 subs r5,r5,#1
2137 beq .Lsqueeze_done
2138 strb r2,[r4],#1
2139 mov r2,r2,lsr#8
2140 subs r5,r5,#1
2141 beq .Lsqueeze_done
2142 strb r2,[r4],#1
2143 subs r5,r5,#1
2144 beq .Lsqueeze_done
2145
2146 strb r0,[r4],#1
2147 mov r0,r0,lsr#8
2148 subs r5,r5,#1
2149 beq .Lsqueeze_done
2150 strb r0,[r4],#1
2151 mov r0,r0,lsr#8
2152 subs r5,r5,#1
2153 beq .Lsqueeze_done
2154 strb r0,[r4]
2155 b .Lsqueeze_done
2156
2157 .align 4
2158 .Lsqueeze_done:
2159 add sp,sp,#24
2160 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
2161 .size SHA3_squeeze,.-SHA3_squeeze
2162 #if __ARM_MAX_ARCH__>=7
2163 .fpu neon
2164
2165 .type iotas64, %object
2166 .align 5
2167 iotas64:
2168 .quad 0x0000000000000001
2169 .quad 0x0000000000008082
2170 .quad 0x800000000000808a
2171 .quad 0x8000000080008000
2172 .quad 0x000000000000808b
2173 .quad 0x0000000080000001
2174 .quad 0x8000000080008081
2175 .quad 0x8000000000008009
2176 .quad 0x000000000000008a
2177 .quad 0x0000000000000088
2178 .quad 0x0000000080008009
2179 .quad 0x000000008000000a
2180 .quad 0x000000008000808b
2181 .quad 0x800000000000008b
2182 .quad 0x8000000000008089
2183 .quad 0x8000000000008003
2184 .quad 0x8000000000008002
2185 .quad 0x8000000000000080
2186 .quad 0x000000000000800a
2187 .quad 0x800000008000000a
2188 .quad 0x8000000080008081
2189 .quad 0x8000000000008080
2190 .quad 0x0000000080000001
2191 .quad 0x8000000080008008
2192 .size iotas64,.-iotas64
2193
2194 .type KeccakF1600_neon, %function
2195 .align 5
2196 KeccakF1600_neon:
2197 add r1, r0, #16
2198 adr r2, iotas64
2199 mov r3, #24 @ loop counter
2200 b .Loop_neon
2201
2202 .align 4
2203 .Loop_neon:
2204 @ Theta
2205 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4]
2206 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0]
2207 vst1.64 {d18}, [r1,:64] @ offload A[2][4]
2208 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1]
2209 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2]
2210 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
2211 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
2212 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3]
2213 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4]
2214 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
2215 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
2216 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
2217 veor q13, q13, q10 @ C[0..1]^=A[4][0..1]
2218 veor q14, q15, q11 @ C[2..3]^=A[4][2..3]
2219 veor d25, d25, d24 @ C[4]^=A[4][4]
2220
2221 vadd.u64 q4, q13, q13 @ C[0..1]<<1
2222 vadd.u64 q15, q14, q14 @ C[2..3]<<1
2223 vadd.u64 d18, d25, d25 @ C[4]<<1
2224 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1)
2225 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1)
2226 vsri.u64 d18, d25, #63 @ ROL64(C[4],1)
2227 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1)
2228 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
2229 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1)
2230 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1)
2231
2232 veor d0, d0, d25 @ A[0][0] ^= C[4]
2233 veor d1, d1, d25 @ A[1][0] ^= C[4]
2234 veor d10, d10, d25 @ A[2][0] ^= C[4]
2235 veor d11, d11, d25 @ A[3][0] ^= C[4]
2236 veor d20, d20, d25 @ A[4][0] ^= C[4]
2237
2238 veor d2, d2, d26 @ A[0][1] ^= D[1]
2239 veor d3, d3, d26 @ A[1][1] ^= D[1]
2240 veor d12, d12, d26 @ A[2][1] ^= D[1]
2241 veor d13, d13, d26 @ A[3][1] ^= D[1]
2242 veor d21, d21, d26 @ A[4][1] ^= D[1]
2243 vmov d26, d27
2244
2245 veor d6, d6, d28 @ A[0][3] ^= C[2]
2246 veor d7, d7, d28 @ A[1][3] ^= C[2]
2247 veor d16, d16, d28 @ A[2][3] ^= C[2]
2248 veor d17, d17, d28 @ A[3][3] ^= C[2]
2249 veor d23, d23, d28 @ A[4][3] ^= C[2]
2250 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4]
2251 vmov d28, d29
2252
2253 vld1.64 {d18}, [r1,:64] @ restore A[2][4]
2254 veor q2, q2, q13 @ A[0..1][2] ^= D[2]
2255 veor q7, q7, q13 @ A[2..3][2] ^= D[2]
2256 veor d22, d22, d27 @ A[4][2] ^= D[2]
2257
2258 veor q4, q4, q14 @ A[0..1][4] ^= C[3]
2259 veor q9, q9, q14 @ A[2..3][4] ^= C[3]
2260 veor d24, d24, d29 @ A[4][4] ^= C[3]
2261
2262 @ Rho + Pi
2263 vmov d26, d2 @ C[1] = A[0][1]
2264 vshl.u64 d2, d3, #44
2265 vmov d27, d4 @ C[2] = A[0][2]
2266 vshl.u64 d4, d14, #43
2267 vmov d28, d6 @ C[3] = A[0][3]
2268 vshl.u64 d6, d17, #21
2269 vmov d29, d8 @ C[4] = A[0][4]
2270 vshl.u64 d8, d24, #14
2271 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1])
2272 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2])
2273 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3])
2274 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4])
2275
2276 vshl.u64 d3, d9, #20
2277 vshl.u64 d14, d16, #25
2278 vshl.u64 d17, d15, #15
2279 vshl.u64 d24, d21, #2
2280 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4])
2281 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3])
2282 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2])
2283 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1])
2284
2285 vshl.u64 d9, d22, #61
2286 @ vshl.u64 d16, d19, #8
2287 vshl.u64 d15, d12, #10
2288 vshl.u64 d21, d7, #55
2289 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2])
2290 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4])
2291 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1])
2292 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3])
2293
2294 vshl.u64 d22, d18, #39
2295 @ vshl.u64 d19, d23, #56
2296 vshl.u64 d12, d5, #6
2297 vshl.u64 d7, d13, #45
2298 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4])
2299 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3])
2300 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2])
2301 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1])
2302
2303 vshl.u64 d18, d20, #18
2304 vshl.u64 d23, d11, #41
2305 vshl.u64 d5, d10, #3
2306 vshl.u64 d13, d1, #36
2307 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0])
2308 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0])
2309 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0])
2310 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0])
2311
2312 vshl.u64 d1, d28, #28
2313 vshl.u64 d10, d26, #1
2314 vshl.u64 d11, d29, #27
2315 vshl.u64 d20, d27, #62
2316 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3])
2317 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1])
2318 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4])
2319 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2])
2320
2321 @ Chi + Iota
2322 vbic q13, q2, q1
2323 vbic q14, q3, q2
2324 vbic q15, q4, q3
2325 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
2326 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
2327 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
2328 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0]
2329 vbic q13, q0, q4
2330 vbic q15, q1, q0
2331 vmov q1, q14 @ A[0..1][1]
2332 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
2333 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
2334
2335 vbic q13, q7, q6
2336 vmov q0, q5 @ A[2..3][0]
2337 vbic q14, q8, q7
2338 vmov q15, q6 @ A[2..3][1]
2339 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
2340 vbic q13, q9, q8
2341 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
2342 vbic q14, q0, q9
2343 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
2344 vbic q13, q15, q0
2345 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
2346 vmov q14, q10 @ A[4][0..1]
2347 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
2348
2349 vld1.64 d25, [r2,:64]! @ Iota[i++]
2350 vbic d26, d22, d21
2351 vbic d27, d23, d22
2352 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0]
2353 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2])
2354 vbic d26, d24, d23
2355 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3])
2356 vbic d27, d28, d24
2357 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4])
2358 vbic d26, d29, d28
2359 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0])
2360 veor d0, d0, d25 @ A[0][0] ^= Iota[i]
2361 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1])
2362
2363 subs r3, r3, #1
2364 bne .Loop_neon
2365
2366 .word 0xe12fff1e
2367 .size KeccakF1600_neon,.-KeccakF1600_neon
2368
2369 .globl SHA3_absorb_neon
2370 .type SHA3_absorb_neon, %function
2371 .align 5
2372 SHA3_absorb_neon:
2373 stmdb sp!, {r4,r5,r6,lr}
2374 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2375
2376 mov r4, r1 @ inp
2377 mov r5, r2 @ len
2378 mov r6, r3 @ bsz
2379
2380 vld1.32 {d0}, [r0,:64]! @ A[0][0]
2381 vld1.32 {d2}, [r0,:64]! @ A[0][1]
2382 vld1.32 {d4}, [r0,:64]! @ A[0][2]
2383 vld1.32 {d6}, [r0,:64]! @ A[0][3]
2384 vld1.32 {d8}, [r0,:64]! @ A[0][4]
2385
2386 vld1.32 {d1}, [r0,:64]! @ A[1][0]
2387 vld1.32 {d3}, [r0,:64]! @ A[1][1]
2388 vld1.32 {d5}, [r0,:64]! @ A[1][2]
2389 vld1.32 {d7}, [r0,:64]! @ A[1][3]
2390 vld1.32 {d9}, [r0,:64]! @ A[1][4]
2391
2392 vld1.32 {d10}, [r0,:64]! @ A[2][0]
2393 vld1.32 {d12}, [r0,:64]! @ A[2][1]
2394 vld1.32 {d14}, [r0,:64]! @ A[2][2]
2395 vld1.32 {d16}, [r0,:64]! @ A[2][3]
2396 vld1.32 {d18}, [r0,:64]! @ A[2][4]
2397
2398 vld1.32 {d11}, [r0,:64]! @ A[3][0]
2399 vld1.32 {d13}, [r0,:64]! @ A[3][1]
2400 vld1.32 {d15}, [r0,:64]! @ A[3][2]
2401 vld1.32 {d17}, [r0,:64]! @ A[3][3]
2402 vld1.32 {d19}, [r0,:64]! @ A[3][4]
2403
2404 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3]
2405 vld1.32 {d24}, [r0,:64] @ A[4][4]
2406 sub r0, r0, #24*8 @ rewind
2407 b .Loop_absorb_neon
2408
2409 .align 4
2410 .Loop_absorb_neon:
2411 subs r12, r5, r6 @ len - bsz
2412 blo .Labsorbed_neon
2413 mov r5, r12
2414
2415 vld1.8 {d31}, [r4]! @ endian-neutral loads...
2416 cmp r6, #8*2
2417 veor d0, d0, d31 @ A[0][0] ^= *inp++
2418 blo .Lprocess_neon
2419 vld1.8 {d31}, [r4]!
2420 veor d2, d2, d31 @ A[0][1] ^= *inp++
2421 beq .Lprocess_neon
2422 vld1.8 {d31}, [r4]!
2423 cmp r6, #8*4
2424 veor d4, d4, d31 @ A[0][2] ^= *inp++
2425 blo .Lprocess_neon
2426 vld1.8 {d31}, [r4]!
2427 veor d6, d6, d31 @ A[0][3] ^= *inp++
2428 beq .Lprocess_neon
2429 vld1.8 {d31},[r4]!
2430 cmp r6, #8*6
2431 veor d8, d8, d31 @ A[0][4] ^= *inp++
2432 blo .Lprocess_neon
2433
2434 vld1.8 {d31}, [r4]!
2435 veor d1, d1, d31 @ A[1][0] ^= *inp++
2436 beq .Lprocess_neon
2437 vld1.8 {d31}, [r4]!
2438 cmp r6, #8*8
2439 veor d3, d3, d31 @ A[1][1] ^= *inp++
2440 blo .Lprocess_neon
2441 vld1.8 {d31}, [r4]!
2442 veor d5, d5, d31 @ A[1][2] ^= *inp++
2443 beq .Lprocess_neon
2444 vld1.8 {d31}, [r4]!
2445 cmp r6, #8*10
2446 veor d7, d7, d31 @ A[1][3] ^= *inp++
2447 blo .Lprocess_neon
2448 vld1.8 {d31}, [r4]!
2449 veor d9, d9, d31 @ A[1][4] ^= *inp++
2450 beq .Lprocess_neon
2451
2452 vld1.8 {d31}, [r4]!
2453 cmp r6, #8*12
2454 veor d10, d10, d31 @ A[2][0] ^= *inp++
2455 blo .Lprocess_neon
2456 vld1.8 {d31}, [r4]!
2457 veor d12, d12, d31 @ A[2][1] ^= *inp++
2458 beq .Lprocess_neon
2459 vld1.8 {d31}, [r4]!
2460 cmp r6, #8*14
2461 veor d14, d14, d31 @ A[2][2] ^= *inp++
2462 blo .Lprocess_neon
2463 vld1.8 {d31}, [r4]!
2464 veor d16, d16, d31 @ A[2][3] ^= *inp++
2465 beq .Lprocess_neon
2466 vld1.8 {d31}, [r4]!
2467 cmp r6, #8*16
2468 veor d18, d18, d31 @ A[2][4] ^= *inp++
2469 blo .Lprocess_neon
2470
2471 vld1.8 {d31}, [r4]!
2472 veor d11, d11, d31 @ A[3][0] ^= *inp++
2473 beq .Lprocess_neon
2474 vld1.8 {d31}, [r4]!
2475 cmp r6, #8*18
2476 veor d13, d13, d31 @ A[3][1] ^= *inp++
2477 blo .Lprocess_neon
2478 vld1.8 {d31}, [r4]!
2479 veor d15, d15, d31 @ A[3][2] ^= *inp++
2480 beq .Lprocess_neon
2481 vld1.8 {d31}, [r4]!
2482 cmp r6, #8*20
2483 veor d17, d17, d31 @ A[3][3] ^= *inp++
2484 blo .Lprocess_neon
2485 vld1.8 {d31}, [r4]!
2486 veor d19, d19, d31 @ A[3][4] ^= *inp++
2487 beq .Lprocess_neon
2488
2489 vld1.8 {d31}, [r4]!
2490 cmp r6, #8*22
2491 veor d20, d20, d31 @ A[4][0] ^= *inp++
2492 blo .Lprocess_neon
2493 vld1.8 {d31}, [r4]!
2494 veor d21, d21, d31 @ A[4][1] ^= *inp++
2495 beq .Lprocess_neon
2496 vld1.8 {d31}, [r4]!
2497 cmp r6, #8*24
2498 veor d22, d22, d31 @ A[4][2] ^= *inp++
2499 blo .Lprocess_neon
2500 vld1.8 {d31}, [r4]!
2501 veor d23, d23, d31 @ A[4][3] ^= *inp++
2502 beq .Lprocess_neon
2503 vld1.8 {d31}, [r4]!
2504 veor d24, d24, d31 @ A[4][4] ^= *inp++
2505
2506 .Lprocess_neon:
2507 bl KeccakF1600_neon
2508 b .Loop_absorb_neon
2509
2510 .align 4
2511 .Labsorbed_neon:
2512 vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
2513 vst1.32 {d2}, [r0,:64]!
2514 vst1.32 {d4}, [r0,:64]!
2515 vst1.32 {d6}, [r0,:64]!
2516 vst1.32 {d8}, [r0,:64]!
2517
2518 vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
2519 vst1.32 {d3}, [r0,:64]!
2520 vst1.32 {d5}, [r0,:64]!
2521 vst1.32 {d7}, [r0,:64]!
2522 vst1.32 {d9}, [r0,:64]!
2523
2524 vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
2525 vst1.32 {d12}, [r0,:64]!
2526 vst1.32 {d14}, [r0,:64]!
2527 vst1.32 {d16}, [r0,:64]!
2528 vst1.32 {d18}, [r0,:64]!
2529
2530 vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
2531 vst1.32 {d13}, [r0,:64]!
2532 vst1.32 {d15}, [r0,:64]!
2533 vst1.32 {d17}, [r0,:64]!
2534 vst1.32 {d19}, [r0,:64]!
2535
2536 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2537 vst1.32 {d24}, [r0,:64]
2538
2539 mov r0, r5 @ return value
2540 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2541 ldmia sp!, {r4,r5,r6,pc}
2542 .size SHA3_absorb_neon,.-SHA3_absorb_neon
2543
2544 .globl SHA3_squeeze_neon
2545 .type SHA3_squeeze_neon, %function
2546 .align 5
2547 SHA3_squeeze_neon:
2548 stmdb sp!, {r4,r5,r6,lr}
2549
2550 mov r4, r1 @ out
2551 mov r5, r2 @ len
2552 mov r6, r3 @ bsz
2553 mov r12, r0 @ A_flat
2554 mov r14, r3 @ bsz
2555 b .Loop_squeeze_neon
2556
2557 .align 4
2558 .Loop_squeeze_neon:
2559 cmp r5, #8
2560 blo .Lsqueeze_neon_tail
2561 vld1.32 {d0}, [r12]!
2562 vst1.8 {d0}, [r4]! @ endian-neutral store
2563
2564 subs r5, r5, #8 @ len -= 8
2565 beq .Lsqueeze_neon_done
2566
2567 subs r14, r14, #8 @ bsz -= 8
2568 bhi .Loop_squeeze_neon
2569
2570 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2571
2572 vld1.32 {d0}, [r0,:64]! @ A[0][0..4]
2573 vld1.32 {d2}, [r0,:64]!
2574 vld1.32 {d4}, [r0,:64]!
2575 vld1.32 {d6}, [r0,:64]!
2576 vld1.32 {d8}, [r0,:64]!
2577
2578 vld1.32 {d1}, [r0,:64]! @ A[1][0..4]
2579 vld1.32 {d3}, [r0,:64]!
2580 vld1.32 {d5}, [r0,:64]!
2581 vld1.32 {d7}, [r0,:64]!
2582 vld1.32 {d9}, [r0,:64]!
2583
2584 vld1.32 {d10}, [r0,:64]! @ A[2][0..4]
2585 vld1.32 {d12}, [r0,:64]!
2586 vld1.32 {d14}, [r0,:64]!
2587 vld1.32 {d16}, [r0,:64]!
2588 vld1.32 {d18}, [r0,:64]!
2589
2590 vld1.32 {d11}, [r0,:64]! @ A[3][0..4]
2591 vld1.32 {d13}, [r0,:64]!
2592 vld1.32 {d15}, [r0,:64]!
2593 vld1.32 {d17}, [r0,:64]!
2594 vld1.32 {d19}, [r0,:64]!
2595
2596 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2597 vld1.32 {d24}, [r0,:64]
2598 sub r0, r0, #24*8 @ rewind
2599
2600 bl KeccakF1600_neon
2601
2602 mov r12, r0 @ A_flat
2603 vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
2604 vst1.32 {d2}, [r0,:64]!
2605 vst1.32 {d4}, [r0,:64]!
2606 vst1.32 {d6}, [r0,:64]!
2607 vst1.32 {d8}, [r0,:64]!
2608
2609 vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
2610 vst1.32 {d3}, [r0,:64]!
2611 vst1.32 {d5}, [r0,:64]!
2612 vst1.32 {d7}, [r0,:64]!
2613 vst1.32 {d9}, [r0,:64]!
2614
2615 vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
2616 vst1.32 {d12}, [r0,:64]!
2617 vst1.32 {d14}, [r0,:64]!
2618 vst1.32 {d16}, [r0,:64]!
2619 vst1.32 {d18}, [r0,:64]!
2620
2621 vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
2622 vst1.32 {d13}, [r0,:64]!
2623 vst1.32 {d15}, [r0,:64]!
2624 vst1.32 {d17}, [r0,:64]!
2625 vst1.32 {d19}, [r0,:64]!
2626
2627 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2628 mov r14, r6 @ bsz
2629 vst1.32 {d24}, [r0,:64]
2630 mov r0, r12 @ rewind
2631
2632 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2633 b .Loop_squeeze_neon
2634
2635 .align 4
2636 .Lsqueeze_neon_tail:
2637 ldmia r12, {r2,r3}
2638 cmp r5, #2
2639 strb r2, [r4],#1 @ endian-neutral store
2640 mov r2, r2, lsr#8
2641 blo .Lsqueeze_neon_done
2642 strb r2, [r4], #1
2643 mov r2, r2, lsr#8
2644 beq .Lsqueeze_neon_done
2645 strb r2, [r4], #1
2646 mov r2, r2, lsr#8
2647 cmp r5, #4
2648 blo .Lsqueeze_neon_done
2649 strb r2, [r4], #1
2650 beq .Lsqueeze_neon_done
2651
2652 strb r3, [r4], #1
2653 mov r3, r3, lsr#8
2654 cmp r5, #6
2655 blo .Lsqueeze_neon_done
2656 strb r3, [r4], #1
2657 mov r3, r3, lsr#8
2658 beq .Lsqueeze_neon_done
2659 strb r3, [r4], #1
2660
2661 .Lsqueeze_neon_done:
2662 ldmia sp!, {r4,r5,r6,pc}
2663 .size SHA3_squeeze_neon,.-SHA3_squeeze_neon
2664 #endif
2665 .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2666 .align 2
2667 .align 2
Cache object: ee8edafdda44b02677184324bfe3fade
|