The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/arm/keccak1600-armv4.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
    3 #include "arm_arch.h"
    4 
    5 .text
    6 
    7 #if defined(__thumb2__)
    8 .syntax unified
    9 .thumb
   10 #else
   11 .code   32
   12 #endif
   13 
   14 .type   iotas32, %object
   15 .align  5
   16 iotas32:
   17 .long   0x00000001, 0x00000000
   18 .long   0x00000000, 0x00000089
   19 .long   0x00000000, 0x8000008b
   20 .long   0x00000000, 0x80008080
   21 .long   0x00000001, 0x0000008b
   22 .long   0x00000001, 0x00008000
   23 .long   0x00000001, 0x80008088
   24 .long   0x00000001, 0x80000082
   25 .long   0x00000000, 0x0000000b
   26 .long   0x00000000, 0x0000000a
   27 .long   0x00000001, 0x00008082
   28 .long   0x00000000, 0x00008003
   29 .long   0x00000001, 0x0000808b
   30 .long   0x00000001, 0x8000000b
   31 .long   0x00000001, 0x8000008a
   32 .long   0x00000001, 0x80000081
   33 .long   0x00000000, 0x80000081
   34 .long   0x00000000, 0x80000008
   35 .long   0x00000000, 0x00000083
   36 .long   0x00000000, 0x80008003
   37 .long   0x00000001, 0x80008088
   38 .long   0x00000000, 0x80000088
   39 .long   0x00000001, 0x00008000
   40 .long   0x00000000, 0x80008082
   41 .size   iotas32,.-iotas32
   42 
   43 .type   KeccakF1600_int, %function
   44 .align  5
   45 KeccakF1600_int:
   46         add     r9,sp,#176
   47         add     r12,sp,#0
   48         add     r10,sp,#40
   49         ldmia   r9,{r4,r5,r6,r7,r8,r9}          @ A[4][2..4]
   50 KeccakF1600_enter:
   51         str     lr,[sp,#440]
   52         eor     r11,r11,r11
   53         str     r11,[sp,#444]
   54         b       .Lround2x
   55 
   56 .align  4
   57 .Lround2x:
   58         ldmia   r12,{r0,r1,r2,r3}               @ A[0][0..1]
   59         ldmia   r10,{r10,r11,r12,r14}   @ A[1][0..1]
   60 #ifdef  __thumb2__
   61         eor     r0,r0,r10
   62         eor     r1,r1,r11
   63         eor     r2,r2,r12
   64         ldrd    r10,r11,[sp,#56]
   65         eor     r3,r3,r14
   66         ldrd    r12,r14,[sp,#64]
   67         eor     r4,r4,r10
   68         eor     r5,r5,r11
   69         eor     r6,r6,r12
   70         ldrd    r10,r11,[sp,#72]
   71         eor     r7,r7,r14
   72         ldrd    r12,r14,[sp,#80]
   73         eor     r8,r8,r10
   74         eor     r9,r9,r11
   75         eor     r0,r0,r12
   76         ldrd    r10,r11,[sp,#88]
   77         eor     r1,r1,r14
   78         ldrd    r12,r14,[sp,#96]
   79         eor     r2,r2,r10
   80         eor     r3,r3,r11
   81         eor     r4,r4,r12
   82         ldrd    r10,r11,[sp,#104]
   83         eor     r5,r5,r14
   84         ldrd    r12,r14,[sp,#112]
   85         eor     r6,r6,r10
   86         eor     r7,r7,r11
   87         eor     r8,r8,r12
   88         ldrd    r10,r11,[sp,#120]
   89         eor     r9,r9,r14
   90         ldrd    r12,r14,[sp,#128]
   91         eor     r0,r0,r10
   92         eor     r1,r1,r11
   93         eor     r2,r2,r12
   94         ldrd    r10,r11,[sp,#136]
   95         eor     r3,r3,r14
   96         ldrd    r12,r14,[sp,#144]
   97         eor     r4,r4,r10
   98         eor     r5,r5,r11
   99         eor     r6,r6,r12
  100         ldrd    r10,r11,[sp,#152]
  101         eor     r7,r7,r14
  102         ldrd    r12,r14,[sp,#160]
  103         eor     r8,r8,r10
  104         eor     r9,r9,r11
  105         eor     r0,r0,r12
  106         ldrd    r10,r11,[sp,#168]
  107         eor     r1,r1,r14
  108         ldrd    r12,r14,[sp,#16]
  109         eor     r2,r2,r10
  110         eor     r3,r3,r11
  111         eor     r4,r4,r12
  112         ldrd    r10,r11,[sp,#24]
  113         eor     r5,r5,r14
  114         ldrd    r12,r14,[sp,#32]
  115 #else
  116         eor     r0,r0,r10
  117         add     r10,sp,#56
  118         eor     r1,r1,r11
  119         eor     r2,r2,r12
  120         eor     r3,r3,r14
  121         ldmia   r10,{r10,r11,r12,r14}   @ A[1][2..3]
  122         eor     r4,r4,r10
  123         add     r10,sp,#72
  124         eor     r5,r5,r11
  125         eor     r6,r6,r12
  126         eor     r7,r7,r14
  127         ldmia   r10,{r10,r11,r12,r14}   @ A[1][4]..A[2][0]
  128         eor     r8,r8,r10
  129         add     r10,sp,#88
  130         eor     r9,r9,r11
  131         eor     r0,r0,r12
  132         eor     r1,r1,r14
  133         ldmia   r10,{r10,r11,r12,r14}   @ A[2][1..2]
  134         eor     r2,r2,r10
  135         add     r10,sp,#104
  136         eor     r3,r3,r11
  137         eor     r4,r4,r12
  138         eor     r5,r5,r14
  139         ldmia   r10,{r10,r11,r12,r14}   @ A[2][3..4]
  140         eor     r6,r6,r10
  141         add     r10,sp,#120
  142         eor     r7,r7,r11
  143         eor     r8,r8,r12
  144         eor     r9,r9,r14
  145         ldmia   r10,{r10,r11,r12,r14}   @ A[3][0..1]
  146         eor     r0,r0,r10
  147         add     r10,sp,#136
  148         eor     r1,r1,r11
  149         eor     r2,r2,r12
  150         eor     r3,r3,r14
  151         ldmia   r10,{r10,r11,r12,r14}   @ A[3][2..3]
  152         eor     r4,r4,r10
  153         add     r10,sp,#152
  154         eor     r5,r5,r11
  155         eor     r6,r6,r12
  156         eor     r7,r7,r14
  157         ldmia   r10,{r10,r11,r12,r14}   @ A[3][4]..A[4][0]
  158         eor     r8,r8,r10
  159         ldr     r10,[sp,#168]           @ A[4][1]
  160         eor     r9,r9,r11
  161         ldr     r11,[sp,#168+4]
  162         eor     r0,r0,r12
  163         ldr     r12,[sp,#16]            @ A[0][2]
  164         eor     r1,r1,r14
  165         ldr     r14,[sp,#16+4]
  166         eor     r2,r2,r10
  167         add     r10,sp,#24
  168         eor     r3,r3,r11
  169         eor     r4,r4,r12
  170         eor     r5,r5,r14
  171         ldmia   r10,{r10,r11,r12,r14}   @ A[0][3..4]
  172 #endif
  173         eor     r6,r6,r10
  174         eor     r7,r7,r11
  175         eor     r8,r8,r12
  176         eor     r9,r9,r14
  177 
  178         eor     r10,r0,r5,ror#32-1      @ E[0] = ROL64(C[2], 1) ^ C[0];
  179 #ifndef __thumb2__
  180         str     r10,[sp,#208]           @ D[1] = E[0]
  181 #endif
  182         eor     r11,r1,r4
  183 #ifndef __thumb2__
  184         str     r11,[sp,#208+4]
  185 #else
  186         strd    r10,r11,[sp,#208]               @ D[1] = E[0]
  187 #endif
  188         eor     r12,r6,r1,ror#32-1      @ E[1] = ROL64(C[0], 1) ^ C[3];
  189         eor     r14,r7,r0
  190 #ifndef __thumb2__
  191         str     r12,[sp,#232]           @ D[4] = E[1]
  192 #endif
  193         eor     r0,r8,r3,ror#32-1       @ C[0] = ROL64(C[1], 1) ^ C[4];
  194 #ifndef __thumb2__
  195         str     r14,[sp,#232+4]
  196 #else
  197         strd    r12,r14,[sp,#232]               @ D[4] = E[1]
  198 #endif
  199         eor     r1,r9,r2
  200 #ifndef __thumb2__
  201         str     r0,[sp,#200]            @ D[0] = C[0]
  202 #endif
  203         eor     r2,r2,r7,ror#32-1       @ C[1] = ROL64(C[3], 1) ^ C[1];
  204 #ifndef __thumb2__
  205         ldr     r7,[sp,#144]
  206 #endif
  207         eor     r3,r3,r6
  208 #ifndef __thumb2__
  209         str     r1,[sp,#200+4]
  210 #else
  211         strd    r0,r1,[sp,#200]         @ D[0] = C[0]
  212 #endif
  213 #ifndef __thumb2__
  214         ldr     r6,[sp,#144+4]
  215 #else
  216         ldrd    r7,r6,[sp,#144]
  217 #endif
  218 #ifndef __thumb2__
  219         str     r2,[sp,#216]            @ D[2] = C[1]
  220 #endif
  221         eor     r4,r4,r9,ror#32-1       @ C[2] = ROL64(C[4], 1) ^ C[2];
  222 #ifndef __thumb2__
  223         str     r3,[sp,#216+4]
  224 #else
  225         strd    r2,r3,[sp,#216]         @ D[2] = C[1]
  226 #endif
  227         eor     r5,r5,r8
  228 
  229 #ifndef __thumb2__
  230         ldr     r8,[sp,#192]
  231 #endif
  232 #ifndef __thumb2__
  233         ldr     r9,[sp,#192+4]
  234 #else
  235         ldrd    r8,r9,[sp,#192]
  236 #endif
  237 #ifndef __thumb2__
  238         str     r4,[sp,#224]            @ D[3] = C[2]
  239 #endif
  240         eor     r7,r7,r4
  241 #ifndef __thumb2__
  242         str     r5,[sp,#224+4]
  243 #else
  244         strd    r4,r5,[sp,#224]         @ D[3] = C[2]
  245 #endif
  246         eor     r6,r6,r5
  247 #ifndef __thumb2__
  248         ldr     r4,[sp,#0]
  249 #endif
  250         @ mov   r7,r7,ror#32-10         @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
  251         @ mov   r6,r6,ror#32-11
  252 #ifndef __thumb2__
  253         ldr     r5,[sp,#0+4]
  254 #else
  255         ldrd    r4,r5,[sp,#0]
  256 #endif
  257         eor     r8,r8,r12
  258         eor     r9,r9,r14
  259 #ifndef __thumb2__
  260         ldr     r12,[sp,#96]
  261 #endif
  262         eor     r0,r0,r4
  263 #ifndef __thumb2__
  264         ldr     r14,[sp,#96+4]
  265 #else
  266         ldrd    r12,r14,[sp,#96]
  267 #endif
  268         @ mov   r8,r8,ror#32-7          @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
  269         @ mov   r9,r9,ror#32-7
  270         eor     r1,r1,r5                @ C[0] =       A[0][0] ^ C[0];
  271         eor     r12,r12,r2
  272 #ifndef __thumb2__
  273         ldr     r2,[sp,#48]
  274 #endif
  275         eor     r14,r14,r3
  276 #ifndef __thumb2__
  277         ldr     r3,[sp,#48+4]
  278 #else
  279         ldrd    r2,r3,[sp,#48]
  280 #endif
  281         mov     r5,r12,ror#32-21                @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
  282         ldr     r12,[sp,#444]                   @ load counter
  283         eor     r2,r2,r10
  284         adr     r10,iotas32
  285         mov     r4,r14,ror#32-22
  286         add     r14,r10,r12
  287         eor     r3,r3,r11
  288         ldmia   r14,{r10,r11}           @ iotas[i]
  289         bic     r12,r4,r2,ror#32-22
  290         bic     r14,r5,r3,ror#32-22
  291         mov     r2,r2,ror#32-22         @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
  292         mov     r3,r3,ror#32-22
  293         eor     r12,r12,r0
  294         eor     r14,r14,r1
  295         eor     r10,r10,r12
  296         eor     r11,r11,r14
  297 #ifndef __thumb2__
  298         str     r10,[sp,#240]           @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
  299 #endif
  300         bic     r12,r6,r4,ror#11
  301 #ifndef __thumb2__
  302         str     r11,[sp,#240+4]
  303 #else
  304         strd    r10,r11,[sp,#240]               @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
  305 #endif
  306         bic     r14,r7,r5,ror#10
  307         bic     r10,r8,r6,ror#32-(11-7)
  308         bic     r11,r9,r7,ror#32-(10-7)
  309         eor     r12,r2,r12,ror#32-11
  310 #ifndef __thumb2__
  311         str     r12,[sp,#248]           @ R[0][1] = C[1] ^ (~C[2] & C[3]);
  312 #endif
  313         eor     r14,r3,r14,ror#32-10
  314 #ifndef __thumb2__
  315         str     r14,[sp,#248+4]
  316 #else
  317         strd    r12,r14,[sp,#248]               @ R[0][1] = C[1] ^ (~C[2] & C[3]);
  318 #endif
  319         eor     r10,r4,r10,ror#32-7
  320         eor     r11,r5,r11,ror#32-7
  321 #ifndef __thumb2__
  322         str     r10,[sp,#256]           @ R[0][2] = C[2] ^ (~C[3] & C[4]);
  323 #endif
  324         bic     r12,r0,r8,ror#32-7
  325 #ifndef __thumb2__
  326         str     r11,[sp,#256+4]
  327 #else
  328         strd    r10,r11,[sp,#256]               @ R[0][2] = C[2] ^ (~C[3] & C[4]);
  329 #endif
  330         bic     r14,r1,r9,ror#32-7
  331         eor     r12,r12,r6,ror#32-11
  332 #ifndef __thumb2__
  333         str     r12,[sp,#264]           @ R[0][3] = C[3] ^ (~C[4] & C[0]);
  334 #endif
  335         eor     r14,r14,r7,ror#32-10
  336 #ifndef __thumb2__
  337         str     r14,[sp,#264+4]
  338 #else
  339         strd    r12,r14,[sp,#264]               @ R[0][3] = C[3] ^ (~C[4] & C[0]);
  340 #endif
  341         bic     r10,r2,r0
  342         add     r14,sp,#224
  343 #ifndef __thumb2__
  344         ldr     r0,[sp,#24]             @ A[0][3]
  345 #endif
  346         bic     r11,r3,r1
  347 #ifndef __thumb2__
  348         ldr     r1,[sp,#24+4]
  349 #else
  350         ldrd    r0,r1,[sp,#24]          @ A[0][3]
  351 #endif
  352         eor     r10,r10,r8,ror#32-7
  353         eor     r11,r11,r9,ror#32-7
  354 #ifndef __thumb2__
  355         str     r10,[sp,#272]           @ R[0][4] = C[4] ^ (~C[0] & C[1]);
  356 #endif
  357         add     r9,sp,#200
  358 #ifndef __thumb2__
  359         str     r11,[sp,#272+4]
  360 #else
  361         strd    r10,r11,[sp,#272]               @ R[0][4] = C[4] ^ (~C[0] & C[1]);
  362 #endif
  363 
  364         ldmia   r14,{r10,r11,r12,r14}   @ D[3..4]
  365         ldmia   r9,{r6,r7,r8,r9}                @ D[0..1]
  366 
  367 #ifndef __thumb2__
  368         ldr     r2,[sp,#72]             @ A[1][4]
  369 #endif
  370         eor     r0,r0,r10
  371 #ifndef __thumb2__
  372         ldr     r3,[sp,#72+4]
  373 #else
  374         ldrd    r2,r3,[sp,#72]          @ A[1][4]
  375 #endif
  376         eor     r1,r1,r11
  377         @ mov   r0,r0,ror#32-14         @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
  378 #ifndef __thumb2__
  379         ldr     r10,[sp,#128]           @ A[3][1]
  380 #endif
  381         @ mov   r1,r1,ror#32-14
  382 #ifndef __thumb2__
  383         ldr     r11,[sp,#128+4]
  384 #else
  385         ldrd    r10,r11,[sp,#128]               @ A[3][1]
  386 #endif
  387 
  388         eor     r2,r2,r12
  389 #ifndef __thumb2__
  390         ldr     r4,[sp,#80]             @ A[2][0]
  391 #endif
  392         eor     r3,r3,r14
  393 #ifndef __thumb2__
  394         ldr     r5,[sp,#80+4]
  395 #else
  396         ldrd    r4,r5,[sp,#80]          @ A[2][0]
  397 #endif
  398         @ mov   r2,r2,ror#32-10         @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
  399         @ mov   r3,r3,ror#32-10
  400 
  401         eor     r6,r6,r4
  402 #ifndef __thumb2__
  403         ldr     r12,[sp,#216]           @ D[2]
  404 #endif
  405         eor     r7,r7,r5
  406 #ifndef __thumb2__
  407         ldr     r14,[sp,#216+4]
  408 #else
  409         ldrd    r12,r14,[sp,#216]               @ D[2]
  410 #endif
  411         mov     r5,r6,ror#32-1          @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
  412         mov     r4,r7,ror#32-2
  413 
  414         eor     r10,r10,r8
  415 #ifndef __thumb2__
  416         ldr     r8,[sp,#176]            @ A[4][2]
  417 #endif
  418         eor     r11,r11,r9
  419 #ifndef __thumb2__
  420         ldr     r9,[sp,#176+4]
  421 #else
  422         ldrd    r8,r9,[sp,#176]         @ A[4][2]
  423 #endif
  424         mov     r7,r10,ror#32-22                @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
  425         mov     r6,r11,ror#32-23
  426 
  427         bic     r10,r4,r2,ror#32-10
  428         bic     r11,r5,r3,ror#32-10
  429         eor     r12,r12,r8
  430         eor     r14,r14,r9
  431         mov     r9,r12,ror#32-30                @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
  432         mov     r8,r14,ror#32-31
  433         eor     r10,r10,r0,ror#32-14
  434         eor     r11,r11,r1,ror#32-14
  435 #ifndef __thumb2__
  436         str     r10,[sp,#280]           @ R[1][0] = C[0] ^ (~C[1] & C[2])
  437 #endif
  438         bic     r12,r6,r4
  439 #ifndef __thumb2__
  440         str     r11,[sp,#280+4]
  441 #else
  442         strd    r10,r11,[sp,#280]               @ R[1][0] = C[0] ^ (~C[1] & C[2])
  443 #endif
  444         bic     r14,r7,r5
  445         eor     r12,r12,r2,ror#32-10
  446 #ifndef __thumb2__
  447         str     r12,[sp,#288]           @ R[1][1] = C[1] ^ (~C[2] & C[3]);
  448 #endif
  449         eor     r14,r14,r3,ror#32-10
  450 #ifndef __thumb2__
  451         str     r14,[sp,#288+4]
  452 #else
  453         strd    r12,r14,[sp,#288]               @ R[1][1] = C[1] ^ (~C[2] & C[3]);
  454 #endif
  455         bic     r10,r8,r6
  456         bic     r11,r9,r7
  457         bic     r12,r0,r8,ror#14
  458         bic     r14,r1,r9,ror#14
  459         eor     r10,r10,r4
  460         eor     r11,r11,r5
  461 #ifndef __thumb2__
  462         str     r10,[sp,#296]           @ R[1][2] = C[2] ^ (~C[3] & C[4]);
  463 #endif
  464         bic     r2,r2,r0,ror#32-(14-10)
  465 #ifndef __thumb2__
  466         str     r11,[sp,#296+4]
  467 #else
  468         strd    r10,r11,[sp,#296]               @ R[1][2] = C[2] ^ (~C[3] & C[4]);
  469 #endif
  470         eor     r12,r6,r12,ror#32-14
  471         bic     r11,r3,r1,ror#32-(14-10)
  472 #ifndef __thumb2__
  473         str     r12,[sp,#304]           @ R[1][3] = C[3] ^ (~C[4] & C[0]);
  474 #endif
  475         eor     r14,r7,r14,ror#32-14
  476 #ifndef __thumb2__
  477         str     r14,[sp,#304+4]
  478 #else
  479         strd    r12,r14,[sp,#304]               @ R[1][3] = C[3] ^ (~C[4] & C[0]);
  480 #endif
  481         add     r12,sp,#208
  482 #ifndef __thumb2__
  483         ldr     r1,[sp,#8]              @ A[0][1]
  484 #endif
  485         eor     r10,r8,r2,ror#32-10
  486 #ifndef __thumb2__
  487         ldr     r0,[sp,#8+4]
  488 #else
  489         ldrd    r1,r0,[sp,#8]           @ A[0][1]
  490 #endif
  491         eor     r11,r9,r11,ror#32-10
  492 #ifndef __thumb2__
  493         str     r10,[sp,#312]           @ R[1][4] = C[4] ^ (~C[0] & C[1]);
  494 #endif
  495 #ifndef __thumb2__
  496         str     r11,[sp,#312+4]
  497 #else
  498         strd    r10,r11,[sp,#312]               @ R[1][4] = C[4] ^ (~C[0] & C[1]);
  499 #endif
  500 
  501         add     r9,sp,#224
  502         ldmia   r12,{r10,r11,r12,r14}   @ D[1..2]
  503 #ifndef __thumb2__
  504         ldr     r2,[sp,#56]             @ A[1][2]
  505 #endif
  506 #ifndef __thumb2__
  507         ldr     r3,[sp,#56+4]
  508 #else
  509         ldrd    r2,r3,[sp,#56]          @ A[1][2]
  510 #endif
  511         ldmia   r9,{r6,r7,r8,r9}                @ D[3..4]
  512 
  513         eor     r1,r1,r10
  514 #ifndef __thumb2__
  515         ldr     r4,[sp,#104]            @ A[2][3]
  516 #endif
  517         eor     r0,r0,r11
  518 #ifndef __thumb2__
  519         ldr     r5,[sp,#104+4]
  520 #else
  521         ldrd    r4,r5,[sp,#104]         @ A[2][3]
  522 #endif
  523         mov     r0,r0,ror#32-1          @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
  524 
  525         eor     r2,r2,r12
  526 #ifndef __thumb2__
  527         ldr     r10,[sp,#152]           @ A[3][4]
  528 #endif
  529         eor     r3,r3,r14
  530 #ifndef __thumb2__
  531         ldr     r11,[sp,#152+4]
  532 #else
  533         ldrd    r10,r11,[sp,#152]               @ A[3][4]
  534 #endif
  535         @ mov   r2,r2,ror#32-3          @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
  536 #ifndef __thumb2__
  537         ldr     r12,[sp,#200]           @ D[0]
  538 #endif
  539         @ mov   r3,r3,ror#32-3
  540 #ifndef __thumb2__
  541         ldr     r14,[sp,#200+4]
  542 #else
  543         ldrd    r12,r14,[sp,#200]               @ D[0]
  544 #endif
  545 
  546         eor     r4,r4,r6
  547         eor     r5,r5,r7
  548         @ mov   r5,r6,ror#32-12         @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
  549         @ mov   r4,r7,ror#32-13         @ [track reverse order below]
  550 
  551         eor     r10,r10,r8
  552 #ifndef __thumb2__
  553         ldr     r8,[sp,#160]            @ A[4][0]
  554 #endif
  555         eor     r11,r11,r9
  556 #ifndef __thumb2__
  557         ldr     r9,[sp,#160+4]
  558 #else
  559         ldrd    r8,r9,[sp,#160]         @ A[4][0]
  560 #endif
  561         mov     r6,r10,ror#32-4         @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
  562         mov     r7,r11,ror#32-4
  563 
  564         eor     r12,r12,r8
  565         eor     r14,r14,r9
  566         mov     r8,r12,ror#32-9         @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
  567         mov     r9,r14,ror#32-9
  568 
  569         bic     r10,r5,r2,ror#13-3
  570         bic     r11,r4,r3,ror#12-3
  571         bic     r12,r6,r5,ror#32-13
  572         bic     r14,r7,r4,ror#32-12
  573         eor     r10,r0,r10,ror#32-13
  574         eor     r11,r1,r11,ror#32-12
  575 #ifndef __thumb2__
  576         str     r10,[sp,#320]           @ R[2][0] = C[0] ^ (~C[1] & C[2])
  577 #endif
  578         eor     r12,r12,r2,ror#32-3
  579 #ifndef __thumb2__
  580         str     r11,[sp,#320+4]
  581 #else
  582         strd    r10,r11,[sp,#320]               @ R[2][0] = C[0] ^ (~C[1] & C[2])
  583 #endif
  584         eor     r14,r14,r3,ror#32-3
  585 #ifndef __thumb2__
  586         str     r12,[sp,#328]           @ R[2][1] = C[1] ^ (~C[2] & C[3]);
  587 #endif
  588         bic     r10,r8,r6
  589         bic     r11,r9,r7
  590 #ifndef __thumb2__
  591         str     r14,[sp,#328+4]
  592 #else
  593         strd    r12,r14,[sp,#328]               @ R[2][1] = C[1] ^ (~C[2] & C[3]);
  594 #endif
  595         eor     r10,r10,r5,ror#32-13
  596         eor     r11,r11,r4,ror#32-12
  597 #ifndef __thumb2__
  598         str     r10,[sp,#336]           @ R[2][2] = C[2] ^ (~C[3] & C[4]);
  599 #endif
  600         bic     r12,r0,r8
  601 #ifndef __thumb2__
  602         str     r11,[sp,#336+4]
  603 #else
  604         strd    r10,r11,[sp,#336]               @ R[2][2] = C[2] ^ (~C[3] & C[4]);
  605 #endif
  606         bic     r14,r1,r9
  607         eor     r12,r12,r6
  608         eor     r14,r14,r7
  609 #ifndef __thumb2__
  610         str     r12,[sp,#344]           @ R[2][3] = C[3] ^ (~C[4] & C[0]);
  611 #endif
  612         bic     r10,r2,r0,ror#3
  613 #ifndef __thumb2__
  614         str     r14,[sp,#344+4]
  615 #else
  616         strd    r12,r14,[sp,#344]               @ R[2][3] = C[3] ^ (~C[4] & C[0]);
  617 #endif
  618         bic     r11,r3,r1,ror#3
  619 #ifndef __thumb2__
  620         ldr     r1,[sp,#32]             @ A[0][4] [in reverse order]
  621 #endif
  622         eor     r10,r8,r10,ror#32-3
  623 #ifndef __thumb2__
  624         ldr     r0,[sp,#32+4]
  625 #else
  626         ldrd    r1,r0,[sp,#32]          @ A[0][4] [in reverse order]
  627 #endif
  628         eor     r11,r9,r11,ror#32-3
  629 #ifndef __thumb2__
  630         str     r10,[sp,#352]           @ R[2][4] = C[4] ^ (~C[0] & C[1]);
  631 #endif
  632         add     r9,sp,#208
  633 #ifndef __thumb2__
  634         str     r11,[sp,#352+4]
  635 #else
  636         strd    r10,r11,[sp,#352]               @ R[2][4] = C[4] ^ (~C[0] & C[1]);
  637 #endif
  638 
  639 #ifndef __thumb2__
  640         ldr     r10,[sp,#232]           @ D[4]
  641 #endif
  642 #ifndef __thumb2__
  643         ldr     r11,[sp,#232+4]
  644 #else
  645         ldrd    r10,r11,[sp,#232]               @ D[4]
  646 #endif
  647 #ifndef __thumb2__
  648         ldr     r12,[sp,#200]           @ D[0]
  649 #endif
  650 #ifndef __thumb2__
  651         ldr     r14,[sp,#200+4]
  652 #else
  653         ldrd    r12,r14,[sp,#200]               @ D[0]
  654 #endif
  655 
  656         ldmia   r9,{r6,r7,r8,r9}                @ D[1..2]
  657 
  658         eor     r1,r1,r10
  659 #ifndef __thumb2__
  660         ldr     r2,[sp,#40]             @ A[1][0]
  661 #endif
  662         eor     r0,r0,r11
  663 #ifndef __thumb2__
  664         ldr     r3,[sp,#40+4]
  665 #else
  666         ldrd    r2,r3,[sp,#40]          @ A[1][0]
  667 #endif
  668         @ mov   r1,r10,ror#32-13                @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
  669 #ifndef __thumb2__
  670         ldr     r4,[sp,#88]             @ A[2][1]
  671 #endif
  672         @ mov   r0,r11,ror#32-14                @ [was loaded in reverse order]
  673 #ifndef __thumb2__
  674         ldr     r5,[sp,#88+4]
  675 #else
  676         ldrd    r4,r5,[sp,#88]          @ A[2][1]
  677 #endif
  678 
  679         eor     r2,r2,r12
  680 #ifndef __thumb2__
  681         ldr     r10,[sp,#136]           @ A[3][2]
  682 #endif
  683         eor     r3,r3,r14
  684 #ifndef __thumb2__
  685         ldr     r11,[sp,#136+4]
  686 #else
  687         ldrd    r10,r11,[sp,#136]               @ A[3][2]
  688 #endif
  689         @ mov   r2,r2,ror#32-18         @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
  690 #ifndef __thumb2__
  691         ldr     r12,[sp,#224]           @ D[3]
  692 #endif
  693         @ mov   r3,r3,ror#32-18
  694 #ifndef __thumb2__
  695         ldr     r14,[sp,#224+4]
  696 #else
  697         ldrd    r12,r14,[sp,#224]               @ D[3]
  698 #endif
  699 
  700         eor     r6,r6,r4
  701         eor     r7,r7,r5
  702         mov     r4,r6,ror#32-5          @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
  703         mov     r5,r7,ror#32-5
  704 
  705         eor     r10,r10,r8
  706 #ifndef __thumb2__
  707         ldr     r8,[sp,#184]            @ A[4][3]
  708 #endif
  709         eor     r11,r11,r9
  710 #ifndef __thumb2__
  711         ldr     r9,[sp,#184+4]
  712 #else
  713         ldrd    r8,r9,[sp,#184]         @ A[4][3]
  714 #endif
  715         mov     r7,r10,ror#32-7         @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
  716         mov     r6,r11,ror#32-8
  717 
  718         eor     r12,r12,r8
  719         eor     r14,r14,r9
  720         mov     r8,r12,ror#32-28                @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
  721         mov     r9,r14,ror#32-28
  722 
  723         bic     r10,r4,r2,ror#32-18
  724         bic     r11,r5,r3,ror#32-18
  725         eor     r10,r10,r0,ror#32-14
  726         eor     r11,r11,r1,ror#32-13
  727 #ifndef __thumb2__
  728         str     r10,[sp,#360]           @ R[3][0] = C[0] ^ (~C[1] & C[2])
  729 #endif
  730         bic     r12,r6,r4
  731 #ifndef __thumb2__
  732         str     r11,[sp,#360+4]
  733 #else
  734         strd    r10,r11,[sp,#360]               @ R[3][0] = C[0] ^ (~C[1] & C[2])
  735 #endif
  736         bic     r14,r7,r5
  737         eor     r12,r12,r2,ror#32-18
  738 #ifndef __thumb2__
  739         str     r12,[sp,#368]           @ R[3][1] = C[1] ^ (~C[2] & C[3]);
  740 #endif
  741         eor     r14,r14,r3,ror#32-18
  742 #ifndef __thumb2__
  743         str     r14,[sp,#368+4]
  744 #else
  745         strd    r12,r14,[sp,#368]               @ R[3][1] = C[1] ^ (~C[2] & C[3]);
  746 #endif
  747         bic     r10,r8,r6
  748         bic     r11,r9,r7
  749         bic     r12,r0,r8,ror#14
  750         bic     r14,r1,r9,ror#13
  751         eor     r10,r10,r4
  752         eor     r11,r11,r5
  753 #ifndef __thumb2__
  754         str     r10,[sp,#376]           @ R[3][2] = C[2] ^ (~C[3] & C[4]);
  755 #endif
  756         bic     r2,r2,r0,ror#18-14
  757 #ifndef __thumb2__
  758         str     r11,[sp,#376+4]
  759 #else
  760         strd    r10,r11,[sp,#376]               @ R[3][2] = C[2] ^ (~C[3] & C[4]);
  761 #endif
  762         eor     r12,r6,r12,ror#32-14
  763         bic     r11,r3,r1,ror#18-13
  764         eor     r14,r7,r14,ror#32-13
  765 #ifndef __thumb2__
  766         str     r12,[sp,#384]           @ R[3][3] = C[3] ^ (~C[4] & C[0]);
  767 #endif
  768 #ifndef __thumb2__
  769         str     r14,[sp,#384+4]
  770 #else
  771         strd    r12,r14,[sp,#384]               @ R[3][3] = C[3] ^ (~C[4] & C[0]);
  772 #endif
  773         add     r14,sp,#216
  774 #ifndef __thumb2__
  775         ldr     r0,[sp,#16]             @ A[0][2]
  776 #endif
  777         eor     r10,r8,r2,ror#32-18
  778 #ifndef __thumb2__
  779         ldr     r1,[sp,#16+4]
  780 #else
  781         ldrd    r0,r1,[sp,#16]          @ A[0][2]
  782 #endif
  783         eor     r11,r9,r11,ror#32-18
  784 #ifndef __thumb2__
  785         str     r10,[sp,#392]           @ R[3][4] = C[4] ^ (~C[0] & C[1]);
  786 #endif
  787 #ifndef __thumb2__
  788         str     r11,[sp,#392+4]
  789 #else
  790         strd    r10,r11,[sp,#392]               @ R[3][4] = C[4] ^ (~C[0] & C[1]);
  791 #endif
  792 
  793         ldmia   r14,{r10,r11,r12,r14}   @ D[2..3]
  794 #ifndef __thumb2__
  795         ldr     r2,[sp,#64]             @ A[1][3]
  796 #endif
  797 #ifndef __thumb2__
  798         ldr     r3,[sp,#64+4]
  799 #else
  800         ldrd    r2,r3,[sp,#64]          @ A[1][3]
  801 #endif
  802 #ifndef __thumb2__
  803         ldr     r6,[sp,#232]            @ D[4]
  804 #endif
  805 #ifndef __thumb2__
  806         ldr     r7,[sp,#232+4]
  807 #else
  808         ldrd    r6,r7,[sp,#232]         @ D[4]
  809 #endif
  810 
  811         eor     r0,r0,r10
  812 #ifndef __thumb2__
  813         ldr     r4,[sp,#112]            @ A[2][4]
  814 #endif
  815         eor     r1,r1,r11
  816 #ifndef __thumb2__
  817         ldr     r5,[sp,#112+4]
  818 #else
  819         ldrd    r4,r5,[sp,#112]         @ A[2][4]
  820 #endif
  821         @ mov   r0,r0,ror#32-31         @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
  822 #ifndef __thumb2__
  823         ldr     r8,[sp,#200]            @ D[0]
  824 #endif
  825         @ mov   r1,r1,ror#32-31
  826 #ifndef __thumb2__
  827         ldr     r9,[sp,#200+4]
  828 #else
  829         ldrd    r8,r9,[sp,#200]         @ D[0]
  830 #endif
  831 
  832         eor     r12,r12,r2
  833 #ifndef __thumb2__
  834         ldr     r10,[sp,#120]           @ A[3][0]
  835 #endif
  836         eor     r14,r14,r3
  837 #ifndef __thumb2__
  838         ldr     r11,[sp,#120+4]
  839 #else
  840         ldrd    r10,r11,[sp,#120]               @ A[3][0]
  841 #endif
  842         mov     r3,r12,ror#32-27                @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
  843 #ifndef __thumb2__
  844         ldr     r12,[sp,#208]           @ D[1]
  845 #endif
  846         mov     r2,r14,ror#32-28
  847 #ifndef __thumb2__
  848         ldr     r14,[sp,#208+4]
  849 #else
  850         ldrd    r12,r14,[sp,#208]               @ D[1]
  851 #endif
  852 
  853         eor     r6,r6,r4
  854         eor     r7,r7,r5
  855         mov     r5,r6,ror#32-19         @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
  856         mov     r4,r7,ror#32-20
  857 
  858         eor     r10,r10,r8
  859 #ifndef __thumb2__
  860         ldr     r8,[sp,#168]            @ A[4][1]
  861 #endif
  862         eor     r11,r11,r9
  863 #ifndef __thumb2__
  864         ldr     r9,[sp,#168+4]
  865 #else
  866         ldrd    r8,r9,[sp,#168]         @ A[4][1]
  867 #endif
  868         mov     r7,r10,ror#32-20                @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
  869         mov     r6,r11,ror#32-21
  870 
  871         eor     r8,r8,r12
  872         eor     r9,r9,r14
  873         @ mov   r8,r2,ror#32-1          @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
  874         @ mov   r9,r3,ror#32-1
  875 
  876         bic     r10,r4,r2
  877         bic     r11,r5,r3
  878         eor     r10,r10,r0,ror#32-31
  879 #ifndef __thumb2__
  880         str     r10,[sp,#400]           @ R[4][0] = C[0] ^ (~C[1] & C[2])
  881 #endif
  882         eor     r11,r11,r1,ror#32-31
  883 #ifndef __thumb2__
  884         str     r11,[sp,#400+4]
  885 #else
  886         strd    r10,r11,[sp,#400]               @ R[4][0] = C[0] ^ (~C[1] & C[2])
  887 #endif
  888         bic     r12,r6,r4
  889         bic     r14,r7,r5
  890         eor     r12,r12,r2
  891         eor     r14,r14,r3
  892 #ifndef __thumb2__
  893         str     r12,[sp,#408]           @ R[4][1] = C[1] ^ (~C[2] & C[3]);
  894 #endif
  895         bic     r10,r8,r6,ror#1
  896 #ifndef __thumb2__
  897         str     r14,[sp,#408+4]
  898 #else
  899         strd    r12,r14,[sp,#408]               @ R[4][1] = C[1] ^ (~C[2] & C[3]);
  900 #endif
  901         bic     r11,r9,r7,ror#1
  902         bic     r12,r0,r8,ror#31-1
  903         bic     r14,r1,r9,ror#31-1
  904         eor     r4,r4,r10,ror#32-1
  905 #ifndef __thumb2__
  906         str     r4,[sp,#416]            @ R[4][2] = C[2] ^= (~C[3] & C[4]);
  907 #endif
  908         eor     r5,r5,r11,ror#32-1
  909 #ifndef __thumb2__
  910         str     r5,[sp,#416+4]
  911 #else
  912         strd    r4,r5,[sp,#416]         @ R[4][2] = C[2] ^= (~C[3] & C[4]);
  913 #endif
  914         eor     r6,r6,r12,ror#32-31
  915         eor     r7,r7,r14,ror#32-31
  916 #ifndef __thumb2__
  917         str     r6,[sp,#424]            @ R[4][3] = C[3] ^= (~C[4] & C[0]);
  918 #endif
  919         bic     r10,r2,r0,ror#32-31
  920 #ifndef __thumb2__
  921         str     r7,[sp,#424+4]
  922 #else
  923         strd    r6,r7,[sp,#424]         @ R[4][3] = C[3] ^= (~C[4] & C[0]);
  924 #endif
  925         bic     r11,r3,r1,ror#32-31
  926         add     r12,sp,#240
  927         eor     r8,r10,r8,ror#32-1
  928         add     r10,sp,#280
  929         eor     r9,r11,r9,ror#32-1
  930 #ifndef __thumb2__
  931         str     r8,[sp,#432]            @ R[4][4] = C[4] ^= (~C[0] & C[1]);
  932 #endif
  933 #ifndef __thumb2__
  934         str     r9,[sp,#432+4]
  935 #else
  936         strd    r8,r9,[sp,#432]         @ R[4][4] = C[4] ^= (~C[0] & C[1]);
  937 #endif
  938         ldmia   r12,{r0,r1,r2,r3}               @ A[0][0..1]
  939         ldmia   r10,{r10,r11,r12,r14}   @ A[1][0..1]
  940 #ifdef  __thumb2__
  941         eor     r0,r0,r10
  942         eor     r1,r1,r11
  943         eor     r2,r2,r12
  944         ldrd    r10,r11,[sp,#296]
  945         eor     r3,r3,r14
  946         ldrd    r12,r14,[sp,#304]
  947         eor     r4,r4,r10
  948         eor     r5,r5,r11
  949         eor     r6,r6,r12
  950         ldrd    r10,r11,[sp,#312]
  951         eor     r7,r7,r14
  952         ldrd    r12,r14,[sp,#320]
  953         eor     r8,r8,r10
  954         eor     r9,r9,r11
  955         eor     r0,r0,r12
  956         ldrd    r10,r11,[sp,#328]
  957         eor     r1,r1,r14
  958         ldrd    r12,r14,[sp,#336]
  959         eor     r2,r2,r10
  960         eor     r3,r3,r11
  961         eor     r4,r4,r12
  962         ldrd    r10,r11,[sp,#344]
  963         eor     r5,r5,r14
  964         ldrd    r12,r14,[sp,#352]
  965         eor     r6,r6,r10
  966         eor     r7,r7,r11
  967         eor     r8,r8,r12
  968         ldrd    r10,r11,[sp,#360]
  969         eor     r9,r9,r14
  970         ldrd    r12,r14,[sp,#368]
  971         eor     r0,r0,r10
  972         eor     r1,r1,r11
  973         eor     r2,r2,r12
  974         ldrd    r10,r11,[sp,#376]
  975         eor     r3,r3,r14
  976         ldrd    r12,r14,[sp,#384]
  977         eor     r4,r4,r10
  978         eor     r5,r5,r11
  979         eor     r6,r6,r12
  980         ldrd    r10,r11,[sp,#392]
  981         eor     r7,r7,r14
  982         ldrd    r12,r14,[sp,#400]
  983         eor     r8,r8,r10
  984         eor     r9,r9,r11
  985         eor     r0,r0,r12
  986         ldrd    r10,r11,[sp,#408]
  987         eor     r1,r1,r14
  988         ldrd    r12,r14,[sp,#256]
  989         eor     r2,r2,r10
  990         eor     r3,r3,r11
  991         eor     r4,r4,r12
  992         ldrd    r10,r11,[sp,#264]
  993         eor     r5,r5,r14
  994         ldrd    r12,r14,[sp,#272]
  995 #else
  996         eor     r0,r0,r10
  997         add     r10,sp,#296
  998         eor     r1,r1,r11
  999         eor     r2,r2,r12
 1000         eor     r3,r3,r14
 1001         ldmia   r10,{r10,r11,r12,r14}   @ A[1][2..3]
 1002         eor     r4,r4,r10
 1003         add     r10,sp,#312
 1004         eor     r5,r5,r11
 1005         eor     r6,r6,r12
 1006         eor     r7,r7,r14
 1007         ldmia   r10,{r10,r11,r12,r14}   @ A[1][4]..A[2][0]
 1008         eor     r8,r8,r10
 1009         add     r10,sp,#328
 1010         eor     r9,r9,r11
 1011         eor     r0,r0,r12
 1012         eor     r1,r1,r14
 1013         ldmia   r10,{r10,r11,r12,r14}   @ A[2][1..2]
 1014         eor     r2,r2,r10
 1015         add     r10,sp,#344
 1016         eor     r3,r3,r11
 1017         eor     r4,r4,r12
 1018         eor     r5,r5,r14
 1019         ldmia   r10,{r10,r11,r12,r14}   @ A[2][3..4]
 1020         eor     r6,r6,r10
 1021         add     r10,sp,#360
 1022         eor     r7,r7,r11
 1023         eor     r8,r8,r12
 1024         eor     r9,r9,r14
 1025         ldmia   r10,{r10,r11,r12,r14}   @ A[3][0..1]
 1026         eor     r0,r0,r10
 1027         add     r10,sp,#376
 1028         eor     r1,r1,r11
 1029         eor     r2,r2,r12
 1030         eor     r3,r3,r14
 1031         ldmia   r10,{r10,r11,r12,r14}   @ A[3][2..3]
 1032         eor     r4,r4,r10
 1033         add     r10,sp,#392
 1034         eor     r5,r5,r11
 1035         eor     r6,r6,r12
 1036         eor     r7,r7,r14
 1037         ldmia   r10,{r10,r11,r12,r14}   @ A[3][4]..A[4][0]
 1038         eor     r8,r8,r10
 1039         ldr     r10,[sp,#408]           @ A[4][1]
 1040         eor     r9,r9,r11
 1041         ldr     r11,[sp,#408+4]
 1042         eor     r0,r0,r12
 1043         ldr     r12,[sp,#256]           @ A[0][2]
 1044         eor     r1,r1,r14
 1045         ldr     r14,[sp,#256+4]
 1046         eor     r2,r2,r10
 1047         add     r10,sp,#264
 1048         eor     r3,r3,r11
 1049         eor     r4,r4,r12
 1050         eor     r5,r5,r14
 1051         ldmia   r10,{r10,r11,r12,r14}   @ A[0][3..4]
 1052 #endif
 1053         eor     r6,r6,r10
 1054         eor     r7,r7,r11
 1055         eor     r8,r8,r12
 1056         eor     r9,r9,r14
 1057 
 1058         eor     r10,r0,r5,ror#32-1      @ E[0] = ROL64(C[2], 1) ^ C[0];
 1059 #ifndef __thumb2__
 1060         str     r10,[sp,#208]           @ D[1] = E[0]
 1061 #endif
 1062         eor     r11,r1,r4
 1063 #ifndef __thumb2__
 1064         str     r11,[sp,#208+4]
 1065 #else
 1066         strd    r10,r11,[sp,#208]               @ D[1] = E[0]
 1067 #endif
 1068         eor     r12,r6,r1,ror#32-1      @ E[1] = ROL64(C[0], 1) ^ C[3];
 1069         eor     r14,r7,r0
 1070 #ifndef __thumb2__
 1071         str     r12,[sp,#232]           @ D[4] = E[1]
 1072 #endif
 1073         eor     r0,r8,r3,ror#32-1       @ C[0] = ROL64(C[1], 1) ^ C[4];
 1074 #ifndef __thumb2__
 1075         str     r14,[sp,#232+4]
 1076 #else
 1077         strd    r12,r14,[sp,#232]               @ D[4] = E[1]
 1078 #endif
 1079         eor     r1,r9,r2
 1080 #ifndef __thumb2__
 1081         str     r0,[sp,#200]            @ D[0] = C[0]
 1082 #endif
 1083         eor     r2,r2,r7,ror#32-1       @ C[1] = ROL64(C[3], 1) ^ C[1];
 1084 #ifndef __thumb2__
 1085         ldr     r7,[sp,#384]
 1086 #endif
 1087         eor     r3,r3,r6
 1088 #ifndef __thumb2__
 1089         str     r1,[sp,#200+4]
 1090 #else
 1091         strd    r0,r1,[sp,#200]         @ D[0] = C[0]
 1092 #endif
 1093 #ifndef __thumb2__
 1094         ldr     r6,[sp,#384+4]
 1095 #else
 1096         ldrd    r7,r6,[sp,#384]
 1097 #endif
 1098 #ifndef __thumb2__
 1099         str     r2,[sp,#216]            @ D[2] = C[1]
 1100 #endif
 1101         eor     r4,r4,r9,ror#32-1       @ C[2] = ROL64(C[4], 1) ^ C[2];
 1102 #ifndef __thumb2__
 1103         str     r3,[sp,#216+4]
 1104 #else
 1105         strd    r2,r3,[sp,#216]         @ D[2] = C[1]
 1106 #endif
 1107         eor     r5,r5,r8
 1108 
 1109 #ifndef __thumb2__
 1110         ldr     r8,[sp,#432]
 1111 #endif
 1112 #ifndef __thumb2__
 1113         ldr     r9,[sp,#432+4]
 1114 #else
 1115         ldrd    r8,r9,[sp,#432]
 1116 #endif
 1117 #ifndef __thumb2__
 1118         str     r4,[sp,#224]            @ D[3] = C[2]
 1119 #endif
 1120         eor     r7,r7,r4
 1121 #ifndef __thumb2__
 1122         str     r5,[sp,#224+4]
 1123 #else
 1124         strd    r4,r5,[sp,#224]         @ D[3] = C[2]
 1125 #endif
 1126         eor     r6,r6,r5
 1127 #ifndef __thumb2__
 1128         ldr     r4,[sp,#240]
 1129 #endif
 1130         @ mov   r7,r7,ror#32-10         @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
 1131         @ mov   r6,r6,ror#32-11
 1132 #ifndef __thumb2__
 1133         ldr     r5,[sp,#240+4]
 1134 #else
 1135         ldrd    r4,r5,[sp,#240]
 1136 #endif
 1137         eor     r8,r8,r12
 1138         eor     r9,r9,r14
 1139 #ifndef __thumb2__
 1140         ldr     r12,[sp,#336]
 1141 #endif
 1142         eor     r0,r0,r4
 1143 #ifndef __thumb2__
 1144         ldr     r14,[sp,#336+4]
 1145 #else
 1146         ldrd    r12,r14,[sp,#336]
 1147 #endif
 1148         @ mov   r8,r8,ror#32-7          @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
 1149         @ mov   r9,r9,ror#32-7
 1150         eor     r1,r1,r5                @ C[0] =       A[0][0] ^ C[0];
 1151         eor     r12,r12,r2
 1152 #ifndef __thumb2__
 1153         ldr     r2,[sp,#288]
 1154 #endif
 1155         eor     r14,r14,r3
 1156 #ifndef __thumb2__
 1157         ldr     r3,[sp,#288+4]
 1158 #else
 1159         ldrd    r2,r3,[sp,#288]
 1160 #endif
 1161         mov     r5,r12,ror#32-21                @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
 1162         ldr     r12,[sp,#444]                   @ load counter
 1163         eor     r2,r2,r10
 1164         adr     r10,iotas32
 1165         mov     r4,r14,ror#32-22
 1166         add     r14,r10,r12
 1167         eor     r3,r3,r11
 1168 #ifndef __thumb2__
 1169         ldr     r10,[r14,#8]            @ iotas[i].lo
 1170 #endif
 1171         add     r12,r12,#16
 1172 #ifndef __thumb2__
 1173         ldr     r11,[r14,#12]           @ iotas[i].hi
 1174 #else
 1175         ldrd    r10,r11,[r14,#8]                @ iotas[i].lo
 1176 #endif
 1177         cmp     r12,#192
 1178         str     r12,[sp,#444]                   @ store counter
 1179         bic     r12,r4,r2,ror#32-22
 1180         bic     r14,r5,r3,ror#32-22
 1181         mov     r2,r2,ror#32-22         @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
 1182         mov     r3,r3,ror#32-22
 1183         eor     r12,r12,r0
 1184         eor     r14,r14,r1
 1185         eor     r10,r10,r12
 1186         eor     r11,r11,r14
 1187 #ifndef __thumb2__
 1188         str     r10,[sp,#0]             @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
 1189 #endif
 1190         bic     r12,r6,r4,ror#11
 1191 #ifndef __thumb2__
 1192         str     r11,[sp,#0+4]
 1193 #else
 1194         strd    r10,r11,[sp,#0]         @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
 1195 #endif
 1196         bic     r14,r7,r5,ror#10
 1197         bic     r10,r8,r6,ror#32-(11-7)
 1198         bic     r11,r9,r7,ror#32-(10-7)
 1199         eor     r12,r2,r12,ror#32-11
 1200 #ifndef __thumb2__
 1201         str     r12,[sp,#8]             @ R[0][1] = C[1] ^ (~C[2] & C[3]);
 1202 #endif
 1203         eor     r14,r3,r14,ror#32-10
 1204 #ifndef __thumb2__
 1205         str     r14,[sp,#8+4]
 1206 #else
 1207         strd    r12,r14,[sp,#8]         @ R[0][1] = C[1] ^ (~C[2] & C[3]);
 1208 #endif
 1209         eor     r10,r4,r10,ror#32-7
 1210         eor     r11,r5,r11,ror#32-7
 1211 #ifndef __thumb2__
 1212         str     r10,[sp,#16]            @ R[0][2] = C[2] ^ (~C[3] & C[4]);
 1213 #endif
 1214         bic     r12,r0,r8,ror#32-7
 1215 #ifndef __thumb2__
 1216         str     r11,[sp,#16+4]
 1217 #else
 1218         strd    r10,r11,[sp,#16]                @ R[0][2] = C[2] ^ (~C[3] & C[4]);
 1219 #endif
 1220         bic     r14,r1,r9,ror#32-7
 1221         eor     r12,r12,r6,ror#32-11
 1222 #ifndef __thumb2__
 1223         str     r12,[sp,#24]            @ R[0][3] = C[3] ^ (~C[4] & C[0]);
 1224 #endif
 1225         eor     r14,r14,r7,ror#32-10
 1226 #ifndef __thumb2__
 1227         str     r14,[sp,#24+4]
 1228 #else
 1229         strd    r12,r14,[sp,#24]                @ R[0][3] = C[3] ^ (~C[4] & C[0]);
 1230 #endif
 1231         bic     r10,r2,r0
 1232         add     r14,sp,#224
 1233 #ifndef __thumb2__
 1234         ldr     r0,[sp,#264]            @ A[0][3]
 1235 #endif
 1236         bic     r11,r3,r1
 1237 #ifndef __thumb2__
 1238         ldr     r1,[sp,#264+4]
 1239 #else
 1240         ldrd    r0,r1,[sp,#264]         @ A[0][3]
 1241 #endif
 1242         eor     r10,r10,r8,ror#32-7
 1243         eor     r11,r11,r9,ror#32-7
 1244 #ifndef __thumb2__
 1245         str     r10,[sp,#32]            @ R[0][4] = C[4] ^ (~C[0] & C[1]);
 1246 #endif
 1247         add     r9,sp,#200
 1248 #ifndef __thumb2__
 1249         str     r11,[sp,#32+4]
 1250 #else
 1251         strd    r10,r11,[sp,#32]                @ R[0][4] = C[4] ^ (~C[0] & C[1]);
 1252 #endif
 1253 
 1254         ldmia   r14,{r10,r11,r12,r14}   @ D[3..4]
 1255         ldmia   r9,{r6,r7,r8,r9}                @ D[0..1]
 1256 
 1257 #ifndef __thumb2__
 1258         ldr     r2,[sp,#312]            @ A[1][4]
 1259 #endif
 1260         eor     r0,r0,r10
 1261 #ifndef __thumb2__
 1262         ldr     r3,[sp,#312+4]
 1263 #else
 1264         ldrd    r2,r3,[sp,#312]         @ A[1][4]
 1265 #endif
 1266         eor     r1,r1,r11
 1267         @ mov   r0,r0,ror#32-14         @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
 1268 #ifndef __thumb2__
 1269         ldr     r10,[sp,#368]           @ A[3][1]
 1270 #endif
 1271         @ mov   r1,r1,ror#32-14
 1272 #ifndef __thumb2__
 1273         ldr     r11,[sp,#368+4]
 1274 #else
 1275         ldrd    r10,r11,[sp,#368]               @ A[3][1]
 1276 #endif
 1277 
 1278         eor     r2,r2,r12
 1279 #ifndef __thumb2__
 1280         ldr     r4,[sp,#320]            @ A[2][0]
 1281 #endif
 1282         eor     r3,r3,r14
 1283 #ifndef __thumb2__
 1284         ldr     r5,[sp,#320+4]
 1285 #else
 1286         ldrd    r4,r5,[sp,#320]         @ A[2][0]
 1287 #endif
 1288         @ mov   r2,r2,ror#32-10         @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
 1289         @ mov   r3,r3,ror#32-10
 1290 
 1291         eor     r6,r6,r4
 1292 #ifndef __thumb2__
 1293         ldr     r12,[sp,#216]           @ D[2]
 1294 #endif
 1295         eor     r7,r7,r5
 1296 #ifndef __thumb2__
 1297         ldr     r14,[sp,#216+4]
 1298 #else
 1299         ldrd    r12,r14,[sp,#216]               @ D[2]
 1300 #endif
 1301         mov     r5,r6,ror#32-1          @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
 1302         mov     r4,r7,ror#32-2
 1303 
 1304         eor     r10,r10,r8
 1305 #ifndef __thumb2__
 1306         ldr     r8,[sp,#416]            @ A[4][2]
 1307 #endif
 1308         eor     r11,r11,r9
 1309 #ifndef __thumb2__
 1310         ldr     r9,[sp,#416+4]
 1311 #else
 1312         ldrd    r8,r9,[sp,#416]         @ A[4][2]
 1313 #endif
 1314         mov     r7,r10,ror#32-22                @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
 1315         mov     r6,r11,ror#32-23
 1316 
 1317         bic     r10,r4,r2,ror#32-10
 1318         bic     r11,r5,r3,ror#32-10
 1319         eor     r12,r12,r8
 1320         eor     r14,r14,r9
 1321         mov     r9,r12,ror#32-30                @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
 1322         mov     r8,r14,ror#32-31
 1323         eor     r10,r10,r0,ror#32-14
 1324         eor     r11,r11,r1,ror#32-14
 1325 #ifndef __thumb2__
 1326         str     r10,[sp,#40]            @ R[1][0] = C[0] ^ (~C[1] & C[2])
 1327 #endif
 1328         bic     r12,r6,r4
 1329 #ifndef __thumb2__
 1330         str     r11,[sp,#40+4]
 1331 #else
 1332         strd    r10,r11,[sp,#40]                @ R[1][0] = C[0] ^ (~C[1] & C[2])
 1333 #endif
 1334         bic     r14,r7,r5
 1335         eor     r12,r12,r2,ror#32-10
 1336 #ifndef __thumb2__
 1337         str     r12,[sp,#48]            @ R[1][1] = C[1] ^ (~C[2] & C[3]);
 1338 #endif
 1339         eor     r14,r14,r3,ror#32-10
 1340 #ifndef __thumb2__
 1341         str     r14,[sp,#48+4]
 1342 #else
 1343         strd    r12,r14,[sp,#48]                @ R[1][1] = C[1] ^ (~C[2] & C[3]);
 1344 #endif
 1345         bic     r10,r8,r6
 1346         bic     r11,r9,r7
 1347         bic     r12,r0,r8,ror#14
 1348         bic     r14,r1,r9,ror#14
 1349         eor     r10,r10,r4
 1350         eor     r11,r11,r5
 1351 #ifndef __thumb2__
 1352         str     r10,[sp,#56]            @ R[1][2] = C[2] ^ (~C[3] & C[4]);
 1353 #endif
 1354         bic     r2,r2,r0,ror#32-(14-10)
 1355 #ifndef __thumb2__
 1356         str     r11,[sp,#56+4]
 1357 #else
 1358         strd    r10,r11,[sp,#56]                @ R[1][2] = C[2] ^ (~C[3] & C[4]);
 1359 #endif
 1360         eor     r12,r6,r12,ror#32-14
 1361         bic     r11,r3,r1,ror#32-(14-10)
 1362 #ifndef __thumb2__
 1363         str     r12,[sp,#64]            @ R[1][3] = C[3] ^ (~C[4] & C[0]);
 1364 #endif
 1365         eor     r14,r7,r14,ror#32-14
 1366 #ifndef __thumb2__
 1367         str     r14,[sp,#64+4]
 1368 #else
 1369         strd    r12,r14,[sp,#64]                @ R[1][3] = C[3] ^ (~C[4] & C[0]);
 1370 #endif
 1371         add     r12,sp,#208
 1372 #ifndef __thumb2__
 1373         ldr     r1,[sp,#248]            @ A[0][1]
 1374 #endif
 1375         eor     r10,r8,r2,ror#32-10
 1376 #ifndef __thumb2__
 1377         ldr     r0,[sp,#248+4]
 1378 #else
 1379         ldrd    r1,r0,[sp,#248]         @ A[0][1]
 1380 #endif
 1381         eor     r11,r9,r11,ror#32-10
 1382 #ifndef __thumb2__
 1383         str     r10,[sp,#72]            @ R[1][4] = C[4] ^ (~C[0] & C[1]);
 1384 #endif
 1385 #ifndef __thumb2__
 1386         str     r11,[sp,#72+4]
 1387 #else
 1388         strd    r10,r11,[sp,#72]                @ R[1][4] = C[4] ^ (~C[0] & C[1]);
 1389 #endif
 1390 
 1391         add     r9,sp,#224
 1392         ldmia   r12,{r10,r11,r12,r14}   @ D[1..2]
 1393 #ifndef __thumb2__
 1394         ldr     r2,[sp,#296]            @ A[1][2]
 1395 #endif
 1396 #ifndef __thumb2__
 1397         ldr     r3,[sp,#296+4]
 1398 #else
 1399         ldrd    r2,r3,[sp,#296]         @ A[1][2]
 1400 #endif
 1401         ldmia   r9,{r6,r7,r8,r9}                @ D[3..4]
 1402 
 1403         eor     r1,r1,r10
 1404 #ifndef __thumb2__
 1405         ldr     r4,[sp,#344]            @ A[2][3]
 1406 #endif
 1407         eor     r0,r0,r11
 1408 #ifndef __thumb2__
 1409         ldr     r5,[sp,#344+4]
 1410 #else
 1411         ldrd    r4,r5,[sp,#344]         @ A[2][3]
 1412 #endif
 1413         mov     r0,r0,ror#32-1          @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
 1414 
 1415         eor     r2,r2,r12
 1416 #ifndef __thumb2__
 1417         ldr     r10,[sp,#392]           @ A[3][4]
 1418 #endif
 1419         eor     r3,r3,r14
 1420 #ifndef __thumb2__
 1421         ldr     r11,[sp,#392+4]
 1422 #else
 1423         ldrd    r10,r11,[sp,#392]               @ A[3][4]
 1424 #endif
 1425         @ mov   r2,r2,ror#32-3          @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
 1426 #ifndef __thumb2__
 1427         ldr     r12,[sp,#200]           @ D[0]
 1428 #endif
 1429         @ mov   r3,r3,ror#32-3
 1430 #ifndef __thumb2__
 1431         ldr     r14,[sp,#200+4]
 1432 #else
 1433         ldrd    r12,r14,[sp,#200]               @ D[0]
 1434 #endif
 1435 
 1436         eor     r4,r4,r6
 1437         eor     r5,r5,r7
 1438         @ mov   r5,r6,ror#32-12         @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
 1439         @ mov   r4,r7,ror#32-13         @ [track reverse order below]
 1440 
 1441         eor     r10,r10,r8
 1442 #ifndef __thumb2__
 1443         ldr     r8,[sp,#400]            @ A[4][0]
 1444 #endif
 1445         eor     r11,r11,r9
 1446 #ifndef __thumb2__
 1447         ldr     r9,[sp,#400+4]
 1448 #else
 1449         ldrd    r8,r9,[sp,#400]         @ A[4][0]
 1450 #endif
 1451         mov     r6,r10,ror#32-4         @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
 1452         mov     r7,r11,ror#32-4
 1453 
 1454         eor     r12,r12,r8
 1455         eor     r14,r14,r9
 1456         mov     r8,r12,ror#32-9         @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
 1457         mov     r9,r14,ror#32-9
 1458 
 1459         bic     r10,r5,r2,ror#13-3
 1460         bic     r11,r4,r3,ror#12-3
 1461         bic     r12,r6,r5,ror#32-13
 1462         bic     r14,r7,r4,ror#32-12
 1463         eor     r10,r0,r10,ror#32-13
 1464         eor     r11,r1,r11,ror#32-12
 1465 #ifndef __thumb2__
 1466         str     r10,[sp,#80]            @ R[2][0] = C[0] ^ (~C[1] & C[2])
 1467 #endif
 1468         eor     r12,r12,r2,ror#32-3
 1469 #ifndef __thumb2__
 1470         str     r11,[sp,#80+4]
 1471 #else
 1472         strd    r10,r11,[sp,#80]                @ R[2][0] = C[0] ^ (~C[1] & C[2])
 1473 #endif
 1474         eor     r14,r14,r3,ror#32-3
 1475 #ifndef __thumb2__
 1476         str     r12,[sp,#88]            @ R[2][1] = C[1] ^ (~C[2] & C[3]);
 1477 #endif
 1478         bic     r10,r8,r6
 1479         bic     r11,r9,r7
 1480 #ifndef __thumb2__
 1481         str     r14,[sp,#88+4]
 1482 #else
 1483         strd    r12,r14,[sp,#88]                @ R[2][1] = C[1] ^ (~C[2] & C[3]);
 1484 #endif
 1485         eor     r10,r10,r5,ror#32-13
 1486         eor     r11,r11,r4,ror#32-12
 1487 #ifndef __thumb2__
 1488         str     r10,[sp,#96]            @ R[2][2] = C[2] ^ (~C[3] & C[4]);
 1489 #endif
 1490         bic     r12,r0,r8
 1491 #ifndef __thumb2__
 1492         str     r11,[sp,#96+4]
 1493 #else
 1494         strd    r10,r11,[sp,#96]                @ R[2][2] = C[2] ^ (~C[3] & C[4]);
 1495 #endif
 1496         bic     r14,r1,r9
 1497         eor     r12,r12,r6
 1498         eor     r14,r14,r7
 1499 #ifndef __thumb2__
 1500         str     r12,[sp,#104]           @ R[2][3] = C[3] ^ (~C[4] & C[0]);
 1501 #endif
 1502         bic     r10,r2,r0,ror#3
 1503 #ifndef __thumb2__
 1504         str     r14,[sp,#104+4]
 1505 #else
 1506         strd    r12,r14,[sp,#104]               @ R[2][3] = C[3] ^ (~C[4] & C[0]);
 1507 #endif
 1508         bic     r11,r3,r1,ror#3
 1509 #ifndef __thumb2__
 1510         ldr     r1,[sp,#272]            @ A[0][4] [in reverse order]
 1511 #endif
 1512         eor     r10,r8,r10,ror#32-3
 1513 #ifndef __thumb2__
 1514         ldr     r0,[sp,#272+4]
 1515 #else
 1516         ldrd    r1,r0,[sp,#272]         @ A[0][4] [in reverse order]
 1517 #endif
 1518         eor     r11,r9,r11,ror#32-3
 1519 #ifndef __thumb2__
 1520         str     r10,[sp,#112]           @ R[2][4] = C[4] ^ (~C[0] & C[1]);
 1521 #endif
 1522         add     r9,sp,#208
 1523 #ifndef __thumb2__
 1524         str     r11,[sp,#112+4]
 1525 #else
 1526         strd    r10,r11,[sp,#112]               @ R[2][4] = C[4] ^ (~C[0] & C[1]);
 1527 #endif
 1528 
 1529 #ifndef __thumb2__
 1530         ldr     r10,[sp,#232]           @ D[4]
 1531 #endif
 1532 #ifndef __thumb2__
 1533         ldr     r11,[sp,#232+4]
 1534 #else
 1535         ldrd    r10,r11,[sp,#232]               @ D[4]
 1536 #endif
 1537 #ifndef __thumb2__
 1538         ldr     r12,[sp,#200]           @ D[0]
 1539 #endif
 1540 #ifndef __thumb2__
 1541         ldr     r14,[sp,#200+4]
 1542 #else
 1543         ldrd    r12,r14,[sp,#200]               @ D[0]
 1544 #endif
 1545 
 1546         ldmia   r9,{r6,r7,r8,r9}                @ D[1..2]
 1547 
 1548         eor     r1,r1,r10
 1549 #ifndef __thumb2__
 1550         ldr     r2,[sp,#280]            @ A[1][0]
 1551 #endif
 1552         eor     r0,r0,r11
 1553 #ifndef __thumb2__
 1554         ldr     r3,[sp,#280+4]
 1555 #else
 1556         ldrd    r2,r3,[sp,#280]         @ A[1][0]
 1557 #endif
 1558         @ mov   r1,r10,ror#32-13                @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
 1559 #ifndef __thumb2__
 1560         ldr     r4,[sp,#328]            @ A[2][1]
 1561 #endif
 1562         @ mov   r0,r11,ror#32-14                @ [was loaded in reverse order]
 1563 #ifndef __thumb2__
 1564         ldr     r5,[sp,#328+4]
 1565 #else
 1566         ldrd    r4,r5,[sp,#328]         @ A[2][1]
 1567 #endif
 1568 
 1569         eor     r2,r2,r12
 1570 #ifndef __thumb2__
 1571         ldr     r10,[sp,#376]           @ A[3][2]
 1572 #endif
 1573         eor     r3,r3,r14
 1574 #ifndef __thumb2__
 1575         ldr     r11,[sp,#376+4]
 1576 #else
 1577         ldrd    r10,r11,[sp,#376]               @ A[3][2]
 1578 #endif
 1579         @ mov   r2,r2,ror#32-18         @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
 1580 #ifndef __thumb2__
 1581         ldr     r12,[sp,#224]           @ D[3]
 1582 #endif
 1583         @ mov   r3,r3,ror#32-18
 1584 #ifndef __thumb2__
 1585         ldr     r14,[sp,#224+4]
 1586 #else
 1587         ldrd    r12,r14,[sp,#224]               @ D[3]
 1588 #endif
 1589 
 1590         eor     r6,r6,r4
 1591         eor     r7,r7,r5
 1592         mov     r4,r6,ror#32-5          @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
 1593         mov     r5,r7,ror#32-5
 1594 
 1595         eor     r10,r10,r8
 1596 #ifndef __thumb2__
 1597         ldr     r8,[sp,#424]            @ A[4][3]
 1598 #endif
 1599         eor     r11,r11,r9
 1600 #ifndef __thumb2__
 1601         ldr     r9,[sp,#424+4]
 1602 #else
 1603         ldrd    r8,r9,[sp,#424]         @ A[4][3]
 1604 #endif
 1605         mov     r7,r10,ror#32-7         @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
 1606         mov     r6,r11,ror#32-8
 1607 
 1608         eor     r12,r12,r8
 1609         eor     r14,r14,r9
 1610         mov     r8,r12,ror#32-28                @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
 1611         mov     r9,r14,ror#32-28
 1612 
 1613         bic     r10,r4,r2,ror#32-18
 1614         bic     r11,r5,r3,ror#32-18
 1615         eor     r10,r10,r0,ror#32-14
 1616         eor     r11,r11,r1,ror#32-13
 1617 #ifndef __thumb2__
 1618         str     r10,[sp,#120]           @ R[3][0] = C[0] ^ (~C[1] & C[2])
 1619 #endif
 1620         bic     r12,r6,r4
 1621 #ifndef __thumb2__
 1622         str     r11,[sp,#120+4]
 1623 #else
 1624         strd    r10,r11,[sp,#120]               @ R[3][0] = C[0] ^ (~C[1] & C[2])
 1625 #endif
 1626         bic     r14,r7,r5
 1627         eor     r12,r12,r2,ror#32-18
 1628 #ifndef __thumb2__
 1629         str     r12,[sp,#128]           @ R[3][1] = C[1] ^ (~C[2] & C[3]);
 1630 #endif
 1631         eor     r14,r14,r3,ror#32-18
 1632 #ifndef __thumb2__
 1633         str     r14,[sp,#128+4]
 1634 #else
 1635         strd    r12,r14,[sp,#128]               @ R[3][1] = C[1] ^ (~C[2] & C[3]);
 1636 #endif
 1637         bic     r10,r8,r6
 1638         bic     r11,r9,r7
 1639         bic     r12,r0,r8,ror#14
 1640         bic     r14,r1,r9,ror#13
 1641         eor     r10,r10,r4
 1642         eor     r11,r11,r5
 1643 #ifndef __thumb2__
 1644         str     r10,[sp,#136]           @ R[3][2] = C[2] ^ (~C[3] & C[4]);
 1645 #endif
 1646         bic     r2,r2,r0,ror#18-14
 1647 #ifndef __thumb2__
 1648         str     r11,[sp,#136+4]
 1649 #else
 1650         strd    r10,r11,[sp,#136]               @ R[3][2] = C[2] ^ (~C[3] & C[4]);
 1651 #endif
 1652         eor     r12,r6,r12,ror#32-14
 1653         bic     r11,r3,r1,ror#18-13
 1654         eor     r14,r7,r14,ror#32-13
 1655 #ifndef __thumb2__
 1656         str     r12,[sp,#144]           @ R[3][3] = C[3] ^ (~C[4] & C[0]);
 1657 #endif
 1658 #ifndef __thumb2__
 1659         str     r14,[sp,#144+4]
 1660 #else
 1661         strd    r12,r14,[sp,#144]               @ R[3][3] = C[3] ^ (~C[4] & C[0]);
 1662 #endif
 1663         add     r14,sp,#216
 1664 #ifndef __thumb2__
 1665         ldr     r0,[sp,#256]            @ A[0][2]
 1666 #endif
 1667         eor     r10,r8,r2,ror#32-18
 1668 #ifndef __thumb2__
 1669         ldr     r1,[sp,#256+4]
 1670 #else
 1671         ldrd    r0,r1,[sp,#256]         @ A[0][2]
 1672 #endif
 1673         eor     r11,r9,r11,ror#32-18
 1674 #ifndef __thumb2__
 1675         str     r10,[sp,#152]           @ R[3][4] = C[4] ^ (~C[0] & C[1]);
 1676 #endif
 1677 #ifndef __thumb2__
 1678         str     r11,[sp,#152+4]
 1679 #else
 1680         strd    r10,r11,[sp,#152]               @ R[3][4] = C[4] ^ (~C[0] & C[1]);
 1681 #endif
 1682 
 1683         ldmia   r14,{r10,r11,r12,r14}   @ D[2..3]
 1684 #ifndef __thumb2__
 1685         ldr     r2,[sp,#304]            @ A[1][3]
 1686 #endif
 1687 #ifndef __thumb2__
 1688         ldr     r3,[sp,#304+4]
 1689 #else
 1690         ldrd    r2,r3,[sp,#304]         @ A[1][3]
 1691 #endif
 1692 #ifndef __thumb2__
 1693         ldr     r6,[sp,#232]            @ D[4]
 1694 #endif
 1695 #ifndef __thumb2__
 1696         ldr     r7,[sp,#232+4]
 1697 #else
 1698         ldrd    r6,r7,[sp,#232]         @ D[4]
 1699 #endif
 1700 
 1701         eor     r0,r0,r10
 1702 #ifndef __thumb2__
 1703         ldr     r4,[sp,#352]            @ A[2][4]
 1704 #endif
 1705         eor     r1,r1,r11
 1706 #ifndef __thumb2__
 1707         ldr     r5,[sp,#352+4]
 1708 #else
 1709         ldrd    r4,r5,[sp,#352]         @ A[2][4]
 1710 #endif
 1711         @ mov   r0,r0,ror#32-31         @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
 1712 #ifndef __thumb2__
 1713         ldr     r8,[sp,#200]            @ D[0]
 1714 #endif
 1715         @ mov   r1,r1,ror#32-31
 1716 #ifndef __thumb2__
 1717         ldr     r9,[sp,#200+4]
 1718 #else
 1719         ldrd    r8,r9,[sp,#200]         @ D[0]
 1720 #endif
 1721 
 1722         eor     r12,r12,r2
 1723 #ifndef __thumb2__
 1724         ldr     r10,[sp,#360]           @ A[3][0]
 1725 #endif
 1726         eor     r14,r14,r3
 1727 #ifndef __thumb2__
 1728         ldr     r11,[sp,#360+4]
 1729 #else
 1730         ldrd    r10,r11,[sp,#360]               @ A[3][0]
 1731 #endif
 1732         mov     r3,r12,ror#32-27                @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
 1733 #ifndef __thumb2__
 1734         ldr     r12,[sp,#208]           @ D[1]
 1735 #endif
 1736         mov     r2,r14,ror#32-28
 1737 #ifndef __thumb2__
 1738         ldr     r14,[sp,#208+4]
 1739 #else
 1740         ldrd    r12,r14,[sp,#208]               @ D[1]
 1741 #endif
 1742 
 1743         eor     r6,r6,r4
 1744         eor     r7,r7,r5
 1745         mov     r5,r6,ror#32-19         @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
 1746         mov     r4,r7,ror#32-20
 1747 
 1748         eor     r10,r10,r8
 1749 #ifndef __thumb2__
 1750         ldr     r8,[sp,#408]            @ A[4][1]
 1751 #endif
 1752         eor     r11,r11,r9
 1753 #ifndef __thumb2__
 1754         ldr     r9,[sp,#408+4]
 1755 #else
 1756         ldrd    r8,r9,[sp,#408]         @ A[4][1]
 1757 #endif
 1758         mov     r7,r10,ror#32-20                @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
 1759         mov     r6,r11,ror#32-21
 1760 
 1761         eor     r8,r8,r12
 1762         eor     r9,r9,r14
 1763         @ mov   r8,r2,ror#32-1          @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
 1764         @ mov   r9,r3,ror#32-1
 1765 
 1766         bic     r10,r4,r2
 1767         bic     r11,r5,r3
 1768         eor     r10,r10,r0,ror#32-31
 1769 #ifndef __thumb2__
 1770         str     r10,[sp,#160]           @ R[4][0] = C[0] ^ (~C[1] & C[2])
 1771 #endif
 1772         eor     r11,r11,r1,ror#32-31
 1773 #ifndef __thumb2__
 1774         str     r11,[sp,#160+4]
 1775 #else
 1776         strd    r10,r11,[sp,#160]               @ R[4][0] = C[0] ^ (~C[1] & C[2])
 1777 #endif
 1778         bic     r12,r6,r4
 1779         bic     r14,r7,r5
 1780         eor     r12,r12,r2
 1781         eor     r14,r14,r3
 1782 #ifndef __thumb2__
 1783         str     r12,[sp,#168]           @ R[4][1] = C[1] ^ (~C[2] & C[3]);
 1784 #endif
 1785         bic     r10,r8,r6,ror#1
 1786 #ifndef __thumb2__
 1787         str     r14,[sp,#168+4]
 1788 #else
 1789         strd    r12,r14,[sp,#168]               @ R[4][1] = C[1] ^ (~C[2] & C[3]);
 1790 #endif
 1791         bic     r11,r9,r7,ror#1
 1792         bic     r12,r0,r8,ror#31-1
 1793         bic     r14,r1,r9,ror#31-1
 1794         eor     r4,r4,r10,ror#32-1
 1795 #ifndef __thumb2__
 1796         str     r4,[sp,#176]            @ R[4][2] = C[2] ^= (~C[3] & C[4]);
 1797 #endif
 1798         eor     r5,r5,r11,ror#32-1
 1799 #ifndef __thumb2__
 1800         str     r5,[sp,#176+4]
 1801 #else
 1802         strd    r4,r5,[sp,#176]         @ R[4][2] = C[2] ^= (~C[3] & C[4]);
 1803 #endif
 1804         eor     r6,r6,r12,ror#32-31
 1805         eor     r7,r7,r14,ror#32-31
 1806 #ifndef __thumb2__
 1807         str     r6,[sp,#184]            @ R[4][3] = C[3] ^= (~C[4] & C[0]);
 1808 #endif
 1809         bic     r10,r2,r0,ror#32-31
 1810 #ifndef __thumb2__
 1811         str     r7,[sp,#184+4]
 1812 #else
 1813         strd    r6,r7,[sp,#184]         @ R[4][3] = C[3] ^= (~C[4] & C[0]);
 1814 #endif
 1815         bic     r11,r3,r1,ror#32-31
 1816         add     r12,sp,#0
 1817         eor     r8,r10,r8,ror#32-1
 1818         add     r10,sp,#40
 1819         eor     r9,r11,r9,ror#32-1
 1820 #ifndef __thumb2__
 1821         str     r8,[sp,#192]            @ R[4][4] = C[4] ^= (~C[0] & C[1]);
 1822 #endif
 1823 #ifndef __thumb2__
 1824         str     r9,[sp,#192+4]
 1825 #else
 1826         strd    r8,r9,[sp,#192]         @ R[4][4] = C[4] ^= (~C[0] & C[1]);
 1827 #endif
 1828         blo     .Lround2x
 1829 
 1830         ldr     pc,[sp,#440]
 1831 .size   KeccakF1600_int,.-KeccakF1600_int
 1832 
 1833 .type   KeccakF1600, %function
 1834 .align  5
 1835 KeccakF1600:
 1836         stmdb   sp!,{r0,r4-r11,lr}
 1837         sub     sp,sp,#440+16                   @ space for A[5][5],D[5],T[5][5],...
 1838 
 1839         add     r10,r0,#40
 1840         add     r11,sp,#40
 1841         ldmia   r0,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}          @ copy A[5][5] to stack
 1842         stmia   sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1843         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1844         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1845         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1846         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1847         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1848         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1849         ldmia   r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1850         add     r12,sp,#0
 1851         add     r10,sp,#40
 1852         stmia   r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1853 
 1854         bl      KeccakF1600_enter
 1855 
 1856         ldr     r11, [sp,#440+16]               @ restore pointer to A
 1857         ldmia   sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1858         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}            @ return A[5][5]
 1859         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1860         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1861         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1862         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1863         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1864         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1865         ldmia   r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1866         stmia   r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1867 
 1868         add     sp,sp,#440+20
 1869         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
 1870 .size   KeccakF1600,.-KeccakF1600
 1871 .globl  SHA3_absorb
 1872 .type   SHA3_absorb,%function
 1873 .align  5
 1874 SHA3_absorb:
 1875         stmdb   sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
 1876         sub     sp,sp,#456+16
 1877 
 1878         add     r10,r0,#40
 1879         @ mov   r11,r1
 1880         mov     r12,r2
 1881         mov     r14,r3
 1882         cmp     r2,r3
 1883         blo     .Labsorb_abort
 1884 
 1885         add     r11,sp,#0
 1886         ldmia   r0,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}        @ copy A[5][5] to stack
 1887         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1888         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1889         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1890         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1891         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1892         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1893         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1894         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1895         stmia   r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 1896 
 1897         ldr     r11,[sp,#476]           @ restore r11
 1898 #ifdef  __thumb2__
 1899         mov     r9,#0x00ff00ff
 1900         mov     r8,#0x0f0f0f0f
 1901         mov     r7,#0x33333333
 1902         mov     r6,#0x55555555
 1903 #else
 1904         mov     r6,#0x11                @ compose constants
 1905         mov     r8,#0x0f
 1906         mov     r9,#0xff
 1907         orr     r6,r6,r6,lsl#8
 1908         orr     r8,r8,r8,lsl#8
 1909         orr     r6,r6,r6,lsl#16         @ 0x11111111
 1910         orr     r9,r9,r9,lsl#16         @ 0x00ff00ff
 1911         orr     r8,r8,r8,lsl#16         @ 0x0f0f0f0f
 1912         orr     r7,r6,r6,lsl#1          @ 0x33333333
 1913         orr     r6,r6,r6,lsl#2          @ 0x55555555
 1914 #endif
 1915         str     r9,[sp,#468]
 1916         str     r8,[sp,#464]
 1917         str     r7,[sp,#460]
 1918         str     r6,[sp,#456]
 1919         b       .Loop_absorb
 1920 
 1921 .align  4
 1922 .Loop_absorb:
 1923         subs    r0,r12,r14
 1924         blo     .Labsorbed
 1925         add     r10,sp,#0
 1926         str     r0,[sp,#480]            @ save len - bsz
 1927 
 1928 .align  4
 1929 .Loop_block:
 1930         ldrb    r0,[r11],#1
 1931         ldrb    r1,[r11],#1
 1932         ldrb    r2,[r11],#1
 1933         ldrb    r3,[r11],#1
 1934         ldrb    r4,[r11],#1
 1935         orr     r0,r0,r1,lsl#8
 1936         ldrb    r1,[r11],#1
 1937         orr     r0,r0,r2,lsl#16
 1938         ldrb    r2,[r11],#1
 1939         orr     r0,r0,r3,lsl#24         @ lo
 1940         ldrb    r3,[r11],#1
 1941         orr     r1,r4,r1,lsl#8
 1942         orr     r1,r1,r2,lsl#16
 1943         orr     r1,r1,r3,lsl#24         @ hi
 1944 
 1945         and     r2,r0,r6                @ &=0x55555555
 1946         and     r0,r0,r6,lsl#1          @ &=0xaaaaaaaa
 1947         and     r3,r1,r6                @ &=0x55555555
 1948         and     r1,r1,r6,lsl#1          @ &=0xaaaaaaaa
 1949         orr     r2,r2,r2,lsr#1
 1950         orr     r0,r0,r0,lsl#1
 1951         orr     r3,r3,r3,lsr#1
 1952         orr     r1,r1,r1,lsl#1
 1953         and     r2,r2,r7                @ &=0x33333333
 1954         and     r0,r0,r7,lsl#2          @ &=0xcccccccc
 1955         and     r3,r3,r7                @ &=0x33333333
 1956         and     r1,r1,r7,lsl#2          @ &=0xcccccccc
 1957         orr     r2,r2,r2,lsr#2
 1958         orr     r0,r0,r0,lsl#2
 1959         orr     r3,r3,r3,lsr#2
 1960         orr     r1,r1,r1,lsl#2
 1961         and     r2,r2,r8                @ &=0x0f0f0f0f
 1962         and     r0,r0,r8,lsl#4          @ &=0xf0f0f0f0
 1963         and     r3,r3,r8                @ &=0x0f0f0f0f
 1964         and     r1,r1,r8,lsl#4          @ &=0xf0f0f0f0
 1965         ldmia   r10,{r4,r5}             @ A_flat[i]
 1966         orr     r2,r2,r2,lsr#4
 1967         orr     r0,r0,r0,lsl#4
 1968         orr     r3,r3,r3,lsr#4
 1969         orr     r1,r1,r1,lsl#4
 1970         and     r2,r2,r9                @ &=0x00ff00ff
 1971         and     r0,r0,r9,lsl#8          @ &=0xff00ff00
 1972         and     r3,r3,r9                @ &=0x00ff00ff
 1973         and     r1,r1,r9,lsl#8          @ &=0xff00ff00
 1974         orr     r2,r2,r2,lsr#8
 1975         orr     r0,r0,r0,lsl#8
 1976         orr     r3,r3,r3,lsr#8
 1977         orr     r1,r1,r1,lsl#8
 1978 
 1979         mov     r2,r2,lsl#16
 1980         mov     r1,r1,lsr#16
 1981         eor     r4,r4,r3,lsl#16
 1982         eor     r5,r5,r0,lsr#16
 1983         eor     r4,r4,r2,lsr#16
 1984         eor     r5,r5,r1,lsl#16
 1985         stmia   r10!,{r4,r5}    @ A_flat[i++] ^= BitInterleave(inp[0..7])
 1986 
 1987         subs    r14,r14,#8
 1988         bhi     .Loop_block
 1989 
 1990         str     r11,[sp,#476]
 1991 
 1992         bl      KeccakF1600_int
 1993 
 1994         add     r14,sp,#456
 1995         ldmia   r14,{r6,r7,r8,r9,r10,r11,r12,r14}       @ restore constants and variables
 1996         b       .Loop_absorb
 1997 
 1998 .align  4
 1999 .Labsorbed:
 2000         add     r11,sp,#40
 2001         ldmia   sp,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2002         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}    @ return A[5][5]
 2003         ldmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2004         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2005         ldmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2006         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2007         ldmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2008         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2009         ldmia   r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2010         stmia   r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
 2011 
 2012 .Labsorb_abort:
 2013         add     sp,sp,#456+32
 2014         mov     r0,r12                  @ return value
 2015         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
 2016 .size   SHA3_absorb,.-SHA3_absorb
 2017 .globl  SHA3_squeeze
 2018 .type   SHA3_squeeze,%function
 2019 .align  5
 2020 SHA3_squeeze:
 2021         stmdb   sp!,{r0,r3-r10,lr}
 2022 
 2023         mov     r10,r0
 2024         mov     r4,r1
 2025         mov     r5,r2
 2026         mov     r12,r3
 2027 
 2028 #ifdef  __thumb2__
 2029         mov     r9,#0x00ff00ff
 2030         mov     r8,#0x0f0f0f0f
 2031         mov     r7,#0x33333333
 2032         mov     r6,#0x55555555
 2033 #else
 2034         mov     r6,#0x11                @ compose constants
 2035         mov     r8,#0x0f
 2036         mov     r9,#0xff
 2037         orr     r6,r6,r6,lsl#8
 2038         orr     r8,r8,r8,lsl#8
 2039         orr     r6,r6,r6,lsl#16         @ 0x11111111
 2040         orr     r9,r9,r9,lsl#16         @ 0x00ff00ff
 2041         orr     r8,r8,r8,lsl#16         @ 0x0f0f0f0f
 2042         orr     r7,r6,r6,lsl#1          @ 0x33333333
 2043         orr     r6,r6,r6,lsl#2          @ 0x55555555
 2044 #endif
 2045         stmdb   sp!,{r6,r7,r8,r9}
 2046 
 2047         mov     r14,r10
 2048         b       .Loop_squeeze
 2049 
 2050 .align  4
 2051 .Loop_squeeze:
 2052         ldmia   r10!,{r0,r1}    @ A_flat[i++]
 2053 
 2054         mov     r2,r0,lsl#16
 2055         mov     r3,r1,lsl#16            @ r3 = r1 << 16
 2056         mov     r2,r2,lsr#16            @ r2 = r0 & 0x0000ffff
 2057         mov     r1,r1,lsr#16
 2058         mov     r0,r0,lsr#16            @ r0 = r0 >> 16
 2059         mov     r1,r1,lsl#16            @ r1 = r1 & 0xffff0000
 2060 
 2061         orr     r2,r2,r2,lsl#8
 2062         orr     r3,r3,r3,lsr#8
 2063         orr     r0,r0,r0,lsl#8
 2064         orr     r1,r1,r1,lsr#8
 2065         and     r2,r2,r9                @ &=0x00ff00ff
 2066         and     r3,r3,r9,lsl#8          @ &=0xff00ff00
 2067         and     r0,r0,r9                @ &=0x00ff00ff
 2068         and     r1,r1,r9,lsl#8          @ &=0xff00ff00
 2069         orr     r2,r2,r2,lsl#4
 2070         orr     r3,r3,r3,lsr#4
 2071         orr     r0,r0,r0,lsl#4
 2072         orr     r1,r1,r1,lsr#4
 2073         and     r2,r2,r8                @ &=0x0f0f0f0f
 2074         and     r3,r3,r8,lsl#4          @ &=0xf0f0f0f0
 2075         and     r0,r0,r8                @ &=0x0f0f0f0f
 2076         and     r1,r1,r8,lsl#4          @ &=0xf0f0f0f0
 2077         orr     r2,r2,r2,lsl#2
 2078         orr     r3,r3,r3,lsr#2
 2079         orr     r0,r0,r0,lsl#2
 2080         orr     r1,r1,r1,lsr#2
 2081         and     r2,r2,r7                @ &=0x33333333
 2082         and     r3,r3,r7,lsl#2          @ &=0xcccccccc
 2083         and     r0,r0,r7                @ &=0x33333333
 2084         and     r1,r1,r7,lsl#2          @ &=0xcccccccc
 2085         orr     r2,r2,r2,lsl#1
 2086         orr     r3,r3,r3,lsr#1
 2087         orr     r0,r0,r0,lsl#1
 2088         orr     r1,r1,r1,lsr#1
 2089         and     r2,r2,r6                @ &=0x55555555
 2090         and     r3,r3,r6,lsl#1          @ &=0xaaaaaaaa
 2091         and     r0,r0,r6                @ &=0x55555555
 2092         and     r1,r1,r6,lsl#1          @ &=0xaaaaaaaa
 2093 
 2094         orr     r2,r2,r3
 2095         orr     r0,r0,r1
 2096 
 2097         cmp     r5,#8
 2098         blo     .Lsqueeze_tail
 2099         mov     r1,r2,lsr#8
 2100         strb    r2,[r4],#1
 2101         mov     r3,r2,lsr#16
 2102         strb    r1,[r4],#1
 2103         mov     r2,r2,lsr#24
 2104         strb    r3,[r4],#1
 2105         strb    r2,[r4],#1
 2106 
 2107         mov     r1,r0,lsr#8
 2108         strb    r0,[r4],#1
 2109         mov     r3,r0,lsr#16
 2110         strb    r1,[r4],#1
 2111         mov     r0,r0,lsr#24
 2112         strb    r3,[r4],#1
 2113         strb    r0,[r4],#1
 2114         subs    r5,r5,#8
 2115         beq     .Lsqueeze_done
 2116 
 2117         subs    r12,r12,#8              @ bsz -= 8
 2118         bhi     .Loop_squeeze
 2119 
 2120         mov     r0,r14                  @ original r10
 2121 
 2122         bl      KeccakF1600
 2123 
 2124         ldmia   sp,{r6,r7,r8,r9,r10,r12}                @ restore constants and variables
 2125         mov     r14,r10
 2126         b       .Loop_squeeze
 2127 
 2128 .align  4
 2129 .Lsqueeze_tail:
 2130         strb    r2,[r4],#1
 2131         mov     r2,r2,lsr#8
 2132         subs    r5,r5,#1
 2133         beq     .Lsqueeze_done
 2134         strb    r2,[r4],#1
 2135         mov     r2,r2,lsr#8
 2136         subs    r5,r5,#1
 2137         beq     .Lsqueeze_done
 2138         strb    r2,[r4],#1
 2139         mov     r2,r2,lsr#8
 2140         subs    r5,r5,#1
 2141         beq     .Lsqueeze_done
 2142         strb    r2,[r4],#1
 2143         subs    r5,r5,#1
 2144         beq     .Lsqueeze_done
 2145 
 2146         strb    r0,[r4],#1
 2147         mov     r0,r0,lsr#8
 2148         subs    r5,r5,#1
 2149         beq     .Lsqueeze_done
 2150         strb    r0,[r4],#1
 2151         mov     r0,r0,lsr#8
 2152         subs    r5,r5,#1
 2153         beq     .Lsqueeze_done
 2154         strb    r0,[r4]
 2155         b       .Lsqueeze_done
 2156 
 2157 .align  4
 2158 .Lsqueeze_done:
 2159         add     sp,sp,#24
 2160         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
 2161 .size   SHA3_squeeze,.-SHA3_squeeze
 2162 #if __ARM_MAX_ARCH__>=7
 2163 .fpu    neon
 2164 
 2165 .type   iotas64, %object
 2166 .align  5
 2167 iotas64:
 2168 .quad   0x0000000000000001
 2169 .quad   0x0000000000008082
 2170 .quad   0x800000000000808a
 2171 .quad   0x8000000080008000
 2172 .quad   0x000000000000808b
 2173 .quad   0x0000000080000001
 2174 .quad   0x8000000080008081
 2175 .quad   0x8000000000008009
 2176 .quad   0x000000000000008a
 2177 .quad   0x0000000000000088
 2178 .quad   0x0000000080008009
 2179 .quad   0x000000008000000a
 2180 .quad   0x000000008000808b
 2181 .quad   0x800000000000008b
 2182 .quad   0x8000000000008089
 2183 .quad   0x8000000000008003
 2184 .quad   0x8000000000008002
 2185 .quad   0x8000000000000080
 2186 .quad   0x000000000000800a
 2187 .quad   0x800000008000000a
 2188 .quad   0x8000000080008081
 2189 .quad   0x8000000000008080
 2190 .quad   0x0000000080000001
 2191 .quad   0x8000000080008008
 2192 .size   iotas64,.-iotas64
 2193 
 2194 .type   KeccakF1600_neon, %function
 2195 .align  5
 2196 KeccakF1600_neon:
 2197         add     r1, r0, #16
 2198         adr     r2, iotas64
 2199         mov     r3, #24                 @ loop counter
 2200         b       .Loop_neon
 2201 
 2202 .align  4
 2203 .Loop_neon:
 2204         @ Theta
 2205         vst1.64 {q4},  [r0,:64]         @ offload A[0..1][4]
 2206         veor    q13, q0,  q5            @ A[0..1][0]^A[2..3][0]
 2207         vst1.64 {d18}, [r1,:64]         @ offload A[2][4]
 2208         veor    q14, q1,  q6            @ A[0..1][1]^A[2..3][1]
 2209         veor    q15, q2,  q7            @ A[0..1][2]^A[2..3][2]
 2210         veor    d26, d26, d27           @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
 2211         veor    d27, d28, d29           @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
 2212         veor    q14, q3,  q8            @ A[0..1][3]^A[2..3][3]
 2213         veor    q4,  q4,  q9            @ A[0..1][4]^A[2..3][4]
 2214         veor    d30, d30, d31           @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
 2215         veor    d31, d28, d29           @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
 2216         veor    d25, d8,  d9            @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
 2217         veor    q13, q13, q10           @ C[0..1]^=A[4][0..1]
 2218         veor    q14, q15, q11           @ C[2..3]^=A[4][2..3]
 2219         veor    d25, d25, d24           @ C[4]^=A[4][4]
 2220 
 2221         vadd.u64        q4,  q13, q13           @ C[0..1]<<1
 2222         vadd.u64        q15, q14, q14           @ C[2..3]<<1
 2223         vadd.u64        d18, d25, d25           @ C[4]<<1
 2224         vsri.u64        q4,  q13, #63           @ ROL64(C[0..1],1)
 2225         vsri.u64        q15, q14, #63           @ ROL64(C[2..3],1)
 2226         vsri.u64        d18, d25, #63           @ ROL64(C[4],1)
 2227         veor    d25, d25, d9            @ D[0] = C[4] ^= ROL64(C[1],1)
 2228         veor    q13, q13, q15           @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
 2229         veor    d28, d28, d18           @ D[3] = C[2] ^= ROL64(C[4],1)
 2230         veor    d29, d29, d8            @ D[4] = C[3] ^= ROL64(C[0],1)
 2231 
 2232         veor    d0,  d0,  d25           @ A[0][0] ^= C[4]
 2233         veor    d1,  d1,  d25           @ A[1][0] ^= C[4]
 2234         veor    d10, d10, d25           @ A[2][0] ^= C[4]
 2235         veor    d11, d11, d25           @ A[3][0] ^= C[4]
 2236         veor    d20, d20, d25           @ A[4][0] ^= C[4]
 2237 
 2238         veor    d2,  d2,  d26           @ A[0][1] ^= D[1]
 2239         veor    d3,  d3,  d26           @ A[1][1] ^= D[1]
 2240         veor    d12, d12, d26           @ A[2][1] ^= D[1]
 2241         veor    d13, d13, d26           @ A[3][1] ^= D[1]
 2242         veor    d21, d21, d26           @ A[4][1] ^= D[1]
 2243         vmov    d26, d27
 2244 
 2245         veor    d6,  d6,  d28           @ A[0][3] ^= C[2]
 2246         veor    d7,  d7,  d28           @ A[1][3] ^= C[2]
 2247         veor    d16, d16, d28           @ A[2][3] ^= C[2]
 2248         veor    d17, d17, d28           @ A[3][3] ^= C[2]
 2249         veor    d23, d23, d28           @ A[4][3] ^= C[2]
 2250         vld1.64 {q4},  [r0,:64]         @ restore A[0..1][4]
 2251         vmov    d28, d29
 2252 
 2253         vld1.64 {d18}, [r1,:64]         @ restore A[2][4]
 2254         veor    q2,  q2,  q13           @ A[0..1][2] ^= D[2]
 2255         veor    q7,  q7,  q13           @ A[2..3][2] ^= D[2]
 2256         veor    d22, d22, d27           @ A[4][2]    ^= D[2]
 2257 
 2258         veor    q4,  q4,  q14           @ A[0..1][4] ^= C[3]
 2259         veor    q9,  q9,  q14           @ A[2..3][4] ^= C[3]
 2260         veor    d24, d24, d29           @ A[4][4]    ^= C[3]
 2261 
 2262         @ Rho + Pi
 2263         vmov    d26, d2                 @ C[1] = A[0][1]
 2264         vshl.u64        d2,  d3,  #44
 2265         vmov    d27, d4                 @ C[2] = A[0][2]
 2266         vshl.u64        d4,  d14, #43
 2267         vmov    d28, d6                 @ C[3] = A[0][3]
 2268         vshl.u64        d6,  d17, #21
 2269         vmov    d29, d8                 @ C[4] = A[0][4]
 2270         vshl.u64        d8,  d24, #14
 2271         vsri.u64        d2,  d3,  #64-44        @ A[0][1] = ROL64(A[1][1], rhotates[1][1])
 2272         vsri.u64        d4,  d14, #64-43        @ A[0][2] = ROL64(A[2][2], rhotates[2][2])
 2273         vsri.u64        d6,  d17, #64-21        @ A[0][3] = ROL64(A[3][3], rhotates[3][3])
 2274         vsri.u64        d8,  d24, #64-14        @ A[0][4] = ROL64(A[4][4], rhotates[4][4])
 2275 
 2276         vshl.u64        d3,  d9,  #20
 2277         vshl.u64        d14, d16, #25
 2278         vshl.u64        d17, d15, #15
 2279         vshl.u64        d24, d21, #2
 2280         vsri.u64        d3,  d9,  #64-20        @ A[1][1] = ROL64(A[1][4], rhotates[1][4])
 2281         vsri.u64        d14, d16, #64-25        @ A[2][2] = ROL64(A[2][3], rhotates[2][3])
 2282         vsri.u64        d17, d15, #64-15        @ A[3][3] = ROL64(A[3][2], rhotates[3][2])
 2283         vsri.u64        d24, d21, #64-2         @ A[4][4] = ROL64(A[4][1], rhotates[4][1])
 2284 
 2285         vshl.u64        d9,  d22, #61
 2286         @ vshl.u64      d16, d19, #8
 2287         vshl.u64        d15, d12, #10
 2288         vshl.u64        d21, d7,  #55
 2289         vsri.u64        d9,  d22, #64-61        @ A[1][4] = ROL64(A[4][2], rhotates[4][2])
 2290         vext.8  d16, d19, d19, #8-1     @ A[2][3] = ROL64(A[3][4], rhotates[3][4])
 2291         vsri.u64        d15, d12, #64-10        @ A[3][2] = ROL64(A[2][1], rhotates[2][1])
 2292         vsri.u64        d21, d7,  #64-55        @ A[4][1] = ROL64(A[1][3], rhotates[1][3])
 2293 
 2294         vshl.u64        d22, d18, #39
 2295         @ vshl.u64      d19, d23, #56
 2296         vshl.u64        d12, d5,  #6
 2297         vshl.u64        d7,  d13, #45
 2298         vsri.u64        d22, d18, #64-39        @ A[4][2] = ROL64(A[2][4], rhotates[2][4])
 2299         vext.8  d19, d23, d23, #8-7     @ A[3][4] = ROL64(A[4][3], rhotates[4][3])
 2300         vsri.u64        d12, d5,  #64-6         @ A[2][1] = ROL64(A[1][2], rhotates[1][2])
 2301         vsri.u64        d7,  d13, #64-45        @ A[1][3] = ROL64(A[3][1], rhotates[3][1])
 2302 
 2303         vshl.u64        d18, d20, #18
 2304         vshl.u64        d23, d11, #41
 2305         vshl.u64        d5,  d10, #3
 2306         vshl.u64        d13, d1,  #36
 2307         vsri.u64        d18, d20, #64-18        @ A[2][4] = ROL64(A[4][0], rhotates[4][0])
 2308         vsri.u64        d23, d11, #64-41        @ A[4][3] = ROL64(A[3][0], rhotates[3][0])
 2309         vsri.u64        d5,  d10, #64-3         @ A[1][2] = ROL64(A[2][0], rhotates[2][0])
 2310         vsri.u64        d13, d1,  #64-36        @ A[3][1] = ROL64(A[1][0], rhotates[1][0])
 2311 
 2312         vshl.u64        d1,  d28, #28
 2313         vshl.u64        d10, d26, #1
 2314         vshl.u64        d11, d29, #27
 2315         vshl.u64        d20, d27, #62
 2316         vsri.u64        d1,  d28, #64-28        @ A[1][0] = ROL64(C[3],    rhotates[0][3])
 2317         vsri.u64        d10, d26, #64-1         @ A[2][0] = ROL64(C[1],    rhotates[0][1])
 2318         vsri.u64        d11, d29, #64-27        @ A[3][0] = ROL64(C[4],    rhotates[0][4])
 2319         vsri.u64        d20, d27, #64-62        @ A[4][0] = ROL64(C[2],    rhotates[0][2])
 2320 
 2321         @ Chi + Iota
 2322         vbic    q13, q2,  q1
 2323         vbic    q14, q3,  q2
 2324         vbic    q15, q4,  q3
 2325         veor    q13, q13, q0            @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
 2326         veor    q14, q14, q1            @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
 2327         veor    q2,  q2,  q15           @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
 2328         vst1.64 {q13}, [r0,:64]         @ offload A[0..1][0]
 2329         vbic    q13, q0,  q4
 2330         vbic    q15, q1,  q0
 2331         vmov    q1,  q14                @ A[0..1][1]
 2332         veor    q3,  q3,  q13           @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
 2333         veor    q4,  q4,  q15           @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
 2334 
 2335         vbic    q13, q7,  q6
 2336         vmov    q0,  q5                 @ A[2..3][0]
 2337         vbic    q14, q8,  q7
 2338         vmov    q15, q6                 @ A[2..3][1]
 2339         veor    q5,  q5,  q13           @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
 2340         vbic    q13, q9,  q8
 2341         veor    q6,  q6,  q14           @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
 2342         vbic    q14, q0,  q9
 2343         veor    q7,  q7,  q13           @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
 2344         vbic    q13, q15, q0
 2345         veor    q8,  q8,  q14           @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
 2346         vmov    q14, q10                @ A[4][0..1]
 2347         veor    q9,  q9,  q13           @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
 2348 
 2349         vld1.64 d25, [r2,:64]!          @ Iota[i++]
 2350         vbic    d26, d22, d21
 2351         vbic    d27, d23, d22
 2352         vld1.64 {q0}, [r0,:64]          @ restore A[0..1][0]
 2353         veor    d20, d20, d26           @ A[4][0] ^= (~A[4][1] & A[4][2])
 2354         vbic    d26, d24, d23
 2355         veor    d21, d21, d27           @ A[4][1] ^= (~A[4][2] & A[4][3])
 2356         vbic    d27, d28, d24
 2357         veor    d22, d22, d26           @ A[4][2] ^= (~A[4][3] & A[4][4])
 2358         vbic    d26, d29, d28
 2359         veor    d23, d23, d27           @ A[4][3] ^= (~A[4][4] & A[4][0])
 2360         veor    d0,  d0,  d25           @ A[0][0] ^= Iota[i]
 2361         veor    d24, d24, d26           @ A[4][4] ^= (~A[4][0] & A[4][1])
 2362 
 2363         subs    r3, r3, #1
 2364         bne     .Loop_neon
 2365 
 2366 .word   0xe12fff1e
 2367 .size   KeccakF1600_neon,.-KeccakF1600_neon
 2368 
 2369 .globl  SHA3_absorb_neon
 2370 .type   SHA3_absorb_neon, %function
 2371 .align  5
 2372 SHA3_absorb_neon:
 2373         stmdb   sp!, {r4,r5,r6,lr}
 2374         vstmdb  sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
 2375 
 2376         mov     r4, r1                  @ inp
 2377         mov     r5, r2                  @ len
 2378         mov     r6, r3                  @ bsz
 2379 
 2380         vld1.32 {d0}, [r0,:64]!         @ A[0][0]
 2381         vld1.32 {d2}, [r0,:64]!         @ A[0][1]
 2382         vld1.32 {d4}, [r0,:64]!         @ A[0][2]
 2383         vld1.32 {d6}, [r0,:64]!         @ A[0][3]
 2384         vld1.32 {d8}, [r0,:64]!         @ A[0][4]
 2385 
 2386         vld1.32 {d1}, [r0,:64]!         @ A[1][0]
 2387         vld1.32 {d3}, [r0,:64]!         @ A[1][1]
 2388         vld1.32 {d5}, [r0,:64]!         @ A[1][2]
 2389         vld1.32 {d7}, [r0,:64]!         @ A[1][3]
 2390         vld1.32 {d9}, [r0,:64]!         @ A[1][4]
 2391 
 2392         vld1.32 {d10}, [r0,:64]!                @ A[2][0]
 2393         vld1.32 {d12}, [r0,:64]!                @ A[2][1]
 2394         vld1.32 {d14}, [r0,:64]!                @ A[2][2]
 2395         vld1.32 {d16}, [r0,:64]!                @ A[2][3]
 2396         vld1.32 {d18}, [r0,:64]!                @ A[2][4]
 2397 
 2398         vld1.32 {d11}, [r0,:64]!                @ A[3][0]
 2399         vld1.32 {d13}, [r0,:64]!                @ A[3][1]
 2400         vld1.32 {d15}, [r0,:64]!                @ A[3][2]
 2401         vld1.32 {d17}, [r0,:64]!                @ A[3][3]
 2402         vld1.32 {d19}, [r0,:64]!                @ A[3][4]
 2403 
 2404         vld1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..3]
 2405         vld1.32 {d24}, [r0,:64]         @ A[4][4]
 2406         sub     r0, r0, #24*8           @ rewind
 2407         b       .Loop_absorb_neon
 2408 
 2409 .align  4
 2410 .Loop_absorb_neon:
 2411         subs    r12, r5, r6             @ len - bsz
 2412         blo     .Labsorbed_neon
 2413         mov     r5, r12
 2414 
 2415         vld1.8  {d31}, [r4]!            @ endian-neutral loads...
 2416         cmp     r6, #8*2
 2417         veor    d0, d0, d31             @ A[0][0] ^= *inp++
 2418         blo     .Lprocess_neon
 2419         vld1.8  {d31}, [r4]!
 2420         veor    d2, d2, d31             @ A[0][1] ^= *inp++
 2421         beq     .Lprocess_neon
 2422         vld1.8  {d31}, [r4]!
 2423         cmp     r6, #8*4
 2424         veor    d4, d4, d31             @ A[0][2] ^= *inp++
 2425         blo     .Lprocess_neon
 2426         vld1.8  {d31}, [r4]!
 2427         veor    d6, d6, d31             @ A[0][3] ^= *inp++
 2428         beq     .Lprocess_neon
 2429         vld1.8  {d31},[r4]!
 2430         cmp     r6, #8*6
 2431         veor    d8, d8, d31             @ A[0][4] ^= *inp++
 2432         blo     .Lprocess_neon
 2433 
 2434         vld1.8  {d31}, [r4]!
 2435         veor    d1, d1, d31             @ A[1][0] ^= *inp++
 2436         beq     .Lprocess_neon
 2437         vld1.8  {d31}, [r4]!
 2438         cmp     r6, #8*8
 2439         veor    d3, d3, d31             @ A[1][1] ^= *inp++
 2440         blo     .Lprocess_neon
 2441         vld1.8  {d31}, [r4]!
 2442         veor    d5, d5, d31             @ A[1][2] ^= *inp++
 2443         beq     .Lprocess_neon
 2444         vld1.8  {d31}, [r4]!
 2445         cmp     r6, #8*10
 2446         veor    d7, d7, d31             @ A[1][3] ^= *inp++
 2447         blo     .Lprocess_neon
 2448         vld1.8  {d31}, [r4]!
 2449         veor    d9, d9, d31             @ A[1][4] ^= *inp++
 2450         beq     .Lprocess_neon
 2451 
 2452         vld1.8  {d31}, [r4]!
 2453         cmp     r6, #8*12
 2454         veor    d10, d10, d31           @ A[2][0] ^= *inp++
 2455         blo     .Lprocess_neon
 2456         vld1.8  {d31}, [r4]!
 2457         veor    d12, d12, d31           @ A[2][1] ^= *inp++
 2458         beq     .Lprocess_neon
 2459         vld1.8  {d31}, [r4]!
 2460         cmp     r6, #8*14
 2461         veor    d14, d14, d31           @ A[2][2] ^= *inp++
 2462         blo     .Lprocess_neon
 2463         vld1.8  {d31}, [r4]!
 2464         veor    d16, d16, d31           @ A[2][3] ^= *inp++
 2465         beq     .Lprocess_neon
 2466         vld1.8  {d31}, [r4]!
 2467         cmp     r6, #8*16
 2468         veor    d18, d18, d31           @ A[2][4] ^= *inp++
 2469         blo     .Lprocess_neon
 2470 
 2471         vld1.8  {d31}, [r4]!
 2472         veor    d11, d11, d31           @ A[3][0] ^= *inp++
 2473         beq     .Lprocess_neon
 2474         vld1.8  {d31}, [r4]!
 2475         cmp     r6, #8*18
 2476         veor    d13, d13, d31           @ A[3][1] ^= *inp++
 2477         blo     .Lprocess_neon
 2478         vld1.8  {d31}, [r4]!
 2479         veor    d15, d15, d31           @ A[3][2] ^= *inp++
 2480         beq     .Lprocess_neon
 2481         vld1.8  {d31}, [r4]!
 2482         cmp     r6, #8*20
 2483         veor    d17, d17, d31           @ A[3][3] ^= *inp++
 2484         blo     .Lprocess_neon
 2485         vld1.8  {d31}, [r4]!
 2486         veor    d19, d19, d31           @ A[3][4] ^= *inp++
 2487         beq     .Lprocess_neon
 2488 
 2489         vld1.8  {d31}, [r4]!
 2490         cmp     r6, #8*22
 2491         veor    d20, d20, d31           @ A[4][0] ^= *inp++
 2492         blo     .Lprocess_neon
 2493         vld1.8  {d31}, [r4]!
 2494         veor    d21, d21, d31           @ A[4][1] ^= *inp++
 2495         beq     .Lprocess_neon
 2496         vld1.8  {d31}, [r4]!
 2497         cmp     r6, #8*24
 2498         veor    d22, d22, d31           @ A[4][2] ^= *inp++
 2499         blo     .Lprocess_neon
 2500         vld1.8  {d31}, [r4]!
 2501         veor    d23, d23, d31           @ A[4][3] ^= *inp++
 2502         beq     .Lprocess_neon
 2503         vld1.8  {d31}, [r4]!
 2504         veor    d24, d24, d31           @ A[4][4] ^= *inp++
 2505 
 2506 .Lprocess_neon:
 2507         bl      KeccakF1600_neon
 2508         b       .Loop_absorb_neon
 2509 
 2510 .align  4
 2511 .Labsorbed_neon:
 2512         vst1.32 {d0}, [r0,:64]!         @ A[0][0..4]
 2513         vst1.32 {d2}, [r0,:64]!
 2514         vst1.32 {d4}, [r0,:64]!
 2515         vst1.32 {d6}, [r0,:64]!
 2516         vst1.32 {d8}, [r0,:64]!
 2517 
 2518         vst1.32 {d1}, [r0,:64]!         @ A[1][0..4]
 2519         vst1.32 {d3}, [r0,:64]!
 2520         vst1.32 {d5}, [r0,:64]!
 2521         vst1.32 {d7}, [r0,:64]!
 2522         vst1.32 {d9}, [r0,:64]!
 2523 
 2524         vst1.32 {d10}, [r0,:64]!                @ A[2][0..4]
 2525         vst1.32 {d12}, [r0,:64]!
 2526         vst1.32 {d14}, [r0,:64]!
 2527         vst1.32 {d16}, [r0,:64]!
 2528         vst1.32 {d18}, [r0,:64]!
 2529 
 2530         vst1.32 {d11}, [r0,:64]!                @ A[3][0..4]
 2531         vst1.32 {d13}, [r0,:64]!
 2532         vst1.32 {d15}, [r0,:64]!
 2533         vst1.32 {d17}, [r0,:64]!
 2534         vst1.32 {d19}, [r0,:64]!
 2535 
 2536         vst1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..4]
 2537         vst1.32 {d24}, [r0,:64]
 2538 
 2539         mov     r0, r5                  @ return value
 2540         vldmia  sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
 2541         ldmia   sp!, {r4,r5,r6,pc}
 2542 .size   SHA3_absorb_neon,.-SHA3_absorb_neon
 2543 
 2544 .globl  SHA3_squeeze_neon
 2545 .type   SHA3_squeeze_neon, %function
 2546 .align  5
 2547 SHA3_squeeze_neon:
 2548         stmdb   sp!, {r4,r5,r6,lr}
 2549 
 2550         mov     r4, r1                  @ out
 2551         mov     r5, r2                  @ len
 2552         mov     r6, r3                  @ bsz
 2553         mov     r12, r0                 @ A_flat
 2554         mov     r14, r3                 @ bsz
 2555         b       .Loop_squeeze_neon
 2556 
 2557 .align  4
 2558 .Loop_squeeze_neon:
 2559         cmp     r5, #8
 2560         blo     .Lsqueeze_neon_tail
 2561         vld1.32 {d0}, [r12]!
 2562         vst1.8  {d0}, [r4]!             @ endian-neutral store
 2563 
 2564         subs    r5, r5, #8              @ len -= 8
 2565         beq     .Lsqueeze_neon_done
 2566 
 2567         subs    r14, r14, #8            @ bsz -= 8
 2568         bhi     .Loop_squeeze_neon
 2569 
 2570         vstmdb  sp!,  {d8,d9,d10,d11,d12,d13,d14,d15}
 2571 
 2572         vld1.32 {d0}, [r0,:64]!         @ A[0][0..4]
 2573         vld1.32 {d2}, [r0,:64]!
 2574         vld1.32 {d4}, [r0,:64]!
 2575         vld1.32 {d6}, [r0,:64]!
 2576         vld1.32 {d8}, [r0,:64]!
 2577 
 2578         vld1.32 {d1}, [r0,:64]!         @ A[1][0..4]
 2579         vld1.32 {d3}, [r0,:64]!
 2580         vld1.32 {d5}, [r0,:64]!
 2581         vld1.32 {d7}, [r0,:64]!
 2582         vld1.32 {d9}, [r0,:64]!
 2583 
 2584         vld1.32 {d10}, [r0,:64]!                @ A[2][0..4]
 2585         vld1.32 {d12}, [r0,:64]!
 2586         vld1.32 {d14}, [r0,:64]!
 2587         vld1.32 {d16}, [r0,:64]!
 2588         vld1.32 {d18}, [r0,:64]!
 2589 
 2590         vld1.32 {d11}, [r0,:64]!                @ A[3][0..4]
 2591         vld1.32 {d13}, [r0,:64]!
 2592         vld1.32 {d15}, [r0,:64]!
 2593         vld1.32 {d17}, [r0,:64]!
 2594         vld1.32 {d19}, [r0,:64]!
 2595 
 2596         vld1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..4]
 2597         vld1.32 {d24}, [r0,:64]
 2598         sub     r0, r0, #24*8           @ rewind
 2599 
 2600         bl      KeccakF1600_neon
 2601 
 2602         mov     r12, r0                 @ A_flat
 2603         vst1.32 {d0}, [r0,:64]!         @ A[0][0..4]
 2604         vst1.32 {d2}, [r0,:64]!
 2605         vst1.32 {d4}, [r0,:64]!
 2606         vst1.32 {d6}, [r0,:64]!
 2607         vst1.32 {d8}, [r0,:64]!
 2608 
 2609         vst1.32 {d1}, [r0,:64]!         @ A[1][0..4]
 2610         vst1.32 {d3}, [r0,:64]!
 2611         vst1.32 {d5}, [r0,:64]!
 2612         vst1.32 {d7}, [r0,:64]!
 2613         vst1.32 {d9}, [r0,:64]!
 2614 
 2615         vst1.32 {d10}, [r0,:64]!                @ A[2][0..4]
 2616         vst1.32 {d12}, [r0,:64]!
 2617         vst1.32 {d14}, [r0,:64]!
 2618         vst1.32 {d16}, [r0,:64]!
 2619         vst1.32 {d18}, [r0,:64]!
 2620 
 2621         vst1.32 {d11}, [r0,:64]!                @ A[3][0..4]
 2622         vst1.32 {d13}, [r0,:64]!
 2623         vst1.32 {d15}, [r0,:64]!
 2624         vst1.32 {d17}, [r0,:64]!
 2625         vst1.32 {d19}, [r0,:64]!
 2626 
 2627         vst1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..4]
 2628         mov     r14, r6                 @ bsz
 2629         vst1.32 {d24}, [r0,:64]
 2630         mov     r0,  r12                @ rewind
 2631 
 2632         vldmia  sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
 2633         b       .Loop_squeeze_neon
 2634 
 2635 .align  4
 2636 .Lsqueeze_neon_tail:
 2637         ldmia   r12, {r2,r3}
 2638         cmp     r5, #2
 2639         strb    r2, [r4],#1             @ endian-neutral store
 2640         mov     r2, r2, lsr#8
 2641         blo     .Lsqueeze_neon_done
 2642         strb    r2, [r4], #1
 2643         mov     r2, r2, lsr#8
 2644         beq     .Lsqueeze_neon_done
 2645         strb    r2, [r4], #1
 2646         mov     r2, r2, lsr#8
 2647         cmp     r5, #4
 2648         blo     .Lsqueeze_neon_done
 2649         strb    r2, [r4], #1
 2650         beq     .Lsqueeze_neon_done
 2651 
 2652         strb    r3, [r4], #1
 2653         mov     r3, r3, lsr#8
 2654         cmp     r5, #6
 2655         blo     .Lsqueeze_neon_done
 2656         strb    r3, [r4], #1
 2657         mov     r3, r3, lsr#8
 2658         beq     .Lsqueeze_neon_done
 2659         strb    r3, [r4], #1
 2660 
 2661 .Lsqueeze_neon_done:
 2662         ldmia   sp!, {r4,r5,r6,pc}
 2663 .size   SHA3_squeeze_neon,.-SHA3_squeeze_neon
 2664 #endif
 2665 .byte   75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 2666 .align  2
 2667 .align  2

Cache object: ee8edafdda44b02677184324bfe3fade


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.