The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/i386/chacha-x86.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from chacha-x86.pl. */
    3 #ifdef PIC
    4 .text
    5 .globl  ChaCha20_ctr32
    6 .type   ChaCha20_ctr32,@function
    7 .align  16
    8 ChaCha20_ctr32:
    9 .L_ChaCha20_ctr32_begin:
   10         pushl   %ebp
   11         pushl   %ebx
   12         pushl   %esi
   13         pushl   %edi
   14         xorl    %eax,%eax
   15         cmpl    28(%esp),%eax
   16         je      .L000no_data
   17         call    .Lpic_point
   18 .Lpic_point:
   19         popl    %eax
   20         leal    OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
   21         testl   $16777216,(%ebp)
   22         jz      .L001x86
   23         testl   $512,4(%ebp)
   24         jz      .L001x86
   25         jmp     .Lssse3_shortcut
   26 .L001x86:
   27         movl    32(%esp),%esi
   28         movl    36(%esp),%edi
   29         subl    $132,%esp
   30         movl    (%esi),%eax
   31         movl    4(%esi),%ebx
   32         movl    8(%esi),%ecx
   33         movl    12(%esi),%edx
   34         movl    %eax,80(%esp)
   35         movl    %ebx,84(%esp)
   36         movl    %ecx,88(%esp)
   37         movl    %edx,92(%esp)
   38         movl    16(%esi),%eax
   39         movl    20(%esi),%ebx
   40         movl    24(%esi),%ecx
   41         movl    28(%esi),%edx
   42         movl    %eax,96(%esp)
   43         movl    %ebx,100(%esp)
   44         movl    %ecx,104(%esp)
   45         movl    %edx,108(%esp)
   46         movl    (%edi),%eax
   47         movl    4(%edi),%ebx
   48         movl    8(%edi),%ecx
   49         movl    12(%edi),%edx
   50         subl    $1,%eax
   51         movl    %eax,112(%esp)
   52         movl    %ebx,116(%esp)
   53         movl    %ecx,120(%esp)
   54         movl    %edx,124(%esp)
   55         jmp     .L002entry
   56 .align  16
   57 .L003outer_loop:
   58         movl    %ebx,156(%esp)
   59         movl    %eax,152(%esp)
   60         movl    %ecx,160(%esp)
   61 .L002entry:
   62         movl    $1634760805,%eax
   63         movl    $857760878,4(%esp)
   64         movl    $2036477234,8(%esp)
   65         movl    $1797285236,12(%esp)
   66         movl    84(%esp),%ebx
   67         movl    88(%esp),%ebp
   68         movl    104(%esp),%ecx
   69         movl    108(%esp),%esi
   70         movl    116(%esp),%edx
   71         movl    120(%esp),%edi
   72         movl    %ebx,20(%esp)
   73         movl    %ebp,24(%esp)
   74         movl    %ecx,40(%esp)
   75         movl    %esi,44(%esp)
   76         movl    %edx,52(%esp)
   77         movl    %edi,56(%esp)
   78         movl    92(%esp),%ebx
   79         movl    124(%esp),%edi
   80         movl    112(%esp),%edx
   81         movl    80(%esp),%ebp
   82         movl    96(%esp),%ecx
   83         movl    100(%esp),%esi
   84         addl    $1,%edx
   85         movl    %ebx,28(%esp)
   86         movl    %edi,60(%esp)
   87         movl    %edx,112(%esp)
   88         movl    $10,%ebx
   89         jmp     .L004loop
   90 .align  16
   91 .L004loop:
   92         addl    %ebp,%eax
   93         movl    %ebx,128(%esp)
   94         movl    %ebp,%ebx
   95         xorl    %eax,%edx
   96         roll    $16,%edx
   97         addl    %edx,%ecx
   98         xorl    %ecx,%ebx
   99         movl    52(%esp),%edi
  100         roll    $12,%ebx
  101         movl    20(%esp),%ebp
  102         addl    %ebx,%eax
  103         xorl    %eax,%edx
  104         movl    %eax,(%esp)
  105         roll    $8,%edx
  106         movl    4(%esp),%eax
  107         addl    %edx,%ecx
  108         movl    %edx,48(%esp)
  109         xorl    %ecx,%ebx
  110         addl    %ebp,%eax
  111         roll    $7,%ebx
  112         xorl    %eax,%edi
  113         movl    %ecx,32(%esp)
  114         roll    $16,%edi
  115         movl    %ebx,16(%esp)
  116         addl    %edi,%esi
  117         movl    40(%esp),%ecx
  118         xorl    %esi,%ebp
  119         movl    56(%esp),%edx
  120         roll    $12,%ebp
  121         movl    24(%esp),%ebx
  122         addl    %ebp,%eax
  123         xorl    %eax,%edi
  124         movl    %eax,4(%esp)
  125         roll    $8,%edi
  126         movl    8(%esp),%eax
  127         addl    %edi,%esi
  128         movl    %edi,52(%esp)
  129         xorl    %esi,%ebp
  130         addl    %ebx,%eax
  131         roll    $7,%ebp
  132         xorl    %eax,%edx
  133         movl    %esi,36(%esp)
  134         roll    $16,%edx
  135         movl    %ebp,20(%esp)
  136         addl    %edx,%ecx
  137         movl    44(%esp),%esi
  138         xorl    %ecx,%ebx
  139         movl    60(%esp),%edi
  140         roll    $12,%ebx
  141         movl    28(%esp),%ebp
  142         addl    %ebx,%eax
  143         xorl    %eax,%edx
  144         movl    %eax,8(%esp)
  145         roll    $8,%edx
  146         movl    12(%esp),%eax
  147         addl    %edx,%ecx
  148         movl    %edx,56(%esp)
  149         xorl    %ecx,%ebx
  150         addl    %ebp,%eax
  151         roll    $7,%ebx
  152         xorl    %eax,%edi
  153         roll    $16,%edi
  154         movl    %ebx,24(%esp)
  155         addl    %edi,%esi
  156         xorl    %esi,%ebp
  157         roll    $12,%ebp
  158         movl    20(%esp),%ebx
  159         addl    %ebp,%eax
  160         xorl    %eax,%edi
  161         movl    %eax,12(%esp)
  162         roll    $8,%edi
  163         movl    (%esp),%eax
  164         addl    %edi,%esi
  165         movl    %edi,%edx
  166         xorl    %esi,%ebp
  167         addl    %ebx,%eax
  168         roll    $7,%ebp
  169         xorl    %eax,%edx
  170         roll    $16,%edx
  171         movl    %ebp,28(%esp)
  172         addl    %edx,%ecx
  173         xorl    %ecx,%ebx
  174         movl    48(%esp),%edi
  175         roll    $12,%ebx
  176         movl    24(%esp),%ebp
  177         addl    %ebx,%eax
  178         xorl    %eax,%edx
  179         movl    %eax,(%esp)
  180         roll    $8,%edx
  181         movl    4(%esp),%eax
  182         addl    %edx,%ecx
  183         movl    %edx,60(%esp)
  184         xorl    %ecx,%ebx
  185         addl    %ebp,%eax
  186         roll    $7,%ebx
  187         xorl    %eax,%edi
  188         movl    %ecx,40(%esp)
  189         roll    $16,%edi
  190         movl    %ebx,20(%esp)
  191         addl    %edi,%esi
  192         movl    32(%esp),%ecx
  193         xorl    %esi,%ebp
  194         movl    52(%esp),%edx
  195         roll    $12,%ebp
  196         movl    28(%esp),%ebx
  197         addl    %ebp,%eax
  198         xorl    %eax,%edi
  199         movl    %eax,4(%esp)
  200         roll    $8,%edi
  201         movl    8(%esp),%eax
  202         addl    %edi,%esi
  203         movl    %edi,48(%esp)
  204         xorl    %esi,%ebp
  205         addl    %ebx,%eax
  206         roll    $7,%ebp
  207         xorl    %eax,%edx
  208         movl    %esi,44(%esp)
  209         roll    $16,%edx
  210         movl    %ebp,24(%esp)
  211         addl    %edx,%ecx
  212         movl    36(%esp),%esi
  213         xorl    %ecx,%ebx
  214         movl    56(%esp),%edi
  215         roll    $12,%ebx
  216         movl    16(%esp),%ebp
  217         addl    %ebx,%eax
  218         xorl    %eax,%edx
  219         movl    %eax,8(%esp)
  220         roll    $8,%edx
  221         movl    12(%esp),%eax
  222         addl    %edx,%ecx
  223         movl    %edx,52(%esp)
  224         xorl    %ecx,%ebx
  225         addl    %ebp,%eax
  226         roll    $7,%ebx
  227         xorl    %eax,%edi
  228         roll    $16,%edi
  229         movl    %ebx,28(%esp)
  230         addl    %edi,%esi
  231         xorl    %esi,%ebp
  232         movl    48(%esp),%edx
  233         roll    $12,%ebp
  234         movl    128(%esp),%ebx
  235         addl    %ebp,%eax
  236         xorl    %eax,%edi
  237         movl    %eax,12(%esp)
  238         roll    $8,%edi
  239         movl    (%esp),%eax
  240         addl    %edi,%esi
  241         movl    %edi,56(%esp)
  242         xorl    %esi,%ebp
  243         roll    $7,%ebp
  244         decl    %ebx
  245         jnz     .L004loop
  246         movl    160(%esp),%ebx
  247         addl    $1634760805,%eax
  248         addl    80(%esp),%ebp
  249         addl    96(%esp),%ecx
  250         addl    100(%esp),%esi
  251         cmpl    $64,%ebx
  252         jb      .L005tail
  253         movl    156(%esp),%ebx
  254         addl    112(%esp),%edx
  255         addl    120(%esp),%edi
  256         xorl    (%ebx),%eax
  257         xorl    16(%ebx),%ebp
  258         movl    %eax,(%esp)
  259         movl    152(%esp),%eax
  260         xorl    32(%ebx),%ecx
  261         xorl    36(%ebx),%esi
  262         xorl    48(%ebx),%edx
  263         xorl    56(%ebx),%edi
  264         movl    %ebp,16(%eax)
  265         movl    %ecx,32(%eax)
  266         movl    %esi,36(%eax)
  267         movl    %edx,48(%eax)
  268         movl    %edi,56(%eax)
  269         movl    4(%esp),%ebp
  270         movl    8(%esp),%ecx
  271         movl    12(%esp),%esi
  272         movl    20(%esp),%edx
  273         movl    24(%esp),%edi
  274         addl    $857760878,%ebp
  275         addl    $2036477234,%ecx
  276         addl    $1797285236,%esi
  277         addl    84(%esp),%edx
  278         addl    88(%esp),%edi
  279         xorl    4(%ebx),%ebp
  280         xorl    8(%ebx),%ecx
  281         xorl    12(%ebx),%esi
  282         xorl    20(%ebx),%edx
  283         xorl    24(%ebx),%edi
  284         movl    %ebp,4(%eax)
  285         movl    %ecx,8(%eax)
  286         movl    %esi,12(%eax)
  287         movl    %edx,20(%eax)
  288         movl    %edi,24(%eax)
  289         movl    28(%esp),%ebp
  290         movl    40(%esp),%ecx
  291         movl    44(%esp),%esi
  292         movl    52(%esp),%edx
  293         movl    60(%esp),%edi
  294         addl    92(%esp),%ebp
  295         addl    104(%esp),%ecx
  296         addl    108(%esp),%esi
  297         addl    116(%esp),%edx
  298         addl    124(%esp),%edi
  299         xorl    28(%ebx),%ebp
  300         xorl    40(%ebx),%ecx
  301         xorl    44(%ebx),%esi
  302         xorl    52(%ebx),%edx
  303         xorl    60(%ebx),%edi
  304         leal    64(%ebx),%ebx
  305         movl    %ebp,28(%eax)
  306         movl    (%esp),%ebp
  307         movl    %ecx,40(%eax)
  308         movl    160(%esp),%ecx
  309         movl    %esi,44(%eax)
  310         movl    %edx,52(%eax)
  311         movl    %edi,60(%eax)
  312         movl    %ebp,(%eax)
  313         leal    64(%eax),%eax
  314         subl    $64,%ecx
  315         jnz     .L003outer_loop
  316         jmp     .L006done
  317 .L005tail:
  318         addl    112(%esp),%edx
  319         addl    120(%esp),%edi
  320         movl    %eax,(%esp)
  321         movl    %ebp,16(%esp)
  322         movl    %ecx,32(%esp)
  323         movl    %esi,36(%esp)
  324         movl    %edx,48(%esp)
  325         movl    %edi,56(%esp)
  326         movl    4(%esp),%ebp
  327         movl    8(%esp),%ecx
  328         movl    12(%esp),%esi
  329         movl    20(%esp),%edx
  330         movl    24(%esp),%edi
  331         addl    $857760878,%ebp
  332         addl    $2036477234,%ecx
  333         addl    $1797285236,%esi
  334         addl    84(%esp),%edx
  335         addl    88(%esp),%edi
  336         movl    %ebp,4(%esp)
  337         movl    %ecx,8(%esp)
  338         movl    %esi,12(%esp)
  339         movl    %edx,20(%esp)
  340         movl    %edi,24(%esp)
  341         movl    28(%esp),%ebp
  342         movl    40(%esp),%ecx
  343         movl    44(%esp),%esi
  344         movl    52(%esp),%edx
  345         movl    60(%esp),%edi
  346         addl    92(%esp),%ebp
  347         addl    104(%esp),%ecx
  348         addl    108(%esp),%esi
  349         addl    116(%esp),%edx
  350         addl    124(%esp),%edi
  351         movl    %ebp,28(%esp)
  352         movl    156(%esp),%ebp
  353         movl    %ecx,40(%esp)
  354         movl    152(%esp),%ecx
  355         movl    %esi,44(%esp)
  356         xorl    %esi,%esi
  357         movl    %edx,52(%esp)
  358         movl    %edi,60(%esp)
  359         xorl    %eax,%eax
  360         xorl    %edx,%edx
  361 .L007tail_loop:
  362         movb    (%esi,%ebp,1),%al
  363         movb    (%esp,%esi,1),%dl
  364         leal    1(%esi),%esi
  365         xorb    %dl,%al
  366         movb    %al,-1(%ecx,%esi,1)
  367         decl    %ebx
  368         jnz     .L007tail_loop
  369 .L006done:
  370         addl    $132,%esp
  371 .L000no_data:
  372         popl    %edi
  373         popl    %esi
  374         popl    %ebx
  375         popl    %ebp
  376         ret
  377 .size   ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
  378 .globl  ChaCha20_ssse3
  379 .type   ChaCha20_ssse3,@function
  380 .align  16
  381 ChaCha20_ssse3:
  382 .L_ChaCha20_ssse3_begin:
  383         pushl   %ebp
  384         pushl   %ebx
  385         pushl   %esi
  386         pushl   %edi
  387 .Lssse3_shortcut:
  388         testl   $2048,4(%ebp)
  389         jnz     .Lxop_shortcut
  390         movl    20(%esp),%edi
  391         movl    24(%esp),%esi
  392         movl    28(%esp),%ecx
  393         movl    32(%esp),%edx
  394         movl    36(%esp),%ebx
  395         movl    %esp,%ebp
  396         subl    $524,%esp
  397         andl    $-64,%esp
  398         movl    %ebp,512(%esp)
  399         leal    .Lssse3_data-.Lpic_point(%eax),%eax
  400         movdqu  (%ebx),%xmm3
  401 .L0081x:
  402         movdqa  32(%eax),%xmm0
  403         movdqu  (%edx),%xmm1
  404         movdqu  16(%edx),%xmm2
  405         movdqa  (%eax),%xmm6
  406         movdqa  16(%eax),%xmm7
  407         movl    %ebp,48(%esp)
  408         movdqa  %xmm0,(%esp)
  409         movdqa  %xmm1,16(%esp)
  410         movdqa  %xmm2,32(%esp)
  411         movdqa  %xmm3,48(%esp)
  412         movl    $10,%edx
  413         jmp     .L009loop1x
  414 .align  16
  415 .L010outer1x:
  416         movdqa  80(%eax),%xmm3
  417         movdqa  (%esp),%xmm0
  418         movdqa  16(%esp),%xmm1
  419         movdqa  32(%esp),%xmm2
  420         paddd   48(%esp),%xmm3
  421         movl    $10,%edx
  422         movdqa  %xmm3,48(%esp)
  423         jmp     .L009loop1x
  424 .align  16
  425 .L009loop1x:
  426         paddd   %xmm1,%xmm0
  427         pxor    %xmm0,%xmm3
  428 .byte   102,15,56,0,222
  429         paddd   %xmm3,%xmm2
  430         pxor    %xmm2,%xmm1
  431         movdqa  %xmm1,%xmm4
  432         psrld   $20,%xmm1
  433         pslld   $12,%xmm4
  434         por     %xmm4,%xmm1
  435         paddd   %xmm1,%xmm0
  436         pxor    %xmm0,%xmm3
  437 .byte   102,15,56,0,223
  438         paddd   %xmm3,%xmm2
  439         pxor    %xmm2,%xmm1
  440         movdqa  %xmm1,%xmm4
  441         psrld   $25,%xmm1
  442         pslld   $7,%xmm4
  443         por     %xmm4,%xmm1
  444         pshufd  $78,%xmm2,%xmm2
  445         pshufd  $57,%xmm1,%xmm1
  446         pshufd  $147,%xmm3,%xmm3
  447         nop
  448         paddd   %xmm1,%xmm0
  449         pxor    %xmm0,%xmm3
  450 .byte   102,15,56,0,222
  451         paddd   %xmm3,%xmm2
  452         pxor    %xmm2,%xmm1
  453         movdqa  %xmm1,%xmm4
  454         psrld   $20,%xmm1
  455         pslld   $12,%xmm4
  456         por     %xmm4,%xmm1
  457         paddd   %xmm1,%xmm0
  458         pxor    %xmm0,%xmm3
  459 .byte   102,15,56,0,223
  460         paddd   %xmm3,%xmm2
  461         pxor    %xmm2,%xmm1
  462         movdqa  %xmm1,%xmm4
  463         psrld   $25,%xmm1
  464         pslld   $7,%xmm4
  465         por     %xmm4,%xmm1
  466         pshufd  $78,%xmm2,%xmm2
  467         pshufd  $147,%xmm1,%xmm1
  468         pshufd  $57,%xmm3,%xmm3
  469         decl    %edx
  470         jnz     .L009loop1x
  471         paddd   (%esp),%xmm0
  472         paddd   16(%esp),%xmm1
  473         paddd   32(%esp),%xmm2
  474         paddd   48(%esp),%xmm3
  475         cmpl    $64,%ecx
  476         jb      .L011tail
  477         movdqu  (%esi),%xmm4
  478         movdqu  16(%esi),%xmm5
  479         pxor    %xmm4,%xmm0
  480         movdqu  32(%esi),%xmm4
  481         pxor    %xmm5,%xmm1
  482         movdqu  48(%esi),%xmm5
  483         pxor    %xmm4,%xmm2
  484         pxor    %xmm5,%xmm3
  485         leal    64(%esi),%esi
  486         movdqu  %xmm0,(%edi)
  487         movdqu  %xmm1,16(%edi)
  488         movdqu  %xmm2,32(%edi)
  489         movdqu  %xmm3,48(%edi)
  490         leal    64(%edi),%edi
  491         subl    $64,%ecx
  492         jnz     .L010outer1x
  493         jmp     .L012done
  494 .L011tail:
  495         movdqa  %xmm0,(%esp)
  496         movdqa  %xmm1,16(%esp)
  497         movdqa  %xmm2,32(%esp)
  498         movdqa  %xmm3,48(%esp)
  499         xorl    %eax,%eax
  500         xorl    %edx,%edx
  501         xorl    %ebp,%ebp
  502 .L013tail_loop:
  503         movb    (%esp,%ebp,1),%al
  504         movb    (%esi,%ebp,1),%dl
  505         leal    1(%ebp),%ebp
  506         xorb    %dl,%al
  507         movb    %al,-1(%edi,%ebp,1)
  508         decl    %ecx
  509         jnz     .L013tail_loop
  510 .L012done:
  511         movl    512(%esp),%esp
  512         popl    %edi
  513         popl    %esi
  514         popl    %ebx
  515         popl    %ebp
  516         ret
  517 .size   ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
  518 .align  64
  519 .Lssse3_data:
  520 .byte   2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
  521 .byte   3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
  522 .long   1634760805,857760878,2036477234,1797285236
  523 .long   0,1,2,3
  524 .long   4,4,4,4
  525 .long   1,0,0,0
  526 .long   4,0,0,0
  527 .long   0,-1,-1,-1
  528 .align  64
  529 .byte   67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
  530 .byte   44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  531 .byte   60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  532 .byte   114,103,62,0
  533 .globl  ChaCha20_xop
  534 .type   ChaCha20_xop,@function
  535 .align  16
  536 ChaCha20_xop:
  537 .L_ChaCha20_xop_begin:
  538         pushl   %ebp
  539         pushl   %ebx
  540         pushl   %esi
  541         pushl   %edi
  542 .Lxop_shortcut:
  543         movl    20(%esp),%edi
  544         movl    24(%esp),%esi
  545         movl    28(%esp),%ecx
  546         movl    32(%esp),%edx
  547         movl    36(%esp),%ebx
  548         vzeroupper
  549         movl    %esp,%ebp
  550         subl    $524,%esp
  551         andl    $-64,%esp
  552         movl    %ebp,512(%esp)
  553         leal    .Lssse3_data-.Lpic_point(%eax),%eax
  554         vmovdqu (%ebx),%xmm3
  555         cmpl    $256,%ecx
  556         jb      .L0141x
  557         movl    %edx,516(%esp)
  558         movl    %ebx,520(%esp)
  559         subl    $256,%ecx
  560         leal    384(%esp),%ebp
  561         vmovdqu (%edx),%xmm7
  562         vpshufd $0,%xmm3,%xmm0
  563         vpshufd $85,%xmm3,%xmm1
  564         vpshufd $170,%xmm3,%xmm2
  565         vpshufd $255,%xmm3,%xmm3
  566         vpaddd  48(%eax),%xmm0,%xmm0
  567         vpshufd $0,%xmm7,%xmm4
  568         vpshufd $85,%xmm7,%xmm5
  569         vpsubd  64(%eax),%xmm0,%xmm0
  570         vpshufd $170,%xmm7,%xmm6
  571         vpshufd $255,%xmm7,%xmm7
  572         vmovdqa %xmm0,64(%ebp)
  573         vmovdqa %xmm1,80(%ebp)
  574         vmovdqa %xmm2,96(%ebp)
  575         vmovdqa %xmm3,112(%ebp)
  576         vmovdqu 16(%edx),%xmm3
  577         vmovdqa %xmm4,-64(%ebp)
  578         vmovdqa %xmm5,-48(%ebp)
  579         vmovdqa %xmm6,-32(%ebp)
  580         vmovdqa %xmm7,-16(%ebp)
  581         vmovdqa 32(%eax),%xmm7
  582         leal    128(%esp),%ebx
  583         vpshufd $0,%xmm3,%xmm0
  584         vpshufd $85,%xmm3,%xmm1
  585         vpshufd $170,%xmm3,%xmm2
  586         vpshufd $255,%xmm3,%xmm3
  587         vpshufd $0,%xmm7,%xmm4
  588         vpshufd $85,%xmm7,%xmm5
  589         vpshufd $170,%xmm7,%xmm6
  590         vpshufd $255,%xmm7,%xmm7
  591         vmovdqa %xmm0,(%ebp)
  592         vmovdqa %xmm1,16(%ebp)
  593         vmovdqa %xmm2,32(%ebp)
  594         vmovdqa %xmm3,48(%ebp)
  595         vmovdqa %xmm4,-128(%ebp)
  596         vmovdqa %xmm5,-112(%ebp)
  597         vmovdqa %xmm6,-96(%ebp)
  598         vmovdqa %xmm7,-80(%ebp)
  599         leal    128(%esi),%esi
  600         leal    128(%edi),%edi
  601         jmp     .L015outer_loop
  602 .align  32
  603 .L015outer_loop:
  604         vmovdqa -112(%ebp),%xmm1
  605         vmovdqa -96(%ebp),%xmm2
  606         vmovdqa -80(%ebp),%xmm3
  607         vmovdqa -48(%ebp),%xmm5
  608         vmovdqa -32(%ebp),%xmm6
  609         vmovdqa -16(%ebp),%xmm7
  610         vmovdqa %xmm1,-112(%ebx)
  611         vmovdqa %xmm2,-96(%ebx)
  612         vmovdqa %xmm3,-80(%ebx)
  613         vmovdqa %xmm5,-48(%ebx)
  614         vmovdqa %xmm6,-32(%ebx)
  615         vmovdqa %xmm7,-16(%ebx)
  616         vmovdqa 32(%ebp),%xmm2
  617         vmovdqa 48(%ebp),%xmm3
  618         vmovdqa 64(%ebp),%xmm4
  619         vmovdqa 80(%ebp),%xmm5
  620         vmovdqa 96(%ebp),%xmm6
  621         vmovdqa 112(%ebp),%xmm7
  622         vpaddd  64(%eax),%xmm4,%xmm4
  623         vmovdqa %xmm2,32(%ebx)
  624         vmovdqa %xmm3,48(%ebx)
  625         vmovdqa %xmm4,64(%ebx)
  626         vmovdqa %xmm5,80(%ebx)
  627         vmovdqa %xmm6,96(%ebx)
  628         vmovdqa %xmm7,112(%ebx)
  629         vmovdqa %xmm4,64(%ebp)
  630         vmovdqa -128(%ebp),%xmm0
  631         vmovdqa %xmm4,%xmm6
  632         vmovdqa -64(%ebp),%xmm3
  633         vmovdqa (%ebp),%xmm4
  634         vmovdqa 16(%ebp),%xmm5
  635         movl    $10,%edx
  636         nop
  637 .align  32
  638 .L016loop:
  639         vpaddd  %xmm3,%xmm0,%xmm0
  640         vpxor   %xmm0,%xmm6,%xmm6
  641 .byte   143,232,120,194,246,16
  642         vpaddd  %xmm6,%xmm4,%xmm4
  643         vpxor   %xmm4,%xmm3,%xmm2
  644         vmovdqa -112(%ebx),%xmm1
  645 .byte   143,232,120,194,210,12
  646         vmovdqa -48(%ebx),%xmm3
  647         vpaddd  %xmm2,%xmm0,%xmm0
  648         vmovdqa 80(%ebx),%xmm7
  649         vpxor   %xmm0,%xmm6,%xmm6
  650         vpaddd  %xmm3,%xmm1,%xmm1
  651 .byte   143,232,120,194,246,8
  652         vmovdqa %xmm0,-128(%ebx)
  653         vpaddd  %xmm6,%xmm4,%xmm4
  654         vmovdqa %xmm6,64(%ebx)
  655         vpxor   %xmm4,%xmm2,%xmm2
  656         vpxor   %xmm1,%xmm7,%xmm7
  657 .byte   143,232,120,194,210,7
  658         vmovdqa %xmm4,(%ebx)
  659 .byte   143,232,120,194,255,16
  660         vmovdqa %xmm2,-64(%ebx)
  661         vpaddd  %xmm7,%xmm5,%xmm5
  662         vmovdqa 32(%ebx),%xmm4
  663         vpxor   %xmm5,%xmm3,%xmm3
  664         vmovdqa -96(%ebx),%xmm0
  665 .byte   143,232,120,194,219,12
  666         vmovdqa -32(%ebx),%xmm2
  667         vpaddd  %xmm3,%xmm1,%xmm1
  668         vmovdqa 96(%ebx),%xmm6
  669         vpxor   %xmm1,%xmm7,%xmm7
  670         vpaddd  %xmm2,%xmm0,%xmm0
  671 .byte   143,232,120,194,255,8
  672         vmovdqa %xmm1,-112(%ebx)
  673         vpaddd  %xmm7,%xmm5,%xmm5
  674         vmovdqa %xmm7,80(%ebx)
  675         vpxor   %xmm5,%xmm3,%xmm3
  676         vpxor   %xmm0,%xmm6,%xmm6
  677 .byte   143,232,120,194,219,7
  678         vmovdqa %xmm5,16(%ebx)
  679 .byte   143,232,120,194,246,16
  680         vmovdqa %xmm3,-48(%ebx)
  681         vpaddd  %xmm6,%xmm4,%xmm4
  682         vmovdqa 48(%ebx),%xmm5
  683         vpxor   %xmm4,%xmm2,%xmm2
  684         vmovdqa -80(%ebx),%xmm1
  685 .byte   143,232,120,194,210,12
  686         vmovdqa -16(%ebx),%xmm3
  687         vpaddd  %xmm2,%xmm0,%xmm0
  688         vmovdqa 112(%ebx),%xmm7
  689         vpxor   %xmm0,%xmm6,%xmm6
  690         vpaddd  %xmm3,%xmm1,%xmm1
  691 .byte   143,232,120,194,246,8
  692         vmovdqa %xmm0,-96(%ebx)
  693         vpaddd  %xmm6,%xmm4,%xmm4
  694         vmovdqa %xmm6,96(%ebx)
  695         vpxor   %xmm4,%xmm2,%xmm2
  696         vpxor   %xmm1,%xmm7,%xmm7
  697 .byte   143,232,120,194,210,7
  698 .byte   143,232,120,194,255,16
  699         vmovdqa %xmm2,-32(%ebx)
  700         vpaddd  %xmm7,%xmm5,%xmm5
  701         vpxor   %xmm5,%xmm3,%xmm3
  702         vmovdqa -128(%ebx),%xmm0
  703 .byte   143,232,120,194,219,12
  704         vmovdqa -48(%ebx),%xmm2
  705         vpaddd  %xmm3,%xmm1,%xmm1
  706         vpxor   %xmm1,%xmm7,%xmm7
  707         vpaddd  %xmm2,%xmm0,%xmm0
  708 .byte   143,232,120,194,255,8
  709         vmovdqa %xmm1,-80(%ebx)
  710         vpaddd  %xmm7,%xmm5,%xmm5
  711         vpxor   %xmm5,%xmm3,%xmm3
  712         vpxor   %xmm0,%xmm7,%xmm6
  713 .byte   143,232,120,194,219,7
  714 .byte   143,232,120,194,246,16
  715         vmovdqa %xmm3,-16(%ebx)
  716         vpaddd  %xmm6,%xmm4,%xmm4
  717         vpxor   %xmm4,%xmm2,%xmm2
  718         vmovdqa -112(%ebx),%xmm1
  719 .byte   143,232,120,194,210,12
  720         vmovdqa -32(%ebx),%xmm3
  721         vpaddd  %xmm2,%xmm0,%xmm0
  722         vmovdqa 64(%ebx),%xmm7
  723         vpxor   %xmm0,%xmm6,%xmm6
  724         vpaddd  %xmm3,%xmm1,%xmm1
  725 .byte   143,232,120,194,246,8
  726         vmovdqa %xmm0,-128(%ebx)
  727         vpaddd  %xmm6,%xmm4,%xmm4
  728         vmovdqa %xmm6,112(%ebx)
  729         vpxor   %xmm4,%xmm2,%xmm2
  730         vpxor   %xmm1,%xmm7,%xmm7
  731 .byte   143,232,120,194,210,7
  732         vmovdqa %xmm4,32(%ebx)
  733 .byte   143,232,120,194,255,16
  734         vmovdqa %xmm2,-48(%ebx)
  735         vpaddd  %xmm7,%xmm5,%xmm5
  736         vmovdqa (%ebx),%xmm4
  737         vpxor   %xmm5,%xmm3,%xmm3
  738         vmovdqa -96(%ebx),%xmm0
  739 .byte   143,232,120,194,219,12
  740         vmovdqa -16(%ebx),%xmm2
  741         vpaddd  %xmm3,%xmm1,%xmm1
  742         vmovdqa 80(%ebx),%xmm6
  743         vpxor   %xmm1,%xmm7,%xmm7
  744         vpaddd  %xmm2,%xmm0,%xmm0
  745 .byte   143,232,120,194,255,8
  746         vmovdqa %xmm1,-112(%ebx)
  747         vpaddd  %xmm7,%xmm5,%xmm5
  748         vmovdqa %xmm7,64(%ebx)
  749         vpxor   %xmm5,%xmm3,%xmm3
  750         vpxor   %xmm0,%xmm6,%xmm6
  751 .byte   143,232,120,194,219,7
  752         vmovdqa %xmm5,48(%ebx)
  753 .byte   143,232,120,194,246,16
  754         vmovdqa %xmm3,-32(%ebx)
  755         vpaddd  %xmm6,%xmm4,%xmm4
  756         vmovdqa 16(%ebx),%xmm5
  757         vpxor   %xmm4,%xmm2,%xmm2
  758         vmovdqa -80(%ebx),%xmm1
  759 .byte   143,232,120,194,210,12
  760         vmovdqa -64(%ebx),%xmm3
  761         vpaddd  %xmm2,%xmm0,%xmm0
  762         vmovdqa 96(%ebx),%xmm7
  763         vpxor   %xmm0,%xmm6,%xmm6
  764         vpaddd  %xmm3,%xmm1,%xmm1
  765 .byte   143,232,120,194,246,8
  766         vmovdqa %xmm0,-96(%ebx)
  767         vpaddd  %xmm6,%xmm4,%xmm4
  768         vmovdqa %xmm6,80(%ebx)
  769         vpxor   %xmm4,%xmm2,%xmm2
  770         vpxor   %xmm1,%xmm7,%xmm7
  771 .byte   143,232,120,194,210,7
  772 .byte   143,232,120,194,255,16
  773         vmovdqa %xmm2,-16(%ebx)
  774         vpaddd  %xmm7,%xmm5,%xmm5
  775         vpxor   %xmm5,%xmm3,%xmm3
  776         vmovdqa -128(%ebx),%xmm0
  777 .byte   143,232,120,194,219,12
  778         vpaddd  %xmm3,%xmm1,%xmm1
  779         vmovdqa 64(%ebx),%xmm6
  780         vpxor   %xmm1,%xmm7,%xmm7
  781 .byte   143,232,120,194,255,8
  782         vmovdqa %xmm1,-80(%ebx)
  783         vpaddd  %xmm7,%xmm5,%xmm5
  784         vmovdqa %xmm7,96(%ebx)
  785         vpxor   %xmm5,%xmm3,%xmm3
  786 .byte   143,232,120,194,219,7
  787         decl    %edx
  788         jnz     .L016loop
  789         vmovdqa %xmm3,-64(%ebx)
  790         vmovdqa %xmm4,(%ebx)
  791         vmovdqa %xmm5,16(%ebx)
  792         vmovdqa %xmm6,64(%ebx)
  793         vmovdqa %xmm7,96(%ebx)
  794         vmovdqa -112(%ebx),%xmm1
  795         vmovdqa -96(%ebx),%xmm2
  796         vmovdqa -80(%ebx),%xmm3
  797         vpaddd  -128(%ebp),%xmm0,%xmm0
  798         vpaddd  -112(%ebp),%xmm1,%xmm1
  799         vpaddd  -96(%ebp),%xmm2,%xmm2
  800         vpaddd  -80(%ebp),%xmm3,%xmm3
  801         vpunpckldq      %xmm1,%xmm0,%xmm6
  802         vpunpckldq      %xmm3,%xmm2,%xmm7
  803         vpunpckhdq      %xmm1,%xmm0,%xmm0
  804         vpunpckhdq      %xmm3,%xmm2,%xmm2
  805         vpunpcklqdq     %xmm7,%xmm6,%xmm1
  806         vpunpckhqdq     %xmm7,%xmm6,%xmm6
  807         vpunpcklqdq     %xmm2,%xmm0,%xmm7
  808         vpunpckhqdq     %xmm2,%xmm0,%xmm3
  809         vpxor   -128(%esi),%xmm1,%xmm4
  810         vpxor   -64(%esi),%xmm6,%xmm5
  811         vpxor   (%esi),%xmm7,%xmm6
  812         vpxor   64(%esi),%xmm3,%xmm7
  813         leal    16(%esi),%esi
  814         vmovdqa -64(%ebx),%xmm0
  815         vmovdqa -48(%ebx),%xmm1
  816         vmovdqa -32(%ebx),%xmm2
  817         vmovdqa -16(%ebx),%xmm3
  818         vmovdqu %xmm4,-128(%edi)
  819         vmovdqu %xmm5,-64(%edi)
  820         vmovdqu %xmm6,(%edi)
  821         vmovdqu %xmm7,64(%edi)
  822         leal    16(%edi),%edi
  823         vpaddd  -64(%ebp),%xmm0,%xmm0
  824         vpaddd  -48(%ebp),%xmm1,%xmm1
  825         vpaddd  -32(%ebp),%xmm2,%xmm2
  826         vpaddd  -16(%ebp),%xmm3,%xmm3
  827         vpunpckldq      %xmm1,%xmm0,%xmm6
  828         vpunpckldq      %xmm3,%xmm2,%xmm7
  829         vpunpckhdq      %xmm1,%xmm0,%xmm0
  830         vpunpckhdq      %xmm3,%xmm2,%xmm2
  831         vpunpcklqdq     %xmm7,%xmm6,%xmm1
  832         vpunpckhqdq     %xmm7,%xmm6,%xmm6
  833         vpunpcklqdq     %xmm2,%xmm0,%xmm7
  834         vpunpckhqdq     %xmm2,%xmm0,%xmm3
  835         vpxor   -128(%esi),%xmm1,%xmm4
  836         vpxor   -64(%esi),%xmm6,%xmm5
  837         vpxor   (%esi),%xmm7,%xmm6
  838         vpxor   64(%esi),%xmm3,%xmm7
  839         leal    16(%esi),%esi
  840         vmovdqa (%ebx),%xmm0
  841         vmovdqa 16(%ebx),%xmm1
  842         vmovdqa 32(%ebx),%xmm2
  843         vmovdqa 48(%ebx),%xmm3
  844         vmovdqu %xmm4,-128(%edi)
  845         vmovdqu %xmm5,-64(%edi)
  846         vmovdqu %xmm6,(%edi)
  847         vmovdqu %xmm7,64(%edi)
  848         leal    16(%edi),%edi
  849         vpaddd  (%ebp),%xmm0,%xmm0
  850         vpaddd  16(%ebp),%xmm1,%xmm1
  851         vpaddd  32(%ebp),%xmm2,%xmm2
  852         vpaddd  48(%ebp),%xmm3,%xmm3
  853         vpunpckldq      %xmm1,%xmm0,%xmm6
  854         vpunpckldq      %xmm3,%xmm2,%xmm7
  855         vpunpckhdq      %xmm1,%xmm0,%xmm0
  856         vpunpckhdq      %xmm3,%xmm2,%xmm2
  857         vpunpcklqdq     %xmm7,%xmm6,%xmm1
  858         vpunpckhqdq     %xmm7,%xmm6,%xmm6
  859         vpunpcklqdq     %xmm2,%xmm0,%xmm7
  860         vpunpckhqdq     %xmm2,%xmm0,%xmm3
  861         vpxor   -128(%esi),%xmm1,%xmm4
  862         vpxor   -64(%esi),%xmm6,%xmm5
  863         vpxor   (%esi),%xmm7,%xmm6
  864         vpxor   64(%esi),%xmm3,%xmm7
  865         leal    16(%esi),%esi
  866         vmovdqa 64(%ebx),%xmm0
  867         vmovdqa 80(%ebx),%xmm1
  868         vmovdqa 96(%ebx),%xmm2
  869         vmovdqa 112(%ebx),%xmm3
  870         vmovdqu %xmm4,-128(%edi)
  871         vmovdqu %xmm5,-64(%edi)
  872         vmovdqu %xmm6,(%edi)
  873         vmovdqu %xmm7,64(%edi)
  874         leal    16(%edi),%edi
  875         vpaddd  64(%ebp),%xmm0,%xmm0
  876         vpaddd  80(%ebp),%xmm1,%xmm1
  877         vpaddd  96(%ebp),%xmm2,%xmm2
  878         vpaddd  112(%ebp),%xmm3,%xmm3
  879         vpunpckldq      %xmm1,%xmm0,%xmm6
  880         vpunpckldq      %xmm3,%xmm2,%xmm7
  881         vpunpckhdq      %xmm1,%xmm0,%xmm0
  882         vpunpckhdq      %xmm3,%xmm2,%xmm2
  883         vpunpcklqdq     %xmm7,%xmm6,%xmm1
  884         vpunpckhqdq     %xmm7,%xmm6,%xmm6
  885         vpunpcklqdq     %xmm2,%xmm0,%xmm7
  886         vpunpckhqdq     %xmm2,%xmm0,%xmm3
  887         vpxor   -128(%esi),%xmm1,%xmm4
  888         vpxor   -64(%esi),%xmm6,%xmm5
  889         vpxor   (%esi),%xmm7,%xmm6
  890         vpxor   64(%esi),%xmm3,%xmm7
  891         leal    208(%esi),%esi
  892         vmovdqu %xmm4,-128(%edi)
  893         vmovdqu %xmm5,-64(%edi)
  894         vmovdqu %xmm6,(%edi)
  895         vmovdqu %xmm7,64(%edi)
  896         leal    208(%edi),%edi
  897         subl    $256,%ecx
  898         jnc     .L015outer_loop
  899         addl    $256,%ecx
  900         jz      .L017done
  901         movl    520(%esp),%ebx
  902         leal    -128(%esi),%esi
  903         movl    516(%esp),%edx
  904         leal    -128(%edi),%edi
  905         vmovd   64(%ebp),%xmm2
  906         vmovdqu (%ebx),%xmm3
  907         vpaddd  96(%eax),%xmm2,%xmm2
  908         vpand   112(%eax),%xmm3,%xmm3
  909         vpor    %xmm2,%xmm3,%xmm3
  910 .L0141x:
  911         vmovdqa 32(%eax),%xmm0
  912         vmovdqu (%edx),%xmm1
  913         vmovdqu 16(%edx),%xmm2
  914         vmovdqa (%eax),%xmm6
  915         vmovdqa 16(%eax),%xmm7
  916         movl    %ebp,48(%esp)
  917         vmovdqa %xmm0,(%esp)
  918         vmovdqa %xmm1,16(%esp)
  919         vmovdqa %xmm2,32(%esp)
  920         vmovdqa %xmm3,48(%esp)
  921         movl    $10,%edx
  922         jmp     .L018loop1x
  923 .align  16
  924 .L019outer1x:
  925         vmovdqa 80(%eax),%xmm3
  926         vmovdqa (%esp),%xmm0
  927         vmovdqa 16(%esp),%xmm1
  928         vmovdqa 32(%esp),%xmm2
  929         vpaddd  48(%esp),%xmm3,%xmm3
  930         movl    $10,%edx
  931         vmovdqa %xmm3,48(%esp)
  932         jmp     .L018loop1x
  933 .align  16
  934 .L018loop1x:
  935         vpaddd  %xmm1,%xmm0,%xmm0
  936         vpxor   %xmm0,%xmm3,%xmm3
  937 .byte   143,232,120,194,219,16
  938         vpaddd  %xmm3,%xmm2,%xmm2
  939         vpxor   %xmm2,%xmm1,%xmm1
  940 .byte   143,232,120,194,201,12
  941         vpaddd  %xmm1,%xmm0,%xmm0
  942         vpxor   %xmm0,%xmm3,%xmm3
  943 .byte   143,232,120,194,219,8
  944         vpaddd  %xmm3,%xmm2,%xmm2
  945         vpxor   %xmm2,%xmm1,%xmm1
  946 .byte   143,232,120,194,201,7
  947         vpshufd $78,%xmm2,%xmm2
  948         vpshufd $57,%xmm1,%xmm1
  949         vpshufd $147,%xmm3,%xmm3
  950         vpaddd  %xmm1,%xmm0,%xmm0
  951         vpxor   %xmm0,%xmm3,%xmm3
  952 .byte   143,232,120,194,219,16
  953         vpaddd  %xmm3,%xmm2,%xmm2
  954         vpxor   %xmm2,%xmm1,%xmm1
  955 .byte   143,232,120,194,201,12
  956         vpaddd  %xmm1,%xmm0,%xmm0
  957         vpxor   %xmm0,%xmm3,%xmm3
  958 .byte   143,232,120,194,219,8
  959         vpaddd  %xmm3,%xmm2,%xmm2
  960         vpxor   %xmm2,%xmm1,%xmm1
  961 .byte   143,232,120,194,201,7
  962         vpshufd $78,%xmm2,%xmm2
  963         vpshufd $147,%xmm1,%xmm1
  964         vpshufd $57,%xmm3,%xmm3
  965         decl    %edx
  966         jnz     .L018loop1x
  967         vpaddd  (%esp),%xmm0,%xmm0
  968         vpaddd  16(%esp),%xmm1,%xmm1
  969         vpaddd  32(%esp),%xmm2,%xmm2
  970         vpaddd  48(%esp),%xmm3,%xmm3
  971         cmpl    $64,%ecx
  972         jb      .L020tail
  973         vpxor   (%esi),%xmm0,%xmm0
  974         vpxor   16(%esi),%xmm1,%xmm1
  975         vpxor   32(%esi),%xmm2,%xmm2
  976         vpxor   48(%esi),%xmm3,%xmm3
  977         leal    64(%esi),%esi
  978         vmovdqu %xmm0,(%edi)
  979         vmovdqu %xmm1,16(%edi)
  980         vmovdqu %xmm2,32(%edi)
  981         vmovdqu %xmm3,48(%edi)
  982         leal    64(%edi),%edi
  983         subl    $64,%ecx
  984         jnz     .L019outer1x
  985         jmp     .L017done
  986 .L020tail:
  987         vmovdqa %xmm0,(%esp)
  988         vmovdqa %xmm1,16(%esp)
  989         vmovdqa %xmm2,32(%esp)
  990         vmovdqa %xmm3,48(%esp)
  991         xorl    %eax,%eax
  992         xorl    %edx,%edx
  993         xorl    %ebp,%ebp
  994 .L021tail_loop:
  995         movb    (%esp,%ebp,1),%al
  996         movb    (%esi,%ebp,1),%dl
  997         leal    1(%ebp),%ebp
  998         xorb    %dl,%al
  999         movb    %al,-1(%edi,%ebp,1)
 1000         decl    %ecx
 1001         jnz     .L021tail_loop
 1002 .L017done:
 1003         vzeroupper
 1004         movl    512(%esp),%esp
 1005         popl    %edi
 1006         popl    %esi
 1007         popl    %ebx
 1008         popl    %ebp
 1009         ret
 1010 .size   ChaCha20_xop,.-.L_ChaCha20_xop_begin
 1011 .comm   OPENSSL_ia32cap_P,16,4
 1012 #else
 1013 .text
 1014 .globl  ChaCha20_ctr32
 1015 .type   ChaCha20_ctr32,@function
 1016 .align  16
 1017 ChaCha20_ctr32:
 1018 .L_ChaCha20_ctr32_begin:
 1019         pushl   %ebp
 1020         pushl   %ebx
 1021         pushl   %esi
 1022         pushl   %edi
 1023         xorl    %eax,%eax
 1024         cmpl    28(%esp),%eax
 1025         je      .L000no_data
 1026         call    .Lpic_point
 1027 .Lpic_point:
 1028         popl    %eax
 1029         leal    OPENSSL_ia32cap_P,%ebp
 1030         testl   $16777216,(%ebp)
 1031         jz      .L001x86
 1032         testl   $512,4(%ebp)
 1033         jz      .L001x86
 1034         jmp     .Lssse3_shortcut
 1035 .L001x86:
 1036         movl    32(%esp),%esi
 1037         movl    36(%esp),%edi
 1038         subl    $132,%esp
 1039         movl    (%esi),%eax
 1040         movl    4(%esi),%ebx
 1041         movl    8(%esi),%ecx
 1042         movl    12(%esi),%edx
 1043         movl    %eax,80(%esp)
 1044         movl    %ebx,84(%esp)
 1045         movl    %ecx,88(%esp)
 1046         movl    %edx,92(%esp)
 1047         movl    16(%esi),%eax
 1048         movl    20(%esi),%ebx
 1049         movl    24(%esi),%ecx
 1050         movl    28(%esi),%edx
 1051         movl    %eax,96(%esp)
 1052         movl    %ebx,100(%esp)
 1053         movl    %ecx,104(%esp)
 1054         movl    %edx,108(%esp)
 1055         movl    (%edi),%eax
 1056         movl    4(%edi),%ebx
 1057         movl    8(%edi),%ecx
 1058         movl    12(%edi),%edx
 1059         subl    $1,%eax
 1060         movl    %eax,112(%esp)
 1061         movl    %ebx,116(%esp)
 1062         movl    %ecx,120(%esp)
 1063         movl    %edx,124(%esp)
 1064         jmp     .L002entry
 1065 .align  16
 1066 .L003outer_loop:
 1067         movl    %ebx,156(%esp)
 1068         movl    %eax,152(%esp)
 1069         movl    %ecx,160(%esp)
 1070 .L002entry:
 1071         movl    $1634760805,%eax
 1072         movl    $857760878,4(%esp)
 1073         movl    $2036477234,8(%esp)
 1074         movl    $1797285236,12(%esp)
 1075         movl    84(%esp),%ebx
 1076         movl    88(%esp),%ebp
 1077         movl    104(%esp),%ecx
 1078         movl    108(%esp),%esi
 1079         movl    116(%esp),%edx
 1080         movl    120(%esp),%edi
 1081         movl    %ebx,20(%esp)
 1082         movl    %ebp,24(%esp)
 1083         movl    %ecx,40(%esp)
 1084         movl    %esi,44(%esp)
 1085         movl    %edx,52(%esp)
 1086         movl    %edi,56(%esp)
 1087         movl    92(%esp),%ebx
 1088         movl    124(%esp),%edi
 1089         movl    112(%esp),%edx
 1090         movl    80(%esp),%ebp
 1091         movl    96(%esp),%ecx
 1092         movl    100(%esp),%esi
 1093         addl    $1,%edx
 1094         movl    %ebx,28(%esp)
 1095         movl    %edi,60(%esp)
 1096         movl    %edx,112(%esp)
 1097         movl    $10,%ebx
 1098         jmp     .L004loop
 1099 .align  16
 1100 .L004loop:
 1101         addl    %ebp,%eax
 1102         movl    %ebx,128(%esp)
 1103         movl    %ebp,%ebx
 1104         xorl    %eax,%edx
 1105         roll    $16,%edx
 1106         addl    %edx,%ecx
 1107         xorl    %ecx,%ebx
 1108         movl    52(%esp),%edi
 1109         roll    $12,%ebx
 1110         movl    20(%esp),%ebp
 1111         addl    %ebx,%eax
 1112         xorl    %eax,%edx
 1113         movl    %eax,(%esp)
 1114         roll    $8,%edx
 1115         movl    4(%esp),%eax
 1116         addl    %edx,%ecx
 1117         movl    %edx,48(%esp)
 1118         xorl    %ecx,%ebx
 1119         addl    %ebp,%eax
 1120         roll    $7,%ebx
 1121         xorl    %eax,%edi
 1122         movl    %ecx,32(%esp)
 1123         roll    $16,%edi
 1124         movl    %ebx,16(%esp)
 1125         addl    %edi,%esi
 1126         movl    40(%esp),%ecx
 1127         xorl    %esi,%ebp
 1128         movl    56(%esp),%edx
 1129         roll    $12,%ebp
 1130         movl    24(%esp),%ebx
 1131         addl    %ebp,%eax
 1132         xorl    %eax,%edi
 1133         movl    %eax,4(%esp)
 1134         roll    $8,%edi
 1135         movl    8(%esp),%eax
 1136         addl    %edi,%esi
 1137         movl    %edi,52(%esp)
 1138         xorl    %esi,%ebp
 1139         addl    %ebx,%eax
 1140         roll    $7,%ebp
 1141         xorl    %eax,%edx
 1142         movl    %esi,36(%esp)
 1143         roll    $16,%edx
 1144         movl    %ebp,20(%esp)
 1145         addl    %edx,%ecx
 1146         movl    44(%esp),%esi
 1147         xorl    %ecx,%ebx
 1148         movl    60(%esp),%edi
 1149         roll    $12,%ebx
 1150         movl    28(%esp),%ebp
 1151         addl    %ebx,%eax
 1152         xorl    %eax,%edx
 1153         movl    %eax,8(%esp)
 1154         roll    $8,%edx
 1155         movl    12(%esp),%eax
 1156         addl    %edx,%ecx
 1157         movl    %edx,56(%esp)
 1158         xorl    %ecx,%ebx
 1159         addl    %ebp,%eax
 1160         roll    $7,%ebx
 1161         xorl    %eax,%edi
 1162         roll    $16,%edi
 1163         movl    %ebx,24(%esp)
 1164         addl    %edi,%esi
 1165         xorl    %esi,%ebp
 1166         roll    $12,%ebp
 1167         movl    20(%esp),%ebx
 1168         addl    %ebp,%eax
 1169         xorl    %eax,%edi
 1170         movl    %eax,12(%esp)
 1171         roll    $8,%edi
 1172         movl    (%esp),%eax
 1173         addl    %edi,%esi
 1174         movl    %edi,%edx
 1175         xorl    %esi,%ebp
 1176         addl    %ebx,%eax
 1177         roll    $7,%ebp
 1178         xorl    %eax,%edx
 1179         roll    $16,%edx
 1180         movl    %ebp,28(%esp)
 1181         addl    %edx,%ecx
 1182         xorl    %ecx,%ebx
 1183         movl    48(%esp),%edi
 1184         roll    $12,%ebx
 1185         movl    24(%esp),%ebp
 1186         addl    %ebx,%eax
 1187         xorl    %eax,%edx
 1188         movl    %eax,(%esp)
 1189         roll    $8,%edx
 1190         movl    4(%esp),%eax
 1191         addl    %edx,%ecx
 1192         movl    %edx,60(%esp)
 1193         xorl    %ecx,%ebx
 1194         addl    %ebp,%eax
 1195         roll    $7,%ebx
 1196         xorl    %eax,%edi
 1197         movl    %ecx,40(%esp)
 1198         roll    $16,%edi
 1199         movl    %ebx,20(%esp)
 1200         addl    %edi,%esi
 1201         movl    32(%esp),%ecx
 1202         xorl    %esi,%ebp
 1203         movl    52(%esp),%edx
 1204         roll    $12,%ebp
 1205         movl    28(%esp),%ebx
 1206         addl    %ebp,%eax
 1207         xorl    %eax,%edi
 1208         movl    %eax,4(%esp)
 1209         roll    $8,%edi
 1210         movl    8(%esp),%eax
 1211         addl    %edi,%esi
 1212         movl    %edi,48(%esp)
 1213         xorl    %esi,%ebp
 1214         addl    %ebx,%eax
 1215         roll    $7,%ebp
 1216         xorl    %eax,%edx
 1217         movl    %esi,44(%esp)
 1218         roll    $16,%edx
 1219         movl    %ebp,24(%esp)
 1220         addl    %edx,%ecx
 1221         movl    36(%esp),%esi
 1222         xorl    %ecx,%ebx
 1223         movl    56(%esp),%edi
 1224         roll    $12,%ebx
 1225         movl    16(%esp),%ebp
 1226         addl    %ebx,%eax
 1227         xorl    %eax,%edx
 1228         movl    %eax,8(%esp)
 1229         roll    $8,%edx
 1230         movl    12(%esp),%eax
 1231         addl    %edx,%ecx
 1232         movl    %edx,52(%esp)
 1233         xorl    %ecx,%ebx
 1234         addl    %ebp,%eax
 1235         roll    $7,%ebx
 1236         xorl    %eax,%edi
 1237         roll    $16,%edi
 1238         movl    %ebx,28(%esp)
 1239         addl    %edi,%esi
 1240         xorl    %esi,%ebp
 1241         movl    48(%esp),%edx
 1242         roll    $12,%ebp
 1243         movl    128(%esp),%ebx
 1244         addl    %ebp,%eax
 1245         xorl    %eax,%edi
 1246         movl    %eax,12(%esp)
 1247         roll    $8,%edi
 1248         movl    (%esp),%eax
 1249         addl    %edi,%esi
 1250         movl    %edi,56(%esp)
 1251         xorl    %esi,%ebp
 1252         roll    $7,%ebp
 1253         decl    %ebx
 1254         jnz     .L004loop
 1255         movl    160(%esp),%ebx
 1256         addl    $1634760805,%eax
 1257         addl    80(%esp),%ebp
 1258         addl    96(%esp),%ecx
 1259         addl    100(%esp),%esi
 1260         cmpl    $64,%ebx
 1261         jb      .L005tail
 1262         movl    156(%esp),%ebx
 1263         addl    112(%esp),%edx
 1264         addl    120(%esp),%edi
 1265         xorl    (%ebx),%eax
 1266         xorl    16(%ebx),%ebp
 1267         movl    %eax,(%esp)
 1268         movl    152(%esp),%eax
 1269         xorl    32(%ebx),%ecx
 1270         xorl    36(%ebx),%esi
 1271         xorl    48(%ebx),%edx
 1272         xorl    56(%ebx),%edi
 1273         movl    %ebp,16(%eax)
 1274         movl    %ecx,32(%eax)
 1275         movl    %esi,36(%eax)
 1276         movl    %edx,48(%eax)
 1277         movl    %edi,56(%eax)
 1278         movl    4(%esp),%ebp
 1279         movl    8(%esp),%ecx
 1280         movl    12(%esp),%esi
 1281         movl    20(%esp),%edx
 1282         movl    24(%esp),%edi
 1283         addl    $857760878,%ebp
 1284         addl    $2036477234,%ecx
 1285         addl    $1797285236,%esi
 1286         addl    84(%esp),%edx
 1287         addl    88(%esp),%edi
 1288         xorl    4(%ebx),%ebp
 1289         xorl    8(%ebx),%ecx
 1290         xorl    12(%ebx),%esi
 1291         xorl    20(%ebx),%edx
 1292         xorl    24(%ebx),%edi
 1293         movl    %ebp,4(%eax)
 1294         movl    %ecx,8(%eax)
 1295         movl    %esi,12(%eax)
 1296         movl    %edx,20(%eax)
 1297         movl    %edi,24(%eax)
 1298         movl    28(%esp),%ebp
 1299         movl    40(%esp),%ecx
 1300         movl    44(%esp),%esi
 1301         movl    52(%esp),%edx
 1302         movl    60(%esp),%edi
 1303         addl    92(%esp),%ebp
 1304         addl    104(%esp),%ecx
 1305         addl    108(%esp),%esi
 1306         addl    116(%esp),%edx
 1307         addl    124(%esp),%edi
 1308         xorl    28(%ebx),%ebp
 1309         xorl    40(%ebx),%ecx
 1310         xorl    44(%ebx),%esi
 1311         xorl    52(%ebx),%edx
 1312         xorl    60(%ebx),%edi
 1313         leal    64(%ebx),%ebx
 1314         movl    %ebp,28(%eax)
 1315         movl    (%esp),%ebp
 1316         movl    %ecx,40(%eax)
 1317         movl    160(%esp),%ecx
 1318         movl    %esi,44(%eax)
 1319         movl    %edx,52(%eax)
 1320         movl    %edi,60(%eax)
 1321         movl    %ebp,(%eax)
 1322         leal    64(%eax),%eax
 1323         subl    $64,%ecx
 1324         jnz     .L003outer_loop
 1325         jmp     .L006done
 1326 .L005tail:
 1327         addl    112(%esp),%edx
 1328         addl    120(%esp),%edi
 1329         movl    %eax,(%esp)
 1330         movl    %ebp,16(%esp)
 1331         movl    %ecx,32(%esp)
 1332         movl    %esi,36(%esp)
 1333         movl    %edx,48(%esp)
 1334         movl    %edi,56(%esp)
 1335         movl    4(%esp),%ebp
 1336         movl    8(%esp),%ecx
 1337         movl    12(%esp),%esi
 1338         movl    20(%esp),%edx
 1339         movl    24(%esp),%edi
 1340         addl    $857760878,%ebp
 1341         addl    $2036477234,%ecx
 1342         addl    $1797285236,%esi
 1343         addl    84(%esp),%edx
 1344         addl    88(%esp),%edi
 1345         movl    %ebp,4(%esp)
 1346         movl    %ecx,8(%esp)
 1347         movl    %esi,12(%esp)
 1348         movl    %edx,20(%esp)
 1349         movl    %edi,24(%esp)
 1350         movl    28(%esp),%ebp
 1351         movl    40(%esp),%ecx
 1352         movl    44(%esp),%esi
 1353         movl    52(%esp),%edx
 1354         movl    60(%esp),%edi
 1355         addl    92(%esp),%ebp
 1356         addl    104(%esp),%ecx
 1357         addl    108(%esp),%esi
 1358         addl    116(%esp),%edx
 1359         addl    124(%esp),%edi
 1360         movl    %ebp,28(%esp)
 1361         movl    156(%esp),%ebp
 1362         movl    %ecx,40(%esp)
 1363         movl    152(%esp),%ecx
 1364         movl    %esi,44(%esp)
 1365         xorl    %esi,%esi
 1366         movl    %edx,52(%esp)
 1367         movl    %edi,60(%esp)
 1368         xorl    %eax,%eax
 1369         xorl    %edx,%edx
 1370 .L007tail_loop:
 1371         movb    (%esi,%ebp,1),%al
 1372         movb    (%esp,%esi,1),%dl
 1373         leal    1(%esi),%esi
 1374         xorb    %dl,%al
 1375         movb    %al,-1(%ecx,%esi,1)
 1376         decl    %ebx
 1377         jnz     .L007tail_loop
 1378 .L006done:
 1379         addl    $132,%esp
 1380 .L000no_data:
 1381         popl    %edi
 1382         popl    %esi
 1383         popl    %ebx
 1384         popl    %ebp
 1385         ret
 1386 .size   ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
 1387 .globl  ChaCha20_ssse3
 1388 .type   ChaCha20_ssse3,@function
 1389 .align  16
 1390 ChaCha20_ssse3:
 1391 .L_ChaCha20_ssse3_begin:
 1392         pushl   %ebp
 1393         pushl   %ebx
 1394         pushl   %esi
 1395         pushl   %edi
 1396 .Lssse3_shortcut:
 1397         testl   $2048,4(%ebp)
 1398         jnz     .Lxop_shortcut
 1399         movl    20(%esp),%edi
 1400         movl    24(%esp),%esi
 1401         movl    28(%esp),%ecx
 1402         movl    32(%esp),%edx
 1403         movl    36(%esp),%ebx
 1404         movl    %esp,%ebp
 1405         subl    $524,%esp
 1406         andl    $-64,%esp
 1407         movl    %ebp,512(%esp)
 1408         leal    .Lssse3_data-.Lpic_point(%eax),%eax
 1409         movdqu  (%ebx),%xmm3
 1410 .L0081x:
 1411         movdqa  32(%eax),%xmm0
 1412         movdqu  (%edx),%xmm1
 1413         movdqu  16(%edx),%xmm2
 1414         movdqa  (%eax),%xmm6
 1415         movdqa  16(%eax),%xmm7
 1416         movl    %ebp,48(%esp)
 1417         movdqa  %xmm0,(%esp)
 1418         movdqa  %xmm1,16(%esp)
 1419         movdqa  %xmm2,32(%esp)
 1420         movdqa  %xmm3,48(%esp)
 1421         movl    $10,%edx
 1422         jmp     .L009loop1x
 1423 .align  16
 1424 .L010outer1x:
 1425         movdqa  80(%eax),%xmm3
 1426         movdqa  (%esp),%xmm0
 1427         movdqa  16(%esp),%xmm1
 1428         movdqa  32(%esp),%xmm2
 1429         paddd   48(%esp),%xmm3
 1430         movl    $10,%edx
 1431         movdqa  %xmm3,48(%esp)
 1432         jmp     .L009loop1x
 1433 .align  16
 1434 .L009loop1x:
 1435         paddd   %xmm1,%xmm0
 1436         pxor    %xmm0,%xmm3
 1437 .byte   102,15,56,0,222
 1438         paddd   %xmm3,%xmm2
 1439         pxor    %xmm2,%xmm1
 1440         movdqa  %xmm1,%xmm4
 1441         psrld   $20,%xmm1
 1442         pslld   $12,%xmm4
 1443         por     %xmm4,%xmm1
 1444         paddd   %xmm1,%xmm0
 1445         pxor    %xmm0,%xmm3
 1446 .byte   102,15,56,0,223
 1447         paddd   %xmm3,%xmm2
 1448         pxor    %xmm2,%xmm1
 1449         movdqa  %xmm1,%xmm4
 1450         psrld   $25,%xmm1
 1451         pslld   $7,%xmm4
 1452         por     %xmm4,%xmm1
 1453         pshufd  $78,%xmm2,%xmm2
 1454         pshufd  $57,%xmm1,%xmm1
 1455         pshufd  $147,%xmm3,%xmm3
 1456         nop
 1457         paddd   %xmm1,%xmm0
 1458         pxor    %xmm0,%xmm3
 1459 .byte   102,15,56,0,222
 1460         paddd   %xmm3,%xmm2
 1461         pxor    %xmm2,%xmm1
 1462         movdqa  %xmm1,%xmm4
 1463         psrld   $20,%xmm1
 1464         pslld   $12,%xmm4
 1465         por     %xmm4,%xmm1
 1466         paddd   %xmm1,%xmm0
 1467         pxor    %xmm0,%xmm3
 1468 .byte   102,15,56,0,223
 1469         paddd   %xmm3,%xmm2
 1470         pxor    %xmm2,%xmm1
 1471         movdqa  %xmm1,%xmm4
 1472         psrld   $25,%xmm1
 1473         pslld   $7,%xmm4
 1474         por     %xmm4,%xmm1
 1475         pshufd  $78,%xmm2,%xmm2
 1476         pshufd  $147,%xmm1,%xmm1
 1477         pshufd  $57,%xmm3,%xmm3
 1478         decl    %edx
 1479         jnz     .L009loop1x
 1480         paddd   (%esp),%xmm0
 1481         paddd   16(%esp),%xmm1
 1482         paddd   32(%esp),%xmm2
 1483         paddd   48(%esp),%xmm3
 1484         cmpl    $64,%ecx
 1485         jb      .L011tail
 1486         movdqu  (%esi),%xmm4
 1487         movdqu  16(%esi),%xmm5
 1488         pxor    %xmm4,%xmm0
 1489         movdqu  32(%esi),%xmm4
 1490         pxor    %xmm5,%xmm1
 1491         movdqu  48(%esi),%xmm5
 1492         pxor    %xmm4,%xmm2
 1493         pxor    %xmm5,%xmm3
 1494         leal    64(%esi),%esi
 1495         movdqu  %xmm0,(%edi)
 1496         movdqu  %xmm1,16(%edi)
 1497         movdqu  %xmm2,32(%edi)
 1498         movdqu  %xmm3,48(%edi)
 1499         leal    64(%edi),%edi
 1500         subl    $64,%ecx
 1501         jnz     .L010outer1x
 1502         jmp     .L012done
 1503 .L011tail:
 1504         movdqa  %xmm0,(%esp)
 1505         movdqa  %xmm1,16(%esp)
 1506         movdqa  %xmm2,32(%esp)
 1507         movdqa  %xmm3,48(%esp)
 1508         xorl    %eax,%eax
 1509         xorl    %edx,%edx
 1510         xorl    %ebp,%ebp
 1511 .L013tail_loop:
 1512         movb    (%esp,%ebp,1),%al
 1513         movb    (%esi,%ebp,1),%dl
 1514         leal    1(%ebp),%ebp
 1515         xorb    %dl,%al
 1516         movb    %al,-1(%edi,%ebp,1)
 1517         decl    %ecx
 1518         jnz     .L013tail_loop
 1519 .L012done:
 1520         movl    512(%esp),%esp
 1521         popl    %edi
 1522         popl    %esi
 1523         popl    %ebx
 1524         popl    %ebp
 1525         ret
 1526 .size   ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
 1527 .align  64
 1528 .Lssse3_data:
 1529 .byte   2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
 1530 .byte   3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
 1531 .long   1634760805,857760878,2036477234,1797285236
 1532 .long   0,1,2,3
 1533 .long   4,4,4,4
 1534 .long   1,0,0,0
 1535 .long   4,0,0,0
 1536 .long   0,-1,-1,-1
 1537 .align  64
 1538 .byte   67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
 1539 .byte   44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
 1540 .byte   60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
 1541 .byte   114,103,62,0
 1542 .globl  ChaCha20_xop
 1543 .type   ChaCha20_xop,@function
 1544 .align  16
 1545 ChaCha20_xop:
 1546 .L_ChaCha20_xop_begin:
 1547         pushl   %ebp
 1548         pushl   %ebx
 1549         pushl   %esi
 1550         pushl   %edi
 1551 .Lxop_shortcut:
 1552         movl    20(%esp),%edi
 1553         movl    24(%esp),%esi
 1554         movl    28(%esp),%ecx
 1555         movl    32(%esp),%edx
 1556         movl    36(%esp),%ebx
 1557         vzeroupper
 1558         movl    %esp,%ebp
 1559         subl    $524,%esp
 1560         andl    $-64,%esp
 1561         movl    %ebp,512(%esp)
 1562         leal    .Lssse3_data-.Lpic_point(%eax),%eax
 1563         vmovdqu (%ebx),%xmm3
 1564         cmpl    $256,%ecx
 1565         jb      .L0141x
 1566         movl    %edx,516(%esp)
 1567         movl    %ebx,520(%esp)
 1568         subl    $256,%ecx
 1569         leal    384(%esp),%ebp
 1570         vmovdqu (%edx),%xmm7
 1571         vpshufd $0,%xmm3,%xmm0
 1572         vpshufd $85,%xmm3,%xmm1
 1573         vpshufd $170,%xmm3,%xmm2
 1574         vpshufd $255,%xmm3,%xmm3
 1575         vpaddd  48(%eax),%xmm0,%xmm0
 1576         vpshufd $0,%xmm7,%xmm4
 1577         vpshufd $85,%xmm7,%xmm5
 1578         vpsubd  64(%eax),%xmm0,%xmm0
 1579         vpshufd $170,%xmm7,%xmm6
 1580         vpshufd $255,%xmm7,%xmm7
 1581         vmovdqa %xmm0,64(%ebp)
 1582         vmovdqa %xmm1,80(%ebp)
 1583         vmovdqa %xmm2,96(%ebp)
 1584         vmovdqa %xmm3,112(%ebp)
 1585         vmovdqu 16(%edx),%xmm3
 1586         vmovdqa %xmm4,-64(%ebp)
 1587         vmovdqa %xmm5,-48(%ebp)
 1588         vmovdqa %xmm6,-32(%ebp)
 1589         vmovdqa %xmm7,-16(%ebp)
 1590         vmovdqa 32(%eax),%xmm7
 1591         leal    128(%esp),%ebx
 1592         vpshufd $0,%xmm3,%xmm0
 1593         vpshufd $85,%xmm3,%xmm1
 1594         vpshufd $170,%xmm3,%xmm2
 1595         vpshufd $255,%xmm3,%xmm3
 1596         vpshufd $0,%xmm7,%xmm4
 1597         vpshufd $85,%xmm7,%xmm5
 1598         vpshufd $170,%xmm7,%xmm6
 1599         vpshufd $255,%xmm7,%xmm7
 1600         vmovdqa %xmm0,(%ebp)
 1601         vmovdqa %xmm1,16(%ebp)
 1602         vmovdqa %xmm2,32(%ebp)
 1603         vmovdqa %xmm3,48(%ebp)
 1604         vmovdqa %xmm4,-128(%ebp)
 1605         vmovdqa %xmm5,-112(%ebp)
 1606         vmovdqa %xmm6,-96(%ebp)
 1607         vmovdqa %xmm7,-80(%ebp)
 1608         leal    128(%esi),%esi
 1609         leal    128(%edi),%edi
 1610         jmp     .L015outer_loop
 1611 .align  32
 1612 .L015outer_loop:
 1613         vmovdqa -112(%ebp),%xmm1
 1614         vmovdqa -96(%ebp),%xmm2
 1615         vmovdqa -80(%ebp),%xmm3
 1616         vmovdqa -48(%ebp),%xmm5
 1617         vmovdqa -32(%ebp),%xmm6
 1618         vmovdqa -16(%ebp),%xmm7
 1619         vmovdqa %xmm1,-112(%ebx)
 1620         vmovdqa %xmm2,-96(%ebx)
 1621         vmovdqa %xmm3,-80(%ebx)
 1622         vmovdqa %xmm5,-48(%ebx)
 1623         vmovdqa %xmm6,-32(%ebx)
 1624         vmovdqa %xmm7,-16(%ebx)
 1625         vmovdqa 32(%ebp),%xmm2
 1626         vmovdqa 48(%ebp),%xmm3
 1627         vmovdqa 64(%ebp),%xmm4
 1628         vmovdqa 80(%ebp),%xmm5
 1629         vmovdqa 96(%ebp),%xmm6
 1630         vmovdqa 112(%ebp),%xmm7
 1631         vpaddd  64(%eax),%xmm4,%xmm4
 1632         vmovdqa %xmm2,32(%ebx)
 1633         vmovdqa %xmm3,48(%ebx)
 1634         vmovdqa %xmm4,64(%ebx)
 1635         vmovdqa %xmm5,80(%ebx)
 1636         vmovdqa %xmm6,96(%ebx)
 1637         vmovdqa %xmm7,112(%ebx)
 1638         vmovdqa %xmm4,64(%ebp)
 1639         vmovdqa -128(%ebp),%xmm0
 1640         vmovdqa %xmm4,%xmm6
 1641         vmovdqa -64(%ebp),%xmm3
 1642         vmovdqa (%ebp),%xmm4
 1643         vmovdqa 16(%ebp),%xmm5
 1644         movl    $10,%edx
 1645         nop
 1646 .align  32
 1647 .L016loop:
 1648         vpaddd  %xmm3,%xmm0,%xmm0
 1649         vpxor   %xmm0,%xmm6,%xmm6
 1650 .byte   143,232,120,194,246,16
 1651         vpaddd  %xmm6,%xmm4,%xmm4
 1652         vpxor   %xmm4,%xmm3,%xmm2
 1653         vmovdqa -112(%ebx),%xmm1
 1654 .byte   143,232,120,194,210,12
 1655         vmovdqa -48(%ebx),%xmm3
 1656         vpaddd  %xmm2,%xmm0,%xmm0
 1657         vmovdqa 80(%ebx),%xmm7
 1658         vpxor   %xmm0,%xmm6,%xmm6
 1659         vpaddd  %xmm3,%xmm1,%xmm1
 1660 .byte   143,232,120,194,246,8
 1661         vmovdqa %xmm0,-128(%ebx)
 1662         vpaddd  %xmm6,%xmm4,%xmm4
 1663         vmovdqa %xmm6,64(%ebx)
 1664         vpxor   %xmm4,%xmm2,%xmm2
 1665         vpxor   %xmm1,%xmm7,%xmm7
 1666 .byte   143,232,120,194,210,7
 1667         vmovdqa %xmm4,(%ebx)
 1668 .byte   143,232,120,194,255,16
 1669         vmovdqa %xmm2,-64(%ebx)
 1670         vpaddd  %xmm7,%xmm5,%xmm5
 1671         vmovdqa 32(%ebx),%xmm4
 1672         vpxor   %xmm5,%xmm3,%xmm3
 1673         vmovdqa -96(%ebx),%xmm0
 1674 .byte   143,232,120,194,219,12
 1675         vmovdqa -32(%ebx),%xmm2
 1676         vpaddd  %xmm3,%xmm1,%xmm1
 1677         vmovdqa 96(%ebx),%xmm6
 1678         vpxor   %xmm1,%xmm7,%xmm7
 1679         vpaddd  %xmm2,%xmm0,%xmm0
 1680 .byte   143,232,120,194,255,8
 1681         vmovdqa %xmm1,-112(%ebx)
 1682         vpaddd  %xmm7,%xmm5,%xmm5
 1683         vmovdqa %xmm7,80(%ebx)
 1684         vpxor   %xmm5,%xmm3,%xmm3
 1685         vpxor   %xmm0,%xmm6,%xmm6
 1686 .byte   143,232,120,194,219,7
 1687         vmovdqa %xmm5,16(%ebx)
 1688 .byte   143,232,120,194,246,16
 1689         vmovdqa %xmm3,-48(%ebx)
 1690         vpaddd  %xmm6,%xmm4,%xmm4
 1691         vmovdqa 48(%ebx),%xmm5
 1692         vpxor   %xmm4,%xmm2,%xmm2
 1693         vmovdqa -80(%ebx),%xmm1
 1694 .byte   143,232,120,194,210,12
 1695         vmovdqa -16(%ebx),%xmm3
 1696         vpaddd  %xmm2,%xmm0,%xmm0
 1697         vmovdqa 112(%ebx),%xmm7
 1698         vpxor   %xmm0,%xmm6,%xmm6
 1699         vpaddd  %xmm3,%xmm1,%xmm1
 1700 .byte   143,232,120,194,246,8
 1701         vmovdqa %xmm0,-96(%ebx)
 1702         vpaddd  %xmm6,%xmm4,%xmm4
 1703         vmovdqa %xmm6,96(%ebx)
 1704         vpxor   %xmm4,%xmm2,%xmm2
 1705         vpxor   %xmm1,%xmm7,%xmm7
 1706 .byte   143,232,120,194,210,7
 1707 .byte   143,232,120,194,255,16
 1708         vmovdqa %xmm2,-32(%ebx)
 1709         vpaddd  %xmm7,%xmm5,%xmm5
 1710         vpxor   %xmm5,%xmm3,%xmm3
 1711         vmovdqa -128(%ebx),%xmm0
 1712 .byte   143,232,120,194,219,12
 1713         vmovdqa -48(%ebx),%xmm2
 1714         vpaddd  %xmm3,%xmm1,%xmm1
 1715         vpxor   %xmm1,%xmm7,%xmm7
 1716         vpaddd  %xmm2,%xmm0,%xmm0
 1717 .byte   143,232,120,194,255,8
 1718         vmovdqa %xmm1,-80(%ebx)
 1719         vpaddd  %xmm7,%xmm5,%xmm5
 1720         vpxor   %xmm5,%xmm3,%xmm3
 1721         vpxor   %xmm0,%xmm7,%xmm6
 1722 .byte   143,232,120,194,219,7
 1723 .byte   143,232,120,194,246,16
 1724         vmovdqa %xmm3,-16(%ebx)
 1725         vpaddd  %xmm6,%xmm4,%xmm4
 1726         vpxor   %xmm4,%xmm2,%xmm2
 1727         vmovdqa -112(%ebx),%xmm1
 1728 .byte   143,232,120,194,210,12
 1729         vmovdqa -32(%ebx),%xmm3
 1730         vpaddd  %xmm2,%xmm0,%xmm0
 1731         vmovdqa 64(%ebx),%xmm7
 1732         vpxor   %xmm0,%xmm6,%xmm6
 1733         vpaddd  %xmm3,%xmm1,%xmm1
 1734 .byte   143,232,120,194,246,8
 1735         vmovdqa %xmm0,-128(%ebx)
 1736         vpaddd  %xmm6,%xmm4,%xmm4
 1737         vmovdqa %xmm6,112(%ebx)
 1738         vpxor   %xmm4,%xmm2,%xmm2
 1739         vpxor   %xmm1,%xmm7,%xmm7
 1740 .byte   143,232,120,194,210,7
 1741         vmovdqa %xmm4,32(%ebx)
 1742 .byte   143,232,120,194,255,16
 1743         vmovdqa %xmm2,-48(%ebx)
 1744         vpaddd  %xmm7,%xmm5,%xmm5
 1745         vmovdqa (%ebx),%xmm4
 1746         vpxor   %xmm5,%xmm3,%xmm3
 1747         vmovdqa -96(%ebx),%xmm0
 1748 .byte   143,232,120,194,219,12
 1749         vmovdqa -16(%ebx),%xmm2
 1750         vpaddd  %xmm3,%xmm1,%xmm1
 1751         vmovdqa 80(%ebx),%xmm6
 1752         vpxor   %xmm1,%xmm7,%xmm7
 1753         vpaddd  %xmm2,%xmm0,%xmm0
 1754 .byte   143,232,120,194,255,8
 1755         vmovdqa %xmm1,-112(%ebx)
 1756         vpaddd  %xmm7,%xmm5,%xmm5
 1757         vmovdqa %xmm7,64(%ebx)
 1758         vpxor   %xmm5,%xmm3,%xmm3
 1759         vpxor   %xmm0,%xmm6,%xmm6
 1760 .byte   143,232,120,194,219,7
 1761         vmovdqa %xmm5,48(%ebx)
 1762 .byte   143,232,120,194,246,16
 1763         vmovdqa %xmm3,-32(%ebx)
 1764         vpaddd  %xmm6,%xmm4,%xmm4
 1765         vmovdqa 16(%ebx),%xmm5
 1766         vpxor   %xmm4,%xmm2,%xmm2
 1767         vmovdqa -80(%ebx),%xmm1
 1768 .byte   143,232,120,194,210,12
 1769         vmovdqa -64(%ebx),%xmm3
 1770         vpaddd  %xmm2,%xmm0,%xmm0
 1771         vmovdqa 96(%ebx),%xmm7
 1772         vpxor   %xmm0,%xmm6,%xmm6
 1773         vpaddd  %xmm3,%xmm1,%xmm1
 1774 .byte   143,232,120,194,246,8
 1775         vmovdqa %xmm0,-96(%ebx)
 1776         vpaddd  %xmm6,%xmm4,%xmm4
 1777         vmovdqa %xmm6,80(%ebx)
 1778         vpxor   %xmm4,%xmm2,%xmm2
 1779         vpxor   %xmm1,%xmm7,%xmm7
 1780 .byte   143,232,120,194,210,7
 1781 .byte   143,232,120,194,255,16
 1782         vmovdqa %xmm2,-16(%ebx)
 1783         vpaddd  %xmm7,%xmm5,%xmm5
 1784         vpxor   %xmm5,%xmm3,%xmm3
 1785         vmovdqa -128(%ebx),%xmm0
 1786 .byte   143,232,120,194,219,12
 1787         vpaddd  %xmm3,%xmm1,%xmm1
 1788         vmovdqa 64(%ebx),%xmm6
 1789         vpxor   %xmm1,%xmm7,%xmm7
 1790 .byte   143,232,120,194,255,8
 1791         vmovdqa %xmm1,-80(%ebx)
 1792         vpaddd  %xmm7,%xmm5,%xmm5
 1793         vmovdqa %xmm7,96(%ebx)
 1794         vpxor   %xmm5,%xmm3,%xmm3
 1795 .byte   143,232,120,194,219,7
 1796         decl    %edx
 1797         jnz     .L016loop
 1798         vmovdqa %xmm3,-64(%ebx)
 1799         vmovdqa %xmm4,(%ebx)
 1800         vmovdqa %xmm5,16(%ebx)
 1801         vmovdqa %xmm6,64(%ebx)
 1802         vmovdqa %xmm7,96(%ebx)
 1803         vmovdqa -112(%ebx),%xmm1
 1804         vmovdqa -96(%ebx),%xmm2
 1805         vmovdqa -80(%ebx),%xmm3
 1806         vpaddd  -128(%ebp),%xmm0,%xmm0
 1807         vpaddd  -112(%ebp),%xmm1,%xmm1
 1808         vpaddd  -96(%ebp),%xmm2,%xmm2
 1809         vpaddd  -80(%ebp),%xmm3,%xmm3
 1810         vpunpckldq      %xmm1,%xmm0,%xmm6
 1811         vpunpckldq      %xmm3,%xmm2,%xmm7
 1812         vpunpckhdq      %xmm1,%xmm0,%xmm0
 1813         vpunpckhdq      %xmm3,%xmm2,%xmm2
 1814         vpunpcklqdq     %xmm7,%xmm6,%xmm1
 1815         vpunpckhqdq     %xmm7,%xmm6,%xmm6
 1816         vpunpcklqdq     %xmm2,%xmm0,%xmm7
 1817         vpunpckhqdq     %xmm2,%xmm0,%xmm3
 1818         vpxor   -128(%esi),%xmm1,%xmm4
 1819         vpxor   -64(%esi),%xmm6,%xmm5
 1820         vpxor   (%esi),%xmm7,%xmm6
 1821         vpxor   64(%esi),%xmm3,%xmm7
 1822         leal    16(%esi),%esi
 1823         vmovdqa -64(%ebx),%xmm0
 1824         vmovdqa -48(%ebx),%xmm1
 1825         vmovdqa -32(%ebx),%xmm2
 1826         vmovdqa -16(%ebx),%xmm3
 1827         vmovdqu %xmm4,-128(%edi)
 1828         vmovdqu %xmm5,-64(%edi)
 1829         vmovdqu %xmm6,(%edi)
 1830         vmovdqu %xmm7,64(%edi)
 1831         leal    16(%edi),%edi
 1832         vpaddd  -64(%ebp),%xmm0,%xmm0
 1833         vpaddd  -48(%ebp),%xmm1,%xmm1
 1834         vpaddd  -32(%ebp),%xmm2,%xmm2
 1835         vpaddd  -16(%ebp),%xmm3,%xmm3
 1836         vpunpckldq      %xmm1,%xmm0,%xmm6
 1837         vpunpckldq      %xmm3,%xmm2,%xmm7
 1838         vpunpckhdq      %xmm1,%xmm0,%xmm0
 1839         vpunpckhdq      %xmm3,%xmm2,%xmm2
 1840         vpunpcklqdq     %xmm7,%xmm6,%xmm1
 1841         vpunpckhqdq     %xmm7,%xmm6,%xmm6
 1842         vpunpcklqdq     %xmm2,%xmm0,%xmm7
 1843         vpunpckhqdq     %xmm2,%xmm0,%xmm3
 1844         vpxor   -128(%esi),%xmm1,%xmm4
 1845         vpxor   -64(%esi),%xmm6,%xmm5
 1846         vpxor   (%esi),%xmm7,%xmm6
 1847         vpxor   64(%esi),%xmm3,%xmm7
 1848         leal    16(%esi),%esi
 1849         vmovdqa (%ebx),%xmm0
 1850         vmovdqa 16(%ebx),%xmm1
 1851         vmovdqa 32(%ebx),%xmm2
 1852         vmovdqa 48(%ebx),%xmm3
 1853         vmovdqu %xmm4,-128(%edi)
 1854         vmovdqu %xmm5,-64(%edi)
 1855         vmovdqu %xmm6,(%edi)
 1856         vmovdqu %xmm7,64(%edi)
 1857         leal    16(%edi),%edi
 1858         vpaddd  (%ebp),%xmm0,%xmm0
 1859         vpaddd  16(%ebp),%xmm1,%xmm1
 1860         vpaddd  32(%ebp),%xmm2,%xmm2
 1861         vpaddd  48(%ebp),%xmm3,%xmm3
 1862         vpunpckldq      %xmm1,%xmm0,%xmm6
 1863         vpunpckldq      %xmm3,%xmm2,%xmm7
 1864         vpunpckhdq      %xmm1,%xmm0,%xmm0
 1865         vpunpckhdq      %xmm3,%xmm2,%xmm2
 1866         vpunpcklqdq     %xmm7,%xmm6,%xmm1
 1867         vpunpckhqdq     %xmm7,%xmm6,%xmm6
 1868         vpunpcklqdq     %xmm2,%xmm0,%xmm7
 1869         vpunpckhqdq     %xmm2,%xmm0,%xmm3
 1870         vpxor   -128(%esi),%xmm1,%xmm4
 1871         vpxor   -64(%esi),%xmm6,%xmm5
 1872         vpxor   (%esi),%xmm7,%xmm6
 1873         vpxor   64(%esi),%xmm3,%xmm7
 1874         leal    16(%esi),%esi
 1875         vmovdqa 64(%ebx),%xmm0
 1876         vmovdqa 80(%ebx),%xmm1
 1877         vmovdqa 96(%ebx),%xmm2
 1878         vmovdqa 112(%ebx),%xmm3
 1879         vmovdqu %xmm4,-128(%edi)
 1880         vmovdqu %xmm5,-64(%edi)
 1881         vmovdqu %xmm6,(%edi)
 1882         vmovdqu %xmm7,64(%edi)
 1883         leal    16(%edi),%edi
 1884         vpaddd  64(%ebp),%xmm0,%xmm0
 1885         vpaddd  80(%ebp),%xmm1,%xmm1
 1886         vpaddd  96(%ebp),%xmm2,%xmm2
 1887         vpaddd  112(%ebp),%xmm3,%xmm3
 1888         vpunpckldq      %xmm1,%xmm0,%xmm6
 1889         vpunpckldq      %xmm3,%xmm2,%xmm7
 1890         vpunpckhdq      %xmm1,%xmm0,%xmm0
 1891         vpunpckhdq      %xmm3,%xmm2,%xmm2
 1892         vpunpcklqdq     %xmm7,%xmm6,%xmm1
 1893         vpunpckhqdq     %xmm7,%xmm6,%xmm6
 1894         vpunpcklqdq     %xmm2,%xmm0,%xmm7
 1895         vpunpckhqdq     %xmm2,%xmm0,%xmm3
 1896         vpxor   -128(%esi),%xmm1,%xmm4
 1897         vpxor   -64(%esi),%xmm6,%xmm5
 1898         vpxor   (%esi),%xmm7,%xmm6
 1899         vpxor   64(%esi),%xmm3,%xmm7
 1900         leal    208(%esi),%esi
 1901         vmovdqu %xmm4,-128(%edi)
 1902         vmovdqu %xmm5,-64(%edi)
 1903         vmovdqu %xmm6,(%edi)
 1904         vmovdqu %xmm7,64(%edi)
 1905         leal    208(%edi),%edi
 1906         subl    $256,%ecx
 1907         jnc     .L015outer_loop
 1908         addl    $256,%ecx
 1909         jz      .L017done
 1910         movl    520(%esp),%ebx
 1911         leal    -128(%esi),%esi
 1912         movl    516(%esp),%edx
 1913         leal    -128(%edi),%edi
 1914         vmovd   64(%ebp),%xmm2
 1915         vmovdqu (%ebx),%xmm3
 1916         vpaddd  96(%eax),%xmm2,%xmm2
 1917         vpand   112(%eax),%xmm3,%xmm3
 1918         vpor    %xmm2,%xmm3,%xmm3
 1919 .L0141x:
 1920         vmovdqa 32(%eax),%xmm0
 1921         vmovdqu (%edx),%xmm1
 1922         vmovdqu 16(%edx),%xmm2
 1923         vmovdqa (%eax),%xmm6
 1924         vmovdqa 16(%eax),%xmm7
 1925         movl    %ebp,48(%esp)
 1926         vmovdqa %xmm0,(%esp)
 1927         vmovdqa %xmm1,16(%esp)
 1928         vmovdqa %xmm2,32(%esp)
 1929         vmovdqa %xmm3,48(%esp)
 1930         movl    $10,%edx
 1931         jmp     .L018loop1x
 1932 .align  16
 1933 .L019outer1x:
 1934         vmovdqa 80(%eax),%xmm3
 1935         vmovdqa (%esp),%xmm0
 1936         vmovdqa 16(%esp),%xmm1
 1937         vmovdqa 32(%esp),%xmm2
 1938         vpaddd  48(%esp),%xmm3,%xmm3
 1939         movl    $10,%edx
 1940         vmovdqa %xmm3,48(%esp)
 1941         jmp     .L018loop1x
 1942 .align  16
 1943 .L018loop1x:
 1944         vpaddd  %xmm1,%xmm0,%xmm0
 1945         vpxor   %xmm0,%xmm3,%xmm3
 1946 .byte   143,232,120,194,219,16
 1947         vpaddd  %xmm3,%xmm2,%xmm2
 1948         vpxor   %xmm2,%xmm1,%xmm1
 1949 .byte   143,232,120,194,201,12
 1950         vpaddd  %xmm1,%xmm0,%xmm0
 1951         vpxor   %xmm0,%xmm3,%xmm3
 1952 .byte   143,232,120,194,219,8
 1953         vpaddd  %xmm3,%xmm2,%xmm2
 1954         vpxor   %xmm2,%xmm1,%xmm1
 1955 .byte   143,232,120,194,201,7
 1956         vpshufd $78,%xmm2,%xmm2
 1957         vpshufd $57,%xmm1,%xmm1
 1958         vpshufd $147,%xmm3,%xmm3
 1959         vpaddd  %xmm1,%xmm0,%xmm0
 1960         vpxor   %xmm0,%xmm3,%xmm3
 1961 .byte   143,232,120,194,219,16
 1962         vpaddd  %xmm3,%xmm2,%xmm2
 1963         vpxor   %xmm2,%xmm1,%xmm1
 1964 .byte   143,232,120,194,201,12
 1965         vpaddd  %xmm1,%xmm0,%xmm0
 1966         vpxor   %xmm0,%xmm3,%xmm3
 1967 .byte   143,232,120,194,219,8
 1968         vpaddd  %xmm3,%xmm2,%xmm2
 1969         vpxor   %xmm2,%xmm1,%xmm1
 1970 .byte   143,232,120,194,201,7
 1971         vpshufd $78,%xmm2,%xmm2
 1972         vpshufd $147,%xmm1,%xmm1
 1973         vpshufd $57,%xmm3,%xmm3
 1974         decl    %edx
 1975         jnz     .L018loop1x
 1976         vpaddd  (%esp),%xmm0,%xmm0
 1977         vpaddd  16(%esp),%xmm1,%xmm1
 1978         vpaddd  32(%esp),%xmm2,%xmm2
 1979         vpaddd  48(%esp),%xmm3,%xmm3
 1980         cmpl    $64,%ecx
 1981         jb      .L020tail
 1982         vpxor   (%esi),%xmm0,%xmm0
 1983         vpxor   16(%esi),%xmm1,%xmm1
 1984         vpxor   32(%esi),%xmm2,%xmm2
 1985         vpxor   48(%esi),%xmm3,%xmm3
 1986         leal    64(%esi),%esi
 1987         vmovdqu %xmm0,(%edi)
 1988         vmovdqu %xmm1,16(%edi)
 1989         vmovdqu %xmm2,32(%edi)
 1990         vmovdqu %xmm3,48(%edi)
 1991         leal    64(%edi),%edi
 1992         subl    $64,%ecx
 1993         jnz     .L019outer1x
 1994         jmp     .L017done
 1995 .L020tail:
 1996         vmovdqa %xmm0,(%esp)
 1997         vmovdqa %xmm1,16(%esp)
 1998         vmovdqa %xmm2,32(%esp)
 1999         vmovdqa %xmm3,48(%esp)
 2000         xorl    %eax,%eax
 2001         xorl    %edx,%edx
 2002         xorl    %ebp,%ebp
 2003 .L021tail_loop:
 2004         movb    (%esp,%ebp,1),%al
 2005         movb    (%esi,%ebp,1),%dl
 2006         leal    1(%ebp),%ebp
 2007         xorb    %dl,%al
 2008         movb    %al,-1(%edi,%ebp,1)
 2009         decl    %ecx
 2010         jnz     .L021tail_loop
 2011 .L017done:
 2012         vzeroupper
 2013         movl    512(%esp),%esp
 2014         popl    %edi
 2015         popl    %esi
 2016         popl    %ebx
 2017         popl    %ebp
 2018         ret
 2019 .size   ChaCha20_xop,.-.L_ChaCha20_xop_begin
 2020 .comm   OPENSSL_ia32cap_P,16,4
 2021 #endif

Cache object: 40c79e69d0229ad346193826a833c3da


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.