The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/amd64/rsaz-x86_64.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
    3 .text   
    4 
    5 
    6 
    7 .globl  rsaz_512_sqr
    8 .type   rsaz_512_sqr,@function
    9 .align  32
   10 rsaz_512_sqr:
   11 .cfi_startproc  
   12         pushq   %rbx
   13 .cfi_adjust_cfa_offset  8
   14 .cfi_offset     %rbx,-16
   15         pushq   %rbp
   16 .cfi_adjust_cfa_offset  8
   17 .cfi_offset     %rbp,-24
   18         pushq   %r12
   19 .cfi_adjust_cfa_offset  8
   20 .cfi_offset     %r12,-32
   21         pushq   %r13
   22 .cfi_adjust_cfa_offset  8
   23 .cfi_offset     %r13,-40
   24         pushq   %r14
   25 .cfi_adjust_cfa_offset  8
   26 .cfi_offset     %r14,-48
   27         pushq   %r15
   28 .cfi_adjust_cfa_offset  8
   29 .cfi_offset     %r15,-56
   30 
   31         subq    $128+24,%rsp
   32 .cfi_adjust_cfa_offset  128+24
   33 .Lsqr_body:
   34 .byte   102,72,15,110,202
   35         movq    (%rsi),%rdx
   36         movq    8(%rsi),%rax
   37         movq    %rcx,128(%rsp)
   38         movl    $0x80100,%r11d
   39         andl    OPENSSL_ia32cap_P+8(%rip),%r11d
   40         cmpl    $0x80100,%r11d
   41         je      .Loop_sqrx
   42         jmp     .Loop_sqr
   43 
   44 .align  32
   45 .Loop_sqr:
   46         movl    %r8d,128+8(%rsp)
   47 
   48         movq    %rdx,%rbx
   49         movq    %rax,%rbp
   50         mulq    %rdx
   51         movq    %rax,%r8
   52         movq    16(%rsi),%rax
   53         movq    %rdx,%r9
   54 
   55         mulq    %rbx
   56         addq    %rax,%r9
   57         movq    24(%rsi),%rax
   58         movq    %rdx,%r10
   59         adcq    $0,%r10
   60 
   61         mulq    %rbx
   62         addq    %rax,%r10
   63         movq    32(%rsi),%rax
   64         movq    %rdx,%r11
   65         adcq    $0,%r11
   66 
   67         mulq    %rbx
   68         addq    %rax,%r11
   69         movq    40(%rsi),%rax
   70         movq    %rdx,%r12
   71         adcq    $0,%r12
   72 
   73         mulq    %rbx
   74         addq    %rax,%r12
   75         movq    48(%rsi),%rax
   76         movq    %rdx,%r13
   77         adcq    $0,%r13
   78 
   79         mulq    %rbx
   80         addq    %rax,%r13
   81         movq    56(%rsi),%rax
   82         movq    %rdx,%r14
   83         adcq    $0,%r14
   84 
   85         mulq    %rbx
   86         addq    %rax,%r14
   87         movq    %rbx,%rax
   88         adcq    $0,%rdx
   89 
   90         xorq    %rcx,%rcx
   91         addq    %r8,%r8
   92         movq    %rdx,%r15
   93         adcq    $0,%rcx
   94 
   95         mulq    %rax
   96         addq    %r8,%rdx
   97         adcq    $0,%rcx
   98 
   99         movq    %rax,(%rsp)
  100         movq    %rdx,8(%rsp)
  101 
  102 
  103         movq    16(%rsi),%rax
  104         mulq    %rbp
  105         addq    %rax,%r10
  106         movq    24(%rsi),%rax
  107         movq    %rdx,%rbx
  108         adcq    $0,%rbx
  109 
  110         mulq    %rbp
  111         addq    %rax,%r11
  112         movq    32(%rsi),%rax
  113         adcq    $0,%rdx
  114         addq    %rbx,%r11
  115         movq    %rdx,%rbx
  116         adcq    $0,%rbx
  117 
  118         mulq    %rbp
  119         addq    %rax,%r12
  120         movq    40(%rsi),%rax
  121         adcq    $0,%rdx
  122         addq    %rbx,%r12
  123         movq    %rdx,%rbx
  124         adcq    $0,%rbx
  125 
  126         mulq    %rbp
  127         addq    %rax,%r13
  128         movq    48(%rsi),%rax
  129         adcq    $0,%rdx
  130         addq    %rbx,%r13
  131         movq    %rdx,%rbx
  132         adcq    $0,%rbx
  133 
  134         mulq    %rbp
  135         addq    %rax,%r14
  136         movq    56(%rsi),%rax
  137         adcq    $0,%rdx
  138         addq    %rbx,%r14
  139         movq    %rdx,%rbx
  140         adcq    $0,%rbx
  141 
  142         mulq    %rbp
  143         addq    %rax,%r15
  144         movq    %rbp,%rax
  145         adcq    $0,%rdx
  146         addq    %rbx,%r15
  147         adcq    $0,%rdx
  148 
  149         xorq    %rbx,%rbx
  150         addq    %r9,%r9
  151         movq    %rdx,%r8
  152         adcq    %r10,%r10
  153         adcq    $0,%rbx
  154 
  155         mulq    %rax
  156 
  157         addq    %rcx,%rax
  158         movq    16(%rsi),%rbp
  159         addq    %rax,%r9
  160         movq    24(%rsi),%rax
  161         adcq    %rdx,%r10
  162         adcq    $0,%rbx
  163 
  164         movq    %r9,16(%rsp)
  165         movq    %r10,24(%rsp)
  166 
  167 
  168         mulq    %rbp
  169         addq    %rax,%r12
  170         movq    32(%rsi),%rax
  171         movq    %rdx,%rcx
  172         adcq    $0,%rcx
  173 
  174         mulq    %rbp
  175         addq    %rax,%r13
  176         movq    40(%rsi),%rax
  177         adcq    $0,%rdx
  178         addq    %rcx,%r13
  179         movq    %rdx,%rcx
  180         adcq    $0,%rcx
  181 
  182         mulq    %rbp
  183         addq    %rax,%r14
  184         movq    48(%rsi),%rax
  185         adcq    $0,%rdx
  186         addq    %rcx,%r14
  187         movq    %rdx,%rcx
  188         adcq    $0,%rcx
  189 
  190         mulq    %rbp
  191         addq    %rax,%r15
  192         movq    56(%rsi),%rax
  193         adcq    $0,%rdx
  194         addq    %rcx,%r15
  195         movq    %rdx,%rcx
  196         adcq    $0,%rcx
  197 
  198         mulq    %rbp
  199         addq    %rax,%r8
  200         movq    %rbp,%rax
  201         adcq    $0,%rdx
  202         addq    %rcx,%r8
  203         adcq    $0,%rdx
  204 
  205         xorq    %rcx,%rcx
  206         addq    %r11,%r11
  207         movq    %rdx,%r9
  208         adcq    %r12,%r12
  209         adcq    $0,%rcx
  210 
  211         mulq    %rax
  212 
  213         addq    %rbx,%rax
  214         movq    24(%rsi),%r10
  215         addq    %rax,%r11
  216         movq    32(%rsi),%rax
  217         adcq    %rdx,%r12
  218         adcq    $0,%rcx
  219 
  220         movq    %r11,32(%rsp)
  221         movq    %r12,40(%rsp)
  222 
  223 
  224         movq    %rax,%r11
  225         mulq    %r10
  226         addq    %rax,%r14
  227         movq    40(%rsi),%rax
  228         movq    %rdx,%rbx
  229         adcq    $0,%rbx
  230 
  231         movq    %rax,%r12
  232         mulq    %r10
  233         addq    %rax,%r15
  234         movq    48(%rsi),%rax
  235         adcq    $0,%rdx
  236         addq    %rbx,%r15
  237         movq    %rdx,%rbx
  238         adcq    $0,%rbx
  239 
  240         movq    %rax,%rbp
  241         mulq    %r10
  242         addq    %rax,%r8
  243         movq    56(%rsi),%rax
  244         adcq    $0,%rdx
  245         addq    %rbx,%r8
  246         movq    %rdx,%rbx
  247         adcq    $0,%rbx
  248 
  249         mulq    %r10
  250         addq    %rax,%r9
  251         movq    %r10,%rax
  252         adcq    $0,%rdx
  253         addq    %rbx,%r9
  254         adcq    $0,%rdx
  255 
  256         xorq    %rbx,%rbx
  257         addq    %r13,%r13
  258         movq    %rdx,%r10
  259         adcq    %r14,%r14
  260         adcq    $0,%rbx
  261 
  262         mulq    %rax
  263 
  264         addq    %rcx,%rax
  265         addq    %rax,%r13
  266         movq    %r12,%rax
  267         adcq    %rdx,%r14
  268         adcq    $0,%rbx
  269 
  270         movq    %r13,48(%rsp)
  271         movq    %r14,56(%rsp)
  272 
  273 
  274         mulq    %r11
  275         addq    %rax,%r8
  276         movq    %rbp,%rax
  277         movq    %rdx,%rcx
  278         adcq    $0,%rcx
  279 
  280         mulq    %r11
  281         addq    %rax,%r9
  282         movq    56(%rsi),%rax
  283         adcq    $0,%rdx
  284         addq    %rcx,%r9
  285         movq    %rdx,%rcx
  286         adcq    $0,%rcx
  287 
  288         movq    %rax,%r14
  289         mulq    %r11
  290         addq    %rax,%r10
  291         movq    %r11,%rax
  292         adcq    $0,%rdx
  293         addq    %rcx,%r10
  294         adcq    $0,%rdx
  295 
  296         xorq    %rcx,%rcx
  297         addq    %r15,%r15
  298         movq    %rdx,%r11
  299         adcq    %r8,%r8
  300         adcq    $0,%rcx
  301 
  302         mulq    %rax
  303 
  304         addq    %rbx,%rax
  305         addq    %rax,%r15
  306         movq    %rbp,%rax
  307         adcq    %rdx,%r8
  308         adcq    $0,%rcx
  309 
  310         movq    %r15,64(%rsp)
  311         movq    %r8,72(%rsp)
  312 
  313 
  314         mulq    %r12
  315         addq    %rax,%r10
  316         movq    %r14,%rax
  317         movq    %rdx,%rbx
  318         adcq    $0,%rbx
  319 
  320         mulq    %r12
  321         addq    %rax,%r11
  322         movq    %r12,%rax
  323         adcq    $0,%rdx
  324         addq    %rbx,%r11
  325         adcq    $0,%rdx
  326 
  327         xorq    %rbx,%rbx
  328         addq    %r9,%r9
  329         movq    %rdx,%r12
  330         adcq    %r10,%r10
  331         adcq    $0,%rbx
  332 
  333         mulq    %rax
  334 
  335         addq    %rcx,%rax
  336         addq    %rax,%r9
  337         movq    %r14,%rax
  338         adcq    %rdx,%r10
  339         adcq    $0,%rbx
  340 
  341         movq    %r9,80(%rsp)
  342         movq    %r10,88(%rsp)
  343 
  344 
  345         mulq    %rbp
  346         addq    %rax,%r12
  347         movq    %rbp,%rax
  348         adcq    $0,%rdx
  349 
  350         xorq    %rcx,%rcx
  351         addq    %r11,%r11
  352         movq    %rdx,%r13
  353         adcq    %r12,%r12
  354         adcq    $0,%rcx
  355 
  356         mulq    %rax
  357 
  358         addq    %rbx,%rax
  359         addq    %rax,%r11
  360         movq    %r14,%rax
  361         adcq    %rdx,%r12
  362         adcq    $0,%rcx
  363 
  364         movq    %r11,96(%rsp)
  365         movq    %r12,104(%rsp)
  366 
  367 
  368         xorq    %rbx,%rbx
  369         addq    %r13,%r13
  370         adcq    $0,%rbx
  371 
  372         mulq    %rax
  373 
  374         addq    %rcx,%rax
  375         addq    %r13,%rax
  376         adcq    %rbx,%rdx
  377 
  378         movq    (%rsp),%r8
  379         movq    8(%rsp),%r9
  380         movq    16(%rsp),%r10
  381         movq    24(%rsp),%r11
  382         movq    32(%rsp),%r12
  383         movq    40(%rsp),%r13
  384         movq    48(%rsp),%r14
  385         movq    56(%rsp),%r15
  386 .byte   102,72,15,126,205
  387 
  388         movq    %rax,112(%rsp)
  389         movq    %rdx,120(%rsp)
  390 
  391         call    __rsaz_512_reduce
  392 
  393         addq    64(%rsp),%r8
  394         adcq    72(%rsp),%r9
  395         adcq    80(%rsp),%r10
  396         adcq    88(%rsp),%r11
  397         adcq    96(%rsp),%r12
  398         adcq    104(%rsp),%r13
  399         adcq    112(%rsp),%r14
  400         adcq    120(%rsp),%r15
  401         sbbq    %rcx,%rcx
  402 
  403         call    __rsaz_512_subtract
  404 
  405         movq    %r8,%rdx
  406         movq    %r9,%rax
  407         movl    128+8(%rsp),%r8d
  408         movq    %rdi,%rsi
  409 
  410         decl    %r8d
  411         jnz     .Loop_sqr
  412         jmp     .Lsqr_tail
  413 
  414 .align  32
  415 .Loop_sqrx:
  416         movl    %r8d,128+8(%rsp)
  417 .byte   102,72,15,110,199
  418 
  419         mulxq   %rax,%r8,%r9
  420         movq    %rax,%rbx
  421 
  422         mulxq   16(%rsi),%rcx,%r10
  423         xorq    %rbp,%rbp
  424 
  425         mulxq   24(%rsi),%rax,%r11
  426         adcxq   %rcx,%r9
  427 
  428 .byte   0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
  429         adcxq   %rax,%r10
  430 
  431 .byte   0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
  432         adcxq   %rcx,%r11
  433 
  434         mulxq   48(%rsi),%rcx,%r14
  435         adcxq   %rax,%r12
  436         adcxq   %rcx,%r13
  437 
  438         mulxq   56(%rsi),%rax,%r15
  439         adcxq   %rax,%r14
  440         adcxq   %rbp,%r15
  441 
  442         mulxq   %rdx,%rax,%rdi
  443         movq    %rbx,%rdx
  444         xorq    %rcx,%rcx
  445         adoxq   %r8,%r8
  446         adcxq   %rdi,%r8
  447         adoxq   %rbp,%rcx
  448         adcxq   %rbp,%rcx
  449 
  450         movq    %rax,(%rsp)
  451         movq    %r8,8(%rsp)
  452 
  453 
  454 .byte   0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
  455         adoxq   %rax,%r10
  456         adcxq   %rbx,%r11
  457 
  458         mulxq   24(%rsi),%rdi,%r8
  459         adoxq   %rdi,%r11
  460 .byte   0x66
  461         adcxq   %r8,%r12
  462 
  463         mulxq   32(%rsi),%rax,%rbx
  464         adoxq   %rax,%r12
  465         adcxq   %rbx,%r13
  466 
  467         mulxq   40(%rsi),%rdi,%r8
  468         adoxq   %rdi,%r13
  469         adcxq   %r8,%r14
  470 
  471 .byte   0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
  472         adoxq   %rax,%r14
  473         adcxq   %rbx,%r15
  474 
  475 .byte   0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
  476         adoxq   %rdi,%r15
  477         adcxq   %rbp,%r8
  478         mulxq   %rdx,%rax,%rdi
  479         adoxq   %rbp,%r8
  480 .byte   0x48,0x8b,0x96,0x10,0x00,0x00,0x00
  481 
  482         xorq    %rbx,%rbx
  483         adoxq   %r9,%r9
  484 
  485         adcxq   %rcx,%rax
  486         adoxq   %r10,%r10
  487         adcxq   %rax,%r9
  488         adoxq   %rbp,%rbx
  489         adcxq   %rdi,%r10
  490         adcxq   %rbp,%rbx
  491 
  492         movq    %r9,16(%rsp)
  493 .byte   0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
  494 
  495 
  496         mulxq   24(%rsi),%rdi,%r9
  497         adoxq   %rdi,%r12
  498         adcxq   %r9,%r13
  499 
  500         mulxq   32(%rsi),%rax,%rcx
  501         adoxq   %rax,%r13
  502         adcxq   %rcx,%r14
  503 
  504 .byte   0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
  505         adoxq   %rdi,%r14
  506         adcxq   %r9,%r15
  507 
  508 .byte   0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
  509         adoxq   %rax,%r15
  510         adcxq   %rcx,%r8
  511 
  512         mulxq   56(%rsi),%rdi,%r9
  513         adoxq   %rdi,%r8
  514         adcxq   %rbp,%r9
  515         mulxq   %rdx,%rax,%rdi
  516         adoxq   %rbp,%r9
  517         movq    24(%rsi),%rdx
  518 
  519         xorq    %rcx,%rcx
  520         adoxq   %r11,%r11
  521 
  522         adcxq   %rbx,%rax
  523         adoxq   %r12,%r12
  524         adcxq   %rax,%r11
  525         adoxq   %rbp,%rcx
  526         adcxq   %rdi,%r12
  527         adcxq   %rbp,%rcx
  528 
  529         movq    %r11,32(%rsp)
  530         movq    %r12,40(%rsp)
  531 
  532 
  533         mulxq   32(%rsi),%rax,%rbx
  534         adoxq   %rax,%r14
  535         adcxq   %rbx,%r15
  536 
  537         mulxq   40(%rsi),%rdi,%r10
  538         adoxq   %rdi,%r15
  539         adcxq   %r10,%r8
  540 
  541         mulxq   48(%rsi),%rax,%rbx
  542         adoxq   %rax,%r8
  543         adcxq   %rbx,%r9
  544 
  545         mulxq   56(%rsi),%rdi,%r10
  546         adoxq   %rdi,%r9
  547         adcxq   %rbp,%r10
  548         mulxq   %rdx,%rax,%rdi
  549         adoxq   %rbp,%r10
  550         movq    32(%rsi),%rdx
  551 
  552         xorq    %rbx,%rbx
  553         adoxq   %r13,%r13
  554 
  555         adcxq   %rcx,%rax
  556         adoxq   %r14,%r14
  557         adcxq   %rax,%r13
  558         adoxq   %rbp,%rbx
  559         adcxq   %rdi,%r14
  560         adcxq   %rbp,%rbx
  561 
  562         movq    %r13,48(%rsp)
  563         movq    %r14,56(%rsp)
  564 
  565 
  566         mulxq   40(%rsi),%rdi,%r11
  567         adoxq   %rdi,%r8
  568         adcxq   %r11,%r9
  569 
  570         mulxq   48(%rsi),%rax,%rcx
  571         adoxq   %rax,%r9
  572         adcxq   %rcx,%r10
  573 
  574         mulxq   56(%rsi),%rdi,%r11
  575         adoxq   %rdi,%r10
  576         adcxq   %rbp,%r11
  577         mulxq   %rdx,%rax,%rdi
  578         movq    40(%rsi),%rdx
  579         adoxq   %rbp,%r11
  580 
  581         xorq    %rcx,%rcx
  582         adoxq   %r15,%r15
  583 
  584         adcxq   %rbx,%rax
  585         adoxq   %r8,%r8
  586         adcxq   %rax,%r15
  587         adoxq   %rbp,%rcx
  588         adcxq   %rdi,%r8
  589         adcxq   %rbp,%rcx
  590 
  591         movq    %r15,64(%rsp)
  592         movq    %r8,72(%rsp)
  593 
  594 
  595 .byte   0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
  596         adoxq   %rax,%r10
  597         adcxq   %rbx,%r11
  598 
  599 .byte   0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
  600         adoxq   %rdi,%r11
  601         adcxq   %rbp,%r12
  602         mulxq   %rdx,%rax,%rdi
  603         adoxq   %rbp,%r12
  604         movq    48(%rsi),%rdx
  605 
  606         xorq    %rbx,%rbx
  607         adoxq   %r9,%r9
  608 
  609         adcxq   %rcx,%rax
  610         adoxq   %r10,%r10
  611         adcxq   %rax,%r9
  612         adcxq   %rdi,%r10
  613         adoxq   %rbp,%rbx
  614         adcxq   %rbp,%rbx
  615 
  616         movq    %r9,80(%rsp)
  617         movq    %r10,88(%rsp)
  618 
  619 
  620 .byte   0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
  621         adoxq   %rax,%r12
  622         adoxq   %rbp,%r13
  623 
  624         mulxq   %rdx,%rax,%rdi
  625         xorq    %rcx,%rcx
  626         movq    56(%rsi),%rdx
  627         adoxq   %r11,%r11
  628 
  629         adcxq   %rbx,%rax
  630         adoxq   %r12,%r12
  631         adcxq   %rax,%r11
  632         adoxq   %rbp,%rcx
  633         adcxq   %rdi,%r12
  634         adcxq   %rbp,%rcx
  635 
  636 .byte   0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
  637 .byte   0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
  638 
  639 
  640         mulxq   %rdx,%rax,%rdx
  641         xorq    %rbx,%rbx
  642         adoxq   %r13,%r13
  643 
  644         adcxq   %rcx,%rax
  645         adoxq   %rbp,%rbx
  646         adcxq   %r13,%rax
  647         adcxq   %rdx,%rbx
  648 
  649 .byte   102,72,15,126,199
  650 .byte   102,72,15,126,205
  651 
  652         movq    128(%rsp),%rdx
  653         movq    (%rsp),%r8
  654         movq    8(%rsp),%r9
  655         movq    16(%rsp),%r10
  656         movq    24(%rsp),%r11
  657         movq    32(%rsp),%r12
  658         movq    40(%rsp),%r13
  659         movq    48(%rsp),%r14
  660         movq    56(%rsp),%r15
  661 
  662         movq    %rax,112(%rsp)
  663         movq    %rbx,120(%rsp)
  664 
  665         call    __rsaz_512_reducex
  666 
  667         addq    64(%rsp),%r8
  668         adcq    72(%rsp),%r9
  669         adcq    80(%rsp),%r10
  670         adcq    88(%rsp),%r11
  671         adcq    96(%rsp),%r12
  672         adcq    104(%rsp),%r13
  673         adcq    112(%rsp),%r14
  674         adcq    120(%rsp),%r15
  675         sbbq    %rcx,%rcx
  676 
  677         call    __rsaz_512_subtract
  678 
  679         movq    %r8,%rdx
  680         movq    %r9,%rax
  681         movl    128+8(%rsp),%r8d
  682         movq    %rdi,%rsi
  683 
  684         decl    %r8d
  685         jnz     .Loop_sqrx
  686 
  687 .Lsqr_tail:
  688 
  689         leaq    128+24+48(%rsp),%rax
  690 .cfi_def_cfa    %rax,8
  691         movq    -48(%rax),%r15
  692 .cfi_restore    %r15
  693         movq    -40(%rax),%r14
  694 .cfi_restore    %r14
  695         movq    -32(%rax),%r13
  696 .cfi_restore    %r13
  697         movq    -24(%rax),%r12
  698 .cfi_restore    %r12
  699         movq    -16(%rax),%rbp
  700 .cfi_restore    %rbp
  701         movq    -8(%rax),%rbx
  702 .cfi_restore    %rbx
  703         leaq    (%rax),%rsp
  704 .cfi_def_cfa_register   %rsp
  705 .Lsqr_epilogue:
  706         .byte   0xf3,0xc3
  707 .cfi_endproc    
  708 .size   rsaz_512_sqr,.-rsaz_512_sqr
  709 .globl  rsaz_512_mul
  710 .type   rsaz_512_mul,@function
  711 .align  32
  712 rsaz_512_mul:
  713 .cfi_startproc  
  714         pushq   %rbx
  715 .cfi_adjust_cfa_offset  8
  716 .cfi_offset     %rbx,-16
  717         pushq   %rbp
  718 .cfi_adjust_cfa_offset  8
  719 .cfi_offset     %rbp,-24
  720         pushq   %r12
  721 .cfi_adjust_cfa_offset  8
  722 .cfi_offset     %r12,-32
  723         pushq   %r13
  724 .cfi_adjust_cfa_offset  8
  725 .cfi_offset     %r13,-40
  726         pushq   %r14
  727 .cfi_adjust_cfa_offset  8
  728 .cfi_offset     %r14,-48
  729         pushq   %r15
  730 .cfi_adjust_cfa_offset  8
  731 .cfi_offset     %r15,-56
  732 
  733         subq    $128+24,%rsp
  734 .cfi_adjust_cfa_offset  128+24
  735 .Lmul_body:
  736 .byte   102,72,15,110,199
  737 .byte   102,72,15,110,201
  738         movq    %r8,128(%rsp)
  739         movl    $0x80100,%r11d
  740         andl    OPENSSL_ia32cap_P+8(%rip),%r11d
  741         cmpl    $0x80100,%r11d
  742         je      .Lmulx
  743         movq    (%rdx),%rbx
  744         movq    %rdx,%rbp
  745         call    __rsaz_512_mul
  746 
  747 .byte   102,72,15,126,199
  748 .byte   102,72,15,126,205
  749 
  750         movq    (%rsp),%r8
  751         movq    8(%rsp),%r9
  752         movq    16(%rsp),%r10
  753         movq    24(%rsp),%r11
  754         movq    32(%rsp),%r12
  755         movq    40(%rsp),%r13
  756         movq    48(%rsp),%r14
  757         movq    56(%rsp),%r15
  758 
  759         call    __rsaz_512_reduce
  760         jmp     .Lmul_tail
  761 
  762 .align  32
  763 .Lmulx:
  764         movq    %rdx,%rbp
  765         movq    (%rdx),%rdx
  766         call    __rsaz_512_mulx
  767 
  768 .byte   102,72,15,126,199
  769 .byte   102,72,15,126,205
  770 
  771         movq    128(%rsp),%rdx
  772         movq    (%rsp),%r8
  773         movq    8(%rsp),%r9
  774         movq    16(%rsp),%r10
  775         movq    24(%rsp),%r11
  776         movq    32(%rsp),%r12
  777         movq    40(%rsp),%r13
  778         movq    48(%rsp),%r14
  779         movq    56(%rsp),%r15
  780 
  781         call    __rsaz_512_reducex
  782 .Lmul_tail:
  783         addq    64(%rsp),%r8
  784         adcq    72(%rsp),%r9
  785         adcq    80(%rsp),%r10
  786         adcq    88(%rsp),%r11
  787         adcq    96(%rsp),%r12
  788         adcq    104(%rsp),%r13
  789         adcq    112(%rsp),%r14
  790         adcq    120(%rsp),%r15
  791         sbbq    %rcx,%rcx
  792 
  793         call    __rsaz_512_subtract
  794 
  795         leaq    128+24+48(%rsp),%rax
  796 .cfi_def_cfa    %rax,8
  797         movq    -48(%rax),%r15
  798 .cfi_restore    %r15
  799         movq    -40(%rax),%r14
  800 .cfi_restore    %r14
  801         movq    -32(%rax),%r13
  802 .cfi_restore    %r13
  803         movq    -24(%rax),%r12
  804 .cfi_restore    %r12
  805         movq    -16(%rax),%rbp
  806 .cfi_restore    %rbp
  807         movq    -8(%rax),%rbx
  808 .cfi_restore    %rbx
  809         leaq    (%rax),%rsp
  810 .cfi_def_cfa_register   %rsp
  811 .Lmul_epilogue:
  812         .byte   0xf3,0xc3
  813 .cfi_endproc    
  814 .size   rsaz_512_mul,.-rsaz_512_mul
  815 .globl  rsaz_512_mul_gather4
  816 .type   rsaz_512_mul_gather4,@function
  817 .align  32
  818 rsaz_512_mul_gather4:
  819 .cfi_startproc  
  820         pushq   %rbx
  821 .cfi_adjust_cfa_offset  8
  822 .cfi_offset     %rbx,-16
  823         pushq   %rbp
  824 .cfi_adjust_cfa_offset  8
  825 .cfi_offset     %rbp,-24
  826         pushq   %r12
  827 .cfi_adjust_cfa_offset  8
  828 .cfi_offset     %r12,-32
  829         pushq   %r13
  830 .cfi_adjust_cfa_offset  8
  831 .cfi_offset     %r13,-40
  832         pushq   %r14
  833 .cfi_adjust_cfa_offset  8
  834 .cfi_offset     %r14,-48
  835         pushq   %r15
  836 .cfi_adjust_cfa_offset  8
  837 .cfi_offset     %r15,-56
  838 
  839         subq    $152,%rsp
  840 .cfi_adjust_cfa_offset  152
  841 .Lmul_gather4_body:
  842         movd    %r9d,%xmm8
  843         movdqa  .Linc+16(%rip),%xmm1
  844         movdqa  .Linc(%rip),%xmm0
  845 
  846         pshufd  $0,%xmm8,%xmm8
  847         movdqa  %xmm1,%xmm7
  848         movdqa  %xmm1,%xmm2
  849         paddd   %xmm0,%xmm1
  850         pcmpeqd %xmm8,%xmm0
  851         movdqa  %xmm7,%xmm3
  852         paddd   %xmm1,%xmm2
  853         pcmpeqd %xmm8,%xmm1
  854         movdqa  %xmm7,%xmm4
  855         paddd   %xmm2,%xmm3
  856         pcmpeqd %xmm8,%xmm2
  857         movdqa  %xmm7,%xmm5
  858         paddd   %xmm3,%xmm4
  859         pcmpeqd %xmm8,%xmm3
  860         movdqa  %xmm7,%xmm6
  861         paddd   %xmm4,%xmm5
  862         pcmpeqd %xmm8,%xmm4
  863         paddd   %xmm5,%xmm6
  864         pcmpeqd %xmm8,%xmm5
  865         paddd   %xmm6,%xmm7
  866         pcmpeqd %xmm8,%xmm6
  867         pcmpeqd %xmm8,%xmm7
  868 
  869         movdqa  0(%rdx),%xmm8
  870         movdqa  16(%rdx),%xmm9
  871         movdqa  32(%rdx),%xmm10
  872         movdqa  48(%rdx),%xmm11
  873         pand    %xmm0,%xmm8
  874         movdqa  64(%rdx),%xmm12
  875         pand    %xmm1,%xmm9
  876         movdqa  80(%rdx),%xmm13
  877         pand    %xmm2,%xmm10
  878         movdqa  96(%rdx),%xmm14
  879         pand    %xmm3,%xmm11
  880         movdqa  112(%rdx),%xmm15
  881         leaq    128(%rdx),%rbp
  882         pand    %xmm4,%xmm12
  883         pand    %xmm5,%xmm13
  884         pand    %xmm6,%xmm14
  885         pand    %xmm7,%xmm15
  886         por     %xmm10,%xmm8
  887         por     %xmm11,%xmm9
  888         por     %xmm12,%xmm8
  889         por     %xmm13,%xmm9
  890         por     %xmm14,%xmm8
  891         por     %xmm15,%xmm9
  892 
  893         por     %xmm9,%xmm8
  894         pshufd  $0x4e,%xmm8,%xmm9
  895         por     %xmm9,%xmm8
  896         movl    $0x80100,%r11d
  897         andl    OPENSSL_ia32cap_P+8(%rip),%r11d
  898         cmpl    $0x80100,%r11d
  899         je      .Lmulx_gather
  900 .byte   102,76,15,126,195
  901 
  902         movq    %r8,128(%rsp)
  903         movq    %rdi,128+8(%rsp)
  904         movq    %rcx,128+16(%rsp)
  905 
  906         movq    (%rsi),%rax
  907         movq    8(%rsi),%rcx
  908         mulq    %rbx
  909         movq    %rax,(%rsp)
  910         movq    %rcx,%rax
  911         movq    %rdx,%r8
  912 
  913         mulq    %rbx
  914         addq    %rax,%r8
  915         movq    16(%rsi),%rax
  916         movq    %rdx,%r9
  917         adcq    $0,%r9
  918 
  919         mulq    %rbx
  920         addq    %rax,%r9
  921         movq    24(%rsi),%rax
  922         movq    %rdx,%r10
  923         adcq    $0,%r10
  924 
  925         mulq    %rbx
  926         addq    %rax,%r10
  927         movq    32(%rsi),%rax
  928         movq    %rdx,%r11
  929         adcq    $0,%r11
  930 
  931         mulq    %rbx
  932         addq    %rax,%r11
  933         movq    40(%rsi),%rax
  934         movq    %rdx,%r12
  935         adcq    $0,%r12
  936 
  937         mulq    %rbx
  938         addq    %rax,%r12
  939         movq    48(%rsi),%rax
  940         movq    %rdx,%r13
  941         adcq    $0,%r13
  942 
  943         mulq    %rbx
  944         addq    %rax,%r13
  945         movq    56(%rsi),%rax
  946         movq    %rdx,%r14
  947         adcq    $0,%r14
  948 
  949         mulq    %rbx
  950         addq    %rax,%r14
  951         movq    (%rsi),%rax
  952         movq    %rdx,%r15
  953         adcq    $0,%r15
  954 
  955         leaq    8(%rsp),%rdi
  956         movl    $7,%ecx
  957         jmp     .Loop_mul_gather
  958 
  959 .align  32
  960 .Loop_mul_gather:
  961         movdqa  0(%rbp),%xmm8
  962         movdqa  16(%rbp),%xmm9
  963         movdqa  32(%rbp),%xmm10
  964         movdqa  48(%rbp),%xmm11
  965         pand    %xmm0,%xmm8
  966         movdqa  64(%rbp),%xmm12
  967         pand    %xmm1,%xmm9
  968         movdqa  80(%rbp),%xmm13
  969         pand    %xmm2,%xmm10
  970         movdqa  96(%rbp),%xmm14
  971         pand    %xmm3,%xmm11
  972         movdqa  112(%rbp),%xmm15
  973         leaq    128(%rbp),%rbp
  974         pand    %xmm4,%xmm12
  975         pand    %xmm5,%xmm13
  976         pand    %xmm6,%xmm14
  977         pand    %xmm7,%xmm15
  978         por     %xmm10,%xmm8
  979         por     %xmm11,%xmm9
  980         por     %xmm12,%xmm8
  981         por     %xmm13,%xmm9
  982         por     %xmm14,%xmm8
  983         por     %xmm15,%xmm9
  984 
  985         por     %xmm9,%xmm8
  986         pshufd  $0x4e,%xmm8,%xmm9
  987         por     %xmm9,%xmm8
  988 .byte   102,76,15,126,195
  989 
  990         mulq    %rbx
  991         addq    %rax,%r8
  992         movq    8(%rsi),%rax
  993         movq    %r8,(%rdi)
  994         movq    %rdx,%r8
  995         adcq    $0,%r8
  996 
  997         mulq    %rbx
  998         addq    %rax,%r9
  999         movq    16(%rsi),%rax
 1000         adcq    $0,%rdx
 1001         addq    %r9,%r8
 1002         movq    %rdx,%r9
 1003         adcq    $0,%r9
 1004 
 1005         mulq    %rbx
 1006         addq    %rax,%r10
 1007         movq    24(%rsi),%rax
 1008         adcq    $0,%rdx
 1009         addq    %r10,%r9
 1010         movq    %rdx,%r10
 1011         adcq    $0,%r10
 1012 
 1013         mulq    %rbx
 1014         addq    %rax,%r11
 1015         movq    32(%rsi),%rax
 1016         adcq    $0,%rdx
 1017         addq    %r11,%r10
 1018         movq    %rdx,%r11
 1019         adcq    $0,%r11
 1020 
 1021         mulq    %rbx
 1022         addq    %rax,%r12
 1023         movq    40(%rsi),%rax
 1024         adcq    $0,%rdx
 1025         addq    %r12,%r11
 1026         movq    %rdx,%r12
 1027         adcq    $0,%r12
 1028 
 1029         mulq    %rbx
 1030         addq    %rax,%r13
 1031         movq    48(%rsi),%rax
 1032         adcq    $0,%rdx
 1033         addq    %r13,%r12
 1034         movq    %rdx,%r13
 1035         adcq    $0,%r13
 1036 
 1037         mulq    %rbx
 1038         addq    %rax,%r14
 1039         movq    56(%rsi),%rax
 1040         adcq    $0,%rdx
 1041         addq    %r14,%r13
 1042         movq    %rdx,%r14
 1043         adcq    $0,%r14
 1044 
 1045         mulq    %rbx
 1046         addq    %rax,%r15
 1047         movq    (%rsi),%rax
 1048         adcq    $0,%rdx
 1049         addq    %r15,%r14
 1050         movq    %rdx,%r15
 1051         adcq    $0,%r15
 1052 
 1053         leaq    8(%rdi),%rdi
 1054 
 1055         decl    %ecx
 1056         jnz     .Loop_mul_gather
 1057 
 1058         movq    %r8,(%rdi)
 1059         movq    %r9,8(%rdi)
 1060         movq    %r10,16(%rdi)
 1061         movq    %r11,24(%rdi)
 1062         movq    %r12,32(%rdi)
 1063         movq    %r13,40(%rdi)
 1064         movq    %r14,48(%rdi)
 1065         movq    %r15,56(%rdi)
 1066 
 1067         movq    128+8(%rsp),%rdi
 1068         movq    128+16(%rsp),%rbp
 1069 
 1070         movq    (%rsp),%r8
 1071         movq    8(%rsp),%r9
 1072         movq    16(%rsp),%r10
 1073         movq    24(%rsp),%r11
 1074         movq    32(%rsp),%r12
 1075         movq    40(%rsp),%r13
 1076         movq    48(%rsp),%r14
 1077         movq    56(%rsp),%r15
 1078 
 1079         call    __rsaz_512_reduce
 1080         jmp     .Lmul_gather_tail
 1081 
 1082 .align  32
 1083 .Lmulx_gather:
 1084 .byte   102,76,15,126,194
 1085 
 1086         movq    %r8,128(%rsp)
 1087         movq    %rdi,128+8(%rsp)
 1088         movq    %rcx,128+16(%rsp)
 1089 
 1090         mulxq   (%rsi),%rbx,%r8
 1091         movq    %rbx,(%rsp)
 1092         xorl    %edi,%edi
 1093 
 1094         mulxq   8(%rsi),%rax,%r9
 1095 
 1096         mulxq   16(%rsi),%rbx,%r10
 1097         adcxq   %rax,%r8
 1098 
 1099         mulxq   24(%rsi),%rax,%r11
 1100         adcxq   %rbx,%r9
 1101 
 1102         mulxq   32(%rsi),%rbx,%r12
 1103         adcxq   %rax,%r10
 1104 
 1105         mulxq   40(%rsi),%rax,%r13
 1106         adcxq   %rbx,%r11
 1107 
 1108         mulxq   48(%rsi),%rbx,%r14
 1109         adcxq   %rax,%r12
 1110 
 1111         mulxq   56(%rsi),%rax,%r15
 1112         adcxq   %rbx,%r13
 1113         adcxq   %rax,%r14
 1114 .byte   0x67
 1115         movq    %r8,%rbx
 1116         adcxq   %rdi,%r15
 1117 
 1118         movq    $-7,%rcx
 1119         jmp     .Loop_mulx_gather
 1120 
 1121 .align  32
 1122 .Loop_mulx_gather:
 1123         movdqa  0(%rbp),%xmm8
 1124         movdqa  16(%rbp),%xmm9
 1125         movdqa  32(%rbp),%xmm10
 1126         movdqa  48(%rbp),%xmm11
 1127         pand    %xmm0,%xmm8
 1128         movdqa  64(%rbp),%xmm12
 1129         pand    %xmm1,%xmm9
 1130         movdqa  80(%rbp),%xmm13
 1131         pand    %xmm2,%xmm10
 1132         movdqa  96(%rbp),%xmm14
 1133         pand    %xmm3,%xmm11
 1134         movdqa  112(%rbp),%xmm15
 1135         leaq    128(%rbp),%rbp
 1136         pand    %xmm4,%xmm12
 1137         pand    %xmm5,%xmm13
 1138         pand    %xmm6,%xmm14
 1139         pand    %xmm7,%xmm15
 1140         por     %xmm10,%xmm8
 1141         por     %xmm11,%xmm9
 1142         por     %xmm12,%xmm8
 1143         por     %xmm13,%xmm9
 1144         por     %xmm14,%xmm8
 1145         por     %xmm15,%xmm9
 1146 
 1147         por     %xmm9,%xmm8
 1148         pshufd  $0x4e,%xmm8,%xmm9
 1149         por     %xmm9,%xmm8
 1150 .byte   102,76,15,126,194
 1151 
 1152 .byte   0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
 1153         adcxq   %rax,%rbx
 1154         adoxq   %r9,%r8
 1155 
 1156         mulxq   8(%rsi),%rax,%r9
 1157         adcxq   %rax,%r8
 1158         adoxq   %r10,%r9
 1159 
 1160         mulxq   16(%rsi),%rax,%r10
 1161         adcxq   %rax,%r9
 1162         adoxq   %r11,%r10
 1163 
 1164 .byte   0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
 1165         adcxq   %rax,%r10
 1166         adoxq   %r12,%r11
 1167 
 1168         mulxq   32(%rsi),%rax,%r12
 1169         adcxq   %rax,%r11
 1170         adoxq   %r13,%r12
 1171 
 1172         mulxq   40(%rsi),%rax,%r13
 1173         adcxq   %rax,%r12
 1174         adoxq   %r14,%r13
 1175 
 1176 .byte   0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
 1177         adcxq   %rax,%r13
 1178 .byte   0x67
 1179         adoxq   %r15,%r14
 1180 
 1181         mulxq   56(%rsi),%rax,%r15
 1182         movq    %rbx,64(%rsp,%rcx,8)
 1183         adcxq   %rax,%r14
 1184         adoxq   %rdi,%r15
 1185         movq    %r8,%rbx
 1186         adcxq   %rdi,%r15
 1187 
 1188         incq    %rcx
 1189         jnz     .Loop_mulx_gather
 1190 
 1191         movq    %r8,64(%rsp)
 1192         movq    %r9,64+8(%rsp)
 1193         movq    %r10,64+16(%rsp)
 1194         movq    %r11,64+24(%rsp)
 1195         movq    %r12,64+32(%rsp)
 1196         movq    %r13,64+40(%rsp)
 1197         movq    %r14,64+48(%rsp)
 1198         movq    %r15,64+56(%rsp)
 1199 
 1200         movq    128(%rsp),%rdx
 1201         movq    128+8(%rsp),%rdi
 1202         movq    128+16(%rsp),%rbp
 1203 
 1204         movq    (%rsp),%r8
 1205         movq    8(%rsp),%r9
 1206         movq    16(%rsp),%r10
 1207         movq    24(%rsp),%r11
 1208         movq    32(%rsp),%r12
 1209         movq    40(%rsp),%r13
 1210         movq    48(%rsp),%r14
 1211         movq    56(%rsp),%r15
 1212 
 1213         call    __rsaz_512_reducex
 1214 
 1215 .Lmul_gather_tail:
 1216         addq    64(%rsp),%r8
 1217         adcq    72(%rsp),%r9
 1218         adcq    80(%rsp),%r10
 1219         adcq    88(%rsp),%r11
 1220         adcq    96(%rsp),%r12
 1221         adcq    104(%rsp),%r13
 1222         adcq    112(%rsp),%r14
 1223         adcq    120(%rsp),%r15
 1224         sbbq    %rcx,%rcx
 1225 
 1226         call    __rsaz_512_subtract
 1227 
 1228         leaq    128+24+48(%rsp),%rax
 1229 .cfi_def_cfa    %rax,8
 1230         movq    -48(%rax),%r15
 1231 .cfi_restore    %r15
 1232         movq    -40(%rax),%r14
 1233 .cfi_restore    %r14
 1234         movq    -32(%rax),%r13
 1235 .cfi_restore    %r13
 1236         movq    -24(%rax),%r12
 1237 .cfi_restore    %r12
 1238         movq    -16(%rax),%rbp
 1239 .cfi_restore    %rbp
 1240         movq    -8(%rax),%rbx
 1241 .cfi_restore    %rbx
 1242         leaq    (%rax),%rsp
 1243 .cfi_def_cfa_register   %rsp
 1244 .Lmul_gather4_epilogue:
 1245         .byte   0xf3,0xc3
 1246 .cfi_endproc    
 1247 .size   rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
 1248 .globl  rsaz_512_mul_scatter4
 1249 .type   rsaz_512_mul_scatter4,@function
 1250 .align  32
 1251 rsaz_512_mul_scatter4:
 1252 .cfi_startproc  
 1253         pushq   %rbx
 1254 .cfi_adjust_cfa_offset  8
 1255 .cfi_offset     %rbx,-16
 1256         pushq   %rbp
 1257 .cfi_adjust_cfa_offset  8
 1258 .cfi_offset     %rbp,-24
 1259         pushq   %r12
 1260 .cfi_adjust_cfa_offset  8
 1261 .cfi_offset     %r12,-32
 1262         pushq   %r13
 1263 .cfi_adjust_cfa_offset  8
 1264 .cfi_offset     %r13,-40
 1265         pushq   %r14
 1266 .cfi_adjust_cfa_offset  8
 1267 .cfi_offset     %r14,-48
 1268         pushq   %r15
 1269 .cfi_adjust_cfa_offset  8
 1270 .cfi_offset     %r15,-56
 1271 
 1272         movl    %r9d,%r9d
 1273         subq    $128+24,%rsp
 1274 .cfi_adjust_cfa_offset  128+24
 1275 .Lmul_scatter4_body:
 1276         leaq    (%r8,%r9,8),%r8
 1277 .byte   102,72,15,110,199
 1278 .byte   102,72,15,110,202
 1279 .byte   102,73,15,110,208
 1280         movq    %rcx,128(%rsp)
 1281 
 1282         movq    %rdi,%rbp
 1283         movl    $0x80100,%r11d
 1284         andl    OPENSSL_ia32cap_P+8(%rip),%r11d
 1285         cmpl    $0x80100,%r11d
 1286         je      .Lmulx_scatter
 1287         movq    (%rdi),%rbx
 1288         call    __rsaz_512_mul
 1289 
 1290 .byte   102,72,15,126,199
 1291 .byte   102,72,15,126,205
 1292 
 1293         movq    (%rsp),%r8
 1294         movq    8(%rsp),%r9
 1295         movq    16(%rsp),%r10
 1296         movq    24(%rsp),%r11
 1297         movq    32(%rsp),%r12
 1298         movq    40(%rsp),%r13
 1299         movq    48(%rsp),%r14
 1300         movq    56(%rsp),%r15
 1301 
 1302         call    __rsaz_512_reduce
 1303         jmp     .Lmul_scatter_tail
 1304 
 1305 .align  32
 1306 .Lmulx_scatter:
 1307         movq    (%rdi),%rdx
 1308         call    __rsaz_512_mulx
 1309 
 1310 .byte   102,72,15,126,199
 1311 .byte   102,72,15,126,205
 1312 
 1313         movq    128(%rsp),%rdx
 1314         movq    (%rsp),%r8
 1315         movq    8(%rsp),%r9
 1316         movq    16(%rsp),%r10
 1317         movq    24(%rsp),%r11
 1318         movq    32(%rsp),%r12
 1319         movq    40(%rsp),%r13
 1320         movq    48(%rsp),%r14
 1321         movq    56(%rsp),%r15
 1322 
 1323         call    __rsaz_512_reducex
 1324 
 1325 .Lmul_scatter_tail:
 1326         addq    64(%rsp),%r8
 1327         adcq    72(%rsp),%r9
 1328         adcq    80(%rsp),%r10
 1329         adcq    88(%rsp),%r11
 1330         adcq    96(%rsp),%r12
 1331         adcq    104(%rsp),%r13
 1332         adcq    112(%rsp),%r14
 1333         adcq    120(%rsp),%r15
 1334 .byte   102,72,15,126,214
 1335         sbbq    %rcx,%rcx
 1336 
 1337         call    __rsaz_512_subtract
 1338 
 1339         movq    %r8,0(%rsi)
 1340         movq    %r9,128(%rsi)
 1341         movq    %r10,256(%rsi)
 1342         movq    %r11,384(%rsi)
 1343         movq    %r12,512(%rsi)
 1344         movq    %r13,640(%rsi)
 1345         movq    %r14,768(%rsi)
 1346         movq    %r15,896(%rsi)
 1347 
 1348         leaq    128+24+48(%rsp),%rax
 1349 .cfi_def_cfa    %rax,8
 1350         movq    -48(%rax),%r15
 1351 .cfi_restore    %r15
 1352         movq    -40(%rax),%r14
 1353 .cfi_restore    %r14
 1354         movq    -32(%rax),%r13
 1355 .cfi_restore    %r13
 1356         movq    -24(%rax),%r12
 1357 .cfi_restore    %r12
 1358         movq    -16(%rax),%rbp
 1359 .cfi_restore    %rbp
 1360         movq    -8(%rax),%rbx
 1361 .cfi_restore    %rbx
 1362         leaq    (%rax),%rsp
 1363 .cfi_def_cfa_register   %rsp
 1364 .Lmul_scatter4_epilogue:
 1365         .byte   0xf3,0xc3
 1366 .cfi_endproc    
 1367 .size   rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
 1368 .globl  rsaz_512_mul_by_one
 1369 .type   rsaz_512_mul_by_one,@function
 1370 .align  32
 1371 rsaz_512_mul_by_one:
 1372 .cfi_startproc  
 1373         pushq   %rbx
 1374 .cfi_adjust_cfa_offset  8
 1375 .cfi_offset     %rbx,-16
 1376         pushq   %rbp
 1377 .cfi_adjust_cfa_offset  8
 1378 .cfi_offset     %rbp,-24
 1379         pushq   %r12
 1380 .cfi_adjust_cfa_offset  8
 1381 .cfi_offset     %r12,-32
 1382         pushq   %r13
 1383 .cfi_adjust_cfa_offset  8
 1384 .cfi_offset     %r13,-40
 1385         pushq   %r14
 1386 .cfi_adjust_cfa_offset  8
 1387 .cfi_offset     %r14,-48
 1388         pushq   %r15
 1389 .cfi_adjust_cfa_offset  8
 1390 .cfi_offset     %r15,-56
 1391 
 1392         subq    $128+24,%rsp
 1393 .cfi_adjust_cfa_offset  128+24
 1394 .Lmul_by_one_body:
 1395         movl    OPENSSL_ia32cap_P+8(%rip),%eax
 1396         movq    %rdx,%rbp
 1397         movq    %rcx,128(%rsp)
 1398 
 1399         movq    (%rsi),%r8
 1400         pxor    %xmm0,%xmm0
 1401         movq    8(%rsi),%r9
 1402         movq    16(%rsi),%r10
 1403         movq    24(%rsi),%r11
 1404         movq    32(%rsi),%r12
 1405         movq    40(%rsi),%r13
 1406         movq    48(%rsi),%r14
 1407         movq    56(%rsi),%r15
 1408 
 1409         movdqa  %xmm0,(%rsp)
 1410         movdqa  %xmm0,16(%rsp)
 1411         movdqa  %xmm0,32(%rsp)
 1412         movdqa  %xmm0,48(%rsp)
 1413         movdqa  %xmm0,64(%rsp)
 1414         movdqa  %xmm0,80(%rsp)
 1415         movdqa  %xmm0,96(%rsp)
 1416         andl    $0x80100,%eax
 1417         cmpl    $0x80100,%eax
 1418         je      .Lby_one_callx
 1419         call    __rsaz_512_reduce
 1420         jmp     .Lby_one_tail
 1421 .align  32
 1422 .Lby_one_callx:
 1423         movq    128(%rsp),%rdx
 1424         call    __rsaz_512_reducex
 1425 .Lby_one_tail:
 1426         movq    %r8,(%rdi)
 1427         movq    %r9,8(%rdi)
 1428         movq    %r10,16(%rdi)
 1429         movq    %r11,24(%rdi)
 1430         movq    %r12,32(%rdi)
 1431         movq    %r13,40(%rdi)
 1432         movq    %r14,48(%rdi)
 1433         movq    %r15,56(%rdi)
 1434 
 1435         leaq    128+24+48(%rsp),%rax
 1436 .cfi_def_cfa    %rax,8
 1437         movq    -48(%rax),%r15
 1438 .cfi_restore    %r15
 1439         movq    -40(%rax),%r14
 1440 .cfi_restore    %r14
 1441         movq    -32(%rax),%r13
 1442 .cfi_restore    %r13
 1443         movq    -24(%rax),%r12
 1444 .cfi_restore    %r12
 1445         movq    -16(%rax),%rbp
 1446 .cfi_restore    %rbp
 1447         movq    -8(%rax),%rbx
 1448 .cfi_restore    %rbx
 1449         leaq    (%rax),%rsp
 1450 .cfi_def_cfa_register   %rsp
 1451 .Lmul_by_one_epilogue:
 1452         .byte   0xf3,0xc3
 1453 .cfi_endproc    
 1454 .size   rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
 1455 .type   __rsaz_512_reduce,@function
 1456 .align  32
 1457 __rsaz_512_reduce:
 1458 .cfi_startproc  
 1459         movq    %r8,%rbx
 1460         imulq   128+8(%rsp),%rbx
 1461         movq    0(%rbp),%rax
 1462         movl    $8,%ecx
 1463         jmp     .Lreduction_loop
 1464 
 1465 .align  32
 1466 .Lreduction_loop:
 1467         mulq    %rbx
 1468         movq    8(%rbp),%rax
 1469         negq    %r8
 1470         movq    %rdx,%r8
 1471         adcq    $0,%r8
 1472 
 1473         mulq    %rbx
 1474         addq    %rax,%r9
 1475         movq    16(%rbp),%rax
 1476         adcq    $0,%rdx
 1477         addq    %r9,%r8
 1478         movq    %rdx,%r9
 1479         adcq    $0,%r9
 1480 
 1481         mulq    %rbx
 1482         addq    %rax,%r10
 1483         movq    24(%rbp),%rax
 1484         adcq    $0,%rdx
 1485         addq    %r10,%r9
 1486         movq    %rdx,%r10
 1487         adcq    $0,%r10
 1488 
 1489         mulq    %rbx
 1490         addq    %rax,%r11
 1491         movq    32(%rbp),%rax
 1492         adcq    $0,%rdx
 1493         addq    %r11,%r10
 1494         movq    128+8(%rsp),%rsi
 1495 
 1496 
 1497         adcq    $0,%rdx
 1498         movq    %rdx,%r11
 1499 
 1500         mulq    %rbx
 1501         addq    %rax,%r12
 1502         movq    40(%rbp),%rax
 1503         adcq    $0,%rdx
 1504         imulq   %r8,%rsi
 1505         addq    %r12,%r11
 1506         movq    %rdx,%r12
 1507         adcq    $0,%r12
 1508 
 1509         mulq    %rbx
 1510         addq    %rax,%r13
 1511         movq    48(%rbp),%rax
 1512         adcq    $0,%rdx
 1513         addq    %r13,%r12
 1514         movq    %rdx,%r13
 1515         adcq    $0,%r13
 1516 
 1517         mulq    %rbx
 1518         addq    %rax,%r14
 1519         movq    56(%rbp),%rax
 1520         adcq    $0,%rdx
 1521         addq    %r14,%r13
 1522         movq    %rdx,%r14
 1523         adcq    $0,%r14
 1524 
 1525         mulq    %rbx
 1526         movq    %rsi,%rbx
 1527         addq    %rax,%r15
 1528         movq    0(%rbp),%rax
 1529         adcq    $0,%rdx
 1530         addq    %r15,%r14
 1531         movq    %rdx,%r15
 1532         adcq    $0,%r15
 1533 
 1534         decl    %ecx
 1535         jne     .Lreduction_loop
 1536 
 1537         .byte   0xf3,0xc3
 1538 .cfi_endproc    
 1539 .size   __rsaz_512_reduce,.-__rsaz_512_reduce
 1540 .type   __rsaz_512_reducex,@function
 1541 .align  32
 1542 __rsaz_512_reducex:
 1543 .cfi_startproc  
 1544 
 1545         imulq   %r8,%rdx
 1546         xorq    %rsi,%rsi
 1547         movl    $8,%ecx
 1548         jmp     .Lreduction_loopx
 1549 
 1550 .align  32
 1551 .Lreduction_loopx:
 1552         movq    %r8,%rbx
 1553         mulxq   0(%rbp),%rax,%r8
 1554         adcxq   %rbx,%rax
 1555         adoxq   %r9,%r8
 1556 
 1557         mulxq   8(%rbp),%rax,%r9
 1558         adcxq   %rax,%r8
 1559         adoxq   %r10,%r9
 1560 
 1561         mulxq   16(%rbp),%rbx,%r10
 1562         adcxq   %rbx,%r9
 1563         adoxq   %r11,%r10
 1564 
 1565         mulxq   24(%rbp),%rbx,%r11
 1566         adcxq   %rbx,%r10
 1567         adoxq   %r12,%r11
 1568 
 1569 .byte   0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
 1570         movq    %rdx,%rax
 1571         movq    %r8,%rdx
 1572         adcxq   %rbx,%r11
 1573         adoxq   %r13,%r12
 1574 
 1575         mulxq   128+8(%rsp),%rbx,%rdx
 1576         movq    %rax,%rdx
 1577 
 1578         mulxq   40(%rbp),%rax,%r13
 1579         adcxq   %rax,%r12
 1580         adoxq   %r14,%r13
 1581 
 1582 .byte   0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
 1583         adcxq   %rax,%r13
 1584         adoxq   %r15,%r14
 1585 
 1586         mulxq   56(%rbp),%rax,%r15
 1587         movq    %rbx,%rdx
 1588         adcxq   %rax,%r14
 1589         adoxq   %rsi,%r15
 1590         adcxq   %rsi,%r15
 1591 
 1592         decl    %ecx
 1593         jne     .Lreduction_loopx
 1594 
 1595         .byte   0xf3,0xc3
 1596 .cfi_endproc    
 1597 .size   __rsaz_512_reducex,.-__rsaz_512_reducex
 1598 .type   __rsaz_512_subtract,@function
 1599 .align  32
 1600 __rsaz_512_subtract:
 1601 .cfi_startproc  
 1602         movq    %r8,(%rdi)
 1603         movq    %r9,8(%rdi)
 1604         movq    %r10,16(%rdi)
 1605         movq    %r11,24(%rdi)
 1606         movq    %r12,32(%rdi)
 1607         movq    %r13,40(%rdi)
 1608         movq    %r14,48(%rdi)
 1609         movq    %r15,56(%rdi)
 1610 
 1611         movq    0(%rbp),%r8
 1612         movq    8(%rbp),%r9
 1613         negq    %r8
 1614         notq    %r9
 1615         andq    %rcx,%r8
 1616         movq    16(%rbp),%r10
 1617         andq    %rcx,%r9
 1618         notq    %r10
 1619         movq    24(%rbp),%r11
 1620         andq    %rcx,%r10
 1621         notq    %r11
 1622         movq    32(%rbp),%r12
 1623         andq    %rcx,%r11
 1624         notq    %r12
 1625         movq    40(%rbp),%r13
 1626         andq    %rcx,%r12
 1627         notq    %r13
 1628         movq    48(%rbp),%r14
 1629         andq    %rcx,%r13
 1630         notq    %r14
 1631         movq    56(%rbp),%r15
 1632         andq    %rcx,%r14
 1633         notq    %r15
 1634         andq    %rcx,%r15
 1635 
 1636         addq    (%rdi),%r8
 1637         adcq    8(%rdi),%r9
 1638         adcq    16(%rdi),%r10
 1639         adcq    24(%rdi),%r11
 1640         adcq    32(%rdi),%r12
 1641         adcq    40(%rdi),%r13
 1642         adcq    48(%rdi),%r14
 1643         adcq    56(%rdi),%r15
 1644 
 1645         movq    %r8,(%rdi)
 1646         movq    %r9,8(%rdi)
 1647         movq    %r10,16(%rdi)
 1648         movq    %r11,24(%rdi)
 1649         movq    %r12,32(%rdi)
 1650         movq    %r13,40(%rdi)
 1651         movq    %r14,48(%rdi)
 1652         movq    %r15,56(%rdi)
 1653 
 1654         .byte   0xf3,0xc3
 1655 .cfi_endproc    
 1656 .size   __rsaz_512_subtract,.-__rsaz_512_subtract
 1657 .type   __rsaz_512_mul,@function
 1658 .align  32
 1659 __rsaz_512_mul:
 1660 .cfi_startproc  
 1661         leaq    8(%rsp),%rdi
 1662 
 1663         movq    (%rsi),%rax
 1664         mulq    %rbx
 1665         movq    %rax,(%rdi)
 1666         movq    8(%rsi),%rax
 1667         movq    %rdx,%r8
 1668 
 1669         mulq    %rbx
 1670         addq    %rax,%r8
 1671         movq    16(%rsi),%rax
 1672         movq    %rdx,%r9
 1673         adcq    $0,%r9
 1674 
 1675         mulq    %rbx
 1676         addq    %rax,%r9
 1677         movq    24(%rsi),%rax
 1678         movq    %rdx,%r10
 1679         adcq    $0,%r10
 1680 
 1681         mulq    %rbx
 1682         addq    %rax,%r10
 1683         movq    32(%rsi),%rax
 1684         movq    %rdx,%r11
 1685         adcq    $0,%r11
 1686 
 1687         mulq    %rbx
 1688         addq    %rax,%r11
 1689         movq    40(%rsi),%rax
 1690         movq    %rdx,%r12
 1691         adcq    $0,%r12
 1692 
 1693         mulq    %rbx
 1694         addq    %rax,%r12
 1695         movq    48(%rsi),%rax
 1696         movq    %rdx,%r13
 1697         adcq    $0,%r13
 1698 
 1699         mulq    %rbx
 1700         addq    %rax,%r13
 1701         movq    56(%rsi),%rax
 1702         movq    %rdx,%r14
 1703         adcq    $0,%r14
 1704 
 1705         mulq    %rbx
 1706         addq    %rax,%r14
 1707         movq    (%rsi),%rax
 1708         movq    %rdx,%r15
 1709         adcq    $0,%r15
 1710 
 1711         leaq    8(%rbp),%rbp
 1712         leaq    8(%rdi),%rdi
 1713 
 1714         movl    $7,%ecx
 1715         jmp     .Loop_mul
 1716 
 1717 .align  32
 1718 .Loop_mul:
 1719         movq    (%rbp),%rbx
 1720         mulq    %rbx
 1721         addq    %rax,%r8
 1722         movq    8(%rsi),%rax
 1723         movq    %r8,(%rdi)
 1724         movq    %rdx,%r8
 1725         adcq    $0,%r8
 1726 
 1727         mulq    %rbx
 1728         addq    %rax,%r9
 1729         movq    16(%rsi),%rax
 1730         adcq    $0,%rdx
 1731         addq    %r9,%r8
 1732         movq    %rdx,%r9
 1733         adcq    $0,%r9
 1734 
 1735         mulq    %rbx
 1736         addq    %rax,%r10
 1737         movq    24(%rsi),%rax
 1738         adcq    $0,%rdx
 1739         addq    %r10,%r9
 1740         movq    %rdx,%r10
 1741         adcq    $0,%r10
 1742 
 1743         mulq    %rbx
 1744         addq    %rax,%r11
 1745         movq    32(%rsi),%rax
 1746         adcq    $0,%rdx
 1747         addq    %r11,%r10
 1748         movq    %rdx,%r11
 1749         adcq    $0,%r11
 1750 
 1751         mulq    %rbx
 1752         addq    %rax,%r12
 1753         movq    40(%rsi),%rax
 1754         adcq    $0,%rdx
 1755         addq    %r12,%r11
 1756         movq    %rdx,%r12
 1757         adcq    $0,%r12
 1758 
 1759         mulq    %rbx
 1760         addq    %rax,%r13
 1761         movq    48(%rsi),%rax
 1762         adcq    $0,%rdx
 1763         addq    %r13,%r12
 1764         movq    %rdx,%r13
 1765         adcq    $0,%r13
 1766 
 1767         mulq    %rbx
 1768         addq    %rax,%r14
 1769         movq    56(%rsi),%rax
 1770         adcq    $0,%rdx
 1771         addq    %r14,%r13
 1772         movq    %rdx,%r14
 1773         leaq    8(%rbp),%rbp
 1774         adcq    $0,%r14
 1775 
 1776         mulq    %rbx
 1777         addq    %rax,%r15
 1778         movq    (%rsi),%rax
 1779         adcq    $0,%rdx
 1780         addq    %r15,%r14
 1781         movq    %rdx,%r15
 1782         adcq    $0,%r15
 1783 
 1784         leaq    8(%rdi),%rdi
 1785 
 1786         decl    %ecx
 1787         jnz     .Loop_mul
 1788 
 1789         movq    %r8,(%rdi)
 1790         movq    %r9,8(%rdi)
 1791         movq    %r10,16(%rdi)
 1792         movq    %r11,24(%rdi)
 1793         movq    %r12,32(%rdi)
 1794         movq    %r13,40(%rdi)
 1795         movq    %r14,48(%rdi)
 1796         movq    %r15,56(%rdi)
 1797 
 1798         .byte   0xf3,0xc3
 1799 .cfi_endproc    
 1800 .size   __rsaz_512_mul,.-__rsaz_512_mul
 1801 .type   __rsaz_512_mulx,@function
 1802 .align  32
 1803 __rsaz_512_mulx:
 1804 .cfi_startproc  
 1805         mulxq   (%rsi),%rbx,%r8
 1806         movq    $-6,%rcx
 1807 
 1808         mulxq   8(%rsi),%rax,%r9
 1809         movq    %rbx,8(%rsp)
 1810 
 1811         mulxq   16(%rsi),%rbx,%r10
 1812         adcq    %rax,%r8
 1813 
 1814         mulxq   24(%rsi),%rax,%r11
 1815         adcq    %rbx,%r9
 1816 
 1817         mulxq   32(%rsi),%rbx,%r12
 1818         adcq    %rax,%r10
 1819 
 1820         mulxq   40(%rsi),%rax,%r13
 1821         adcq    %rbx,%r11
 1822 
 1823         mulxq   48(%rsi),%rbx,%r14
 1824         adcq    %rax,%r12
 1825 
 1826         mulxq   56(%rsi),%rax,%r15
 1827         movq    8(%rbp),%rdx
 1828         adcq    %rbx,%r13
 1829         adcq    %rax,%r14
 1830         adcq    $0,%r15
 1831 
 1832         xorq    %rdi,%rdi
 1833         jmp     .Loop_mulx
 1834 
 1835 .align  32
 1836 .Loop_mulx:
 1837         movq    %r8,%rbx
 1838         mulxq   (%rsi),%rax,%r8
 1839         adcxq   %rax,%rbx
 1840         adoxq   %r9,%r8
 1841 
 1842         mulxq   8(%rsi),%rax,%r9
 1843         adcxq   %rax,%r8
 1844         adoxq   %r10,%r9
 1845 
 1846         mulxq   16(%rsi),%rax,%r10
 1847         adcxq   %rax,%r9
 1848         adoxq   %r11,%r10
 1849 
 1850         mulxq   24(%rsi),%rax,%r11
 1851         adcxq   %rax,%r10
 1852         adoxq   %r12,%r11
 1853 
 1854 .byte   0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
 1855         adcxq   %rax,%r11
 1856         adoxq   %r13,%r12
 1857 
 1858         mulxq   40(%rsi),%rax,%r13
 1859         adcxq   %rax,%r12
 1860         adoxq   %r14,%r13
 1861 
 1862         mulxq   48(%rsi),%rax,%r14
 1863         adcxq   %rax,%r13
 1864         adoxq   %r15,%r14
 1865 
 1866         mulxq   56(%rsi),%rax,%r15
 1867         movq    64(%rbp,%rcx,8),%rdx
 1868         movq    %rbx,8+64-8(%rsp,%rcx,8)
 1869         adcxq   %rax,%r14
 1870         adoxq   %rdi,%r15
 1871         adcxq   %rdi,%r15
 1872 
 1873         incq    %rcx
 1874         jnz     .Loop_mulx
 1875 
 1876         movq    %r8,%rbx
 1877         mulxq   (%rsi),%rax,%r8
 1878         adcxq   %rax,%rbx
 1879         adoxq   %r9,%r8
 1880 
 1881 .byte   0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
 1882         adcxq   %rax,%r8
 1883         adoxq   %r10,%r9
 1884 
 1885 .byte   0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
 1886         adcxq   %rax,%r9
 1887         adoxq   %r11,%r10
 1888 
 1889         mulxq   24(%rsi),%rax,%r11
 1890         adcxq   %rax,%r10
 1891         adoxq   %r12,%r11
 1892 
 1893         mulxq   32(%rsi),%rax,%r12
 1894         adcxq   %rax,%r11
 1895         adoxq   %r13,%r12
 1896 
 1897         mulxq   40(%rsi),%rax,%r13
 1898         adcxq   %rax,%r12
 1899         adoxq   %r14,%r13
 1900 
 1901 .byte   0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
 1902         adcxq   %rax,%r13
 1903         adoxq   %r15,%r14
 1904 
 1905 .byte   0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
 1906         adcxq   %rax,%r14
 1907         adoxq   %rdi,%r15
 1908         adcxq   %rdi,%r15
 1909 
 1910         movq    %rbx,8+64-8(%rsp)
 1911         movq    %r8,8+64(%rsp)
 1912         movq    %r9,8+64+8(%rsp)
 1913         movq    %r10,8+64+16(%rsp)
 1914         movq    %r11,8+64+24(%rsp)
 1915         movq    %r12,8+64+32(%rsp)
 1916         movq    %r13,8+64+40(%rsp)
 1917         movq    %r14,8+64+48(%rsp)
 1918         movq    %r15,8+64+56(%rsp)
 1919 
 1920         .byte   0xf3,0xc3
 1921 .cfi_endproc    
 1922 .size   __rsaz_512_mulx,.-__rsaz_512_mulx
 1923 .globl  rsaz_512_scatter4
 1924 .type   rsaz_512_scatter4,@function
 1925 .align  16
 1926 rsaz_512_scatter4:
 1927 .cfi_startproc  
 1928         leaq    (%rdi,%rdx,8),%rdi
 1929         movl    $8,%r9d
 1930         jmp     .Loop_scatter
 1931 .align  16
 1932 .Loop_scatter:
 1933         movq    (%rsi),%rax
 1934         leaq    8(%rsi),%rsi
 1935         movq    %rax,(%rdi)
 1936         leaq    128(%rdi),%rdi
 1937         decl    %r9d
 1938         jnz     .Loop_scatter
 1939         .byte   0xf3,0xc3
 1940 .cfi_endproc    
 1941 .size   rsaz_512_scatter4,.-rsaz_512_scatter4
 1942 
 1943 .globl  rsaz_512_gather4
 1944 .type   rsaz_512_gather4,@function
 1945 .align  16
 1946 rsaz_512_gather4:
 1947 .cfi_startproc  
 1948         movd    %edx,%xmm8
 1949         movdqa  .Linc+16(%rip),%xmm1
 1950         movdqa  .Linc(%rip),%xmm0
 1951 
 1952         pshufd  $0,%xmm8,%xmm8
 1953         movdqa  %xmm1,%xmm7
 1954         movdqa  %xmm1,%xmm2
 1955         paddd   %xmm0,%xmm1
 1956         pcmpeqd %xmm8,%xmm0
 1957         movdqa  %xmm7,%xmm3
 1958         paddd   %xmm1,%xmm2
 1959         pcmpeqd %xmm8,%xmm1
 1960         movdqa  %xmm7,%xmm4
 1961         paddd   %xmm2,%xmm3
 1962         pcmpeqd %xmm8,%xmm2
 1963         movdqa  %xmm7,%xmm5
 1964         paddd   %xmm3,%xmm4
 1965         pcmpeqd %xmm8,%xmm3
 1966         movdqa  %xmm7,%xmm6
 1967         paddd   %xmm4,%xmm5
 1968         pcmpeqd %xmm8,%xmm4
 1969         paddd   %xmm5,%xmm6
 1970         pcmpeqd %xmm8,%xmm5
 1971         paddd   %xmm6,%xmm7
 1972         pcmpeqd %xmm8,%xmm6
 1973         pcmpeqd %xmm8,%xmm7
 1974         movl    $8,%r9d
 1975         jmp     .Loop_gather
 1976 .align  16
 1977 .Loop_gather:
 1978         movdqa  0(%rsi),%xmm8
 1979         movdqa  16(%rsi),%xmm9
 1980         movdqa  32(%rsi),%xmm10
 1981         movdqa  48(%rsi),%xmm11
 1982         pand    %xmm0,%xmm8
 1983         movdqa  64(%rsi),%xmm12
 1984         pand    %xmm1,%xmm9
 1985         movdqa  80(%rsi),%xmm13
 1986         pand    %xmm2,%xmm10
 1987         movdqa  96(%rsi),%xmm14
 1988         pand    %xmm3,%xmm11
 1989         movdqa  112(%rsi),%xmm15
 1990         leaq    128(%rsi),%rsi
 1991         pand    %xmm4,%xmm12
 1992         pand    %xmm5,%xmm13
 1993         pand    %xmm6,%xmm14
 1994         pand    %xmm7,%xmm15
 1995         por     %xmm10,%xmm8
 1996         por     %xmm11,%xmm9
 1997         por     %xmm12,%xmm8
 1998         por     %xmm13,%xmm9
 1999         por     %xmm14,%xmm8
 2000         por     %xmm15,%xmm9
 2001 
 2002         por     %xmm9,%xmm8
 2003         pshufd  $0x4e,%xmm8,%xmm9
 2004         por     %xmm9,%xmm8
 2005         movq    %xmm8,(%rdi)
 2006         leaq    8(%rdi),%rdi
 2007         decl    %r9d
 2008         jnz     .Loop_gather
 2009         .byte   0xf3,0xc3
 2010 .LSEH_end_rsaz_512_gather4:
 2011 .cfi_endproc    
 2012 .size   rsaz_512_gather4,.-rsaz_512_gather4
 2013 
 2014 .align  64
 2015 .Linc:
 2016 .long   0,0, 1,1
 2017 .long   2,2, 2,2

Cache object: 1f5c89196748143f0c8781c1a8a44654


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.