The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/amd64/x86_64-mont.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from x86_64-mont.pl. */
    3 .text   
    4 
    5 
    6 
    7 .globl  bn_mul_mont
    8 .type   bn_mul_mont,@function
    9 .align  16
   10 bn_mul_mont:
   11 .cfi_startproc  
   12         movl    %r9d,%r9d
   13         movq    %rsp,%rax
   14 .cfi_def_cfa_register   %rax
   15         testl   $3,%r9d
   16         jnz     .Lmul_enter
   17         cmpl    $8,%r9d
   18         jb      .Lmul_enter
   19         movl    OPENSSL_ia32cap_P+8(%rip),%r11d
   20         cmpq    %rsi,%rdx
   21         jne     .Lmul4x_enter
   22         testl   $7,%r9d
   23         jz      .Lsqr8x_enter
   24         jmp     .Lmul4x_enter
   25 
   26 .align  16
   27 .Lmul_enter:
   28         pushq   %rbx
   29 .cfi_offset     %rbx,-16
   30         pushq   %rbp
   31 .cfi_offset     %rbp,-24
   32         pushq   %r12
   33 .cfi_offset     %r12,-32
   34         pushq   %r13
   35 .cfi_offset     %r13,-40
   36         pushq   %r14
   37 .cfi_offset     %r14,-48
   38         pushq   %r15
   39 .cfi_offset     %r15,-56
   40 
   41         negq    %r9
   42         movq    %rsp,%r11
   43         leaq    -16(%rsp,%r9,8),%r10
   44         negq    %r9
   45         andq    $-1024,%r10
   46 
   47 
   48 
   49 
   50 
   51 
   52 
   53 
   54 
   55         subq    %r10,%r11
   56         andq    $-4096,%r11
   57         leaq    (%r10,%r11,1),%rsp
   58         movq    (%rsp),%r11
   59         cmpq    %r10,%rsp
   60         ja      .Lmul_page_walk
   61         jmp     .Lmul_page_walk_done
   62 
   63 .align  16
   64 .Lmul_page_walk:
   65         leaq    -4096(%rsp),%rsp
   66         movq    (%rsp),%r11
   67         cmpq    %r10,%rsp
   68         ja      .Lmul_page_walk
   69 .Lmul_page_walk_done:
   70 
   71         movq    %rax,8(%rsp,%r9,8)
   72 .cfi_escape     0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
   73 .Lmul_body:
   74         movq    %rdx,%r12
   75         movq    (%r8),%r8
   76         movq    (%r12),%rbx
   77         movq    (%rsi),%rax
   78 
   79         xorq    %r14,%r14
   80         xorq    %r15,%r15
   81 
   82         movq    %r8,%rbp
   83         mulq    %rbx
   84         movq    %rax,%r10
   85         movq    (%rcx),%rax
   86 
   87         imulq   %r10,%rbp
   88         movq    %rdx,%r11
   89 
   90         mulq    %rbp
   91         addq    %rax,%r10
   92         movq    8(%rsi),%rax
   93         adcq    $0,%rdx
   94         movq    %rdx,%r13
   95 
   96         leaq    1(%r15),%r15
   97         jmp     .L1st_enter
   98 
   99 .align  16
  100 .L1st:
  101         addq    %rax,%r13
  102         movq    (%rsi,%r15,8),%rax
  103         adcq    $0,%rdx
  104         addq    %r11,%r13
  105         movq    %r10,%r11
  106         adcq    $0,%rdx
  107         movq    %r13,-16(%rsp,%r15,8)
  108         movq    %rdx,%r13
  109 
  110 .L1st_enter:
  111         mulq    %rbx
  112         addq    %rax,%r11
  113         movq    (%rcx,%r15,8),%rax
  114         adcq    $0,%rdx
  115         leaq    1(%r15),%r15
  116         movq    %rdx,%r10
  117 
  118         mulq    %rbp
  119         cmpq    %r9,%r15
  120         jne     .L1st
  121 
  122         addq    %rax,%r13
  123         movq    (%rsi),%rax
  124         adcq    $0,%rdx
  125         addq    %r11,%r13
  126         adcq    $0,%rdx
  127         movq    %r13,-16(%rsp,%r15,8)
  128         movq    %rdx,%r13
  129         movq    %r10,%r11
  130 
  131         xorq    %rdx,%rdx
  132         addq    %r11,%r13
  133         adcq    $0,%rdx
  134         movq    %r13,-8(%rsp,%r9,8)
  135         movq    %rdx,(%rsp,%r9,8)
  136 
  137         leaq    1(%r14),%r14
  138         jmp     .Louter
  139 .align  16
  140 .Louter:
  141         movq    (%r12,%r14,8),%rbx
  142         xorq    %r15,%r15
  143         movq    %r8,%rbp
  144         movq    (%rsp),%r10
  145         mulq    %rbx
  146         addq    %rax,%r10
  147         movq    (%rcx),%rax
  148         adcq    $0,%rdx
  149 
  150         imulq   %r10,%rbp
  151         movq    %rdx,%r11
  152 
  153         mulq    %rbp
  154         addq    %rax,%r10
  155         movq    8(%rsi),%rax
  156         adcq    $0,%rdx
  157         movq    8(%rsp),%r10
  158         movq    %rdx,%r13
  159 
  160         leaq    1(%r15),%r15
  161         jmp     .Linner_enter
  162 
  163 .align  16
  164 .Linner:
  165         addq    %rax,%r13
  166         movq    (%rsi,%r15,8),%rax
  167         adcq    $0,%rdx
  168         addq    %r10,%r13
  169         movq    (%rsp,%r15,8),%r10
  170         adcq    $0,%rdx
  171         movq    %r13,-16(%rsp,%r15,8)
  172         movq    %rdx,%r13
  173 
  174 .Linner_enter:
  175         mulq    %rbx
  176         addq    %rax,%r11
  177         movq    (%rcx,%r15,8),%rax
  178         adcq    $0,%rdx
  179         addq    %r11,%r10
  180         movq    %rdx,%r11
  181         adcq    $0,%r11
  182         leaq    1(%r15),%r15
  183 
  184         mulq    %rbp
  185         cmpq    %r9,%r15
  186         jne     .Linner
  187 
  188         addq    %rax,%r13
  189         movq    (%rsi),%rax
  190         adcq    $0,%rdx
  191         addq    %r10,%r13
  192         movq    (%rsp,%r15,8),%r10
  193         adcq    $0,%rdx
  194         movq    %r13,-16(%rsp,%r15,8)
  195         movq    %rdx,%r13
  196 
  197         xorq    %rdx,%rdx
  198         addq    %r11,%r13
  199         adcq    $0,%rdx
  200         addq    %r10,%r13
  201         adcq    $0,%rdx
  202         movq    %r13,-8(%rsp,%r9,8)
  203         movq    %rdx,(%rsp,%r9,8)
  204 
  205         leaq    1(%r14),%r14
  206         cmpq    %r9,%r14
  207         jb      .Louter
  208 
  209         xorq    %r14,%r14
  210         movq    (%rsp),%rax
  211         movq    %r9,%r15
  212 
  213 .align  16
  214 .Lsub:  sbbq    (%rcx,%r14,8),%rax
  215         movq    %rax,(%rdi,%r14,8)
  216         movq    8(%rsp,%r14,8),%rax
  217         leaq    1(%r14),%r14
  218         decq    %r15
  219         jnz     .Lsub
  220 
  221         sbbq    $0,%rax
  222         movq    $-1,%rbx
  223         xorq    %rax,%rbx
  224         xorq    %r14,%r14
  225         movq    %r9,%r15
  226 
  227 .Lcopy:
  228         movq    (%rdi,%r14,8),%rcx
  229         movq    (%rsp,%r14,8),%rdx
  230         andq    %rbx,%rcx
  231         andq    %rax,%rdx
  232         movq    %r9,(%rsp,%r14,8)
  233         orq     %rcx,%rdx
  234         movq    %rdx,(%rdi,%r14,8)
  235         leaq    1(%r14),%r14
  236         subq    $1,%r15
  237         jnz     .Lcopy
  238 
  239         movq    8(%rsp,%r9,8),%rsi
  240 .cfi_def_cfa    %rsi,8
  241         movq    $1,%rax
  242         movq    -48(%rsi),%r15
  243 .cfi_restore    %r15
  244         movq    -40(%rsi),%r14
  245 .cfi_restore    %r14
  246         movq    -32(%rsi),%r13
  247 .cfi_restore    %r13
  248         movq    -24(%rsi),%r12
  249 .cfi_restore    %r12
  250         movq    -16(%rsi),%rbp
  251 .cfi_restore    %rbp
  252         movq    -8(%rsi),%rbx
  253 .cfi_restore    %rbx
  254         leaq    (%rsi),%rsp
  255 .cfi_def_cfa_register   %rsp
  256 .Lmul_epilogue:
  257         .byte   0xf3,0xc3
  258 .cfi_endproc    
  259 .size   bn_mul_mont,.-bn_mul_mont
  260 .type   bn_mul4x_mont,@function
  261 .align  16
  262 bn_mul4x_mont:
  263 .cfi_startproc  
  264         movl    %r9d,%r9d
  265         movq    %rsp,%rax
  266 .cfi_def_cfa_register   %rax
  267 .Lmul4x_enter:
  268         andl    $0x80100,%r11d
  269         cmpl    $0x80100,%r11d
  270         je      .Lmulx4x_enter
  271         pushq   %rbx
  272 .cfi_offset     %rbx,-16
  273         pushq   %rbp
  274 .cfi_offset     %rbp,-24
  275         pushq   %r12
  276 .cfi_offset     %r12,-32
  277         pushq   %r13
  278 .cfi_offset     %r13,-40
  279         pushq   %r14
  280 .cfi_offset     %r14,-48
  281         pushq   %r15
  282 .cfi_offset     %r15,-56
  283 
  284         negq    %r9
  285         movq    %rsp,%r11
  286         leaq    -32(%rsp,%r9,8),%r10
  287         negq    %r9
  288         andq    $-1024,%r10
  289 
  290         subq    %r10,%r11
  291         andq    $-4096,%r11
  292         leaq    (%r10,%r11,1),%rsp
  293         movq    (%rsp),%r11
  294         cmpq    %r10,%rsp
  295         ja      .Lmul4x_page_walk
  296         jmp     .Lmul4x_page_walk_done
  297 
  298 .Lmul4x_page_walk:
  299         leaq    -4096(%rsp),%rsp
  300         movq    (%rsp),%r11
  301         cmpq    %r10,%rsp
  302         ja      .Lmul4x_page_walk
  303 .Lmul4x_page_walk_done:
  304 
  305         movq    %rax,8(%rsp,%r9,8)
  306 .cfi_escape     0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
  307 .Lmul4x_body:
  308         movq    %rdi,16(%rsp,%r9,8)
  309         movq    %rdx,%r12
  310         movq    (%r8),%r8
  311         movq    (%r12),%rbx
  312         movq    (%rsi),%rax
  313 
  314         xorq    %r14,%r14
  315         xorq    %r15,%r15
  316 
  317         movq    %r8,%rbp
  318         mulq    %rbx
  319         movq    %rax,%r10
  320         movq    (%rcx),%rax
  321 
  322         imulq   %r10,%rbp
  323         movq    %rdx,%r11
  324 
  325         mulq    %rbp
  326         addq    %rax,%r10
  327         movq    8(%rsi),%rax
  328         adcq    $0,%rdx
  329         movq    %rdx,%rdi
  330 
  331         mulq    %rbx
  332         addq    %rax,%r11
  333         movq    8(%rcx),%rax
  334         adcq    $0,%rdx
  335         movq    %rdx,%r10
  336 
  337         mulq    %rbp
  338         addq    %rax,%rdi
  339         movq    16(%rsi),%rax
  340         adcq    $0,%rdx
  341         addq    %r11,%rdi
  342         leaq    4(%r15),%r15
  343         adcq    $0,%rdx
  344         movq    %rdi,(%rsp)
  345         movq    %rdx,%r13
  346         jmp     .L1st4x
  347 .align  16
  348 .L1st4x:
  349         mulq    %rbx
  350         addq    %rax,%r10
  351         movq    -16(%rcx,%r15,8),%rax
  352         adcq    $0,%rdx
  353         movq    %rdx,%r11
  354 
  355         mulq    %rbp
  356         addq    %rax,%r13
  357         movq    -8(%rsi,%r15,8),%rax
  358         adcq    $0,%rdx
  359         addq    %r10,%r13
  360         adcq    $0,%rdx
  361         movq    %r13,-24(%rsp,%r15,8)
  362         movq    %rdx,%rdi
  363 
  364         mulq    %rbx
  365         addq    %rax,%r11
  366         movq    -8(%rcx,%r15,8),%rax
  367         adcq    $0,%rdx
  368         movq    %rdx,%r10
  369 
  370         mulq    %rbp
  371         addq    %rax,%rdi
  372         movq    (%rsi,%r15,8),%rax
  373         adcq    $0,%rdx
  374         addq    %r11,%rdi
  375         adcq    $0,%rdx
  376         movq    %rdi,-16(%rsp,%r15,8)
  377         movq    %rdx,%r13
  378 
  379         mulq    %rbx
  380         addq    %rax,%r10
  381         movq    (%rcx,%r15,8),%rax
  382         adcq    $0,%rdx
  383         movq    %rdx,%r11
  384 
  385         mulq    %rbp
  386         addq    %rax,%r13
  387         movq    8(%rsi,%r15,8),%rax
  388         adcq    $0,%rdx
  389         addq    %r10,%r13
  390         adcq    $0,%rdx
  391         movq    %r13,-8(%rsp,%r15,8)
  392         movq    %rdx,%rdi
  393 
  394         mulq    %rbx
  395         addq    %rax,%r11
  396         movq    8(%rcx,%r15,8),%rax
  397         adcq    $0,%rdx
  398         leaq    4(%r15),%r15
  399         movq    %rdx,%r10
  400 
  401         mulq    %rbp
  402         addq    %rax,%rdi
  403         movq    -16(%rsi,%r15,8),%rax
  404         adcq    $0,%rdx
  405         addq    %r11,%rdi
  406         adcq    $0,%rdx
  407         movq    %rdi,-32(%rsp,%r15,8)
  408         movq    %rdx,%r13
  409         cmpq    %r9,%r15
  410         jb      .L1st4x
  411 
  412         mulq    %rbx
  413         addq    %rax,%r10
  414         movq    -16(%rcx,%r15,8),%rax
  415         adcq    $0,%rdx
  416         movq    %rdx,%r11
  417 
  418         mulq    %rbp
  419         addq    %rax,%r13
  420         movq    -8(%rsi,%r15,8),%rax
  421         adcq    $0,%rdx
  422         addq    %r10,%r13
  423         adcq    $0,%rdx
  424         movq    %r13,-24(%rsp,%r15,8)
  425         movq    %rdx,%rdi
  426 
  427         mulq    %rbx
  428         addq    %rax,%r11
  429         movq    -8(%rcx,%r15,8),%rax
  430         adcq    $0,%rdx
  431         movq    %rdx,%r10
  432 
  433         mulq    %rbp
  434         addq    %rax,%rdi
  435         movq    (%rsi),%rax
  436         adcq    $0,%rdx
  437         addq    %r11,%rdi
  438         adcq    $0,%rdx
  439         movq    %rdi,-16(%rsp,%r15,8)
  440         movq    %rdx,%r13
  441 
  442         xorq    %rdi,%rdi
  443         addq    %r10,%r13
  444         adcq    $0,%rdi
  445         movq    %r13,-8(%rsp,%r15,8)
  446         movq    %rdi,(%rsp,%r15,8)
  447 
  448         leaq    1(%r14),%r14
  449 .align  4
  450 .Louter4x:
  451         movq    (%r12,%r14,8),%rbx
  452         xorq    %r15,%r15
  453         movq    (%rsp),%r10
  454         movq    %r8,%rbp
  455         mulq    %rbx
  456         addq    %rax,%r10
  457         movq    (%rcx),%rax
  458         adcq    $0,%rdx
  459 
  460         imulq   %r10,%rbp
  461         movq    %rdx,%r11
  462 
  463         mulq    %rbp
  464         addq    %rax,%r10
  465         movq    8(%rsi),%rax
  466         adcq    $0,%rdx
  467         movq    %rdx,%rdi
  468 
  469         mulq    %rbx
  470         addq    %rax,%r11
  471         movq    8(%rcx),%rax
  472         adcq    $0,%rdx
  473         addq    8(%rsp),%r11
  474         adcq    $0,%rdx
  475         movq    %rdx,%r10
  476 
  477         mulq    %rbp
  478         addq    %rax,%rdi
  479         movq    16(%rsi),%rax
  480         adcq    $0,%rdx
  481         addq    %r11,%rdi
  482         leaq    4(%r15),%r15
  483         adcq    $0,%rdx
  484         movq    %rdi,(%rsp)
  485         movq    %rdx,%r13
  486         jmp     .Linner4x
  487 .align  16
  488 .Linner4x:
  489         mulq    %rbx
  490         addq    %rax,%r10
  491         movq    -16(%rcx,%r15,8),%rax
  492         adcq    $0,%rdx
  493         addq    -16(%rsp,%r15,8),%r10
  494         adcq    $0,%rdx
  495         movq    %rdx,%r11
  496 
  497         mulq    %rbp
  498         addq    %rax,%r13
  499         movq    -8(%rsi,%r15,8),%rax
  500         adcq    $0,%rdx
  501         addq    %r10,%r13
  502         adcq    $0,%rdx
  503         movq    %r13,-24(%rsp,%r15,8)
  504         movq    %rdx,%rdi
  505 
  506         mulq    %rbx
  507         addq    %rax,%r11
  508         movq    -8(%rcx,%r15,8),%rax
  509         adcq    $0,%rdx
  510         addq    -8(%rsp,%r15,8),%r11
  511         adcq    $0,%rdx
  512         movq    %rdx,%r10
  513 
  514         mulq    %rbp
  515         addq    %rax,%rdi
  516         movq    (%rsi,%r15,8),%rax
  517         adcq    $0,%rdx
  518         addq    %r11,%rdi
  519         adcq    $0,%rdx
  520         movq    %rdi,-16(%rsp,%r15,8)
  521         movq    %rdx,%r13
  522 
  523         mulq    %rbx
  524         addq    %rax,%r10
  525         movq    (%rcx,%r15,8),%rax
  526         adcq    $0,%rdx
  527         addq    (%rsp,%r15,8),%r10
  528         adcq    $0,%rdx
  529         movq    %rdx,%r11
  530 
  531         mulq    %rbp
  532         addq    %rax,%r13
  533         movq    8(%rsi,%r15,8),%rax
  534         adcq    $0,%rdx
  535         addq    %r10,%r13
  536         adcq    $0,%rdx
  537         movq    %r13,-8(%rsp,%r15,8)
  538         movq    %rdx,%rdi
  539 
  540         mulq    %rbx
  541         addq    %rax,%r11
  542         movq    8(%rcx,%r15,8),%rax
  543         adcq    $0,%rdx
  544         addq    8(%rsp,%r15,8),%r11
  545         adcq    $0,%rdx
  546         leaq    4(%r15),%r15
  547         movq    %rdx,%r10
  548 
  549         mulq    %rbp
  550         addq    %rax,%rdi
  551         movq    -16(%rsi,%r15,8),%rax
  552         adcq    $0,%rdx
  553         addq    %r11,%rdi
  554         adcq    $0,%rdx
  555         movq    %rdi,-32(%rsp,%r15,8)
  556         movq    %rdx,%r13
  557         cmpq    %r9,%r15
  558         jb      .Linner4x
  559 
  560         mulq    %rbx
  561         addq    %rax,%r10
  562         movq    -16(%rcx,%r15,8),%rax
  563         adcq    $0,%rdx
  564         addq    -16(%rsp,%r15,8),%r10
  565         adcq    $0,%rdx
  566         movq    %rdx,%r11
  567 
  568         mulq    %rbp
  569         addq    %rax,%r13
  570         movq    -8(%rsi,%r15,8),%rax
  571         adcq    $0,%rdx
  572         addq    %r10,%r13
  573         adcq    $0,%rdx
  574         movq    %r13,-24(%rsp,%r15,8)
  575         movq    %rdx,%rdi
  576 
  577         mulq    %rbx
  578         addq    %rax,%r11
  579         movq    -8(%rcx,%r15,8),%rax
  580         adcq    $0,%rdx
  581         addq    -8(%rsp,%r15,8),%r11
  582         adcq    $0,%rdx
  583         leaq    1(%r14),%r14
  584         movq    %rdx,%r10
  585 
  586         mulq    %rbp
  587         addq    %rax,%rdi
  588         movq    (%rsi),%rax
  589         adcq    $0,%rdx
  590         addq    %r11,%rdi
  591         adcq    $0,%rdx
  592         movq    %rdi,-16(%rsp,%r15,8)
  593         movq    %rdx,%r13
  594 
  595         xorq    %rdi,%rdi
  596         addq    %r10,%r13
  597         adcq    $0,%rdi
  598         addq    (%rsp,%r9,8),%r13
  599         adcq    $0,%rdi
  600         movq    %r13,-8(%rsp,%r15,8)
  601         movq    %rdi,(%rsp,%r15,8)
  602 
  603         cmpq    %r9,%r14
  604         jb      .Louter4x
  605         movq    16(%rsp,%r9,8),%rdi
  606         leaq    -4(%r9),%r15
  607         movq    0(%rsp),%rax
  608         movq    8(%rsp),%rdx
  609         shrq    $2,%r15
  610         leaq    (%rsp),%rsi
  611         xorq    %r14,%r14
  612 
  613         subq    0(%rcx),%rax
  614         movq    16(%rsi),%rbx
  615         movq    24(%rsi),%rbp
  616         sbbq    8(%rcx),%rdx
  617 
  618 .Lsub4x:
  619         movq    %rax,0(%rdi,%r14,8)
  620         movq    %rdx,8(%rdi,%r14,8)
  621         sbbq    16(%rcx,%r14,8),%rbx
  622         movq    32(%rsi,%r14,8),%rax
  623         movq    40(%rsi,%r14,8),%rdx
  624         sbbq    24(%rcx,%r14,8),%rbp
  625         movq    %rbx,16(%rdi,%r14,8)
  626         movq    %rbp,24(%rdi,%r14,8)
  627         sbbq    32(%rcx,%r14,8),%rax
  628         movq    48(%rsi,%r14,8),%rbx
  629         movq    56(%rsi,%r14,8),%rbp
  630         sbbq    40(%rcx,%r14,8),%rdx
  631         leaq    4(%r14),%r14
  632         decq    %r15
  633         jnz     .Lsub4x
  634 
  635         movq    %rax,0(%rdi,%r14,8)
  636         movq    32(%rsi,%r14,8),%rax
  637         sbbq    16(%rcx,%r14,8),%rbx
  638         movq    %rdx,8(%rdi,%r14,8)
  639         sbbq    24(%rcx,%r14,8),%rbp
  640         movq    %rbx,16(%rdi,%r14,8)
  641 
  642         sbbq    $0,%rax
  643         movq    %rbp,24(%rdi,%r14,8)
  644         pxor    %xmm0,%xmm0
  645 .byte   102,72,15,110,224
  646         pcmpeqd %xmm5,%xmm5
  647         pshufd  $0,%xmm4,%xmm4
  648         movq    %r9,%r15
  649         pxor    %xmm4,%xmm5
  650         shrq    $2,%r15
  651         xorl    %eax,%eax
  652 
  653         jmp     .Lcopy4x
  654 .align  16
  655 .Lcopy4x:
  656         movdqa  (%rsp,%rax,1),%xmm1
  657         movdqu  (%rdi,%rax,1),%xmm2
  658         pand    %xmm4,%xmm1
  659         pand    %xmm5,%xmm2
  660         movdqa  16(%rsp,%rax,1),%xmm3
  661         movdqa  %xmm0,(%rsp,%rax,1)
  662         por     %xmm2,%xmm1
  663         movdqu  16(%rdi,%rax,1),%xmm2
  664         movdqu  %xmm1,(%rdi,%rax,1)
  665         pand    %xmm4,%xmm3
  666         pand    %xmm5,%xmm2
  667         movdqa  %xmm0,16(%rsp,%rax,1)
  668         por     %xmm2,%xmm3
  669         movdqu  %xmm3,16(%rdi,%rax,1)
  670         leaq    32(%rax),%rax
  671         decq    %r15
  672         jnz     .Lcopy4x
  673         movq    8(%rsp,%r9,8),%rsi
  674 .cfi_def_cfa    %rsi, 8
  675         movq    $1,%rax
  676         movq    -48(%rsi),%r15
  677 .cfi_restore    %r15
  678         movq    -40(%rsi),%r14
  679 .cfi_restore    %r14
  680         movq    -32(%rsi),%r13
  681 .cfi_restore    %r13
  682         movq    -24(%rsi),%r12
  683 .cfi_restore    %r12
  684         movq    -16(%rsi),%rbp
  685 .cfi_restore    %rbp
  686         movq    -8(%rsi),%rbx
  687 .cfi_restore    %rbx
  688         leaq    (%rsi),%rsp
  689 .cfi_def_cfa_register   %rsp
  690 .Lmul4x_epilogue:
  691         .byte   0xf3,0xc3
  692 .cfi_endproc    
  693 .size   bn_mul4x_mont,.-bn_mul4x_mont
  694 
  695 
  696 
  697 .type   bn_sqr8x_mont,@function
  698 .align  32
  699 bn_sqr8x_mont:
  700 .cfi_startproc  
  701         movq    %rsp,%rax
  702 .cfi_def_cfa_register   %rax
  703 .Lsqr8x_enter:
  704         pushq   %rbx
  705 .cfi_offset     %rbx,-16
  706         pushq   %rbp
  707 .cfi_offset     %rbp,-24
  708         pushq   %r12
  709 .cfi_offset     %r12,-32
  710         pushq   %r13
  711 .cfi_offset     %r13,-40
  712         pushq   %r14
  713 .cfi_offset     %r14,-48
  714         pushq   %r15
  715 .cfi_offset     %r15,-56
  716 .Lsqr8x_prologue:
  717 
  718         movl    %r9d,%r10d
  719         shll    $3,%r9d
  720         shlq    $3+2,%r10
  721         negq    %r9
  722 
  723 
  724 
  725 
  726 
  727 
  728         leaq    -64(%rsp,%r9,2),%r11
  729         movq    %rsp,%rbp
  730         movq    (%r8),%r8
  731         subq    %rsi,%r11
  732         andq    $4095,%r11
  733         cmpq    %r11,%r10
  734         jb      .Lsqr8x_sp_alt
  735         subq    %r11,%rbp
  736         leaq    -64(%rbp,%r9,2),%rbp
  737         jmp     .Lsqr8x_sp_done
  738 
  739 .align  32
  740 .Lsqr8x_sp_alt:
  741         leaq    4096-64(,%r9,2),%r10
  742         leaq    -64(%rbp,%r9,2),%rbp
  743         subq    %r10,%r11
  744         movq    $0,%r10
  745         cmovcq  %r10,%r11
  746         subq    %r11,%rbp
  747 .Lsqr8x_sp_done:
  748         andq    $-64,%rbp
  749         movq    %rsp,%r11
  750         subq    %rbp,%r11
  751         andq    $-4096,%r11
  752         leaq    (%r11,%rbp,1),%rsp
  753         movq    (%rsp),%r10
  754         cmpq    %rbp,%rsp
  755         ja      .Lsqr8x_page_walk
  756         jmp     .Lsqr8x_page_walk_done
  757 
  758 .align  16
  759 .Lsqr8x_page_walk:
  760         leaq    -4096(%rsp),%rsp
  761         movq    (%rsp),%r10
  762         cmpq    %rbp,%rsp
  763         ja      .Lsqr8x_page_walk
  764 .Lsqr8x_page_walk_done:
  765 
  766         movq    %r9,%r10
  767         negq    %r9
  768 
  769         movq    %r8,32(%rsp)
  770         movq    %rax,40(%rsp)
  771 .cfi_escape     0x0f,0x05,0x77,0x28,0x06,0x23,0x08
  772 .Lsqr8x_body:
  773 
  774 .byte   102,72,15,110,209
  775         pxor    %xmm0,%xmm0
  776 .byte   102,72,15,110,207
  777 .byte   102,73,15,110,218
  778         movl    OPENSSL_ia32cap_P+8(%rip),%eax
  779         andl    $0x80100,%eax
  780         cmpl    $0x80100,%eax
  781         jne     .Lsqr8x_nox
  782 
  783         call    bn_sqrx8x_internal
  784 
  785 
  786 
  787 
  788         leaq    (%r8,%rcx,1),%rbx
  789         movq    %rcx,%r9
  790         movq    %rcx,%rdx
  791 .byte   102,72,15,126,207
  792         sarq    $3+2,%rcx
  793         jmp     .Lsqr8x_sub
  794 
  795 .align  32
  796 .Lsqr8x_nox:
  797         call    bn_sqr8x_internal
  798 
  799 
  800 
  801 
  802         leaq    (%rdi,%r9,1),%rbx
  803         movq    %r9,%rcx
  804         movq    %r9,%rdx
  805 .byte   102,72,15,126,207
  806         sarq    $3+2,%rcx
  807         jmp     .Lsqr8x_sub
  808 
  809 .align  32
  810 .Lsqr8x_sub:
  811         movq    0(%rbx),%r12
  812         movq    8(%rbx),%r13
  813         movq    16(%rbx),%r14
  814         movq    24(%rbx),%r15
  815         leaq    32(%rbx),%rbx
  816         sbbq    0(%rbp),%r12
  817         sbbq    8(%rbp),%r13
  818         sbbq    16(%rbp),%r14
  819         sbbq    24(%rbp),%r15
  820         leaq    32(%rbp),%rbp
  821         movq    %r12,0(%rdi)
  822         movq    %r13,8(%rdi)
  823         movq    %r14,16(%rdi)
  824         movq    %r15,24(%rdi)
  825         leaq    32(%rdi),%rdi
  826         incq    %rcx
  827         jnz     .Lsqr8x_sub
  828 
  829         sbbq    $0,%rax
  830         leaq    (%rbx,%r9,1),%rbx
  831         leaq    (%rdi,%r9,1),%rdi
  832 
  833 .byte   102,72,15,110,200
  834         pxor    %xmm0,%xmm0
  835         pshufd  $0,%xmm1,%xmm1
  836         movq    40(%rsp),%rsi
  837 .cfi_def_cfa    %rsi,8
  838         jmp     .Lsqr8x_cond_copy
  839 
  840 .align  32
  841 .Lsqr8x_cond_copy:
  842         movdqa  0(%rbx),%xmm2
  843         movdqa  16(%rbx),%xmm3
  844         leaq    32(%rbx),%rbx
  845         movdqu  0(%rdi),%xmm4
  846         movdqu  16(%rdi),%xmm5
  847         leaq    32(%rdi),%rdi
  848         movdqa  %xmm0,-32(%rbx)
  849         movdqa  %xmm0,-16(%rbx)
  850         movdqa  %xmm0,-32(%rbx,%rdx,1)
  851         movdqa  %xmm0,-16(%rbx,%rdx,1)
  852         pcmpeqd %xmm1,%xmm0
  853         pand    %xmm1,%xmm2
  854         pand    %xmm1,%xmm3
  855         pand    %xmm0,%xmm4
  856         pand    %xmm0,%xmm5
  857         pxor    %xmm0,%xmm0
  858         por     %xmm2,%xmm4
  859         por     %xmm3,%xmm5
  860         movdqu  %xmm4,-32(%rdi)
  861         movdqu  %xmm5,-16(%rdi)
  862         addq    $32,%r9
  863         jnz     .Lsqr8x_cond_copy
  864 
  865         movq    $1,%rax
  866         movq    -48(%rsi),%r15
  867 .cfi_restore    %r15
  868         movq    -40(%rsi),%r14
  869 .cfi_restore    %r14
  870         movq    -32(%rsi),%r13
  871 .cfi_restore    %r13
  872         movq    -24(%rsi),%r12
  873 .cfi_restore    %r12
  874         movq    -16(%rsi),%rbp
  875 .cfi_restore    %rbp
  876         movq    -8(%rsi),%rbx
  877 .cfi_restore    %rbx
  878         leaq    (%rsi),%rsp
  879 .cfi_def_cfa_register   %rsp
  880 .Lsqr8x_epilogue:
  881         .byte   0xf3,0xc3
  882 .cfi_endproc    
  883 .size   bn_sqr8x_mont,.-bn_sqr8x_mont
  884 .type   bn_mulx4x_mont,@function
  885 .align  32
  886 bn_mulx4x_mont:
  887 .cfi_startproc  
  888         movq    %rsp,%rax
  889 .cfi_def_cfa_register   %rax
  890 .Lmulx4x_enter:
  891         pushq   %rbx
  892 .cfi_offset     %rbx,-16
  893         pushq   %rbp
  894 .cfi_offset     %rbp,-24
  895         pushq   %r12
  896 .cfi_offset     %r12,-32
  897         pushq   %r13
  898 .cfi_offset     %r13,-40
  899         pushq   %r14
  900 .cfi_offset     %r14,-48
  901         pushq   %r15
  902 .cfi_offset     %r15,-56
  903 .Lmulx4x_prologue:
  904 
  905         shll    $3,%r9d
  906         xorq    %r10,%r10
  907         subq    %r9,%r10
  908         movq    (%r8),%r8
  909         leaq    -72(%rsp,%r10,1),%rbp
  910         andq    $-128,%rbp
  911         movq    %rsp,%r11
  912         subq    %rbp,%r11
  913         andq    $-4096,%r11
  914         leaq    (%r11,%rbp,1),%rsp
  915         movq    (%rsp),%r10
  916         cmpq    %rbp,%rsp
  917         ja      .Lmulx4x_page_walk
  918         jmp     .Lmulx4x_page_walk_done
  919 
  920 .align  16
  921 .Lmulx4x_page_walk:
  922         leaq    -4096(%rsp),%rsp
  923         movq    (%rsp),%r10
  924         cmpq    %rbp,%rsp
  925         ja      .Lmulx4x_page_walk
  926 .Lmulx4x_page_walk_done:
  927 
  928         leaq    (%rdx,%r9,1),%r10
  929 
  930 
  931 
  932 
  933 
  934 
  935 
  936 
  937 
  938 
  939 
  940 
  941         movq    %r9,0(%rsp)
  942         shrq    $5,%r9
  943         movq    %r10,16(%rsp)
  944         subq    $1,%r9
  945         movq    %r8,24(%rsp)
  946         movq    %rdi,32(%rsp)
  947         movq    %rax,40(%rsp)
  948 .cfi_escape     0x0f,0x05,0x77,0x28,0x06,0x23,0x08
  949         movq    %r9,48(%rsp)
  950         jmp     .Lmulx4x_body
  951 
  952 .align  32
  953 .Lmulx4x_body:
  954         leaq    8(%rdx),%rdi
  955         movq    (%rdx),%rdx
  956         leaq    64+32(%rsp),%rbx
  957         movq    %rdx,%r9
  958 
  959         mulxq   0(%rsi),%r8,%rax
  960         mulxq   8(%rsi),%r11,%r14
  961         addq    %rax,%r11
  962         movq    %rdi,8(%rsp)
  963         mulxq   16(%rsi),%r12,%r13
  964         adcq    %r14,%r12
  965         adcq    $0,%r13
  966 
  967         movq    %r8,%rdi
  968         imulq   24(%rsp),%r8
  969         xorq    %rbp,%rbp
  970 
  971         mulxq   24(%rsi),%rax,%r14
  972         movq    %r8,%rdx
  973         leaq    32(%rsi),%rsi
  974         adcxq   %rax,%r13
  975         adcxq   %rbp,%r14
  976 
  977         mulxq   0(%rcx),%rax,%r10
  978         adcxq   %rax,%rdi
  979         adoxq   %r11,%r10
  980         mulxq   8(%rcx),%rax,%r11
  981         adcxq   %rax,%r10
  982         adoxq   %r12,%r11
  983 .byte   0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
  984         movq    48(%rsp),%rdi
  985         movq    %r10,-32(%rbx)
  986         adcxq   %rax,%r11
  987         adoxq   %r13,%r12
  988         mulxq   24(%rcx),%rax,%r15
  989         movq    %r9,%rdx
  990         movq    %r11,-24(%rbx)
  991         adcxq   %rax,%r12
  992         adoxq   %rbp,%r15
  993         leaq    32(%rcx),%rcx
  994         movq    %r12,-16(%rbx)
  995 
  996         jmp     .Lmulx4x_1st
  997 
  998 .align  32
  999 .Lmulx4x_1st:
 1000         adcxq   %rbp,%r15
 1001         mulxq   0(%rsi),%r10,%rax
 1002         adcxq   %r14,%r10
 1003         mulxq   8(%rsi),%r11,%r14
 1004         adcxq   %rax,%r11
 1005         mulxq   16(%rsi),%r12,%rax
 1006         adcxq   %r14,%r12
 1007         mulxq   24(%rsi),%r13,%r14
 1008 .byte   0x67,0x67
 1009         movq    %r8,%rdx
 1010         adcxq   %rax,%r13
 1011         adcxq   %rbp,%r14
 1012         leaq    32(%rsi),%rsi
 1013         leaq    32(%rbx),%rbx
 1014 
 1015         adoxq   %r15,%r10
 1016         mulxq   0(%rcx),%rax,%r15
 1017         adcxq   %rax,%r10
 1018         adoxq   %r15,%r11
 1019         mulxq   8(%rcx),%rax,%r15
 1020         adcxq   %rax,%r11
 1021         adoxq   %r15,%r12
 1022         mulxq   16(%rcx),%rax,%r15
 1023         movq    %r10,-40(%rbx)
 1024         adcxq   %rax,%r12
 1025         movq    %r11,-32(%rbx)
 1026         adoxq   %r15,%r13
 1027         mulxq   24(%rcx),%rax,%r15
 1028         movq    %r9,%rdx
 1029         movq    %r12,-24(%rbx)
 1030         adcxq   %rax,%r13
 1031         adoxq   %rbp,%r15
 1032         leaq    32(%rcx),%rcx
 1033         movq    %r13,-16(%rbx)
 1034 
 1035         decq    %rdi
 1036         jnz     .Lmulx4x_1st
 1037 
 1038         movq    0(%rsp),%rax
 1039         movq    8(%rsp),%rdi
 1040         adcq    %rbp,%r15
 1041         addq    %r15,%r14
 1042         sbbq    %r15,%r15
 1043         movq    %r14,-8(%rbx)
 1044         jmp     .Lmulx4x_outer
 1045 
 1046 .align  32
 1047 .Lmulx4x_outer:
 1048         movq    (%rdi),%rdx
 1049         leaq    8(%rdi),%rdi
 1050         subq    %rax,%rsi
 1051         movq    %r15,(%rbx)
 1052         leaq    64+32(%rsp),%rbx
 1053         subq    %rax,%rcx
 1054 
 1055         mulxq   0(%rsi),%r8,%r11
 1056         xorl    %ebp,%ebp
 1057         movq    %rdx,%r9
 1058         mulxq   8(%rsi),%r14,%r12
 1059         adoxq   -32(%rbx),%r8
 1060         adcxq   %r14,%r11
 1061         mulxq   16(%rsi),%r15,%r13
 1062         adoxq   -24(%rbx),%r11
 1063         adcxq   %r15,%r12
 1064         adoxq   -16(%rbx),%r12
 1065         adcxq   %rbp,%r13
 1066         adoxq   %rbp,%r13
 1067 
 1068         movq    %rdi,8(%rsp)
 1069         movq    %r8,%r15
 1070         imulq   24(%rsp),%r8
 1071         xorl    %ebp,%ebp
 1072 
 1073         mulxq   24(%rsi),%rax,%r14
 1074         movq    %r8,%rdx
 1075         adcxq   %rax,%r13
 1076         adoxq   -8(%rbx),%r13
 1077         adcxq   %rbp,%r14
 1078         leaq    32(%rsi),%rsi
 1079         adoxq   %rbp,%r14
 1080 
 1081         mulxq   0(%rcx),%rax,%r10
 1082         adcxq   %rax,%r15
 1083         adoxq   %r11,%r10
 1084         mulxq   8(%rcx),%rax,%r11
 1085         adcxq   %rax,%r10
 1086         adoxq   %r12,%r11
 1087         mulxq   16(%rcx),%rax,%r12
 1088         movq    %r10,-32(%rbx)
 1089         adcxq   %rax,%r11
 1090         adoxq   %r13,%r12
 1091         mulxq   24(%rcx),%rax,%r15
 1092         movq    %r9,%rdx
 1093         movq    %r11,-24(%rbx)
 1094         leaq    32(%rcx),%rcx
 1095         adcxq   %rax,%r12
 1096         adoxq   %rbp,%r15
 1097         movq    48(%rsp),%rdi
 1098         movq    %r12,-16(%rbx)
 1099 
 1100         jmp     .Lmulx4x_inner
 1101 
 1102 .align  32
 1103 .Lmulx4x_inner:
 1104         mulxq   0(%rsi),%r10,%rax
 1105         adcxq   %rbp,%r15
 1106         adoxq   %r14,%r10
 1107         mulxq   8(%rsi),%r11,%r14
 1108         adcxq   0(%rbx),%r10
 1109         adoxq   %rax,%r11
 1110         mulxq   16(%rsi),%r12,%rax
 1111         adcxq   8(%rbx),%r11
 1112         adoxq   %r14,%r12
 1113         mulxq   24(%rsi),%r13,%r14
 1114         movq    %r8,%rdx
 1115         adcxq   16(%rbx),%r12
 1116         adoxq   %rax,%r13
 1117         adcxq   24(%rbx),%r13
 1118         adoxq   %rbp,%r14
 1119         leaq    32(%rsi),%rsi
 1120         leaq    32(%rbx),%rbx
 1121         adcxq   %rbp,%r14
 1122 
 1123         adoxq   %r15,%r10
 1124         mulxq   0(%rcx),%rax,%r15
 1125         adcxq   %rax,%r10
 1126         adoxq   %r15,%r11
 1127         mulxq   8(%rcx),%rax,%r15
 1128         adcxq   %rax,%r11
 1129         adoxq   %r15,%r12
 1130         mulxq   16(%rcx),%rax,%r15
 1131         movq    %r10,-40(%rbx)
 1132         adcxq   %rax,%r12
 1133         adoxq   %r15,%r13
 1134         mulxq   24(%rcx),%rax,%r15
 1135         movq    %r9,%rdx
 1136         movq    %r11,-32(%rbx)
 1137         movq    %r12,-24(%rbx)
 1138         adcxq   %rax,%r13
 1139         adoxq   %rbp,%r15
 1140         leaq    32(%rcx),%rcx
 1141         movq    %r13,-16(%rbx)
 1142 
 1143         decq    %rdi
 1144         jnz     .Lmulx4x_inner
 1145 
 1146         movq    0(%rsp),%rax
 1147         movq    8(%rsp),%rdi
 1148         adcq    %rbp,%r15
 1149         subq    0(%rbx),%rbp
 1150         adcq    %r15,%r14
 1151         sbbq    %r15,%r15
 1152         movq    %r14,-8(%rbx)
 1153 
 1154         cmpq    16(%rsp),%rdi
 1155         jne     .Lmulx4x_outer
 1156 
 1157         leaq    64(%rsp),%rbx
 1158         subq    %rax,%rcx
 1159         negq    %r15
 1160         movq    %rax,%rdx
 1161         shrq    $3+2,%rax
 1162         movq    32(%rsp),%rdi
 1163         jmp     .Lmulx4x_sub
 1164 
 1165 .align  32
 1166 .Lmulx4x_sub:
 1167         movq    0(%rbx),%r11
 1168         movq    8(%rbx),%r12
 1169         movq    16(%rbx),%r13
 1170         movq    24(%rbx),%r14
 1171         leaq    32(%rbx),%rbx
 1172         sbbq    0(%rcx),%r11
 1173         sbbq    8(%rcx),%r12
 1174         sbbq    16(%rcx),%r13
 1175         sbbq    24(%rcx),%r14
 1176         leaq    32(%rcx),%rcx
 1177         movq    %r11,0(%rdi)
 1178         movq    %r12,8(%rdi)
 1179         movq    %r13,16(%rdi)
 1180         movq    %r14,24(%rdi)
 1181         leaq    32(%rdi),%rdi
 1182         decq    %rax
 1183         jnz     .Lmulx4x_sub
 1184 
 1185         sbbq    $0,%r15
 1186         leaq    64(%rsp),%rbx
 1187         subq    %rdx,%rdi
 1188 
 1189 .byte   102,73,15,110,207
 1190         pxor    %xmm0,%xmm0
 1191         pshufd  $0,%xmm1,%xmm1
 1192         movq    40(%rsp),%rsi
 1193 .cfi_def_cfa    %rsi,8
 1194         jmp     .Lmulx4x_cond_copy
 1195 
 1196 .align  32
 1197 .Lmulx4x_cond_copy:
 1198         movdqa  0(%rbx),%xmm2
 1199         movdqa  16(%rbx),%xmm3
 1200         leaq    32(%rbx),%rbx
 1201         movdqu  0(%rdi),%xmm4
 1202         movdqu  16(%rdi),%xmm5
 1203         leaq    32(%rdi),%rdi
 1204         movdqa  %xmm0,-32(%rbx)
 1205         movdqa  %xmm0,-16(%rbx)
 1206         pcmpeqd %xmm1,%xmm0
 1207         pand    %xmm1,%xmm2
 1208         pand    %xmm1,%xmm3
 1209         pand    %xmm0,%xmm4
 1210         pand    %xmm0,%xmm5
 1211         pxor    %xmm0,%xmm0
 1212         por     %xmm2,%xmm4
 1213         por     %xmm3,%xmm5
 1214         movdqu  %xmm4,-32(%rdi)
 1215         movdqu  %xmm5,-16(%rdi)
 1216         subq    $32,%rdx
 1217         jnz     .Lmulx4x_cond_copy
 1218 
 1219         movq    %rdx,(%rbx)
 1220 
 1221         movq    $1,%rax
 1222         movq    -48(%rsi),%r15
 1223 .cfi_restore    %r15
 1224         movq    -40(%rsi),%r14
 1225 .cfi_restore    %r14
 1226         movq    -32(%rsi),%r13
 1227 .cfi_restore    %r13
 1228         movq    -24(%rsi),%r12
 1229 .cfi_restore    %r12
 1230         movq    -16(%rsi),%rbp
 1231 .cfi_restore    %rbp
 1232         movq    -8(%rsi),%rbx
 1233 .cfi_restore    %rbx
 1234         leaq    (%rsi),%rsp
 1235 .cfi_def_cfa_register   %rsp
 1236 .Lmulx4x_epilogue:
 1237         .byte   0xf3,0xc3
 1238 .cfi_endproc    
 1239 .size   bn_mulx4x_mont,.-bn_mulx4x_mont
 1240 .byte   77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 1241 .align  16

Cache object: 2e8e0958f257f5d9d5c3a4d54b4d7567


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.