The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/amd64/aesni-mb-x86_64.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. */
    3 .text   
    4 
    5 
    6 
    7 .globl  aesni_multi_cbc_encrypt
    8 .type   aesni_multi_cbc_encrypt,@function
    9 .align  32
   10 aesni_multi_cbc_encrypt:
   11 .cfi_startproc  
   12         cmpl    $2,%edx
   13         jb      .Lenc_non_avx
   14         movl    OPENSSL_ia32cap_P+4(%rip),%ecx
   15         testl   $268435456,%ecx
   16         jnz     _avx_cbc_enc_shortcut
   17         jmp     .Lenc_non_avx
   18 .align  16
   19 .Lenc_non_avx:
   20         movq    %rsp,%rax
   21 .cfi_def_cfa_register   %rax
   22         pushq   %rbx
   23 .cfi_offset     %rbx,-16
   24         pushq   %rbp
   25 .cfi_offset     %rbp,-24
   26         pushq   %r12
   27 .cfi_offset     %r12,-32
   28         pushq   %r13
   29 .cfi_offset     %r13,-40
   30         pushq   %r14
   31 .cfi_offset     %r14,-48
   32         pushq   %r15
   33 .cfi_offset     %r15,-56
   34 
   35 
   36 
   37 
   38 
   39 
   40         subq    $48,%rsp
   41         andq    $-64,%rsp
   42         movq    %rax,16(%rsp)
   43 .cfi_escape     0x0f,0x05,0x77,0x10,0x06,0x23,0x08
   44 
   45 .Lenc4x_body:
   46         movdqu  (%rsi),%xmm12
   47         leaq    120(%rsi),%rsi
   48         leaq    80(%rdi),%rdi
   49 
   50 .Lenc4x_loop_grande:
   51         movl    %edx,24(%rsp)
   52         xorl    %edx,%edx
   53         movl    -64(%rdi),%ecx
   54         movq    -80(%rdi),%r8
   55         cmpl    %edx,%ecx
   56         movq    -72(%rdi),%r12
   57         cmovgl  %ecx,%edx
   58         testl   %ecx,%ecx
   59         movdqu  -56(%rdi),%xmm2
   60         movl    %ecx,32(%rsp)
   61         cmovleq %rsp,%r8
   62         movl    -24(%rdi),%ecx
   63         movq    -40(%rdi),%r9
   64         cmpl    %edx,%ecx
   65         movq    -32(%rdi),%r13
   66         cmovgl  %ecx,%edx
   67         testl   %ecx,%ecx
   68         movdqu  -16(%rdi),%xmm3
   69         movl    %ecx,36(%rsp)
   70         cmovleq %rsp,%r9
   71         movl    16(%rdi),%ecx
   72         movq    0(%rdi),%r10
   73         cmpl    %edx,%ecx
   74         movq    8(%rdi),%r14
   75         cmovgl  %ecx,%edx
   76         testl   %ecx,%ecx
   77         movdqu  24(%rdi),%xmm4
   78         movl    %ecx,40(%rsp)
   79         cmovleq %rsp,%r10
   80         movl    56(%rdi),%ecx
   81         movq    40(%rdi),%r11
   82         cmpl    %edx,%ecx
   83         movq    48(%rdi),%r15
   84         cmovgl  %ecx,%edx
   85         testl   %ecx,%ecx
   86         movdqu  64(%rdi),%xmm5
   87         movl    %ecx,44(%rsp)
   88         cmovleq %rsp,%r11
   89         testl   %edx,%edx
   90         jz      .Lenc4x_done
   91 
   92         movups  16-120(%rsi),%xmm1
   93         pxor    %xmm12,%xmm2
   94         movups  32-120(%rsi),%xmm0
   95         pxor    %xmm12,%xmm3
   96         movl    240-120(%rsi),%eax
   97         pxor    %xmm12,%xmm4
   98         movdqu  (%r8),%xmm6
   99         pxor    %xmm12,%xmm5
  100         movdqu  (%r9),%xmm7
  101         pxor    %xmm6,%xmm2
  102         movdqu  (%r10),%xmm8
  103         pxor    %xmm7,%xmm3
  104         movdqu  (%r11),%xmm9
  105         pxor    %xmm8,%xmm4
  106         pxor    %xmm9,%xmm5
  107         movdqa  32(%rsp),%xmm10
  108         xorq    %rbx,%rbx
  109         jmp     .Loop_enc4x
  110 
  111 .align  32
  112 .Loop_enc4x:
  113         addq    $16,%rbx
  114         leaq    16(%rsp),%rbp
  115         movl    $1,%ecx
  116         subq    %rbx,%rbp
  117 
  118 .byte   102,15,56,220,209
  119         prefetcht0      31(%r8,%rbx,1)
  120         prefetcht0      31(%r9,%rbx,1)
  121 .byte   102,15,56,220,217
  122         prefetcht0      31(%r10,%rbx,1)
  123         prefetcht0      31(%r10,%rbx,1)
  124 .byte   102,15,56,220,225
  125 .byte   102,15,56,220,233
  126         movups  48-120(%rsi),%xmm1
  127         cmpl    32(%rsp),%ecx
  128 .byte   102,15,56,220,208
  129 .byte   102,15,56,220,216
  130 .byte   102,15,56,220,224
  131         cmovgeq %rbp,%r8
  132         cmovgq  %rbp,%r12
  133 .byte   102,15,56,220,232
  134         movups  -56(%rsi),%xmm0
  135         cmpl    36(%rsp),%ecx
  136 .byte   102,15,56,220,209
  137 .byte   102,15,56,220,217
  138 .byte   102,15,56,220,225
  139         cmovgeq %rbp,%r9
  140         cmovgq  %rbp,%r13
  141 .byte   102,15,56,220,233
  142         movups  -40(%rsi),%xmm1
  143         cmpl    40(%rsp),%ecx
  144 .byte   102,15,56,220,208
  145 .byte   102,15,56,220,216
  146 .byte   102,15,56,220,224
  147         cmovgeq %rbp,%r10
  148         cmovgq  %rbp,%r14
  149 .byte   102,15,56,220,232
  150         movups  -24(%rsi),%xmm0
  151         cmpl    44(%rsp),%ecx
  152 .byte   102,15,56,220,209
  153 .byte   102,15,56,220,217
  154 .byte   102,15,56,220,225
  155         cmovgeq %rbp,%r11
  156         cmovgq  %rbp,%r15
  157 .byte   102,15,56,220,233
  158         movups  -8(%rsi),%xmm1
  159         movdqa  %xmm10,%xmm11
  160 .byte   102,15,56,220,208
  161         prefetcht0      15(%r12,%rbx,1)
  162         prefetcht0      15(%r13,%rbx,1)
  163 .byte   102,15,56,220,216
  164         prefetcht0      15(%r14,%rbx,1)
  165         prefetcht0      15(%r15,%rbx,1)
  166 .byte   102,15,56,220,224
  167 .byte   102,15,56,220,232
  168         movups  128-120(%rsi),%xmm0
  169         pxor    %xmm12,%xmm12
  170 
  171 .byte   102,15,56,220,209
  172         pcmpgtd %xmm12,%xmm11
  173         movdqu  -120(%rsi),%xmm12
  174 .byte   102,15,56,220,217
  175         paddd   %xmm11,%xmm10
  176         movdqa  %xmm10,32(%rsp)
  177 .byte   102,15,56,220,225
  178 .byte   102,15,56,220,233
  179         movups  144-120(%rsi),%xmm1
  180 
  181         cmpl    $11,%eax
  182 
  183 .byte   102,15,56,220,208
  184 .byte   102,15,56,220,216
  185 .byte   102,15,56,220,224
  186 .byte   102,15,56,220,232
  187         movups  160-120(%rsi),%xmm0
  188 
  189         jb      .Lenc4x_tail
  190 
  191 .byte   102,15,56,220,209
  192 .byte   102,15,56,220,217
  193 .byte   102,15,56,220,225
  194 .byte   102,15,56,220,233
  195         movups  176-120(%rsi),%xmm1
  196 
  197 .byte   102,15,56,220,208
  198 .byte   102,15,56,220,216
  199 .byte   102,15,56,220,224
  200 .byte   102,15,56,220,232
  201         movups  192-120(%rsi),%xmm0
  202 
  203         je      .Lenc4x_tail
  204 
  205 .byte   102,15,56,220,209
  206 .byte   102,15,56,220,217
  207 .byte   102,15,56,220,225
  208 .byte   102,15,56,220,233
  209         movups  208-120(%rsi),%xmm1
  210 
  211 .byte   102,15,56,220,208
  212 .byte   102,15,56,220,216
  213 .byte   102,15,56,220,224
  214 .byte   102,15,56,220,232
  215         movups  224-120(%rsi),%xmm0
  216         jmp     .Lenc4x_tail
  217 
  218 .align  32
  219 .Lenc4x_tail:
  220 .byte   102,15,56,220,209
  221 .byte   102,15,56,220,217
  222 .byte   102,15,56,220,225
  223 .byte   102,15,56,220,233
  224         movdqu  (%r8,%rbx,1),%xmm6
  225         movdqu  16-120(%rsi),%xmm1
  226 
  227 .byte   102,15,56,221,208
  228         movdqu  (%r9,%rbx,1),%xmm7
  229         pxor    %xmm12,%xmm6
  230 .byte   102,15,56,221,216
  231         movdqu  (%r10,%rbx,1),%xmm8
  232         pxor    %xmm12,%xmm7
  233 .byte   102,15,56,221,224
  234         movdqu  (%r11,%rbx,1),%xmm9
  235         pxor    %xmm12,%xmm8
  236 .byte   102,15,56,221,232
  237         movdqu  32-120(%rsi),%xmm0
  238         pxor    %xmm12,%xmm9
  239 
  240         movups  %xmm2,-16(%r12,%rbx,1)
  241         pxor    %xmm6,%xmm2
  242         movups  %xmm3,-16(%r13,%rbx,1)
  243         pxor    %xmm7,%xmm3
  244         movups  %xmm4,-16(%r14,%rbx,1)
  245         pxor    %xmm8,%xmm4
  246         movups  %xmm5,-16(%r15,%rbx,1)
  247         pxor    %xmm9,%xmm5
  248 
  249         decl    %edx
  250         jnz     .Loop_enc4x
  251 
  252         movq    16(%rsp),%rax
  253 .cfi_def_cfa    %rax,8
  254         movl    24(%rsp),%edx
  255 
  256 
  257 
  258 
  259 
  260 
  261 
  262 
  263 
  264 
  265         leaq    160(%rdi),%rdi
  266         decl    %edx
  267         jnz     .Lenc4x_loop_grande
  268 
  269 .Lenc4x_done:
  270         movq    -48(%rax),%r15
  271 .cfi_restore    %r15
  272         movq    -40(%rax),%r14
  273 .cfi_restore    %r14
  274         movq    -32(%rax),%r13
  275 .cfi_restore    %r13
  276         movq    -24(%rax),%r12
  277 .cfi_restore    %r12
  278         movq    -16(%rax),%rbp
  279 .cfi_restore    %rbp
  280         movq    -8(%rax),%rbx
  281 .cfi_restore    %rbx
  282         leaq    (%rax),%rsp
  283 .cfi_def_cfa_register   %rsp
  284 .Lenc4x_epilogue:
  285         .byte   0xf3,0xc3
  286 .cfi_endproc    
  287 .size   aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
  288 
  289 .globl  aesni_multi_cbc_decrypt
  290 .type   aesni_multi_cbc_decrypt,@function
  291 .align  32
  292 aesni_multi_cbc_decrypt:
  293 .cfi_startproc  
  294         cmpl    $2,%edx
  295         jb      .Ldec_non_avx
  296         movl    OPENSSL_ia32cap_P+4(%rip),%ecx
  297         testl   $268435456,%ecx
  298         jnz     _avx_cbc_dec_shortcut
  299         jmp     .Ldec_non_avx
  300 .align  16
  301 .Ldec_non_avx:
  302         movq    %rsp,%rax
  303 .cfi_def_cfa_register   %rax
  304         pushq   %rbx
  305 .cfi_offset     %rbx,-16
  306         pushq   %rbp
  307 .cfi_offset     %rbp,-24
  308         pushq   %r12
  309 .cfi_offset     %r12,-32
  310         pushq   %r13
  311 .cfi_offset     %r13,-40
  312         pushq   %r14
  313 .cfi_offset     %r14,-48
  314         pushq   %r15
  315 .cfi_offset     %r15,-56
  316 
  317 
  318 
  319 
  320 
  321 
  322         subq    $48,%rsp
  323         andq    $-64,%rsp
  324         movq    %rax,16(%rsp)
  325 .cfi_escape     0x0f,0x05,0x77,0x10,0x06,0x23,0x08
  326 
  327 .Ldec4x_body:
  328         movdqu  (%rsi),%xmm12
  329         leaq    120(%rsi),%rsi
  330         leaq    80(%rdi),%rdi
  331 
  332 .Ldec4x_loop_grande:
  333         movl    %edx,24(%rsp)
  334         xorl    %edx,%edx
  335         movl    -64(%rdi),%ecx
  336         movq    -80(%rdi),%r8
  337         cmpl    %edx,%ecx
  338         movq    -72(%rdi),%r12
  339         cmovgl  %ecx,%edx
  340         testl   %ecx,%ecx
  341         movdqu  -56(%rdi),%xmm6
  342         movl    %ecx,32(%rsp)
  343         cmovleq %rsp,%r8
  344         movl    -24(%rdi),%ecx
  345         movq    -40(%rdi),%r9
  346         cmpl    %edx,%ecx
  347         movq    -32(%rdi),%r13
  348         cmovgl  %ecx,%edx
  349         testl   %ecx,%ecx
  350         movdqu  -16(%rdi),%xmm7
  351         movl    %ecx,36(%rsp)
  352         cmovleq %rsp,%r9
  353         movl    16(%rdi),%ecx
  354         movq    0(%rdi),%r10
  355         cmpl    %edx,%ecx
  356         movq    8(%rdi),%r14
  357         cmovgl  %ecx,%edx
  358         testl   %ecx,%ecx
  359         movdqu  24(%rdi),%xmm8
  360         movl    %ecx,40(%rsp)
  361         cmovleq %rsp,%r10
  362         movl    56(%rdi),%ecx
  363         movq    40(%rdi),%r11
  364         cmpl    %edx,%ecx
  365         movq    48(%rdi),%r15
  366         cmovgl  %ecx,%edx
  367         testl   %ecx,%ecx
  368         movdqu  64(%rdi),%xmm9
  369         movl    %ecx,44(%rsp)
  370         cmovleq %rsp,%r11
  371         testl   %edx,%edx
  372         jz      .Ldec4x_done
  373 
  374         movups  16-120(%rsi),%xmm1
  375         movups  32-120(%rsi),%xmm0
  376         movl    240-120(%rsi),%eax
  377         movdqu  (%r8),%xmm2
  378         movdqu  (%r9),%xmm3
  379         pxor    %xmm12,%xmm2
  380         movdqu  (%r10),%xmm4
  381         pxor    %xmm12,%xmm3
  382         movdqu  (%r11),%xmm5
  383         pxor    %xmm12,%xmm4
  384         pxor    %xmm12,%xmm5
  385         movdqa  32(%rsp),%xmm10
  386         xorq    %rbx,%rbx
  387         jmp     .Loop_dec4x
  388 
  389 .align  32
  390 .Loop_dec4x:
  391         addq    $16,%rbx
  392         leaq    16(%rsp),%rbp
  393         movl    $1,%ecx
  394         subq    %rbx,%rbp
  395 
  396 .byte   102,15,56,222,209
  397         prefetcht0      31(%r8,%rbx,1)
  398         prefetcht0      31(%r9,%rbx,1)
  399 .byte   102,15,56,222,217
  400         prefetcht0      31(%r10,%rbx,1)
  401         prefetcht0      31(%r11,%rbx,1)
  402 .byte   102,15,56,222,225
  403 .byte   102,15,56,222,233
  404         movups  48-120(%rsi),%xmm1
  405         cmpl    32(%rsp),%ecx
  406 .byte   102,15,56,222,208
  407 .byte   102,15,56,222,216
  408 .byte   102,15,56,222,224
  409         cmovgeq %rbp,%r8
  410         cmovgq  %rbp,%r12
  411 .byte   102,15,56,222,232
  412         movups  -56(%rsi),%xmm0
  413         cmpl    36(%rsp),%ecx
  414 .byte   102,15,56,222,209
  415 .byte   102,15,56,222,217
  416 .byte   102,15,56,222,225
  417         cmovgeq %rbp,%r9
  418         cmovgq  %rbp,%r13
  419 .byte   102,15,56,222,233
  420         movups  -40(%rsi),%xmm1
  421         cmpl    40(%rsp),%ecx
  422 .byte   102,15,56,222,208
  423 .byte   102,15,56,222,216
  424 .byte   102,15,56,222,224
  425         cmovgeq %rbp,%r10
  426         cmovgq  %rbp,%r14
  427 .byte   102,15,56,222,232
  428         movups  -24(%rsi),%xmm0
  429         cmpl    44(%rsp),%ecx
  430 .byte   102,15,56,222,209
  431 .byte   102,15,56,222,217
  432 .byte   102,15,56,222,225
  433         cmovgeq %rbp,%r11
  434         cmovgq  %rbp,%r15
  435 .byte   102,15,56,222,233
  436         movups  -8(%rsi),%xmm1
  437         movdqa  %xmm10,%xmm11
  438 .byte   102,15,56,222,208
  439         prefetcht0      15(%r12,%rbx,1)
  440         prefetcht0      15(%r13,%rbx,1)
  441 .byte   102,15,56,222,216
  442         prefetcht0      15(%r14,%rbx,1)
  443         prefetcht0      15(%r15,%rbx,1)
  444 .byte   102,15,56,222,224
  445 .byte   102,15,56,222,232
  446         movups  128-120(%rsi),%xmm0
  447         pxor    %xmm12,%xmm12
  448 
  449 .byte   102,15,56,222,209
  450         pcmpgtd %xmm12,%xmm11
  451         movdqu  -120(%rsi),%xmm12
  452 .byte   102,15,56,222,217
  453         paddd   %xmm11,%xmm10
  454         movdqa  %xmm10,32(%rsp)
  455 .byte   102,15,56,222,225
  456 .byte   102,15,56,222,233
  457         movups  144-120(%rsi),%xmm1
  458 
  459         cmpl    $11,%eax
  460 
  461 .byte   102,15,56,222,208
  462 .byte   102,15,56,222,216
  463 .byte   102,15,56,222,224
  464 .byte   102,15,56,222,232
  465         movups  160-120(%rsi),%xmm0
  466 
  467         jb      .Ldec4x_tail
  468 
  469 .byte   102,15,56,222,209
  470 .byte   102,15,56,222,217
  471 .byte   102,15,56,222,225
  472 .byte   102,15,56,222,233
  473         movups  176-120(%rsi),%xmm1
  474 
  475 .byte   102,15,56,222,208
  476 .byte   102,15,56,222,216
  477 .byte   102,15,56,222,224
  478 .byte   102,15,56,222,232
  479         movups  192-120(%rsi),%xmm0
  480 
  481         je      .Ldec4x_tail
  482 
  483 .byte   102,15,56,222,209
  484 .byte   102,15,56,222,217
  485 .byte   102,15,56,222,225
  486 .byte   102,15,56,222,233
  487         movups  208-120(%rsi),%xmm1
  488 
  489 .byte   102,15,56,222,208
  490 .byte   102,15,56,222,216
  491 .byte   102,15,56,222,224
  492 .byte   102,15,56,222,232
  493         movups  224-120(%rsi),%xmm0
  494         jmp     .Ldec4x_tail
  495 
  496 .align  32
  497 .Ldec4x_tail:
  498 .byte   102,15,56,222,209
  499 .byte   102,15,56,222,217
  500 .byte   102,15,56,222,225
  501         pxor    %xmm0,%xmm6
  502         pxor    %xmm0,%xmm7
  503 .byte   102,15,56,222,233
  504         movdqu  16-120(%rsi),%xmm1
  505         pxor    %xmm0,%xmm8
  506         pxor    %xmm0,%xmm9
  507         movdqu  32-120(%rsi),%xmm0
  508 
  509 .byte   102,15,56,223,214
  510 .byte   102,15,56,223,223
  511         movdqu  -16(%r8,%rbx,1),%xmm6
  512         movdqu  -16(%r9,%rbx,1),%xmm7
  513 .byte   102,65,15,56,223,224
  514 .byte   102,65,15,56,223,233
  515         movdqu  -16(%r10,%rbx,1),%xmm8
  516         movdqu  -16(%r11,%rbx,1),%xmm9
  517 
  518         movups  %xmm2,-16(%r12,%rbx,1)
  519         movdqu  (%r8,%rbx,1),%xmm2
  520         movups  %xmm3,-16(%r13,%rbx,1)
  521         movdqu  (%r9,%rbx,1),%xmm3
  522         pxor    %xmm12,%xmm2
  523         movups  %xmm4,-16(%r14,%rbx,1)
  524         movdqu  (%r10,%rbx,1),%xmm4
  525         pxor    %xmm12,%xmm3
  526         movups  %xmm5,-16(%r15,%rbx,1)
  527         movdqu  (%r11,%rbx,1),%xmm5
  528         pxor    %xmm12,%xmm4
  529         pxor    %xmm12,%xmm5
  530 
  531         decl    %edx
  532         jnz     .Loop_dec4x
  533 
  534         movq    16(%rsp),%rax
  535 .cfi_def_cfa    %rax,8
  536         movl    24(%rsp),%edx
  537 
  538         leaq    160(%rdi),%rdi
  539         decl    %edx
  540         jnz     .Ldec4x_loop_grande
  541 
  542 .Ldec4x_done:
  543         movq    -48(%rax),%r15
  544 .cfi_restore    %r15
  545         movq    -40(%rax),%r14
  546 .cfi_restore    %r14
  547         movq    -32(%rax),%r13
  548 .cfi_restore    %r13
  549         movq    -24(%rax),%r12
  550 .cfi_restore    %r12
  551         movq    -16(%rax),%rbp
  552 .cfi_restore    %rbp
  553         movq    -8(%rax),%rbx
  554 .cfi_restore    %rbx
  555         leaq    (%rax),%rsp
  556 .cfi_def_cfa_register   %rsp
  557 .Ldec4x_epilogue:
  558         .byte   0xf3,0xc3
  559 .cfi_endproc    
  560 .size   aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
  561 .type   aesni_multi_cbc_encrypt_avx,@function
  562 .align  32
  563 aesni_multi_cbc_encrypt_avx:
  564 .cfi_startproc  
  565 _avx_cbc_enc_shortcut:
  566         movq    %rsp,%rax
  567 .cfi_def_cfa_register   %rax
  568         pushq   %rbx
  569 .cfi_offset     %rbx,-16
  570         pushq   %rbp
  571 .cfi_offset     %rbp,-24
  572         pushq   %r12
  573 .cfi_offset     %r12,-32
  574         pushq   %r13
  575 .cfi_offset     %r13,-40
  576         pushq   %r14
  577 .cfi_offset     %r14,-48
  578         pushq   %r15
  579 .cfi_offset     %r15,-56
  580 
  581 
  582 
  583 
  584 
  585 
  586 
  587 
  588         subq    $192,%rsp
  589         andq    $-128,%rsp
  590         movq    %rax,16(%rsp)
  591 .cfi_escape     0x0f,0x05,0x77,0x10,0x06,0x23,0x08
  592 
  593 .Lenc8x_body:
  594         vzeroupper
  595         vmovdqu (%rsi),%xmm15
  596         leaq    120(%rsi),%rsi
  597         leaq    160(%rdi),%rdi
  598         shrl    $1,%edx
  599 
  600 .Lenc8x_loop_grande:
  601 
  602         xorl    %edx,%edx
  603         movl    -144(%rdi),%ecx
  604         movq    -160(%rdi),%r8
  605         cmpl    %edx,%ecx
  606         movq    -152(%rdi),%rbx
  607         cmovgl  %ecx,%edx
  608         testl   %ecx,%ecx
  609         vmovdqu -136(%rdi),%xmm2
  610         movl    %ecx,32(%rsp)
  611         cmovleq %rsp,%r8
  612         subq    %r8,%rbx
  613         movq    %rbx,64(%rsp)
  614         movl    -104(%rdi),%ecx
  615         movq    -120(%rdi),%r9
  616         cmpl    %edx,%ecx
  617         movq    -112(%rdi),%rbp
  618         cmovgl  %ecx,%edx
  619         testl   %ecx,%ecx
  620         vmovdqu -96(%rdi),%xmm3
  621         movl    %ecx,36(%rsp)
  622         cmovleq %rsp,%r9
  623         subq    %r9,%rbp
  624         movq    %rbp,72(%rsp)
  625         movl    -64(%rdi),%ecx
  626         movq    -80(%rdi),%r10
  627         cmpl    %edx,%ecx
  628         movq    -72(%rdi),%rbp
  629         cmovgl  %ecx,%edx
  630         testl   %ecx,%ecx
  631         vmovdqu -56(%rdi),%xmm4
  632         movl    %ecx,40(%rsp)
  633         cmovleq %rsp,%r10
  634         subq    %r10,%rbp
  635         movq    %rbp,80(%rsp)
  636         movl    -24(%rdi),%ecx
  637         movq    -40(%rdi),%r11
  638         cmpl    %edx,%ecx
  639         movq    -32(%rdi),%rbp
  640         cmovgl  %ecx,%edx
  641         testl   %ecx,%ecx
  642         vmovdqu -16(%rdi),%xmm5
  643         movl    %ecx,44(%rsp)
  644         cmovleq %rsp,%r11
  645         subq    %r11,%rbp
  646         movq    %rbp,88(%rsp)
  647         movl    16(%rdi),%ecx
  648         movq    0(%rdi),%r12
  649         cmpl    %edx,%ecx
  650         movq    8(%rdi),%rbp
  651         cmovgl  %ecx,%edx
  652         testl   %ecx,%ecx
  653         vmovdqu 24(%rdi),%xmm6
  654         movl    %ecx,48(%rsp)
  655         cmovleq %rsp,%r12
  656         subq    %r12,%rbp
  657         movq    %rbp,96(%rsp)
  658         movl    56(%rdi),%ecx
  659         movq    40(%rdi),%r13
  660         cmpl    %edx,%ecx
  661         movq    48(%rdi),%rbp
  662         cmovgl  %ecx,%edx
  663         testl   %ecx,%ecx
  664         vmovdqu 64(%rdi),%xmm7
  665         movl    %ecx,52(%rsp)
  666         cmovleq %rsp,%r13
  667         subq    %r13,%rbp
  668         movq    %rbp,104(%rsp)
  669         movl    96(%rdi),%ecx
  670         movq    80(%rdi),%r14
  671         cmpl    %edx,%ecx
  672         movq    88(%rdi),%rbp
  673         cmovgl  %ecx,%edx
  674         testl   %ecx,%ecx
  675         vmovdqu 104(%rdi),%xmm8
  676         movl    %ecx,56(%rsp)
  677         cmovleq %rsp,%r14
  678         subq    %r14,%rbp
  679         movq    %rbp,112(%rsp)
  680         movl    136(%rdi),%ecx
  681         movq    120(%rdi),%r15
  682         cmpl    %edx,%ecx
  683         movq    128(%rdi),%rbp
  684         cmovgl  %ecx,%edx
  685         testl   %ecx,%ecx
  686         vmovdqu 144(%rdi),%xmm9
  687         movl    %ecx,60(%rsp)
  688         cmovleq %rsp,%r15
  689         subq    %r15,%rbp
  690         movq    %rbp,120(%rsp)
  691         testl   %edx,%edx
  692         jz      .Lenc8x_done
  693 
  694         vmovups 16-120(%rsi),%xmm1
  695         vmovups 32-120(%rsi),%xmm0
  696         movl    240-120(%rsi),%eax
  697 
  698         vpxor   (%r8),%xmm15,%xmm10
  699         leaq    128(%rsp),%rbp
  700         vpxor   (%r9),%xmm15,%xmm11
  701         vpxor   (%r10),%xmm15,%xmm12
  702         vpxor   (%r11),%xmm15,%xmm13
  703         vpxor   %xmm10,%xmm2,%xmm2
  704         vpxor   (%r12),%xmm15,%xmm10
  705         vpxor   %xmm11,%xmm3,%xmm3
  706         vpxor   (%r13),%xmm15,%xmm11
  707         vpxor   %xmm12,%xmm4,%xmm4
  708         vpxor   (%r14),%xmm15,%xmm12
  709         vpxor   %xmm13,%xmm5,%xmm5
  710         vpxor   (%r15),%xmm15,%xmm13
  711         vpxor   %xmm10,%xmm6,%xmm6
  712         movl    $1,%ecx
  713         vpxor   %xmm11,%xmm7,%xmm7
  714         vpxor   %xmm12,%xmm8,%xmm8
  715         vpxor   %xmm13,%xmm9,%xmm9
  716         jmp     .Loop_enc8x
  717 
  718 .align  32
  719 .Loop_enc8x:
  720         vaesenc %xmm1,%xmm2,%xmm2
  721         cmpl    32+0(%rsp),%ecx
  722         vaesenc %xmm1,%xmm3,%xmm3
  723         prefetcht0      31(%r8)
  724         vaesenc %xmm1,%xmm4,%xmm4
  725         vaesenc %xmm1,%xmm5,%xmm5
  726         leaq    (%r8,%rbx,1),%rbx
  727         cmovgeq %rsp,%r8
  728         vaesenc %xmm1,%xmm6,%xmm6
  729         cmovgq  %rsp,%rbx
  730         vaesenc %xmm1,%xmm7,%xmm7
  731         subq    %r8,%rbx
  732         vaesenc %xmm1,%xmm8,%xmm8
  733         vpxor   16(%r8),%xmm15,%xmm10
  734         movq    %rbx,64+0(%rsp)
  735         vaesenc %xmm1,%xmm9,%xmm9
  736         vmovups -72(%rsi),%xmm1
  737         leaq    16(%r8,%rbx,1),%r8
  738         vmovdqu %xmm10,0(%rbp)
  739         vaesenc %xmm0,%xmm2,%xmm2
  740         cmpl    32+4(%rsp),%ecx
  741         movq    64+8(%rsp),%rbx
  742         vaesenc %xmm0,%xmm3,%xmm3
  743         prefetcht0      31(%r9)
  744         vaesenc %xmm0,%xmm4,%xmm4
  745         vaesenc %xmm0,%xmm5,%xmm5
  746         leaq    (%r9,%rbx,1),%rbx
  747         cmovgeq %rsp,%r9
  748         vaesenc %xmm0,%xmm6,%xmm6
  749         cmovgq  %rsp,%rbx
  750         vaesenc %xmm0,%xmm7,%xmm7
  751         subq    %r9,%rbx
  752         vaesenc %xmm0,%xmm8,%xmm8
  753         vpxor   16(%r9),%xmm15,%xmm11
  754         movq    %rbx,64+8(%rsp)
  755         vaesenc %xmm0,%xmm9,%xmm9
  756         vmovups -56(%rsi),%xmm0
  757         leaq    16(%r9,%rbx,1),%r9
  758         vmovdqu %xmm11,16(%rbp)
  759         vaesenc %xmm1,%xmm2,%xmm2
  760         cmpl    32+8(%rsp),%ecx
  761         movq    64+16(%rsp),%rbx
  762         vaesenc %xmm1,%xmm3,%xmm3
  763         prefetcht0      31(%r10)
  764         vaesenc %xmm1,%xmm4,%xmm4
  765         prefetcht0      15(%r8)
  766         vaesenc %xmm1,%xmm5,%xmm5
  767         leaq    (%r10,%rbx,1),%rbx
  768         cmovgeq %rsp,%r10
  769         vaesenc %xmm1,%xmm6,%xmm6
  770         cmovgq  %rsp,%rbx
  771         vaesenc %xmm1,%xmm7,%xmm7
  772         subq    %r10,%rbx
  773         vaesenc %xmm1,%xmm8,%xmm8
  774         vpxor   16(%r10),%xmm15,%xmm12
  775         movq    %rbx,64+16(%rsp)
  776         vaesenc %xmm1,%xmm9,%xmm9
  777         vmovups -40(%rsi),%xmm1
  778         leaq    16(%r10,%rbx,1),%r10
  779         vmovdqu %xmm12,32(%rbp)
  780         vaesenc %xmm0,%xmm2,%xmm2
  781         cmpl    32+12(%rsp),%ecx
  782         movq    64+24(%rsp),%rbx
  783         vaesenc %xmm0,%xmm3,%xmm3
  784         prefetcht0      31(%r11)
  785         vaesenc %xmm0,%xmm4,%xmm4
  786         prefetcht0      15(%r9)
  787         vaesenc %xmm0,%xmm5,%xmm5
  788         leaq    (%r11,%rbx,1),%rbx
  789         cmovgeq %rsp,%r11
  790         vaesenc %xmm0,%xmm6,%xmm6
  791         cmovgq  %rsp,%rbx
  792         vaesenc %xmm0,%xmm7,%xmm7
  793         subq    %r11,%rbx
  794         vaesenc %xmm0,%xmm8,%xmm8
  795         vpxor   16(%r11),%xmm15,%xmm13
  796         movq    %rbx,64+24(%rsp)
  797         vaesenc %xmm0,%xmm9,%xmm9
  798         vmovups -24(%rsi),%xmm0
  799         leaq    16(%r11,%rbx,1),%r11
  800         vmovdqu %xmm13,48(%rbp)
  801         vaesenc %xmm1,%xmm2,%xmm2
  802         cmpl    32+16(%rsp),%ecx
  803         movq    64+32(%rsp),%rbx
  804         vaesenc %xmm1,%xmm3,%xmm3
  805         prefetcht0      31(%r12)
  806         vaesenc %xmm1,%xmm4,%xmm4
  807         prefetcht0      15(%r10)
  808         vaesenc %xmm1,%xmm5,%xmm5
  809         leaq    (%r12,%rbx,1),%rbx
  810         cmovgeq %rsp,%r12
  811         vaesenc %xmm1,%xmm6,%xmm6
  812         cmovgq  %rsp,%rbx
  813         vaesenc %xmm1,%xmm7,%xmm7
  814         subq    %r12,%rbx
  815         vaesenc %xmm1,%xmm8,%xmm8
  816         vpxor   16(%r12),%xmm15,%xmm10
  817         movq    %rbx,64+32(%rsp)
  818         vaesenc %xmm1,%xmm9,%xmm9
  819         vmovups -8(%rsi),%xmm1
  820         leaq    16(%r12,%rbx,1),%r12
  821         vaesenc %xmm0,%xmm2,%xmm2
  822         cmpl    32+20(%rsp),%ecx
  823         movq    64+40(%rsp),%rbx
  824         vaesenc %xmm0,%xmm3,%xmm3
  825         prefetcht0      31(%r13)
  826         vaesenc %xmm0,%xmm4,%xmm4
  827         prefetcht0      15(%r11)
  828         vaesenc %xmm0,%xmm5,%xmm5
  829         leaq    (%rbx,%r13,1),%rbx
  830         cmovgeq %rsp,%r13
  831         vaesenc %xmm0,%xmm6,%xmm6
  832         cmovgq  %rsp,%rbx
  833         vaesenc %xmm0,%xmm7,%xmm7
  834         subq    %r13,%rbx
  835         vaesenc %xmm0,%xmm8,%xmm8
  836         vpxor   16(%r13),%xmm15,%xmm11
  837         movq    %rbx,64+40(%rsp)
  838         vaesenc %xmm0,%xmm9,%xmm9
  839         vmovups 8(%rsi),%xmm0
  840         leaq    16(%r13,%rbx,1),%r13
  841         vaesenc %xmm1,%xmm2,%xmm2
  842         cmpl    32+24(%rsp),%ecx
  843         movq    64+48(%rsp),%rbx
  844         vaesenc %xmm1,%xmm3,%xmm3
  845         prefetcht0      31(%r14)
  846         vaesenc %xmm1,%xmm4,%xmm4
  847         prefetcht0      15(%r12)
  848         vaesenc %xmm1,%xmm5,%xmm5
  849         leaq    (%r14,%rbx,1),%rbx
  850         cmovgeq %rsp,%r14
  851         vaesenc %xmm1,%xmm6,%xmm6
  852         cmovgq  %rsp,%rbx
  853         vaesenc %xmm1,%xmm7,%xmm7
  854         subq    %r14,%rbx
  855         vaesenc %xmm1,%xmm8,%xmm8
  856         vpxor   16(%r14),%xmm15,%xmm12
  857         movq    %rbx,64+48(%rsp)
  858         vaesenc %xmm1,%xmm9,%xmm9
  859         vmovups 24(%rsi),%xmm1
  860         leaq    16(%r14,%rbx,1),%r14
  861         vaesenc %xmm0,%xmm2,%xmm2
  862         cmpl    32+28(%rsp),%ecx
  863         movq    64+56(%rsp),%rbx
  864         vaesenc %xmm0,%xmm3,%xmm3
  865         prefetcht0      31(%r15)
  866         vaesenc %xmm0,%xmm4,%xmm4
  867         prefetcht0      15(%r13)
  868         vaesenc %xmm0,%xmm5,%xmm5
  869         leaq    (%r15,%rbx,1),%rbx
  870         cmovgeq %rsp,%r15
  871         vaesenc %xmm0,%xmm6,%xmm6
  872         cmovgq  %rsp,%rbx
  873         vaesenc %xmm0,%xmm7,%xmm7
  874         subq    %r15,%rbx
  875         vaesenc %xmm0,%xmm8,%xmm8
  876         vpxor   16(%r15),%xmm15,%xmm13
  877         movq    %rbx,64+56(%rsp)
  878         vaesenc %xmm0,%xmm9,%xmm9
  879         vmovups 40(%rsi),%xmm0
  880         leaq    16(%r15,%rbx,1),%r15
  881         vmovdqu 32(%rsp),%xmm14
  882         prefetcht0      15(%r14)
  883         prefetcht0      15(%r15)
  884         cmpl    $11,%eax
  885         jb      .Lenc8x_tail
  886 
  887         vaesenc %xmm1,%xmm2,%xmm2
  888         vaesenc %xmm1,%xmm3,%xmm3
  889         vaesenc %xmm1,%xmm4,%xmm4
  890         vaesenc %xmm1,%xmm5,%xmm5
  891         vaesenc %xmm1,%xmm6,%xmm6
  892         vaesenc %xmm1,%xmm7,%xmm7
  893         vaesenc %xmm1,%xmm8,%xmm8
  894         vaesenc %xmm1,%xmm9,%xmm9
  895         vmovups 176-120(%rsi),%xmm1
  896 
  897         vaesenc %xmm0,%xmm2,%xmm2
  898         vaesenc %xmm0,%xmm3,%xmm3
  899         vaesenc %xmm0,%xmm4,%xmm4
  900         vaesenc %xmm0,%xmm5,%xmm5
  901         vaesenc %xmm0,%xmm6,%xmm6
  902         vaesenc %xmm0,%xmm7,%xmm7
  903         vaesenc %xmm0,%xmm8,%xmm8
  904         vaesenc %xmm0,%xmm9,%xmm9
  905         vmovups 192-120(%rsi),%xmm0
  906         je      .Lenc8x_tail
  907 
  908         vaesenc %xmm1,%xmm2,%xmm2
  909         vaesenc %xmm1,%xmm3,%xmm3
  910         vaesenc %xmm1,%xmm4,%xmm4
  911         vaesenc %xmm1,%xmm5,%xmm5
  912         vaesenc %xmm1,%xmm6,%xmm6
  913         vaesenc %xmm1,%xmm7,%xmm7
  914         vaesenc %xmm1,%xmm8,%xmm8
  915         vaesenc %xmm1,%xmm9,%xmm9
  916         vmovups 208-120(%rsi),%xmm1
  917 
  918         vaesenc %xmm0,%xmm2,%xmm2
  919         vaesenc %xmm0,%xmm3,%xmm3
  920         vaesenc %xmm0,%xmm4,%xmm4
  921         vaesenc %xmm0,%xmm5,%xmm5
  922         vaesenc %xmm0,%xmm6,%xmm6
  923         vaesenc %xmm0,%xmm7,%xmm7
  924         vaesenc %xmm0,%xmm8,%xmm8
  925         vaesenc %xmm0,%xmm9,%xmm9
  926         vmovups 224-120(%rsi),%xmm0
  927 
  928 .Lenc8x_tail:
  929         vaesenc %xmm1,%xmm2,%xmm2
  930         vpxor   %xmm15,%xmm15,%xmm15
  931         vaesenc %xmm1,%xmm3,%xmm3
  932         vaesenc %xmm1,%xmm4,%xmm4
  933         vpcmpgtd        %xmm15,%xmm14,%xmm15
  934         vaesenc %xmm1,%xmm5,%xmm5
  935         vaesenc %xmm1,%xmm6,%xmm6
  936         vpaddd  %xmm14,%xmm15,%xmm15
  937         vmovdqu 48(%rsp),%xmm14
  938         vaesenc %xmm1,%xmm7,%xmm7
  939         movq    64(%rsp),%rbx
  940         vaesenc %xmm1,%xmm8,%xmm8
  941         vaesenc %xmm1,%xmm9,%xmm9
  942         vmovups 16-120(%rsi),%xmm1
  943 
  944         vaesenclast     %xmm0,%xmm2,%xmm2
  945         vmovdqa %xmm15,32(%rsp)
  946         vpxor   %xmm15,%xmm15,%xmm15
  947         vaesenclast     %xmm0,%xmm3,%xmm3
  948         vaesenclast     %xmm0,%xmm4,%xmm4
  949         vpcmpgtd        %xmm15,%xmm14,%xmm15
  950         vaesenclast     %xmm0,%xmm5,%xmm5
  951         vaesenclast     %xmm0,%xmm6,%xmm6
  952         vpaddd  %xmm15,%xmm14,%xmm14
  953         vmovdqu -120(%rsi),%xmm15
  954         vaesenclast     %xmm0,%xmm7,%xmm7
  955         vaesenclast     %xmm0,%xmm8,%xmm8
  956         vmovdqa %xmm14,48(%rsp)
  957         vaesenclast     %xmm0,%xmm9,%xmm9
  958         vmovups 32-120(%rsi),%xmm0
  959 
  960         vmovups %xmm2,-16(%r8)
  961         subq    %rbx,%r8
  962         vpxor   0(%rbp),%xmm2,%xmm2
  963         vmovups %xmm3,-16(%r9)
  964         subq    72(%rsp),%r9
  965         vpxor   16(%rbp),%xmm3,%xmm3
  966         vmovups %xmm4,-16(%r10)
  967         subq    80(%rsp),%r10
  968         vpxor   32(%rbp),%xmm4,%xmm4
  969         vmovups %xmm5,-16(%r11)
  970         subq    88(%rsp),%r11
  971         vpxor   48(%rbp),%xmm5,%xmm5
  972         vmovups %xmm6,-16(%r12)
  973         subq    96(%rsp),%r12
  974         vpxor   %xmm10,%xmm6,%xmm6
  975         vmovups %xmm7,-16(%r13)
  976         subq    104(%rsp),%r13
  977         vpxor   %xmm11,%xmm7,%xmm7
  978         vmovups %xmm8,-16(%r14)
  979         subq    112(%rsp),%r14
  980         vpxor   %xmm12,%xmm8,%xmm8
  981         vmovups %xmm9,-16(%r15)
  982         subq    120(%rsp),%r15
  983         vpxor   %xmm13,%xmm9,%xmm9
  984 
  985         decl    %edx
  986         jnz     .Loop_enc8x
  987 
  988         movq    16(%rsp),%rax
  989 .cfi_def_cfa    %rax,8
  990 
  991 
  992 
  993 
  994 
  995 .Lenc8x_done:
  996         vzeroupper
  997         movq    -48(%rax),%r15
  998 .cfi_restore    %r15
  999         movq    -40(%rax),%r14
 1000 .cfi_restore    %r14
 1001         movq    -32(%rax),%r13
 1002 .cfi_restore    %r13
 1003         movq    -24(%rax),%r12
 1004 .cfi_restore    %r12
 1005         movq    -16(%rax),%rbp
 1006 .cfi_restore    %rbp
 1007         movq    -8(%rax),%rbx
 1008 .cfi_restore    %rbx
 1009         leaq    (%rax),%rsp
 1010 .cfi_def_cfa_register   %rsp
 1011 .Lenc8x_epilogue:
 1012         .byte   0xf3,0xc3
 1013 .cfi_endproc    
 1014 .size   aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
 1015 
 1016 .type   aesni_multi_cbc_decrypt_avx,@function
 1017 .align  32
 1018 aesni_multi_cbc_decrypt_avx:
 1019 .cfi_startproc  
 1020 _avx_cbc_dec_shortcut:
 1021         movq    %rsp,%rax
 1022 .cfi_def_cfa_register   %rax
 1023         pushq   %rbx
 1024 .cfi_offset     %rbx,-16
 1025         pushq   %rbp
 1026 .cfi_offset     %rbp,-24
 1027         pushq   %r12
 1028 .cfi_offset     %r12,-32
 1029         pushq   %r13
 1030 .cfi_offset     %r13,-40
 1031         pushq   %r14
 1032 .cfi_offset     %r14,-48
 1033         pushq   %r15
 1034 .cfi_offset     %r15,-56
 1035 
 1036 
 1037 
 1038 
 1039 
 1040 
 1041 
 1042 
 1043 
 1044         subq    $256,%rsp
 1045         andq    $-256,%rsp
 1046         subq    $192,%rsp
 1047         movq    %rax,16(%rsp)
 1048 .cfi_escape     0x0f,0x05,0x77,0x10,0x06,0x23,0x08
 1049 
 1050 .Ldec8x_body:
 1051         vzeroupper
 1052         vmovdqu (%rsi),%xmm15
 1053         leaq    120(%rsi),%rsi
 1054         leaq    160(%rdi),%rdi
 1055         shrl    $1,%edx
 1056 
 1057 .Ldec8x_loop_grande:
 1058 
 1059         xorl    %edx,%edx
 1060         movl    -144(%rdi),%ecx
 1061         movq    -160(%rdi),%r8
 1062         cmpl    %edx,%ecx
 1063         movq    -152(%rdi),%rbx
 1064         cmovgl  %ecx,%edx
 1065         testl   %ecx,%ecx
 1066         vmovdqu -136(%rdi),%xmm2
 1067         movl    %ecx,32(%rsp)
 1068         cmovleq %rsp,%r8
 1069         subq    %r8,%rbx
 1070         movq    %rbx,64(%rsp)
 1071         vmovdqu %xmm2,192(%rsp)
 1072         movl    -104(%rdi),%ecx
 1073         movq    -120(%rdi),%r9
 1074         cmpl    %edx,%ecx
 1075         movq    -112(%rdi),%rbp
 1076         cmovgl  %ecx,%edx
 1077         testl   %ecx,%ecx
 1078         vmovdqu -96(%rdi),%xmm3
 1079         movl    %ecx,36(%rsp)
 1080         cmovleq %rsp,%r9
 1081         subq    %r9,%rbp
 1082         movq    %rbp,72(%rsp)
 1083         vmovdqu %xmm3,208(%rsp)
 1084         movl    -64(%rdi),%ecx
 1085         movq    -80(%rdi),%r10
 1086         cmpl    %edx,%ecx
 1087         movq    -72(%rdi),%rbp
 1088         cmovgl  %ecx,%edx
 1089         testl   %ecx,%ecx
 1090         vmovdqu -56(%rdi),%xmm4
 1091         movl    %ecx,40(%rsp)
 1092         cmovleq %rsp,%r10
 1093         subq    %r10,%rbp
 1094         movq    %rbp,80(%rsp)
 1095         vmovdqu %xmm4,224(%rsp)
 1096         movl    -24(%rdi),%ecx
 1097         movq    -40(%rdi),%r11
 1098         cmpl    %edx,%ecx
 1099         movq    -32(%rdi),%rbp
 1100         cmovgl  %ecx,%edx
 1101         testl   %ecx,%ecx
 1102         vmovdqu -16(%rdi),%xmm5
 1103         movl    %ecx,44(%rsp)
 1104         cmovleq %rsp,%r11
 1105         subq    %r11,%rbp
 1106         movq    %rbp,88(%rsp)
 1107         vmovdqu %xmm5,240(%rsp)
 1108         movl    16(%rdi),%ecx
 1109         movq    0(%rdi),%r12
 1110         cmpl    %edx,%ecx
 1111         movq    8(%rdi),%rbp
 1112         cmovgl  %ecx,%edx
 1113         testl   %ecx,%ecx
 1114         vmovdqu 24(%rdi),%xmm6
 1115         movl    %ecx,48(%rsp)
 1116         cmovleq %rsp,%r12
 1117         subq    %r12,%rbp
 1118         movq    %rbp,96(%rsp)
 1119         vmovdqu %xmm6,256(%rsp)
 1120         movl    56(%rdi),%ecx
 1121         movq    40(%rdi),%r13
 1122         cmpl    %edx,%ecx
 1123         movq    48(%rdi),%rbp
 1124         cmovgl  %ecx,%edx
 1125         testl   %ecx,%ecx
 1126         vmovdqu 64(%rdi),%xmm7
 1127         movl    %ecx,52(%rsp)
 1128         cmovleq %rsp,%r13
 1129         subq    %r13,%rbp
 1130         movq    %rbp,104(%rsp)
 1131         vmovdqu %xmm7,272(%rsp)
 1132         movl    96(%rdi),%ecx
 1133         movq    80(%rdi),%r14
 1134         cmpl    %edx,%ecx
 1135         movq    88(%rdi),%rbp
 1136         cmovgl  %ecx,%edx
 1137         testl   %ecx,%ecx
 1138         vmovdqu 104(%rdi),%xmm8
 1139         movl    %ecx,56(%rsp)
 1140         cmovleq %rsp,%r14
 1141         subq    %r14,%rbp
 1142         movq    %rbp,112(%rsp)
 1143         vmovdqu %xmm8,288(%rsp)
 1144         movl    136(%rdi),%ecx
 1145         movq    120(%rdi),%r15
 1146         cmpl    %edx,%ecx
 1147         movq    128(%rdi),%rbp
 1148         cmovgl  %ecx,%edx
 1149         testl   %ecx,%ecx
 1150         vmovdqu 144(%rdi),%xmm9
 1151         movl    %ecx,60(%rsp)
 1152         cmovleq %rsp,%r15
 1153         subq    %r15,%rbp
 1154         movq    %rbp,120(%rsp)
 1155         vmovdqu %xmm9,304(%rsp)
 1156         testl   %edx,%edx
 1157         jz      .Ldec8x_done
 1158 
 1159         vmovups 16-120(%rsi),%xmm1
 1160         vmovups 32-120(%rsi),%xmm0
 1161         movl    240-120(%rsi),%eax
 1162         leaq    192+128(%rsp),%rbp
 1163 
 1164         vmovdqu (%r8),%xmm2
 1165         vmovdqu (%r9),%xmm3
 1166         vmovdqu (%r10),%xmm4
 1167         vmovdqu (%r11),%xmm5
 1168         vmovdqu (%r12),%xmm6
 1169         vmovdqu (%r13),%xmm7
 1170         vmovdqu (%r14),%xmm8
 1171         vmovdqu (%r15),%xmm9
 1172         vmovdqu %xmm2,0(%rbp)
 1173         vpxor   %xmm15,%xmm2,%xmm2
 1174         vmovdqu %xmm3,16(%rbp)
 1175         vpxor   %xmm15,%xmm3,%xmm3
 1176         vmovdqu %xmm4,32(%rbp)
 1177         vpxor   %xmm15,%xmm4,%xmm4
 1178         vmovdqu %xmm5,48(%rbp)
 1179         vpxor   %xmm15,%xmm5,%xmm5
 1180         vmovdqu %xmm6,64(%rbp)
 1181         vpxor   %xmm15,%xmm6,%xmm6
 1182         vmovdqu %xmm7,80(%rbp)
 1183         vpxor   %xmm15,%xmm7,%xmm7
 1184         vmovdqu %xmm8,96(%rbp)
 1185         vpxor   %xmm15,%xmm8,%xmm8
 1186         vmovdqu %xmm9,112(%rbp)
 1187         vpxor   %xmm15,%xmm9,%xmm9
 1188         xorq    $0x80,%rbp
 1189         movl    $1,%ecx
 1190         jmp     .Loop_dec8x
 1191 
 1192 .align  32
 1193 .Loop_dec8x:
 1194         vaesdec %xmm1,%xmm2,%xmm2
 1195         cmpl    32+0(%rsp),%ecx
 1196         vaesdec %xmm1,%xmm3,%xmm3
 1197         prefetcht0      31(%r8)
 1198         vaesdec %xmm1,%xmm4,%xmm4
 1199         vaesdec %xmm1,%xmm5,%xmm5
 1200         leaq    (%r8,%rbx,1),%rbx
 1201         cmovgeq %rsp,%r8
 1202         vaesdec %xmm1,%xmm6,%xmm6
 1203         cmovgq  %rsp,%rbx
 1204         vaesdec %xmm1,%xmm7,%xmm7
 1205         subq    %r8,%rbx
 1206         vaesdec %xmm1,%xmm8,%xmm8
 1207         vmovdqu 16(%r8),%xmm10
 1208         movq    %rbx,64+0(%rsp)
 1209         vaesdec %xmm1,%xmm9,%xmm9
 1210         vmovups -72(%rsi),%xmm1
 1211         leaq    16(%r8,%rbx,1),%r8
 1212         vmovdqu %xmm10,128(%rsp)
 1213         vaesdec %xmm0,%xmm2,%xmm2
 1214         cmpl    32+4(%rsp),%ecx
 1215         movq    64+8(%rsp),%rbx
 1216         vaesdec %xmm0,%xmm3,%xmm3
 1217         prefetcht0      31(%r9)
 1218         vaesdec %xmm0,%xmm4,%xmm4
 1219         vaesdec %xmm0,%xmm5,%xmm5
 1220         leaq    (%r9,%rbx,1),%rbx
 1221         cmovgeq %rsp,%r9
 1222         vaesdec %xmm0,%xmm6,%xmm6
 1223         cmovgq  %rsp,%rbx
 1224         vaesdec %xmm0,%xmm7,%xmm7
 1225         subq    %r9,%rbx
 1226         vaesdec %xmm0,%xmm8,%xmm8
 1227         vmovdqu 16(%r9),%xmm11
 1228         movq    %rbx,64+8(%rsp)
 1229         vaesdec %xmm0,%xmm9,%xmm9
 1230         vmovups -56(%rsi),%xmm0
 1231         leaq    16(%r9,%rbx,1),%r9
 1232         vmovdqu %xmm11,144(%rsp)
 1233         vaesdec %xmm1,%xmm2,%xmm2
 1234         cmpl    32+8(%rsp),%ecx
 1235         movq    64+16(%rsp),%rbx
 1236         vaesdec %xmm1,%xmm3,%xmm3
 1237         prefetcht0      31(%r10)
 1238         vaesdec %xmm1,%xmm4,%xmm4
 1239         prefetcht0      15(%r8)
 1240         vaesdec %xmm1,%xmm5,%xmm5
 1241         leaq    (%r10,%rbx,1),%rbx
 1242         cmovgeq %rsp,%r10
 1243         vaesdec %xmm1,%xmm6,%xmm6
 1244         cmovgq  %rsp,%rbx
 1245         vaesdec %xmm1,%xmm7,%xmm7
 1246         subq    %r10,%rbx
 1247         vaesdec %xmm1,%xmm8,%xmm8
 1248         vmovdqu 16(%r10),%xmm12
 1249         movq    %rbx,64+16(%rsp)
 1250         vaesdec %xmm1,%xmm9,%xmm9
 1251         vmovups -40(%rsi),%xmm1
 1252         leaq    16(%r10,%rbx,1),%r10
 1253         vmovdqu %xmm12,160(%rsp)
 1254         vaesdec %xmm0,%xmm2,%xmm2
 1255         cmpl    32+12(%rsp),%ecx
 1256         movq    64+24(%rsp),%rbx
 1257         vaesdec %xmm0,%xmm3,%xmm3
 1258         prefetcht0      31(%r11)
 1259         vaesdec %xmm0,%xmm4,%xmm4
 1260         prefetcht0      15(%r9)
 1261         vaesdec %xmm0,%xmm5,%xmm5
 1262         leaq    (%r11,%rbx,1),%rbx
 1263         cmovgeq %rsp,%r11
 1264         vaesdec %xmm0,%xmm6,%xmm6
 1265         cmovgq  %rsp,%rbx
 1266         vaesdec %xmm0,%xmm7,%xmm7
 1267         subq    %r11,%rbx
 1268         vaesdec %xmm0,%xmm8,%xmm8
 1269         vmovdqu 16(%r11),%xmm13
 1270         movq    %rbx,64+24(%rsp)
 1271         vaesdec %xmm0,%xmm9,%xmm9
 1272         vmovups -24(%rsi),%xmm0
 1273         leaq    16(%r11,%rbx,1),%r11
 1274         vmovdqu %xmm13,176(%rsp)
 1275         vaesdec %xmm1,%xmm2,%xmm2
 1276         cmpl    32+16(%rsp),%ecx
 1277         movq    64+32(%rsp),%rbx
 1278         vaesdec %xmm1,%xmm3,%xmm3
 1279         prefetcht0      31(%r12)
 1280         vaesdec %xmm1,%xmm4,%xmm4
 1281         prefetcht0      15(%r10)
 1282         vaesdec %xmm1,%xmm5,%xmm5
 1283         leaq    (%r12,%rbx,1),%rbx
 1284         cmovgeq %rsp,%r12
 1285         vaesdec %xmm1,%xmm6,%xmm6
 1286         cmovgq  %rsp,%rbx
 1287         vaesdec %xmm1,%xmm7,%xmm7
 1288         subq    %r12,%rbx
 1289         vaesdec %xmm1,%xmm8,%xmm8
 1290         vmovdqu 16(%r12),%xmm10
 1291         movq    %rbx,64+32(%rsp)
 1292         vaesdec %xmm1,%xmm9,%xmm9
 1293         vmovups -8(%rsi),%xmm1
 1294         leaq    16(%r12,%rbx,1),%r12
 1295         vaesdec %xmm0,%xmm2,%xmm2
 1296         cmpl    32+20(%rsp),%ecx
 1297         movq    64+40(%rsp),%rbx
 1298         vaesdec %xmm0,%xmm3,%xmm3
 1299         prefetcht0      31(%r13)
 1300         vaesdec %xmm0,%xmm4,%xmm4
 1301         prefetcht0      15(%r11)
 1302         vaesdec %xmm0,%xmm5,%xmm5
 1303         leaq    (%rbx,%r13,1),%rbx
 1304         cmovgeq %rsp,%r13
 1305         vaesdec %xmm0,%xmm6,%xmm6
 1306         cmovgq  %rsp,%rbx
 1307         vaesdec %xmm0,%xmm7,%xmm7
 1308         subq    %r13,%rbx
 1309         vaesdec %xmm0,%xmm8,%xmm8
 1310         vmovdqu 16(%r13),%xmm11
 1311         movq    %rbx,64+40(%rsp)
 1312         vaesdec %xmm0,%xmm9,%xmm9
 1313         vmovups 8(%rsi),%xmm0
 1314         leaq    16(%r13,%rbx,1),%r13
 1315         vaesdec %xmm1,%xmm2,%xmm2
 1316         cmpl    32+24(%rsp),%ecx
 1317         movq    64+48(%rsp),%rbx
 1318         vaesdec %xmm1,%xmm3,%xmm3
 1319         prefetcht0      31(%r14)
 1320         vaesdec %xmm1,%xmm4,%xmm4
 1321         prefetcht0      15(%r12)
 1322         vaesdec %xmm1,%xmm5,%xmm5
 1323         leaq    (%r14,%rbx,1),%rbx
 1324         cmovgeq %rsp,%r14
 1325         vaesdec %xmm1,%xmm6,%xmm6
 1326         cmovgq  %rsp,%rbx
 1327         vaesdec %xmm1,%xmm7,%xmm7
 1328         subq    %r14,%rbx
 1329         vaesdec %xmm1,%xmm8,%xmm8
 1330         vmovdqu 16(%r14),%xmm12
 1331         movq    %rbx,64+48(%rsp)
 1332         vaesdec %xmm1,%xmm9,%xmm9
 1333         vmovups 24(%rsi),%xmm1
 1334         leaq    16(%r14,%rbx,1),%r14
 1335         vaesdec %xmm0,%xmm2,%xmm2
 1336         cmpl    32+28(%rsp),%ecx
 1337         movq    64+56(%rsp),%rbx
 1338         vaesdec %xmm0,%xmm3,%xmm3
 1339         prefetcht0      31(%r15)
 1340         vaesdec %xmm0,%xmm4,%xmm4
 1341         prefetcht0      15(%r13)
 1342         vaesdec %xmm0,%xmm5,%xmm5
 1343         leaq    (%r15,%rbx,1),%rbx
 1344         cmovgeq %rsp,%r15
 1345         vaesdec %xmm0,%xmm6,%xmm6
 1346         cmovgq  %rsp,%rbx
 1347         vaesdec %xmm0,%xmm7,%xmm7
 1348         subq    %r15,%rbx
 1349         vaesdec %xmm0,%xmm8,%xmm8
 1350         vmovdqu 16(%r15),%xmm13
 1351         movq    %rbx,64+56(%rsp)
 1352         vaesdec %xmm0,%xmm9,%xmm9
 1353         vmovups 40(%rsi),%xmm0
 1354         leaq    16(%r15,%rbx,1),%r15
 1355         vmovdqu 32(%rsp),%xmm14
 1356         prefetcht0      15(%r14)
 1357         prefetcht0      15(%r15)
 1358         cmpl    $11,%eax
 1359         jb      .Ldec8x_tail
 1360 
 1361         vaesdec %xmm1,%xmm2,%xmm2
 1362         vaesdec %xmm1,%xmm3,%xmm3
 1363         vaesdec %xmm1,%xmm4,%xmm4
 1364         vaesdec %xmm1,%xmm5,%xmm5
 1365         vaesdec %xmm1,%xmm6,%xmm6
 1366         vaesdec %xmm1,%xmm7,%xmm7
 1367         vaesdec %xmm1,%xmm8,%xmm8
 1368         vaesdec %xmm1,%xmm9,%xmm9
 1369         vmovups 176-120(%rsi),%xmm1
 1370 
 1371         vaesdec %xmm0,%xmm2,%xmm2
 1372         vaesdec %xmm0,%xmm3,%xmm3
 1373         vaesdec %xmm0,%xmm4,%xmm4
 1374         vaesdec %xmm0,%xmm5,%xmm5
 1375         vaesdec %xmm0,%xmm6,%xmm6
 1376         vaesdec %xmm0,%xmm7,%xmm7
 1377         vaesdec %xmm0,%xmm8,%xmm8
 1378         vaesdec %xmm0,%xmm9,%xmm9
 1379         vmovups 192-120(%rsi),%xmm0
 1380         je      .Ldec8x_tail
 1381 
 1382         vaesdec %xmm1,%xmm2,%xmm2
 1383         vaesdec %xmm1,%xmm3,%xmm3
 1384         vaesdec %xmm1,%xmm4,%xmm4
 1385         vaesdec %xmm1,%xmm5,%xmm5
 1386         vaesdec %xmm1,%xmm6,%xmm6
 1387         vaesdec %xmm1,%xmm7,%xmm7
 1388         vaesdec %xmm1,%xmm8,%xmm8
 1389         vaesdec %xmm1,%xmm9,%xmm9
 1390         vmovups 208-120(%rsi),%xmm1
 1391 
 1392         vaesdec %xmm0,%xmm2,%xmm2
 1393         vaesdec %xmm0,%xmm3,%xmm3
 1394         vaesdec %xmm0,%xmm4,%xmm4
 1395         vaesdec %xmm0,%xmm5,%xmm5
 1396         vaesdec %xmm0,%xmm6,%xmm6
 1397         vaesdec %xmm0,%xmm7,%xmm7
 1398         vaesdec %xmm0,%xmm8,%xmm8
 1399         vaesdec %xmm0,%xmm9,%xmm9
 1400         vmovups 224-120(%rsi),%xmm0
 1401 
 1402 .Ldec8x_tail:
 1403         vaesdec %xmm1,%xmm2,%xmm2
 1404         vpxor   %xmm15,%xmm15,%xmm15
 1405         vaesdec %xmm1,%xmm3,%xmm3
 1406         vaesdec %xmm1,%xmm4,%xmm4
 1407         vpcmpgtd        %xmm15,%xmm14,%xmm15
 1408         vaesdec %xmm1,%xmm5,%xmm5
 1409         vaesdec %xmm1,%xmm6,%xmm6
 1410         vpaddd  %xmm14,%xmm15,%xmm15
 1411         vmovdqu 48(%rsp),%xmm14
 1412         vaesdec %xmm1,%xmm7,%xmm7
 1413         movq    64(%rsp),%rbx
 1414         vaesdec %xmm1,%xmm8,%xmm8
 1415         vaesdec %xmm1,%xmm9,%xmm9
 1416         vmovups 16-120(%rsi),%xmm1
 1417 
 1418         vaesdeclast     %xmm0,%xmm2,%xmm2
 1419         vmovdqa %xmm15,32(%rsp)
 1420         vpxor   %xmm15,%xmm15,%xmm15
 1421         vaesdeclast     %xmm0,%xmm3,%xmm3
 1422         vpxor   0(%rbp),%xmm2,%xmm2
 1423         vaesdeclast     %xmm0,%xmm4,%xmm4
 1424         vpxor   16(%rbp),%xmm3,%xmm3
 1425         vpcmpgtd        %xmm15,%xmm14,%xmm15
 1426         vaesdeclast     %xmm0,%xmm5,%xmm5
 1427         vpxor   32(%rbp),%xmm4,%xmm4
 1428         vaesdeclast     %xmm0,%xmm6,%xmm6
 1429         vpxor   48(%rbp),%xmm5,%xmm5
 1430         vpaddd  %xmm15,%xmm14,%xmm14
 1431         vmovdqu -120(%rsi),%xmm15
 1432         vaesdeclast     %xmm0,%xmm7,%xmm7
 1433         vpxor   64(%rbp),%xmm6,%xmm6
 1434         vaesdeclast     %xmm0,%xmm8,%xmm8
 1435         vpxor   80(%rbp),%xmm7,%xmm7
 1436         vmovdqa %xmm14,48(%rsp)
 1437         vaesdeclast     %xmm0,%xmm9,%xmm9
 1438         vpxor   96(%rbp),%xmm8,%xmm8
 1439         vmovups 32-120(%rsi),%xmm0
 1440 
 1441         vmovups %xmm2,-16(%r8)
 1442         subq    %rbx,%r8
 1443         vmovdqu 128+0(%rsp),%xmm2
 1444         vpxor   112(%rbp),%xmm9,%xmm9
 1445         vmovups %xmm3,-16(%r9)
 1446         subq    72(%rsp),%r9
 1447         vmovdqu %xmm2,0(%rbp)
 1448         vpxor   %xmm15,%xmm2,%xmm2
 1449         vmovdqu 128+16(%rsp),%xmm3
 1450         vmovups %xmm4,-16(%r10)
 1451         subq    80(%rsp),%r10
 1452         vmovdqu %xmm3,16(%rbp)
 1453         vpxor   %xmm15,%xmm3,%xmm3
 1454         vmovdqu 128+32(%rsp),%xmm4
 1455         vmovups %xmm5,-16(%r11)
 1456         subq    88(%rsp),%r11
 1457         vmovdqu %xmm4,32(%rbp)
 1458         vpxor   %xmm15,%xmm4,%xmm4
 1459         vmovdqu 128+48(%rsp),%xmm5
 1460         vmovups %xmm6,-16(%r12)
 1461         subq    96(%rsp),%r12
 1462         vmovdqu %xmm5,48(%rbp)
 1463         vpxor   %xmm15,%xmm5,%xmm5
 1464         vmovdqu %xmm10,64(%rbp)
 1465         vpxor   %xmm10,%xmm15,%xmm6
 1466         vmovups %xmm7,-16(%r13)
 1467         subq    104(%rsp),%r13
 1468         vmovdqu %xmm11,80(%rbp)
 1469         vpxor   %xmm11,%xmm15,%xmm7
 1470         vmovups %xmm8,-16(%r14)
 1471         subq    112(%rsp),%r14
 1472         vmovdqu %xmm12,96(%rbp)
 1473         vpxor   %xmm12,%xmm15,%xmm8
 1474         vmovups %xmm9,-16(%r15)
 1475         subq    120(%rsp),%r15
 1476         vmovdqu %xmm13,112(%rbp)
 1477         vpxor   %xmm13,%xmm15,%xmm9
 1478 
 1479         xorq    $128,%rbp
 1480         decl    %edx
 1481         jnz     .Loop_dec8x
 1482 
 1483         movq    16(%rsp),%rax
 1484 .cfi_def_cfa    %rax,8
 1485 
 1486 
 1487 
 1488 
 1489 
 1490 .Ldec8x_done:
 1491         vzeroupper
 1492         movq    -48(%rax),%r15
 1493 .cfi_restore    %r15
 1494         movq    -40(%rax),%r14
 1495 .cfi_restore    %r14
 1496         movq    -32(%rax),%r13
 1497 .cfi_restore    %r13
 1498         movq    -24(%rax),%r12
 1499 .cfi_restore    %r12
 1500         movq    -16(%rax),%rbp
 1501 .cfi_restore    %rbp
 1502         movq    -8(%rax),%rbx
 1503 .cfi_restore    %rbx
 1504         leaq    (%rax),%rsp
 1505 .cfi_def_cfa_register   %rsp
 1506 .Ldec8x_epilogue:
 1507         .byte   0xf3,0xc3
 1508 .cfi_endproc    
 1509 .size   aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx

Cache object: f1d3157597574d41dfac1c05ba0c5462


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.