The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/amd64/aesni-x86_64.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from aesni-x86_64.pl. */
    3 .text   
    4 
    5 .globl  aesni_encrypt
    6 .type   aesni_encrypt,@function
    7 .align  16
    8 aesni_encrypt:
    9 .cfi_startproc  
   10         movups  (%rdi),%xmm2
   11         movl    240(%rdx),%eax
   12         movups  (%rdx),%xmm0
   13         movups  16(%rdx),%xmm1
   14         leaq    32(%rdx),%rdx
   15         xorps   %xmm0,%xmm2
   16 .Loop_enc1_1:
   17 .byte   102,15,56,220,209
   18         decl    %eax
   19         movups  (%rdx),%xmm1
   20         leaq    16(%rdx),%rdx
   21         jnz     .Loop_enc1_1
   22 .byte   102,15,56,221,209
   23         pxor    %xmm0,%xmm0
   24         pxor    %xmm1,%xmm1
   25         movups  %xmm2,(%rsi)
   26         pxor    %xmm2,%xmm2
   27         .byte   0xf3,0xc3
   28 .cfi_endproc    
   29 .size   aesni_encrypt,.-aesni_encrypt
   30 
   31 .globl  aesni_decrypt
   32 .type   aesni_decrypt,@function
   33 .align  16
   34 aesni_decrypt:
   35 .cfi_startproc  
   36         movups  (%rdi),%xmm2
   37         movl    240(%rdx),%eax
   38         movups  (%rdx),%xmm0
   39         movups  16(%rdx),%xmm1
   40         leaq    32(%rdx),%rdx
   41         xorps   %xmm0,%xmm2
   42 .Loop_dec1_2:
   43 .byte   102,15,56,222,209
   44         decl    %eax
   45         movups  (%rdx),%xmm1
   46         leaq    16(%rdx),%rdx
   47         jnz     .Loop_dec1_2
   48 .byte   102,15,56,223,209
   49         pxor    %xmm0,%xmm0
   50         pxor    %xmm1,%xmm1
   51         movups  %xmm2,(%rsi)
   52         pxor    %xmm2,%xmm2
   53         .byte   0xf3,0xc3
   54 .cfi_endproc    
   55 .size   aesni_decrypt, .-aesni_decrypt
   56 .type   _aesni_encrypt2,@function
   57 .align  16
   58 _aesni_encrypt2:
   59 .cfi_startproc  
   60         movups  (%rcx),%xmm0
   61         shll    $4,%eax
   62         movups  16(%rcx),%xmm1
   63         xorps   %xmm0,%xmm2
   64         xorps   %xmm0,%xmm3
   65         movups  32(%rcx),%xmm0
   66         leaq    32(%rcx,%rax,1),%rcx
   67         negq    %rax
   68         addq    $16,%rax
   69 
   70 .Lenc_loop2:
   71 .byte   102,15,56,220,209
   72 .byte   102,15,56,220,217
   73         movups  (%rcx,%rax,1),%xmm1
   74         addq    $32,%rax
   75 .byte   102,15,56,220,208
   76 .byte   102,15,56,220,216
   77         movups  -16(%rcx,%rax,1),%xmm0
   78         jnz     .Lenc_loop2
   79 
   80 .byte   102,15,56,220,209
   81 .byte   102,15,56,220,217
   82 .byte   102,15,56,221,208
   83 .byte   102,15,56,221,216
   84         .byte   0xf3,0xc3
   85 .cfi_endproc    
   86 .size   _aesni_encrypt2,.-_aesni_encrypt2
   87 .type   _aesni_decrypt2,@function
   88 .align  16
   89 _aesni_decrypt2:
   90 .cfi_startproc  
   91         movups  (%rcx),%xmm0
   92         shll    $4,%eax
   93         movups  16(%rcx),%xmm1
   94         xorps   %xmm0,%xmm2
   95         xorps   %xmm0,%xmm3
   96         movups  32(%rcx),%xmm0
   97         leaq    32(%rcx,%rax,1),%rcx
   98         negq    %rax
   99         addq    $16,%rax
  100 
  101 .Ldec_loop2:
  102 .byte   102,15,56,222,209
  103 .byte   102,15,56,222,217
  104         movups  (%rcx,%rax,1),%xmm1
  105         addq    $32,%rax
  106 .byte   102,15,56,222,208
  107 .byte   102,15,56,222,216
  108         movups  -16(%rcx,%rax,1),%xmm0
  109         jnz     .Ldec_loop2
  110 
  111 .byte   102,15,56,222,209
  112 .byte   102,15,56,222,217
  113 .byte   102,15,56,223,208
  114 .byte   102,15,56,223,216
  115         .byte   0xf3,0xc3
  116 .cfi_endproc    
  117 .size   _aesni_decrypt2,.-_aesni_decrypt2
  118 .type   _aesni_encrypt3,@function
  119 .align  16
  120 _aesni_encrypt3:
  121 .cfi_startproc  
  122         movups  (%rcx),%xmm0
  123         shll    $4,%eax
  124         movups  16(%rcx),%xmm1
  125         xorps   %xmm0,%xmm2
  126         xorps   %xmm0,%xmm3
  127         xorps   %xmm0,%xmm4
  128         movups  32(%rcx),%xmm0
  129         leaq    32(%rcx,%rax,1),%rcx
  130         negq    %rax
  131         addq    $16,%rax
  132 
  133 .Lenc_loop3:
  134 .byte   102,15,56,220,209
  135 .byte   102,15,56,220,217
  136 .byte   102,15,56,220,225
  137         movups  (%rcx,%rax,1),%xmm1
  138         addq    $32,%rax
  139 .byte   102,15,56,220,208
  140 .byte   102,15,56,220,216
  141 .byte   102,15,56,220,224
  142         movups  -16(%rcx,%rax,1),%xmm0
  143         jnz     .Lenc_loop3
  144 
  145 .byte   102,15,56,220,209
  146 .byte   102,15,56,220,217
  147 .byte   102,15,56,220,225
  148 .byte   102,15,56,221,208
  149 .byte   102,15,56,221,216
  150 .byte   102,15,56,221,224
  151         .byte   0xf3,0xc3
  152 .cfi_endproc    
  153 .size   _aesni_encrypt3,.-_aesni_encrypt3
  154 .type   _aesni_decrypt3,@function
  155 .align  16
  156 _aesni_decrypt3:
  157 .cfi_startproc  
  158         movups  (%rcx),%xmm0
  159         shll    $4,%eax
  160         movups  16(%rcx),%xmm1
  161         xorps   %xmm0,%xmm2
  162         xorps   %xmm0,%xmm3
  163         xorps   %xmm0,%xmm4
  164         movups  32(%rcx),%xmm0
  165         leaq    32(%rcx,%rax,1),%rcx
  166         negq    %rax
  167         addq    $16,%rax
  168 
  169 .Ldec_loop3:
  170 .byte   102,15,56,222,209
  171 .byte   102,15,56,222,217
  172 .byte   102,15,56,222,225
  173         movups  (%rcx,%rax,1),%xmm1
  174         addq    $32,%rax
  175 .byte   102,15,56,222,208
  176 .byte   102,15,56,222,216
  177 .byte   102,15,56,222,224
  178         movups  -16(%rcx,%rax,1),%xmm0
  179         jnz     .Ldec_loop3
  180 
  181 .byte   102,15,56,222,209
  182 .byte   102,15,56,222,217
  183 .byte   102,15,56,222,225
  184 .byte   102,15,56,223,208
  185 .byte   102,15,56,223,216
  186 .byte   102,15,56,223,224
  187         .byte   0xf3,0xc3
  188 .cfi_endproc    
  189 .size   _aesni_decrypt3,.-_aesni_decrypt3
  190 .type   _aesni_encrypt4,@function
  191 .align  16
  192 _aesni_encrypt4:
  193 .cfi_startproc  
  194         movups  (%rcx),%xmm0
  195         shll    $4,%eax
  196         movups  16(%rcx),%xmm1
  197         xorps   %xmm0,%xmm2
  198         xorps   %xmm0,%xmm3
  199         xorps   %xmm0,%xmm4
  200         xorps   %xmm0,%xmm5
  201         movups  32(%rcx),%xmm0
  202         leaq    32(%rcx,%rax,1),%rcx
  203         negq    %rax
  204 .byte   0x0f,0x1f,0x00
  205         addq    $16,%rax
  206 
  207 .Lenc_loop4:
  208 .byte   102,15,56,220,209
  209 .byte   102,15,56,220,217
  210 .byte   102,15,56,220,225
  211 .byte   102,15,56,220,233
  212         movups  (%rcx,%rax,1),%xmm1
  213         addq    $32,%rax
  214 .byte   102,15,56,220,208
  215 .byte   102,15,56,220,216
  216 .byte   102,15,56,220,224
  217 .byte   102,15,56,220,232
  218         movups  -16(%rcx,%rax,1),%xmm0
  219         jnz     .Lenc_loop4
  220 
  221 .byte   102,15,56,220,209
  222 .byte   102,15,56,220,217
  223 .byte   102,15,56,220,225
  224 .byte   102,15,56,220,233
  225 .byte   102,15,56,221,208
  226 .byte   102,15,56,221,216
  227 .byte   102,15,56,221,224
  228 .byte   102,15,56,221,232
  229         .byte   0xf3,0xc3
  230 .cfi_endproc    
  231 .size   _aesni_encrypt4,.-_aesni_encrypt4
  232 .type   _aesni_decrypt4,@function
  233 .align  16
  234 _aesni_decrypt4:
  235 .cfi_startproc  
  236         movups  (%rcx),%xmm0
  237         shll    $4,%eax
  238         movups  16(%rcx),%xmm1
  239         xorps   %xmm0,%xmm2
  240         xorps   %xmm0,%xmm3
  241         xorps   %xmm0,%xmm4
  242         xorps   %xmm0,%xmm5
  243         movups  32(%rcx),%xmm0
  244         leaq    32(%rcx,%rax,1),%rcx
  245         negq    %rax
  246 .byte   0x0f,0x1f,0x00
  247         addq    $16,%rax
  248 
  249 .Ldec_loop4:
  250 .byte   102,15,56,222,209
  251 .byte   102,15,56,222,217
  252 .byte   102,15,56,222,225
  253 .byte   102,15,56,222,233
  254         movups  (%rcx,%rax,1),%xmm1
  255         addq    $32,%rax
  256 .byte   102,15,56,222,208
  257 .byte   102,15,56,222,216
  258 .byte   102,15,56,222,224
  259 .byte   102,15,56,222,232
  260         movups  -16(%rcx,%rax,1),%xmm0
  261         jnz     .Ldec_loop4
  262 
  263 .byte   102,15,56,222,209
  264 .byte   102,15,56,222,217
  265 .byte   102,15,56,222,225
  266 .byte   102,15,56,222,233
  267 .byte   102,15,56,223,208
  268 .byte   102,15,56,223,216
  269 .byte   102,15,56,223,224
  270 .byte   102,15,56,223,232
  271         .byte   0xf3,0xc3
  272 .cfi_endproc    
  273 .size   _aesni_decrypt4,.-_aesni_decrypt4
  274 .type   _aesni_encrypt6,@function
  275 .align  16
  276 _aesni_encrypt6:
  277 .cfi_startproc  
  278         movups  (%rcx),%xmm0
  279         shll    $4,%eax
  280         movups  16(%rcx),%xmm1
  281         xorps   %xmm0,%xmm2
  282         pxor    %xmm0,%xmm3
  283         pxor    %xmm0,%xmm4
  284 .byte   102,15,56,220,209
  285         leaq    32(%rcx,%rax,1),%rcx
  286         negq    %rax
  287 .byte   102,15,56,220,217
  288         pxor    %xmm0,%xmm5
  289         pxor    %xmm0,%xmm6
  290 .byte   102,15,56,220,225
  291         pxor    %xmm0,%xmm7
  292         movups  (%rcx,%rax,1),%xmm0
  293         addq    $16,%rax
  294         jmp     .Lenc_loop6_enter
  295 .align  16
  296 .Lenc_loop6:
  297 .byte   102,15,56,220,209
  298 .byte   102,15,56,220,217
  299 .byte   102,15,56,220,225
  300 .Lenc_loop6_enter:
  301 .byte   102,15,56,220,233
  302 .byte   102,15,56,220,241
  303 .byte   102,15,56,220,249
  304         movups  (%rcx,%rax,1),%xmm1
  305         addq    $32,%rax
  306 .byte   102,15,56,220,208
  307 .byte   102,15,56,220,216
  308 .byte   102,15,56,220,224
  309 .byte   102,15,56,220,232
  310 .byte   102,15,56,220,240
  311 .byte   102,15,56,220,248
  312         movups  -16(%rcx,%rax,1),%xmm0
  313         jnz     .Lenc_loop6
  314 
  315 .byte   102,15,56,220,209
  316 .byte   102,15,56,220,217
  317 .byte   102,15,56,220,225
  318 .byte   102,15,56,220,233
  319 .byte   102,15,56,220,241
  320 .byte   102,15,56,220,249
  321 .byte   102,15,56,221,208
  322 .byte   102,15,56,221,216
  323 .byte   102,15,56,221,224
  324 .byte   102,15,56,221,232
  325 .byte   102,15,56,221,240
  326 .byte   102,15,56,221,248
  327         .byte   0xf3,0xc3
  328 .cfi_endproc    
  329 .size   _aesni_encrypt6,.-_aesni_encrypt6
  330 .type   _aesni_decrypt6,@function
  331 .align  16
  332 _aesni_decrypt6:
  333 .cfi_startproc  
  334         movups  (%rcx),%xmm0
  335         shll    $4,%eax
  336         movups  16(%rcx),%xmm1
  337         xorps   %xmm0,%xmm2
  338         pxor    %xmm0,%xmm3
  339         pxor    %xmm0,%xmm4
  340 .byte   102,15,56,222,209
  341         leaq    32(%rcx,%rax,1),%rcx
  342         negq    %rax
  343 .byte   102,15,56,222,217
  344         pxor    %xmm0,%xmm5
  345         pxor    %xmm0,%xmm6
  346 .byte   102,15,56,222,225
  347         pxor    %xmm0,%xmm7
  348         movups  (%rcx,%rax,1),%xmm0
  349         addq    $16,%rax
  350         jmp     .Ldec_loop6_enter
  351 .align  16
  352 .Ldec_loop6:
  353 .byte   102,15,56,222,209
  354 .byte   102,15,56,222,217
  355 .byte   102,15,56,222,225
  356 .Ldec_loop6_enter:
  357 .byte   102,15,56,222,233
  358 .byte   102,15,56,222,241
  359 .byte   102,15,56,222,249
  360         movups  (%rcx,%rax,1),%xmm1
  361         addq    $32,%rax
  362 .byte   102,15,56,222,208
  363 .byte   102,15,56,222,216
  364 .byte   102,15,56,222,224
  365 .byte   102,15,56,222,232
  366 .byte   102,15,56,222,240
  367 .byte   102,15,56,222,248
  368         movups  -16(%rcx,%rax,1),%xmm0
  369         jnz     .Ldec_loop6
  370 
  371 .byte   102,15,56,222,209
  372 .byte   102,15,56,222,217
  373 .byte   102,15,56,222,225
  374 .byte   102,15,56,222,233
  375 .byte   102,15,56,222,241
  376 .byte   102,15,56,222,249
  377 .byte   102,15,56,223,208
  378 .byte   102,15,56,223,216
  379 .byte   102,15,56,223,224
  380 .byte   102,15,56,223,232
  381 .byte   102,15,56,223,240
  382 .byte   102,15,56,223,248
  383         .byte   0xf3,0xc3
  384 .cfi_endproc    
  385 .size   _aesni_decrypt6,.-_aesni_decrypt6
  386 .type   _aesni_encrypt8,@function
  387 .align  16
  388 _aesni_encrypt8:
  389 .cfi_startproc  
  390         movups  (%rcx),%xmm0
  391         shll    $4,%eax
  392         movups  16(%rcx),%xmm1
  393         xorps   %xmm0,%xmm2
  394         xorps   %xmm0,%xmm3
  395         pxor    %xmm0,%xmm4
  396         pxor    %xmm0,%xmm5
  397         pxor    %xmm0,%xmm6
  398         leaq    32(%rcx,%rax,1),%rcx
  399         negq    %rax
  400 .byte   102,15,56,220,209
  401         pxor    %xmm0,%xmm7
  402         pxor    %xmm0,%xmm8
  403 .byte   102,15,56,220,217
  404         pxor    %xmm0,%xmm9
  405         movups  (%rcx,%rax,1),%xmm0
  406         addq    $16,%rax
  407         jmp     .Lenc_loop8_inner
  408 .align  16
  409 .Lenc_loop8:
  410 .byte   102,15,56,220,209
  411 .byte   102,15,56,220,217
  412 .Lenc_loop8_inner:
  413 .byte   102,15,56,220,225
  414 .byte   102,15,56,220,233
  415 .byte   102,15,56,220,241
  416 .byte   102,15,56,220,249
  417 .byte   102,68,15,56,220,193
  418 .byte   102,68,15,56,220,201
  419 .Lenc_loop8_enter:
  420         movups  (%rcx,%rax,1),%xmm1
  421         addq    $32,%rax
  422 .byte   102,15,56,220,208
  423 .byte   102,15,56,220,216
  424 .byte   102,15,56,220,224
  425 .byte   102,15,56,220,232
  426 .byte   102,15,56,220,240
  427 .byte   102,15,56,220,248
  428 .byte   102,68,15,56,220,192
  429 .byte   102,68,15,56,220,200
  430         movups  -16(%rcx,%rax,1),%xmm0
  431         jnz     .Lenc_loop8
  432 
  433 .byte   102,15,56,220,209
  434 .byte   102,15,56,220,217
  435 .byte   102,15,56,220,225
  436 .byte   102,15,56,220,233
  437 .byte   102,15,56,220,241
  438 .byte   102,15,56,220,249
  439 .byte   102,68,15,56,220,193
  440 .byte   102,68,15,56,220,201
  441 .byte   102,15,56,221,208
  442 .byte   102,15,56,221,216
  443 .byte   102,15,56,221,224
  444 .byte   102,15,56,221,232
  445 .byte   102,15,56,221,240
  446 .byte   102,15,56,221,248
  447 .byte   102,68,15,56,221,192
  448 .byte   102,68,15,56,221,200
  449         .byte   0xf3,0xc3
  450 .cfi_endproc    
  451 .size   _aesni_encrypt8,.-_aesni_encrypt8
  452 .type   _aesni_decrypt8,@function
  453 .align  16
  454 _aesni_decrypt8:
  455 .cfi_startproc  
  456         movups  (%rcx),%xmm0
  457         shll    $4,%eax
  458         movups  16(%rcx),%xmm1
  459         xorps   %xmm0,%xmm2
  460         xorps   %xmm0,%xmm3
  461         pxor    %xmm0,%xmm4
  462         pxor    %xmm0,%xmm5
  463         pxor    %xmm0,%xmm6
  464         leaq    32(%rcx,%rax,1),%rcx
  465         negq    %rax
  466 .byte   102,15,56,222,209
  467         pxor    %xmm0,%xmm7
  468         pxor    %xmm0,%xmm8
  469 .byte   102,15,56,222,217
  470         pxor    %xmm0,%xmm9
  471         movups  (%rcx,%rax,1),%xmm0
  472         addq    $16,%rax
  473         jmp     .Ldec_loop8_inner
  474 .align  16
  475 .Ldec_loop8:
  476 .byte   102,15,56,222,209
  477 .byte   102,15,56,222,217
  478 .Ldec_loop8_inner:
  479 .byte   102,15,56,222,225
  480 .byte   102,15,56,222,233
  481 .byte   102,15,56,222,241
  482 .byte   102,15,56,222,249
  483 .byte   102,68,15,56,222,193
  484 .byte   102,68,15,56,222,201
  485 .Ldec_loop8_enter:
  486         movups  (%rcx,%rax,1),%xmm1
  487         addq    $32,%rax
  488 .byte   102,15,56,222,208
  489 .byte   102,15,56,222,216
  490 .byte   102,15,56,222,224
  491 .byte   102,15,56,222,232
  492 .byte   102,15,56,222,240
  493 .byte   102,15,56,222,248
  494 .byte   102,68,15,56,222,192
  495 .byte   102,68,15,56,222,200
  496         movups  -16(%rcx,%rax,1),%xmm0
  497         jnz     .Ldec_loop8
  498 
  499 .byte   102,15,56,222,209
  500 .byte   102,15,56,222,217
  501 .byte   102,15,56,222,225
  502 .byte   102,15,56,222,233
  503 .byte   102,15,56,222,241
  504 .byte   102,15,56,222,249
  505 .byte   102,68,15,56,222,193
  506 .byte   102,68,15,56,222,201
  507 .byte   102,15,56,223,208
  508 .byte   102,15,56,223,216
  509 .byte   102,15,56,223,224
  510 .byte   102,15,56,223,232
  511 .byte   102,15,56,223,240
  512 .byte   102,15,56,223,248
  513 .byte   102,68,15,56,223,192
  514 .byte   102,68,15,56,223,200
  515         .byte   0xf3,0xc3
  516 .cfi_endproc    
  517 .size   _aesni_decrypt8,.-_aesni_decrypt8
  518 .globl  aesni_ecb_encrypt
  519 .type   aesni_ecb_encrypt,@function
  520 .align  16
  521 aesni_ecb_encrypt:
  522 .cfi_startproc  
  523         andq    $-16,%rdx
  524         jz      .Lecb_ret
  525 
  526         movl    240(%rcx),%eax
  527         movups  (%rcx),%xmm0
  528         movq    %rcx,%r11
  529         movl    %eax,%r10d
  530         testl   %r8d,%r8d
  531         jz      .Lecb_decrypt
  532 
  533         cmpq    $0x80,%rdx
  534         jb      .Lecb_enc_tail
  535 
  536         movdqu  (%rdi),%xmm2
  537         movdqu  16(%rdi),%xmm3
  538         movdqu  32(%rdi),%xmm4
  539         movdqu  48(%rdi),%xmm5
  540         movdqu  64(%rdi),%xmm6
  541         movdqu  80(%rdi),%xmm7
  542         movdqu  96(%rdi),%xmm8
  543         movdqu  112(%rdi),%xmm9
  544         leaq    128(%rdi),%rdi
  545         subq    $0x80,%rdx
  546         jmp     .Lecb_enc_loop8_enter
  547 .align  16
  548 .Lecb_enc_loop8:
  549         movups  %xmm2,(%rsi)
  550         movq    %r11,%rcx
  551         movdqu  (%rdi),%xmm2
  552         movl    %r10d,%eax
  553         movups  %xmm3,16(%rsi)
  554         movdqu  16(%rdi),%xmm3
  555         movups  %xmm4,32(%rsi)
  556         movdqu  32(%rdi),%xmm4
  557         movups  %xmm5,48(%rsi)
  558         movdqu  48(%rdi),%xmm5
  559         movups  %xmm6,64(%rsi)
  560         movdqu  64(%rdi),%xmm6
  561         movups  %xmm7,80(%rsi)
  562         movdqu  80(%rdi),%xmm7
  563         movups  %xmm8,96(%rsi)
  564         movdqu  96(%rdi),%xmm8
  565         movups  %xmm9,112(%rsi)
  566         leaq    128(%rsi),%rsi
  567         movdqu  112(%rdi),%xmm9
  568         leaq    128(%rdi),%rdi
  569 .Lecb_enc_loop8_enter:
  570 
  571         call    _aesni_encrypt8
  572 
  573         subq    $0x80,%rdx
  574         jnc     .Lecb_enc_loop8
  575 
  576         movups  %xmm2,(%rsi)
  577         movq    %r11,%rcx
  578         movups  %xmm3,16(%rsi)
  579         movl    %r10d,%eax
  580         movups  %xmm4,32(%rsi)
  581         movups  %xmm5,48(%rsi)
  582         movups  %xmm6,64(%rsi)
  583         movups  %xmm7,80(%rsi)
  584         movups  %xmm8,96(%rsi)
  585         movups  %xmm9,112(%rsi)
  586         leaq    128(%rsi),%rsi
  587         addq    $0x80,%rdx
  588         jz      .Lecb_ret
  589 
  590 .Lecb_enc_tail:
  591         movups  (%rdi),%xmm2
  592         cmpq    $0x20,%rdx
  593         jb      .Lecb_enc_one
  594         movups  16(%rdi),%xmm3
  595         je      .Lecb_enc_two
  596         movups  32(%rdi),%xmm4
  597         cmpq    $0x40,%rdx
  598         jb      .Lecb_enc_three
  599         movups  48(%rdi),%xmm5
  600         je      .Lecb_enc_four
  601         movups  64(%rdi),%xmm6
  602         cmpq    $0x60,%rdx
  603         jb      .Lecb_enc_five
  604         movups  80(%rdi),%xmm7
  605         je      .Lecb_enc_six
  606         movdqu  96(%rdi),%xmm8
  607         xorps   %xmm9,%xmm9
  608         call    _aesni_encrypt8
  609         movups  %xmm2,(%rsi)
  610         movups  %xmm3,16(%rsi)
  611         movups  %xmm4,32(%rsi)
  612         movups  %xmm5,48(%rsi)
  613         movups  %xmm6,64(%rsi)
  614         movups  %xmm7,80(%rsi)
  615         movups  %xmm8,96(%rsi)
  616         jmp     .Lecb_ret
  617 .align  16
  618 .Lecb_enc_one:
  619         movups  (%rcx),%xmm0
  620         movups  16(%rcx),%xmm1
  621         leaq    32(%rcx),%rcx
  622         xorps   %xmm0,%xmm2
  623 .Loop_enc1_3:
  624 .byte   102,15,56,220,209
  625         decl    %eax
  626         movups  (%rcx),%xmm1
  627         leaq    16(%rcx),%rcx
  628         jnz     .Loop_enc1_3
  629 .byte   102,15,56,221,209
  630         movups  %xmm2,(%rsi)
  631         jmp     .Lecb_ret
  632 .align  16
  633 .Lecb_enc_two:
  634         call    _aesni_encrypt2
  635         movups  %xmm2,(%rsi)
  636         movups  %xmm3,16(%rsi)
  637         jmp     .Lecb_ret
  638 .align  16
  639 .Lecb_enc_three:
  640         call    _aesni_encrypt3
  641         movups  %xmm2,(%rsi)
  642         movups  %xmm3,16(%rsi)
  643         movups  %xmm4,32(%rsi)
  644         jmp     .Lecb_ret
  645 .align  16
  646 .Lecb_enc_four:
  647         call    _aesni_encrypt4
  648         movups  %xmm2,(%rsi)
  649         movups  %xmm3,16(%rsi)
  650         movups  %xmm4,32(%rsi)
  651         movups  %xmm5,48(%rsi)
  652         jmp     .Lecb_ret
  653 .align  16
  654 .Lecb_enc_five:
  655         xorps   %xmm7,%xmm7
  656         call    _aesni_encrypt6
  657         movups  %xmm2,(%rsi)
  658         movups  %xmm3,16(%rsi)
  659         movups  %xmm4,32(%rsi)
  660         movups  %xmm5,48(%rsi)
  661         movups  %xmm6,64(%rsi)
  662         jmp     .Lecb_ret
  663 .align  16
  664 .Lecb_enc_six:
  665         call    _aesni_encrypt6
  666         movups  %xmm2,(%rsi)
  667         movups  %xmm3,16(%rsi)
  668         movups  %xmm4,32(%rsi)
  669         movups  %xmm5,48(%rsi)
  670         movups  %xmm6,64(%rsi)
  671         movups  %xmm7,80(%rsi)
  672         jmp     .Lecb_ret
  673 
  674 .align  16
  675 .Lecb_decrypt:
  676         cmpq    $0x80,%rdx
  677         jb      .Lecb_dec_tail
  678 
  679         movdqu  (%rdi),%xmm2
  680         movdqu  16(%rdi),%xmm3
  681         movdqu  32(%rdi),%xmm4
  682         movdqu  48(%rdi),%xmm5
  683         movdqu  64(%rdi),%xmm6
  684         movdqu  80(%rdi),%xmm7
  685         movdqu  96(%rdi),%xmm8
  686         movdqu  112(%rdi),%xmm9
  687         leaq    128(%rdi),%rdi
  688         subq    $0x80,%rdx
  689         jmp     .Lecb_dec_loop8_enter
  690 .align  16
  691 .Lecb_dec_loop8:
  692         movups  %xmm2,(%rsi)
  693         movq    %r11,%rcx
  694         movdqu  (%rdi),%xmm2
  695         movl    %r10d,%eax
  696         movups  %xmm3,16(%rsi)
  697         movdqu  16(%rdi),%xmm3
  698         movups  %xmm4,32(%rsi)
  699         movdqu  32(%rdi),%xmm4
  700         movups  %xmm5,48(%rsi)
  701         movdqu  48(%rdi),%xmm5
  702         movups  %xmm6,64(%rsi)
  703         movdqu  64(%rdi),%xmm6
  704         movups  %xmm7,80(%rsi)
  705         movdqu  80(%rdi),%xmm7
  706         movups  %xmm8,96(%rsi)
  707         movdqu  96(%rdi),%xmm8
  708         movups  %xmm9,112(%rsi)
  709         leaq    128(%rsi),%rsi
  710         movdqu  112(%rdi),%xmm9
  711         leaq    128(%rdi),%rdi
  712 .Lecb_dec_loop8_enter:
  713 
  714         call    _aesni_decrypt8
  715 
  716         movups  (%r11),%xmm0
  717         subq    $0x80,%rdx
  718         jnc     .Lecb_dec_loop8
  719 
  720         movups  %xmm2,(%rsi)
  721         pxor    %xmm2,%xmm2
  722         movq    %r11,%rcx
  723         movups  %xmm3,16(%rsi)
  724         pxor    %xmm3,%xmm3
  725         movl    %r10d,%eax
  726         movups  %xmm4,32(%rsi)
  727         pxor    %xmm4,%xmm4
  728         movups  %xmm5,48(%rsi)
  729         pxor    %xmm5,%xmm5
  730         movups  %xmm6,64(%rsi)
  731         pxor    %xmm6,%xmm6
  732         movups  %xmm7,80(%rsi)
  733         pxor    %xmm7,%xmm7
  734         movups  %xmm8,96(%rsi)
  735         pxor    %xmm8,%xmm8
  736         movups  %xmm9,112(%rsi)
  737         pxor    %xmm9,%xmm9
  738         leaq    128(%rsi),%rsi
  739         addq    $0x80,%rdx
  740         jz      .Lecb_ret
  741 
  742 .Lecb_dec_tail:
  743         movups  (%rdi),%xmm2
  744         cmpq    $0x20,%rdx
  745         jb      .Lecb_dec_one
  746         movups  16(%rdi),%xmm3
  747         je      .Lecb_dec_two
  748         movups  32(%rdi),%xmm4
  749         cmpq    $0x40,%rdx
  750         jb      .Lecb_dec_three
  751         movups  48(%rdi),%xmm5
  752         je      .Lecb_dec_four
  753         movups  64(%rdi),%xmm6
  754         cmpq    $0x60,%rdx
  755         jb      .Lecb_dec_five
  756         movups  80(%rdi),%xmm7
  757         je      .Lecb_dec_six
  758         movups  96(%rdi),%xmm8
  759         movups  (%rcx),%xmm0
  760         xorps   %xmm9,%xmm9
  761         call    _aesni_decrypt8
  762         movups  %xmm2,(%rsi)
  763         pxor    %xmm2,%xmm2
  764         movups  %xmm3,16(%rsi)
  765         pxor    %xmm3,%xmm3
  766         movups  %xmm4,32(%rsi)
  767         pxor    %xmm4,%xmm4
  768         movups  %xmm5,48(%rsi)
  769         pxor    %xmm5,%xmm5
  770         movups  %xmm6,64(%rsi)
  771         pxor    %xmm6,%xmm6
  772         movups  %xmm7,80(%rsi)
  773         pxor    %xmm7,%xmm7
  774         movups  %xmm8,96(%rsi)
  775         pxor    %xmm8,%xmm8
  776         pxor    %xmm9,%xmm9
  777         jmp     .Lecb_ret
  778 .align  16
  779 .Lecb_dec_one:
  780         movups  (%rcx),%xmm0
  781         movups  16(%rcx),%xmm1
  782         leaq    32(%rcx),%rcx
  783         xorps   %xmm0,%xmm2
  784 .Loop_dec1_4:
  785 .byte   102,15,56,222,209
  786         decl    %eax
  787         movups  (%rcx),%xmm1
  788         leaq    16(%rcx),%rcx
  789         jnz     .Loop_dec1_4
  790 .byte   102,15,56,223,209
  791         movups  %xmm2,(%rsi)
  792         pxor    %xmm2,%xmm2
  793         jmp     .Lecb_ret
  794 .align  16
  795 .Lecb_dec_two:
  796         call    _aesni_decrypt2
  797         movups  %xmm2,(%rsi)
  798         pxor    %xmm2,%xmm2
  799         movups  %xmm3,16(%rsi)
  800         pxor    %xmm3,%xmm3
  801         jmp     .Lecb_ret
  802 .align  16
  803 .Lecb_dec_three:
  804         call    _aesni_decrypt3
  805         movups  %xmm2,(%rsi)
  806         pxor    %xmm2,%xmm2
  807         movups  %xmm3,16(%rsi)
  808         pxor    %xmm3,%xmm3
  809         movups  %xmm4,32(%rsi)
  810         pxor    %xmm4,%xmm4
  811         jmp     .Lecb_ret
  812 .align  16
  813 .Lecb_dec_four:
  814         call    _aesni_decrypt4
  815         movups  %xmm2,(%rsi)
  816         pxor    %xmm2,%xmm2
  817         movups  %xmm3,16(%rsi)
  818         pxor    %xmm3,%xmm3
  819         movups  %xmm4,32(%rsi)
  820         pxor    %xmm4,%xmm4
  821         movups  %xmm5,48(%rsi)
  822         pxor    %xmm5,%xmm5
  823         jmp     .Lecb_ret
  824 .align  16
  825 .Lecb_dec_five:
  826         xorps   %xmm7,%xmm7
  827         call    _aesni_decrypt6
  828         movups  %xmm2,(%rsi)
  829         pxor    %xmm2,%xmm2
  830         movups  %xmm3,16(%rsi)
  831         pxor    %xmm3,%xmm3
  832         movups  %xmm4,32(%rsi)
  833         pxor    %xmm4,%xmm4
  834         movups  %xmm5,48(%rsi)
  835         pxor    %xmm5,%xmm5
  836         movups  %xmm6,64(%rsi)
  837         pxor    %xmm6,%xmm6
  838         pxor    %xmm7,%xmm7
  839         jmp     .Lecb_ret
  840 .align  16
  841 .Lecb_dec_six:
  842         call    _aesni_decrypt6
  843         movups  %xmm2,(%rsi)
  844         pxor    %xmm2,%xmm2
  845         movups  %xmm3,16(%rsi)
  846         pxor    %xmm3,%xmm3
  847         movups  %xmm4,32(%rsi)
  848         pxor    %xmm4,%xmm4
  849         movups  %xmm5,48(%rsi)
  850         pxor    %xmm5,%xmm5
  851         movups  %xmm6,64(%rsi)
  852         pxor    %xmm6,%xmm6
  853         movups  %xmm7,80(%rsi)
  854         pxor    %xmm7,%xmm7
  855 
  856 .Lecb_ret:
  857         xorps   %xmm0,%xmm0
  858         pxor    %xmm1,%xmm1
  859         .byte   0xf3,0xc3
  860 .cfi_endproc    
  861 .size   aesni_ecb_encrypt,.-aesni_ecb_encrypt
  862 .globl  aesni_ccm64_encrypt_blocks
  863 .type   aesni_ccm64_encrypt_blocks,@function
  864 .align  16
  865 aesni_ccm64_encrypt_blocks:
  866 .cfi_startproc  
  867         movl    240(%rcx),%eax
  868         movdqu  (%r8),%xmm6
  869         movdqa  .Lincrement64(%rip),%xmm9
  870         movdqa  .Lbswap_mask(%rip),%xmm7
  871 
  872         shll    $4,%eax
  873         movl    $16,%r10d
  874         leaq    0(%rcx),%r11
  875         movdqu  (%r9),%xmm3
  876         movdqa  %xmm6,%xmm2
  877         leaq    32(%rcx,%rax,1),%rcx
  878 .byte   102,15,56,0,247
  879         subq    %rax,%r10
  880         jmp     .Lccm64_enc_outer
  881 .align  16
  882 .Lccm64_enc_outer:
  883         movups  (%r11),%xmm0
  884         movq    %r10,%rax
  885         movups  (%rdi),%xmm8
  886 
  887         xorps   %xmm0,%xmm2
  888         movups  16(%r11),%xmm1
  889         xorps   %xmm8,%xmm0
  890         xorps   %xmm0,%xmm3
  891         movups  32(%r11),%xmm0
  892 
  893 .Lccm64_enc2_loop:
  894 .byte   102,15,56,220,209
  895 .byte   102,15,56,220,217
  896         movups  (%rcx,%rax,1),%xmm1
  897         addq    $32,%rax
  898 .byte   102,15,56,220,208
  899 .byte   102,15,56,220,216
  900         movups  -16(%rcx,%rax,1),%xmm0
  901         jnz     .Lccm64_enc2_loop
  902 .byte   102,15,56,220,209
  903 .byte   102,15,56,220,217
  904         paddq   %xmm9,%xmm6
  905         decq    %rdx
  906 .byte   102,15,56,221,208
  907 .byte   102,15,56,221,216
  908 
  909         leaq    16(%rdi),%rdi
  910         xorps   %xmm2,%xmm8
  911         movdqa  %xmm6,%xmm2
  912         movups  %xmm8,(%rsi)
  913 .byte   102,15,56,0,215
  914         leaq    16(%rsi),%rsi
  915         jnz     .Lccm64_enc_outer
  916 
  917         pxor    %xmm0,%xmm0
  918         pxor    %xmm1,%xmm1
  919         pxor    %xmm2,%xmm2
  920         movups  %xmm3,(%r9)
  921         pxor    %xmm3,%xmm3
  922         pxor    %xmm8,%xmm8
  923         pxor    %xmm6,%xmm6
  924         .byte   0xf3,0xc3
  925 .cfi_endproc    
  926 .size   aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
  927 .globl  aesni_ccm64_decrypt_blocks
  928 .type   aesni_ccm64_decrypt_blocks,@function
  929 .align  16
  930 aesni_ccm64_decrypt_blocks:
  931 .cfi_startproc  
  932         movl    240(%rcx),%eax
  933         movups  (%r8),%xmm6
  934         movdqu  (%r9),%xmm3
  935         movdqa  .Lincrement64(%rip),%xmm9
  936         movdqa  .Lbswap_mask(%rip),%xmm7
  937 
  938         movaps  %xmm6,%xmm2
  939         movl    %eax,%r10d
  940         movq    %rcx,%r11
  941 .byte   102,15,56,0,247
  942         movups  (%rcx),%xmm0
  943         movups  16(%rcx),%xmm1
  944         leaq    32(%rcx),%rcx
  945         xorps   %xmm0,%xmm2
  946 .Loop_enc1_5:
  947 .byte   102,15,56,220,209
  948         decl    %eax
  949         movups  (%rcx),%xmm1
  950         leaq    16(%rcx),%rcx
  951         jnz     .Loop_enc1_5
  952 .byte   102,15,56,221,209
  953         shll    $4,%r10d
  954         movl    $16,%eax
  955         movups  (%rdi),%xmm8
  956         paddq   %xmm9,%xmm6
  957         leaq    16(%rdi),%rdi
  958         subq    %r10,%rax
  959         leaq    32(%r11,%r10,1),%rcx
  960         movq    %rax,%r10
  961         jmp     .Lccm64_dec_outer
  962 .align  16
  963 .Lccm64_dec_outer:
  964         xorps   %xmm2,%xmm8
  965         movdqa  %xmm6,%xmm2
  966         movups  %xmm8,(%rsi)
  967         leaq    16(%rsi),%rsi
  968 .byte   102,15,56,0,215
  969 
  970         subq    $1,%rdx
  971         jz      .Lccm64_dec_break
  972 
  973         movups  (%r11),%xmm0
  974         movq    %r10,%rax
  975         movups  16(%r11),%xmm1
  976         xorps   %xmm0,%xmm8
  977         xorps   %xmm0,%xmm2
  978         xorps   %xmm8,%xmm3
  979         movups  32(%r11),%xmm0
  980         jmp     .Lccm64_dec2_loop
  981 .align  16
  982 .Lccm64_dec2_loop:
  983 .byte   102,15,56,220,209
  984 .byte   102,15,56,220,217
  985         movups  (%rcx,%rax,1),%xmm1
  986         addq    $32,%rax
  987 .byte   102,15,56,220,208
  988 .byte   102,15,56,220,216
  989         movups  -16(%rcx,%rax,1),%xmm0
  990         jnz     .Lccm64_dec2_loop
  991         movups  (%rdi),%xmm8
  992         paddq   %xmm9,%xmm6
  993 .byte   102,15,56,220,209
  994 .byte   102,15,56,220,217
  995 .byte   102,15,56,221,208
  996 .byte   102,15,56,221,216
  997         leaq    16(%rdi),%rdi
  998         jmp     .Lccm64_dec_outer
  999 
 1000 .align  16
 1001 .Lccm64_dec_break:
 1002 
 1003         movl    240(%r11),%eax
 1004         movups  (%r11),%xmm0
 1005         movups  16(%r11),%xmm1
 1006         xorps   %xmm0,%xmm8
 1007         leaq    32(%r11),%r11
 1008         xorps   %xmm8,%xmm3
 1009 .Loop_enc1_6:
 1010 .byte   102,15,56,220,217
 1011         decl    %eax
 1012         movups  (%r11),%xmm1
 1013         leaq    16(%r11),%r11
 1014         jnz     .Loop_enc1_6
 1015 .byte   102,15,56,221,217
 1016         pxor    %xmm0,%xmm0
 1017         pxor    %xmm1,%xmm1
 1018         pxor    %xmm2,%xmm2
 1019         movups  %xmm3,(%r9)
 1020         pxor    %xmm3,%xmm3
 1021         pxor    %xmm8,%xmm8
 1022         pxor    %xmm6,%xmm6
 1023         .byte   0xf3,0xc3
 1024 .cfi_endproc    
 1025 .size   aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
 1026 .globl  aesni_ctr32_encrypt_blocks
 1027 .type   aesni_ctr32_encrypt_blocks,@function
 1028 .align  16
 1029 aesni_ctr32_encrypt_blocks:
 1030 .cfi_startproc  
 1031         cmpq    $1,%rdx
 1032         jne     .Lctr32_bulk
 1033 
 1034 
 1035 
 1036         movups  (%r8),%xmm2
 1037         movups  (%rdi),%xmm3
 1038         movl    240(%rcx),%edx
 1039         movups  (%rcx),%xmm0
 1040         movups  16(%rcx),%xmm1
 1041         leaq    32(%rcx),%rcx
 1042         xorps   %xmm0,%xmm2
 1043 .Loop_enc1_7:
 1044 .byte   102,15,56,220,209
 1045         decl    %edx
 1046         movups  (%rcx),%xmm1
 1047         leaq    16(%rcx),%rcx
 1048         jnz     .Loop_enc1_7
 1049 .byte   102,15,56,221,209
 1050         pxor    %xmm0,%xmm0
 1051         pxor    %xmm1,%xmm1
 1052         xorps   %xmm3,%xmm2
 1053         pxor    %xmm3,%xmm3
 1054         movups  %xmm2,(%rsi)
 1055         xorps   %xmm2,%xmm2
 1056         jmp     .Lctr32_epilogue
 1057 
 1058 .align  16
 1059 .Lctr32_bulk:
 1060         leaq    (%rsp),%r11
 1061 .cfi_def_cfa_register   %r11
 1062         pushq   %rbp
 1063 .cfi_offset     %rbp,-16
 1064         subq    $128,%rsp
 1065         andq    $-16,%rsp
 1066 
 1067 
 1068 
 1069 
 1070         movdqu  (%r8),%xmm2
 1071         movdqu  (%rcx),%xmm0
 1072         movl    12(%r8),%r8d
 1073         pxor    %xmm0,%xmm2
 1074         movl    12(%rcx),%ebp
 1075         movdqa  %xmm2,0(%rsp)
 1076         bswapl  %r8d
 1077         movdqa  %xmm2,%xmm3
 1078         movdqa  %xmm2,%xmm4
 1079         movdqa  %xmm2,%xmm5
 1080         movdqa  %xmm2,64(%rsp)
 1081         movdqa  %xmm2,80(%rsp)
 1082         movdqa  %xmm2,96(%rsp)
 1083         movq    %rdx,%r10
 1084         movdqa  %xmm2,112(%rsp)
 1085 
 1086         leaq    1(%r8),%rax
 1087         leaq    2(%r8),%rdx
 1088         bswapl  %eax
 1089         bswapl  %edx
 1090         xorl    %ebp,%eax
 1091         xorl    %ebp,%edx
 1092 .byte   102,15,58,34,216,3
 1093         leaq    3(%r8),%rax
 1094         movdqa  %xmm3,16(%rsp)
 1095 .byte   102,15,58,34,226,3
 1096         bswapl  %eax
 1097         movq    %r10,%rdx
 1098         leaq    4(%r8),%r10
 1099         movdqa  %xmm4,32(%rsp)
 1100         xorl    %ebp,%eax
 1101         bswapl  %r10d
 1102 .byte   102,15,58,34,232,3
 1103         xorl    %ebp,%r10d
 1104         movdqa  %xmm5,48(%rsp)
 1105         leaq    5(%r8),%r9
 1106         movl    %r10d,64+12(%rsp)
 1107         bswapl  %r9d
 1108         leaq    6(%r8),%r10
 1109         movl    240(%rcx),%eax
 1110         xorl    %ebp,%r9d
 1111         bswapl  %r10d
 1112         movl    %r9d,80+12(%rsp)
 1113         xorl    %ebp,%r10d
 1114         leaq    7(%r8),%r9
 1115         movl    %r10d,96+12(%rsp)
 1116         bswapl  %r9d
 1117         movl    OPENSSL_ia32cap_P+4(%rip),%r10d
 1118         xorl    %ebp,%r9d
 1119         andl    $71303168,%r10d
 1120         movl    %r9d,112+12(%rsp)
 1121 
 1122         movups  16(%rcx),%xmm1
 1123 
 1124         movdqa  64(%rsp),%xmm6
 1125         movdqa  80(%rsp),%xmm7
 1126 
 1127         cmpq    $8,%rdx
 1128         jb      .Lctr32_tail
 1129 
 1130         subq    $6,%rdx
 1131         cmpl    $4194304,%r10d
 1132         je      .Lctr32_6x
 1133 
 1134         leaq    128(%rcx),%rcx
 1135         subq    $2,%rdx
 1136         jmp     .Lctr32_loop8
 1137 
 1138 .align  16
 1139 .Lctr32_6x:
 1140         shll    $4,%eax
 1141         movl    $48,%r10d
 1142         bswapl  %ebp
 1143         leaq    32(%rcx,%rax,1),%rcx
 1144         subq    %rax,%r10
 1145         jmp     .Lctr32_loop6
 1146 
 1147 .align  16
 1148 .Lctr32_loop6:
 1149         addl    $6,%r8d
 1150         movups  -48(%rcx,%r10,1),%xmm0
 1151 .byte   102,15,56,220,209
 1152         movl    %r8d,%eax
 1153         xorl    %ebp,%eax
 1154 .byte   102,15,56,220,217
 1155 .byte   0x0f,0x38,0xf1,0x44,0x24,12
 1156         leal    1(%r8),%eax
 1157 .byte   102,15,56,220,225
 1158         xorl    %ebp,%eax
 1159 .byte   0x0f,0x38,0xf1,0x44,0x24,28
 1160 .byte   102,15,56,220,233
 1161         leal    2(%r8),%eax
 1162         xorl    %ebp,%eax
 1163 .byte   102,15,56,220,241
 1164 .byte   0x0f,0x38,0xf1,0x44,0x24,44
 1165         leal    3(%r8),%eax
 1166 .byte   102,15,56,220,249
 1167         movups  -32(%rcx,%r10,1),%xmm1
 1168         xorl    %ebp,%eax
 1169 
 1170 .byte   102,15,56,220,208
 1171 .byte   0x0f,0x38,0xf1,0x44,0x24,60
 1172         leal    4(%r8),%eax
 1173 .byte   102,15,56,220,216
 1174         xorl    %ebp,%eax
 1175 .byte   0x0f,0x38,0xf1,0x44,0x24,76
 1176 .byte   102,15,56,220,224
 1177         leal    5(%r8),%eax
 1178         xorl    %ebp,%eax
 1179 .byte   102,15,56,220,232
 1180 .byte   0x0f,0x38,0xf1,0x44,0x24,92
 1181         movq    %r10,%rax
 1182 .byte   102,15,56,220,240
 1183 .byte   102,15,56,220,248
 1184         movups  -16(%rcx,%r10,1),%xmm0
 1185 
 1186         call    .Lenc_loop6
 1187 
 1188         movdqu  (%rdi),%xmm8
 1189         movdqu  16(%rdi),%xmm9
 1190         movdqu  32(%rdi),%xmm10
 1191         movdqu  48(%rdi),%xmm11
 1192         movdqu  64(%rdi),%xmm12
 1193         movdqu  80(%rdi),%xmm13
 1194         leaq    96(%rdi),%rdi
 1195         movups  -64(%rcx,%r10,1),%xmm1
 1196         pxor    %xmm2,%xmm8
 1197         movaps  0(%rsp),%xmm2
 1198         pxor    %xmm3,%xmm9
 1199         movaps  16(%rsp),%xmm3
 1200         pxor    %xmm4,%xmm10
 1201         movaps  32(%rsp),%xmm4
 1202         pxor    %xmm5,%xmm11
 1203         movaps  48(%rsp),%xmm5
 1204         pxor    %xmm6,%xmm12
 1205         movaps  64(%rsp),%xmm6
 1206         pxor    %xmm7,%xmm13
 1207         movaps  80(%rsp),%xmm7
 1208         movdqu  %xmm8,(%rsi)
 1209         movdqu  %xmm9,16(%rsi)
 1210         movdqu  %xmm10,32(%rsi)
 1211         movdqu  %xmm11,48(%rsi)
 1212         movdqu  %xmm12,64(%rsi)
 1213         movdqu  %xmm13,80(%rsi)
 1214         leaq    96(%rsi),%rsi
 1215 
 1216         subq    $6,%rdx
 1217         jnc     .Lctr32_loop6
 1218 
 1219         addq    $6,%rdx
 1220         jz      .Lctr32_done
 1221 
 1222         leal    -48(%r10),%eax
 1223         leaq    -80(%rcx,%r10,1),%rcx
 1224         negl    %eax
 1225         shrl    $4,%eax
 1226         jmp     .Lctr32_tail
 1227 
 1228 .align  32
 1229 .Lctr32_loop8:
 1230         addl    $8,%r8d
 1231         movdqa  96(%rsp),%xmm8
 1232 .byte   102,15,56,220,209
 1233         movl    %r8d,%r9d
 1234         movdqa  112(%rsp),%xmm9
 1235 .byte   102,15,56,220,217
 1236         bswapl  %r9d
 1237         movups  32-128(%rcx),%xmm0
 1238 .byte   102,15,56,220,225
 1239         xorl    %ebp,%r9d
 1240         nop
 1241 .byte   102,15,56,220,233
 1242         movl    %r9d,0+12(%rsp)
 1243         leaq    1(%r8),%r9
 1244 .byte   102,15,56,220,241
 1245 .byte   102,15,56,220,249
 1246 .byte   102,68,15,56,220,193
 1247 .byte   102,68,15,56,220,201
 1248         movups  48-128(%rcx),%xmm1
 1249         bswapl  %r9d
 1250 .byte   102,15,56,220,208
 1251 .byte   102,15,56,220,216
 1252         xorl    %ebp,%r9d
 1253 .byte   0x66,0x90
 1254 .byte   102,15,56,220,224
 1255 .byte   102,15,56,220,232
 1256         movl    %r9d,16+12(%rsp)
 1257         leaq    2(%r8),%r9
 1258 .byte   102,15,56,220,240
 1259 .byte   102,15,56,220,248
 1260 .byte   102,68,15,56,220,192
 1261 .byte   102,68,15,56,220,200
 1262         movups  64-128(%rcx),%xmm0
 1263         bswapl  %r9d
 1264 .byte   102,15,56,220,209
 1265 .byte   102,15,56,220,217
 1266         xorl    %ebp,%r9d
 1267 .byte   0x66,0x90
 1268 .byte   102,15,56,220,225
 1269 .byte   102,15,56,220,233
 1270         movl    %r9d,32+12(%rsp)
 1271         leaq    3(%r8),%r9
 1272 .byte   102,15,56,220,241
 1273 .byte   102,15,56,220,249
 1274 .byte   102,68,15,56,220,193
 1275 .byte   102,68,15,56,220,201
 1276         movups  80-128(%rcx),%xmm1
 1277         bswapl  %r9d
 1278 .byte   102,15,56,220,208
 1279 .byte   102,15,56,220,216
 1280         xorl    %ebp,%r9d
 1281 .byte   0x66,0x90
 1282 .byte   102,15,56,220,224
 1283 .byte   102,15,56,220,232
 1284         movl    %r9d,48+12(%rsp)
 1285         leaq    4(%r8),%r9
 1286 .byte   102,15,56,220,240
 1287 .byte   102,15,56,220,248
 1288 .byte   102,68,15,56,220,192
 1289 .byte   102,68,15,56,220,200
 1290         movups  96-128(%rcx),%xmm0
 1291         bswapl  %r9d
 1292 .byte   102,15,56,220,209
 1293 .byte   102,15,56,220,217
 1294         xorl    %ebp,%r9d
 1295 .byte   0x66,0x90
 1296 .byte   102,15,56,220,225
 1297 .byte   102,15,56,220,233
 1298         movl    %r9d,64+12(%rsp)
 1299         leaq    5(%r8),%r9
 1300 .byte   102,15,56,220,241
 1301 .byte   102,15,56,220,249
 1302 .byte   102,68,15,56,220,193
 1303 .byte   102,68,15,56,220,201
 1304         movups  112-128(%rcx),%xmm1
 1305         bswapl  %r9d
 1306 .byte   102,15,56,220,208
 1307 .byte   102,15,56,220,216
 1308         xorl    %ebp,%r9d
 1309 .byte   0x66,0x90
 1310 .byte   102,15,56,220,224
 1311 .byte   102,15,56,220,232
 1312         movl    %r9d,80+12(%rsp)
 1313         leaq    6(%r8),%r9
 1314 .byte   102,15,56,220,240
 1315 .byte   102,15,56,220,248
 1316 .byte   102,68,15,56,220,192
 1317 .byte   102,68,15,56,220,200
 1318         movups  128-128(%rcx),%xmm0
 1319         bswapl  %r9d
 1320 .byte   102,15,56,220,209
 1321 .byte   102,15,56,220,217
 1322         xorl    %ebp,%r9d
 1323 .byte   0x66,0x90
 1324 .byte   102,15,56,220,225
 1325 .byte   102,15,56,220,233
 1326         movl    %r9d,96+12(%rsp)
 1327         leaq    7(%r8),%r9
 1328 .byte   102,15,56,220,241
 1329 .byte   102,15,56,220,249
 1330 .byte   102,68,15,56,220,193
 1331 .byte   102,68,15,56,220,201
 1332         movups  144-128(%rcx),%xmm1
 1333         bswapl  %r9d
 1334 .byte   102,15,56,220,208
 1335 .byte   102,15,56,220,216
 1336 .byte   102,15,56,220,224
 1337         xorl    %ebp,%r9d
 1338         movdqu  0(%rdi),%xmm10
 1339 .byte   102,15,56,220,232
 1340         movl    %r9d,112+12(%rsp)
 1341         cmpl    $11,%eax
 1342 .byte   102,15,56,220,240
 1343 .byte   102,15,56,220,248
 1344 .byte   102,68,15,56,220,192
 1345 .byte   102,68,15,56,220,200
 1346         movups  160-128(%rcx),%xmm0
 1347 
 1348         jb      .Lctr32_enc_done
 1349 
 1350 .byte   102,15,56,220,209
 1351 .byte   102,15,56,220,217
 1352 .byte   102,15,56,220,225
 1353 .byte   102,15,56,220,233
 1354 .byte   102,15,56,220,241
 1355 .byte   102,15,56,220,249
 1356 .byte   102,68,15,56,220,193
 1357 .byte   102,68,15,56,220,201
 1358         movups  176-128(%rcx),%xmm1
 1359 
 1360 .byte   102,15,56,220,208
 1361 .byte   102,15,56,220,216
 1362 .byte   102,15,56,220,224
 1363 .byte   102,15,56,220,232
 1364 .byte   102,15,56,220,240
 1365 .byte   102,15,56,220,248
 1366 .byte   102,68,15,56,220,192
 1367 .byte   102,68,15,56,220,200
 1368         movups  192-128(%rcx),%xmm0
 1369         je      .Lctr32_enc_done
 1370 
 1371 .byte   102,15,56,220,209
 1372 .byte   102,15,56,220,217
 1373 .byte   102,15,56,220,225
 1374 .byte   102,15,56,220,233
 1375 .byte   102,15,56,220,241
 1376 .byte   102,15,56,220,249
 1377 .byte   102,68,15,56,220,193
 1378 .byte   102,68,15,56,220,201
 1379         movups  208-128(%rcx),%xmm1
 1380 
 1381 .byte   102,15,56,220,208
 1382 .byte   102,15,56,220,216
 1383 .byte   102,15,56,220,224
 1384 .byte   102,15,56,220,232
 1385 .byte   102,15,56,220,240
 1386 .byte   102,15,56,220,248
 1387 .byte   102,68,15,56,220,192
 1388 .byte   102,68,15,56,220,200
 1389         movups  224-128(%rcx),%xmm0
 1390         jmp     .Lctr32_enc_done
 1391 
 1392 .align  16
 1393 .Lctr32_enc_done:
 1394         movdqu  16(%rdi),%xmm11
 1395         pxor    %xmm0,%xmm10
 1396         movdqu  32(%rdi),%xmm12
 1397         pxor    %xmm0,%xmm11
 1398         movdqu  48(%rdi),%xmm13
 1399         pxor    %xmm0,%xmm12
 1400         movdqu  64(%rdi),%xmm14
 1401         pxor    %xmm0,%xmm13
 1402         movdqu  80(%rdi),%xmm15
 1403         pxor    %xmm0,%xmm14
 1404         pxor    %xmm0,%xmm15
 1405 .byte   102,15,56,220,209
 1406 .byte   102,15,56,220,217
 1407 .byte   102,15,56,220,225
 1408 .byte   102,15,56,220,233
 1409 .byte   102,15,56,220,241
 1410 .byte   102,15,56,220,249
 1411 .byte   102,68,15,56,220,193
 1412 .byte   102,68,15,56,220,201
 1413         movdqu  96(%rdi),%xmm1
 1414         leaq    128(%rdi),%rdi
 1415 
 1416 .byte   102,65,15,56,221,210
 1417         pxor    %xmm0,%xmm1
 1418         movdqu  112-128(%rdi),%xmm10
 1419 .byte   102,65,15,56,221,219
 1420         pxor    %xmm0,%xmm10
 1421         movdqa  0(%rsp),%xmm11
 1422 .byte   102,65,15,56,221,228
 1423 .byte   102,65,15,56,221,237
 1424         movdqa  16(%rsp),%xmm12
 1425         movdqa  32(%rsp),%xmm13
 1426 .byte   102,65,15,56,221,246
 1427 .byte   102,65,15,56,221,255
 1428         movdqa  48(%rsp),%xmm14
 1429         movdqa  64(%rsp),%xmm15
 1430 .byte   102,68,15,56,221,193
 1431         movdqa  80(%rsp),%xmm0
 1432         movups  16-128(%rcx),%xmm1
 1433 .byte   102,69,15,56,221,202
 1434 
 1435         movups  %xmm2,(%rsi)
 1436         movdqa  %xmm11,%xmm2
 1437         movups  %xmm3,16(%rsi)
 1438         movdqa  %xmm12,%xmm3
 1439         movups  %xmm4,32(%rsi)
 1440         movdqa  %xmm13,%xmm4
 1441         movups  %xmm5,48(%rsi)
 1442         movdqa  %xmm14,%xmm5
 1443         movups  %xmm6,64(%rsi)
 1444         movdqa  %xmm15,%xmm6
 1445         movups  %xmm7,80(%rsi)
 1446         movdqa  %xmm0,%xmm7
 1447         movups  %xmm8,96(%rsi)
 1448         movups  %xmm9,112(%rsi)
 1449         leaq    128(%rsi),%rsi
 1450 
 1451         subq    $8,%rdx
 1452         jnc     .Lctr32_loop8
 1453 
 1454         addq    $8,%rdx
 1455         jz      .Lctr32_done
 1456         leaq    -128(%rcx),%rcx
 1457 
 1458 .Lctr32_tail:
 1459 
 1460 
 1461         leaq    16(%rcx),%rcx
 1462         cmpq    $4,%rdx
 1463         jb      .Lctr32_loop3
 1464         je      .Lctr32_loop4
 1465 
 1466 
 1467         shll    $4,%eax
 1468         movdqa  96(%rsp),%xmm8
 1469         pxor    %xmm9,%xmm9
 1470 
 1471         movups  16(%rcx),%xmm0
 1472 .byte   102,15,56,220,209
 1473 .byte   102,15,56,220,217
 1474         leaq    32-16(%rcx,%rax,1),%rcx
 1475         negq    %rax
 1476 .byte   102,15,56,220,225
 1477         addq    $16,%rax
 1478         movups  (%rdi),%xmm10
 1479 .byte   102,15,56,220,233
 1480 .byte   102,15,56,220,241
 1481         movups  16(%rdi),%xmm11
 1482         movups  32(%rdi),%xmm12
 1483 .byte   102,15,56,220,249
 1484 .byte   102,68,15,56,220,193
 1485 
 1486         call    .Lenc_loop8_enter
 1487 
 1488         movdqu  48(%rdi),%xmm13
 1489         pxor    %xmm10,%xmm2
 1490         movdqu  64(%rdi),%xmm10
 1491         pxor    %xmm11,%xmm3
 1492         movdqu  %xmm2,(%rsi)
 1493         pxor    %xmm12,%xmm4
 1494         movdqu  %xmm3,16(%rsi)
 1495         pxor    %xmm13,%xmm5
 1496         movdqu  %xmm4,32(%rsi)
 1497         pxor    %xmm10,%xmm6
 1498         movdqu  %xmm5,48(%rsi)
 1499         movdqu  %xmm6,64(%rsi)
 1500         cmpq    $6,%rdx
 1501         jb      .Lctr32_done
 1502 
 1503         movups  80(%rdi),%xmm11
 1504         xorps   %xmm11,%xmm7
 1505         movups  %xmm7,80(%rsi)
 1506         je      .Lctr32_done
 1507 
 1508         movups  96(%rdi),%xmm12
 1509         xorps   %xmm12,%xmm8
 1510         movups  %xmm8,96(%rsi)
 1511         jmp     .Lctr32_done
 1512 
 1513 .align  32
 1514 .Lctr32_loop4:
 1515 .byte   102,15,56,220,209
 1516         leaq    16(%rcx),%rcx
 1517         decl    %eax
 1518 .byte   102,15,56,220,217
 1519 .byte   102,15,56,220,225
 1520 .byte   102,15,56,220,233
 1521         movups  (%rcx),%xmm1
 1522         jnz     .Lctr32_loop4
 1523 .byte   102,15,56,221,209
 1524 .byte   102,15,56,221,217
 1525         movups  (%rdi),%xmm10
 1526         movups  16(%rdi),%xmm11
 1527 .byte   102,15,56,221,225
 1528 .byte   102,15,56,221,233
 1529         movups  32(%rdi),%xmm12
 1530         movups  48(%rdi),%xmm13
 1531 
 1532         xorps   %xmm10,%xmm2
 1533         movups  %xmm2,(%rsi)
 1534         xorps   %xmm11,%xmm3
 1535         movups  %xmm3,16(%rsi)
 1536         pxor    %xmm12,%xmm4
 1537         movdqu  %xmm4,32(%rsi)
 1538         pxor    %xmm13,%xmm5
 1539         movdqu  %xmm5,48(%rsi)
 1540         jmp     .Lctr32_done
 1541 
 1542 .align  32
 1543 .Lctr32_loop3:
 1544 .byte   102,15,56,220,209
 1545         leaq    16(%rcx),%rcx
 1546         decl    %eax
 1547 .byte   102,15,56,220,217
 1548 .byte   102,15,56,220,225
 1549         movups  (%rcx),%xmm1
 1550         jnz     .Lctr32_loop3
 1551 .byte   102,15,56,221,209
 1552 .byte   102,15,56,221,217
 1553 .byte   102,15,56,221,225
 1554 
 1555         movups  (%rdi),%xmm10
 1556         xorps   %xmm10,%xmm2
 1557         movups  %xmm2,(%rsi)
 1558         cmpq    $2,%rdx
 1559         jb      .Lctr32_done
 1560 
 1561         movups  16(%rdi),%xmm11
 1562         xorps   %xmm11,%xmm3
 1563         movups  %xmm3,16(%rsi)
 1564         je      .Lctr32_done
 1565 
 1566         movups  32(%rdi),%xmm12
 1567         xorps   %xmm12,%xmm4
 1568         movups  %xmm4,32(%rsi)
 1569 
 1570 .Lctr32_done:
 1571         xorps   %xmm0,%xmm0
 1572         xorl    %ebp,%ebp
 1573         pxor    %xmm1,%xmm1
 1574         pxor    %xmm2,%xmm2
 1575         pxor    %xmm3,%xmm3
 1576         pxor    %xmm4,%xmm4
 1577         pxor    %xmm5,%xmm5
 1578         pxor    %xmm6,%xmm6
 1579         pxor    %xmm7,%xmm7
 1580         movaps  %xmm0,0(%rsp)
 1581         pxor    %xmm8,%xmm8
 1582         movaps  %xmm0,16(%rsp)
 1583         pxor    %xmm9,%xmm9
 1584         movaps  %xmm0,32(%rsp)
 1585         pxor    %xmm10,%xmm10
 1586         movaps  %xmm0,48(%rsp)
 1587         pxor    %xmm11,%xmm11
 1588         movaps  %xmm0,64(%rsp)
 1589         pxor    %xmm12,%xmm12
 1590         movaps  %xmm0,80(%rsp)
 1591         pxor    %xmm13,%xmm13
 1592         movaps  %xmm0,96(%rsp)
 1593         pxor    %xmm14,%xmm14
 1594         movaps  %xmm0,112(%rsp)
 1595         pxor    %xmm15,%xmm15
 1596         movq    -8(%r11),%rbp
 1597 .cfi_restore    %rbp
 1598         leaq    (%r11),%rsp
 1599 .cfi_def_cfa_register   %rsp
 1600 .Lctr32_epilogue:
 1601         .byte   0xf3,0xc3
 1602 .cfi_endproc    
 1603 .size   aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
 1604 .globl  aesni_xts_encrypt
 1605 .type   aesni_xts_encrypt,@function
 1606 .align  16
 1607 aesni_xts_encrypt:
 1608 .cfi_startproc  
 1609         leaq    (%rsp),%r11
 1610 .cfi_def_cfa_register   %r11
 1611         pushq   %rbp
 1612 .cfi_offset     %rbp,-16
 1613         subq    $112,%rsp
 1614         andq    $-16,%rsp
 1615         movups  (%r9),%xmm2
 1616         movl    240(%r8),%eax
 1617         movl    240(%rcx),%r10d
 1618         movups  (%r8),%xmm0
 1619         movups  16(%r8),%xmm1
 1620         leaq    32(%r8),%r8
 1621         xorps   %xmm0,%xmm2
 1622 .Loop_enc1_8:
 1623 .byte   102,15,56,220,209
 1624         decl    %eax
 1625         movups  (%r8),%xmm1
 1626         leaq    16(%r8),%r8
 1627         jnz     .Loop_enc1_8
 1628 .byte   102,15,56,221,209
 1629         movups  (%rcx),%xmm0
 1630         movq    %rcx,%rbp
 1631         movl    %r10d,%eax
 1632         shll    $4,%r10d
 1633         movq    %rdx,%r9
 1634         andq    $-16,%rdx
 1635 
 1636         movups  16(%rcx,%r10,1),%xmm1
 1637 
 1638         movdqa  .Lxts_magic(%rip),%xmm8
 1639         movdqa  %xmm2,%xmm15
 1640         pshufd  $0x5f,%xmm2,%xmm9
 1641         pxor    %xmm0,%xmm1
 1642         movdqa  %xmm9,%xmm14
 1643         paddd   %xmm9,%xmm9
 1644         movdqa  %xmm15,%xmm10
 1645         psrad   $31,%xmm14
 1646         paddq   %xmm15,%xmm15
 1647         pand    %xmm8,%xmm14
 1648         pxor    %xmm0,%xmm10
 1649         pxor    %xmm14,%xmm15
 1650         movdqa  %xmm9,%xmm14
 1651         paddd   %xmm9,%xmm9
 1652         movdqa  %xmm15,%xmm11
 1653         psrad   $31,%xmm14
 1654         paddq   %xmm15,%xmm15
 1655         pand    %xmm8,%xmm14
 1656         pxor    %xmm0,%xmm11
 1657         pxor    %xmm14,%xmm15
 1658         movdqa  %xmm9,%xmm14
 1659         paddd   %xmm9,%xmm9
 1660         movdqa  %xmm15,%xmm12
 1661         psrad   $31,%xmm14
 1662         paddq   %xmm15,%xmm15
 1663         pand    %xmm8,%xmm14
 1664         pxor    %xmm0,%xmm12
 1665         pxor    %xmm14,%xmm15
 1666         movdqa  %xmm9,%xmm14
 1667         paddd   %xmm9,%xmm9
 1668         movdqa  %xmm15,%xmm13
 1669         psrad   $31,%xmm14
 1670         paddq   %xmm15,%xmm15
 1671         pand    %xmm8,%xmm14
 1672         pxor    %xmm0,%xmm13
 1673         pxor    %xmm14,%xmm15
 1674         movdqa  %xmm15,%xmm14
 1675         psrad   $31,%xmm9
 1676         paddq   %xmm15,%xmm15
 1677         pand    %xmm8,%xmm9
 1678         pxor    %xmm0,%xmm14
 1679         pxor    %xmm9,%xmm15
 1680         movaps  %xmm1,96(%rsp)
 1681 
 1682         subq    $96,%rdx
 1683         jc      .Lxts_enc_short
 1684 
 1685         movl    $16+96,%eax
 1686         leaq    32(%rbp,%r10,1),%rcx
 1687         subq    %r10,%rax
 1688         movups  16(%rbp),%xmm1
 1689         movq    %rax,%r10
 1690         leaq    .Lxts_magic(%rip),%r8
 1691         jmp     .Lxts_enc_grandloop
 1692 
 1693 .align  32
 1694 .Lxts_enc_grandloop:
 1695         movdqu  0(%rdi),%xmm2
 1696         movdqa  %xmm0,%xmm8
 1697         movdqu  16(%rdi),%xmm3
 1698         pxor    %xmm10,%xmm2
 1699         movdqu  32(%rdi),%xmm4
 1700         pxor    %xmm11,%xmm3
 1701 .byte   102,15,56,220,209
 1702         movdqu  48(%rdi),%xmm5
 1703         pxor    %xmm12,%xmm4
 1704 .byte   102,15,56,220,217
 1705         movdqu  64(%rdi),%xmm6
 1706         pxor    %xmm13,%xmm5
 1707 .byte   102,15,56,220,225
 1708         movdqu  80(%rdi),%xmm7
 1709         pxor    %xmm15,%xmm8
 1710         movdqa  96(%rsp),%xmm9
 1711         pxor    %xmm14,%xmm6
 1712 .byte   102,15,56,220,233
 1713         movups  32(%rbp),%xmm0
 1714         leaq    96(%rdi),%rdi
 1715         pxor    %xmm8,%xmm7
 1716 
 1717         pxor    %xmm9,%xmm10
 1718 .byte   102,15,56,220,241
 1719         pxor    %xmm9,%xmm11
 1720         movdqa  %xmm10,0(%rsp)
 1721 .byte   102,15,56,220,249
 1722         movups  48(%rbp),%xmm1
 1723         pxor    %xmm9,%xmm12
 1724 
 1725 .byte   102,15,56,220,208
 1726         pxor    %xmm9,%xmm13
 1727         movdqa  %xmm11,16(%rsp)
 1728 .byte   102,15,56,220,216
 1729         pxor    %xmm9,%xmm14
 1730         movdqa  %xmm12,32(%rsp)
 1731 .byte   102,15,56,220,224
 1732 .byte   102,15,56,220,232
 1733         pxor    %xmm9,%xmm8
 1734         movdqa  %xmm14,64(%rsp)
 1735 .byte   102,15,56,220,240
 1736 .byte   102,15,56,220,248
 1737         movups  64(%rbp),%xmm0
 1738         movdqa  %xmm8,80(%rsp)
 1739         pshufd  $0x5f,%xmm15,%xmm9
 1740         jmp     .Lxts_enc_loop6
 1741 .align  32
 1742 .Lxts_enc_loop6:
 1743 .byte   102,15,56,220,209
 1744 .byte   102,15,56,220,217
 1745 .byte   102,15,56,220,225
 1746 .byte   102,15,56,220,233
 1747 .byte   102,15,56,220,241
 1748 .byte   102,15,56,220,249
 1749         movups  -64(%rcx,%rax,1),%xmm1
 1750         addq    $32,%rax
 1751 
 1752 .byte   102,15,56,220,208
 1753 .byte   102,15,56,220,216
 1754 .byte   102,15,56,220,224
 1755 .byte   102,15,56,220,232
 1756 .byte   102,15,56,220,240
 1757 .byte   102,15,56,220,248
 1758         movups  -80(%rcx,%rax,1),%xmm0
 1759         jnz     .Lxts_enc_loop6
 1760 
 1761         movdqa  (%r8),%xmm8
 1762         movdqa  %xmm9,%xmm14
 1763         paddd   %xmm9,%xmm9
 1764 .byte   102,15,56,220,209
 1765         paddq   %xmm15,%xmm15
 1766         psrad   $31,%xmm14
 1767 .byte   102,15,56,220,217
 1768         pand    %xmm8,%xmm14
 1769         movups  (%rbp),%xmm10
 1770 .byte   102,15,56,220,225
 1771 .byte   102,15,56,220,233
 1772 .byte   102,15,56,220,241
 1773         pxor    %xmm14,%xmm15
 1774         movaps  %xmm10,%xmm11
 1775 .byte   102,15,56,220,249
 1776         movups  -64(%rcx),%xmm1
 1777 
 1778         movdqa  %xmm9,%xmm14
 1779 .byte   102,15,56,220,208
 1780         paddd   %xmm9,%xmm9
 1781         pxor    %xmm15,%xmm10
 1782 .byte   102,15,56,220,216
 1783         psrad   $31,%xmm14
 1784         paddq   %xmm15,%xmm15
 1785 .byte   102,15,56,220,224
 1786 .byte   102,15,56,220,232
 1787         pand    %xmm8,%xmm14
 1788         movaps  %xmm11,%xmm12
 1789 .byte   102,15,56,220,240
 1790         pxor    %xmm14,%xmm15
 1791         movdqa  %xmm9,%xmm14
 1792 .byte   102,15,56,220,248
 1793         movups  -48(%rcx),%xmm0
 1794 
 1795         paddd   %xmm9,%xmm9
 1796 .byte   102,15,56,220,209
 1797         pxor    %xmm15,%xmm11
 1798         psrad   $31,%xmm14
 1799 .byte   102,15,56,220,217
 1800         paddq   %xmm15,%xmm15
 1801         pand    %xmm8,%xmm14
 1802 .byte   102,15,56,220,225
 1803 .byte   102,15,56,220,233
 1804         movdqa  %xmm13,48(%rsp)
 1805         pxor    %xmm14,%xmm15
 1806 .byte   102,15,56,220,241
 1807         movaps  %xmm12,%xmm13
 1808         movdqa  %xmm9,%xmm14
 1809 .byte   102,15,56,220,249
 1810         movups  -32(%rcx),%xmm1
 1811 
 1812         paddd   %xmm9,%xmm9
 1813 .byte   102,15,56,220,208
 1814         pxor    %xmm15,%xmm12
 1815         psrad   $31,%xmm14
 1816 .byte   102,15,56,220,216
 1817         paddq   %xmm15,%xmm15
 1818         pand    %xmm8,%xmm14
 1819 .byte   102,15,56,220,224
 1820 .byte   102,15,56,220,232
 1821 .byte   102,15,56,220,240
 1822         pxor    %xmm14,%xmm15
 1823         movaps  %xmm13,%xmm14
 1824 .byte   102,15,56,220,248
 1825 
 1826         movdqa  %xmm9,%xmm0
 1827         paddd   %xmm9,%xmm9
 1828 .byte   102,15,56,220,209
 1829         pxor    %xmm15,%xmm13
 1830         psrad   $31,%xmm0
 1831 .byte   102,15,56,220,217
 1832         paddq   %xmm15,%xmm15
 1833         pand    %xmm8,%xmm0
 1834 .byte   102,15,56,220,225
 1835 .byte   102,15,56,220,233
 1836         pxor    %xmm0,%xmm15
 1837         movups  (%rbp),%xmm0
 1838 .byte   102,15,56,220,241
 1839 .byte   102,15,56,220,249
 1840         movups  16(%rbp),%xmm1
 1841 
 1842         pxor    %xmm15,%xmm14
 1843 .byte   102,15,56,221,84,36,0
 1844         psrad   $31,%xmm9
 1845         paddq   %xmm15,%xmm15
 1846 .byte   102,15,56,221,92,36,16
 1847 .byte   102,15,56,221,100,36,32
 1848         pand    %xmm8,%xmm9
 1849         movq    %r10,%rax
 1850 .byte   102,15,56,221,108,36,48
 1851 .byte   102,15,56,221,116,36,64
 1852 .byte   102,15,56,221,124,36,80
 1853         pxor    %xmm9,%xmm15
 1854 
 1855         leaq    96(%rsi),%rsi
 1856         movups  %xmm2,-96(%rsi)
 1857         movups  %xmm3,-80(%rsi)
 1858         movups  %xmm4,-64(%rsi)
 1859         movups  %xmm5,-48(%rsi)
 1860         movups  %xmm6,-32(%rsi)
 1861         movups  %xmm7,-16(%rsi)
 1862         subq    $96,%rdx
 1863         jnc     .Lxts_enc_grandloop
 1864 
 1865         movl    $16+96,%eax
 1866         subl    %r10d,%eax
 1867         movq    %rbp,%rcx
 1868         shrl    $4,%eax
 1869 
 1870 .Lxts_enc_short:
 1871 
 1872         movl    %eax,%r10d
 1873         pxor    %xmm0,%xmm10
 1874         addq    $96,%rdx
 1875         jz      .Lxts_enc_done
 1876 
 1877         pxor    %xmm0,%xmm11
 1878         cmpq    $0x20,%rdx
 1879         jb      .Lxts_enc_one
 1880         pxor    %xmm0,%xmm12
 1881         je      .Lxts_enc_two
 1882 
 1883         pxor    %xmm0,%xmm13
 1884         cmpq    $0x40,%rdx
 1885         jb      .Lxts_enc_three
 1886         pxor    %xmm0,%xmm14
 1887         je      .Lxts_enc_four
 1888 
 1889         movdqu  (%rdi),%xmm2
 1890         movdqu  16(%rdi),%xmm3
 1891         movdqu  32(%rdi),%xmm4
 1892         pxor    %xmm10,%xmm2
 1893         movdqu  48(%rdi),%xmm5
 1894         pxor    %xmm11,%xmm3
 1895         movdqu  64(%rdi),%xmm6
 1896         leaq    80(%rdi),%rdi
 1897         pxor    %xmm12,%xmm4
 1898         pxor    %xmm13,%xmm5
 1899         pxor    %xmm14,%xmm6
 1900         pxor    %xmm7,%xmm7
 1901 
 1902         call    _aesni_encrypt6
 1903 
 1904         xorps   %xmm10,%xmm2
 1905         movdqa  %xmm15,%xmm10
 1906         xorps   %xmm11,%xmm3
 1907         xorps   %xmm12,%xmm4
 1908         movdqu  %xmm2,(%rsi)
 1909         xorps   %xmm13,%xmm5
 1910         movdqu  %xmm3,16(%rsi)
 1911         xorps   %xmm14,%xmm6
 1912         movdqu  %xmm4,32(%rsi)
 1913         movdqu  %xmm5,48(%rsi)
 1914         movdqu  %xmm6,64(%rsi)
 1915         leaq    80(%rsi),%rsi
 1916         jmp     .Lxts_enc_done
 1917 
 1918 .align  16
 1919 .Lxts_enc_one:
 1920         movups  (%rdi),%xmm2
 1921         leaq    16(%rdi),%rdi
 1922         xorps   %xmm10,%xmm2
 1923         movups  (%rcx),%xmm0
 1924         movups  16(%rcx),%xmm1
 1925         leaq    32(%rcx),%rcx
 1926         xorps   %xmm0,%xmm2
 1927 .Loop_enc1_9:
 1928 .byte   102,15,56,220,209
 1929         decl    %eax
 1930         movups  (%rcx),%xmm1
 1931         leaq    16(%rcx),%rcx
 1932         jnz     .Loop_enc1_9
 1933 .byte   102,15,56,221,209
 1934         xorps   %xmm10,%xmm2
 1935         movdqa  %xmm11,%xmm10
 1936         movups  %xmm2,(%rsi)
 1937         leaq    16(%rsi),%rsi
 1938         jmp     .Lxts_enc_done
 1939 
 1940 .align  16
 1941 .Lxts_enc_two:
 1942         movups  (%rdi),%xmm2
 1943         movups  16(%rdi),%xmm3
 1944         leaq    32(%rdi),%rdi
 1945         xorps   %xmm10,%xmm2
 1946         xorps   %xmm11,%xmm3
 1947 
 1948         call    _aesni_encrypt2
 1949 
 1950         xorps   %xmm10,%xmm2
 1951         movdqa  %xmm12,%xmm10
 1952         xorps   %xmm11,%xmm3
 1953         movups  %xmm2,(%rsi)
 1954         movups  %xmm3,16(%rsi)
 1955         leaq    32(%rsi),%rsi
 1956         jmp     .Lxts_enc_done
 1957 
 1958 .align  16
 1959 .Lxts_enc_three:
 1960         movups  (%rdi),%xmm2
 1961         movups  16(%rdi),%xmm3
 1962         movups  32(%rdi),%xmm4
 1963         leaq    48(%rdi),%rdi
 1964         xorps   %xmm10,%xmm2
 1965         xorps   %xmm11,%xmm3
 1966         xorps   %xmm12,%xmm4
 1967 
 1968         call    _aesni_encrypt3
 1969 
 1970         xorps   %xmm10,%xmm2
 1971         movdqa  %xmm13,%xmm10
 1972         xorps   %xmm11,%xmm3
 1973         xorps   %xmm12,%xmm4
 1974         movups  %xmm2,(%rsi)
 1975         movups  %xmm3,16(%rsi)
 1976         movups  %xmm4,32(%rsi)
 1977         leaq    48(%rsi),%rsi
 1978         jmp     .Lxts_enc_done
 1979 
 1980 .align  16
 1981 .Lxts_enc_four:
 1982         movups  (%rdi),%xmm2
 1983         movups  16(%rdi),%xmm3
 1984         movups  32(%rdi),%xmm4
 1985         xorps   %xmm10,%xmm2
 1986         movups  48(%rdi),%xmm5
 1987         leaq    64(%rdi),%rdi
 1988         xorps   %xmm11,%xmm3
 1989         xorps   %xmm12,%xmm4
 1990         xorps   %xmm13,%xmm5
 1991 
 1992         call    _aesni_encrypt4
 1993 
 1994         pxor    %xmm10,%xmm2
 1995         movdqa  %xmm14,%xmm10
 1996         pxor    %xmm11,%xmm3
 1997         pxor    %xmm12,%xmm4
 1998         movdqu  %xmm2,(%rsi)
 1999         pxor    %xmm13,%xmm5
 2000         movdqu  %xmm3,16(%rsi)
 2001         movdqu  %xmm4,32(%rsi)
 2002         movdqu  %xmm5,48(%rsi)
 2003         leaq    64(%rsi),%rsi
 2004         jmp     .Lxts_enc_done
 2005 
 2006 .align  16
 2007 .Lxts_enc_done:
 2008         andq    $15,%r9
 2009         jz      .Lxts_enc_ret
 2010         movq    %r9,%rdx
 2011 
 2012 .Lxts_enc_steal:
 2013         movzbl  (%rdi),%eax
 2014         movzbl  -16(%rsi),%ecx
 2015         leaq    1(%rdi),%rdi
 2016         movb    %al,-16(%rsi)
 2017         movb    %cl,0(%rsi)
 2018         leaq    1(%rsi),%rsi
 2019         subq    $1,%rdx
 2020         jnz     .Lxts_enc_steal
 2021 
 2022         subq    %r9,%rsi
 2023         movq    %rbp,%rcx
 2024         movl    %r10d,%eax
 2025 
 2026         movups  -16(%rsi),%xmm2
 2027         xorps   %xmm10,%xmm2
 2028         movups  (%rcx),%xmm0
 2029         movups  16(%rcx),%xmm1
 2030         leaq    32(%rcx),%rcx
 2031         xorps   %xmm0,%xmm2
 2032 .Loop_enc1_10:
 2033 .byte   102,15,56,220,209
 2034         decl    %eax
 2035         movups  (%rcx),%xmm1
 2036         leaq    16(%rcx),%rcx
 2037         jnz     .Loop_enc1_10
 2038 .byte   102,15,56,221,209
 2039         xorps   %xmm10,%xmm2
 2040         movups  %xmm2,-16(%rsi)
 2041 
 2042 .Lxts_enc_ret:
 2043         xorps   %xmm0,%xmm0
 2044         pxor    %xmm1,%xmm1
 2045         pxor    %xmm2,%xmm2
 2046         pxor    %xmm3,%xmm3
 2047         pxor    %xmm4,%xmm4
 2048         pxor    %xmm5,%xmm5
 2049         pxor    %xmm6,%xmm6
 2050         pxor    %xmm7,%xmm7
 2051         movaps  %xmm0,0(%rsp)
 2052         pxor    %xmm8,%xmm8
 2053         movaps  %xmm0,16(%rsp)
 2054         pxor    %xmm9,%xmm9
 2055         movaps  %xmm0,32(%rsp)
 2056         pxor    %xmm10,%xmm10
 2057         movaps  %xmm0,48(%rsp)
 2058         pxor    %xmm11,%xmm11
 2059         movaps  %xmm0,64(%rsp)
 2060         pxor    %xmm12,%xmm12
 2061         movaps  %xmm0,80(%rsp)
 2062         pxor    %xmm13,%xmm13
 2063         movaps  %xmm0,96(%rsp)
 2064         pxor    %xmm14,%xmm14
 2065         pxor    %xmm15,%xmm15
 2066         movq    -8(%r11),%rbp
 2067 .cfi_restore    %rbp
 2068         leaq    (%r11),%rsp
 2069 .cfi_def_cfa_register   %rsp
 2070 .Lxts_enc_epilogue:
 2071         .byte   0xf3,0xc3
 2072 .cfi_endproc    
 2073 .size   aesni_xts_encrypt,.-aesni_xts_encrypt
 2074 .globl  aesni_xts_decrypt
 2075 .type   aesni_xts_decrypt,@function
 2076 .align  16
 2077 aesni_xts_decrypt:
 2078 .cfi_startproc  
 2079         leaq    (%rsp),%r11
 2080 .cfi_def_cfa_register   %r11
 2081         pushq   %rbp
 2082 .cfi_offset     %rbp,-16
 2083         subq    $112,%rsp
 2084         andq    $-16,%rsp
 2085         movups  (%r9),%xmm2
 2086         movl    240(%r8),%eax
 2087         movl    240(%rcx),%r10d
 2088         movups  (%r8),%xmm0
 2089         movups  16(%r8),%xmm1
 2090         leaq    32(%r8),%r8
 2091         xorps   %xmm0,%xmm2
 2092 .Loop_enc1_11:
 2093 .byte   102,15,56,220,209
 2094         decl    %eax
 2095         movups  (%r8),%xmm1
 2096         leaq    16(%r8),%r8
 2097         jnz     .Loop_enc1_11
 2098 .byte   102,15,56,221,209
 2099         xorl    %eax,%eax
 2100         testq   $15,%rdx
 2101         setnz   %al
 2102         shlq    $4,%rax
 2103         subq    %rax,%rdx
 2104 
 2105         movups  (%rcx),%xmm0
 2106         movq    %rcx,%rbp
 2107         movl    %r10d,%eax
 2108         shll    $4,%r10d
 2109         movq    %rdx,%r9
 2110         andq    $-16,%rdx
 2111 
 2112         movups  16(%rcx,%r10,1),%xmm1
 2113 
 2114         movdqa  .Lxts_magic(%rip),%xmm8
 2115         movdqa  %xmm2,%xmm15
 2116         pshufd  $0x5f,%xmm2,%xmm9
 2117         pxor    %xmm0,%xmm1
 2118         movdqa  %xmm9,%xmm14
 2119         paddd   %xmm9,%xmm9
 2120         movdqa  %xmm15,%xmm10
 2121         psrad   $31,%xmm14
 2122         paddq   %xmm15,%xmm15
 2123         pand    %xmm8,%xmm14
 2124         pxor    %xmm0,%xmm10
 2125         pxor    %xmm14,%xmm15
 2126         movdqa  %xmm9,%xmm14
 2127         paddd   %xmm9,%xmm9
 2128         movdqa  %xmm15,%xmm11
 2129         psrad   $31,%xmm14
 2130         paddq   %xmm15,%xmm15
 2131         pand    %xmm8,%xmm14
 2132         pxor    %xmm0,%xmm11
 2133         pxor    %xmm14,%xmm15
 2134         movdqa  %xmm9,%xmm14
 2135         paddd   %xmm9,%xmm9
 2136         movdqa  %xmm15,%xmm12
 2137         psrad   $31,%xmm14
 2138         paddq   %xmm15,%xmm15
 2139         pand    %xmm8,%xmm14
 2140         pxor    %xmm0,%xmm12
 2141         pxor    %xmm14,%xmm15
 2142         movdqa  %xmm9,%xmm14
 2143         paddd   %xmm9,%xmm9
 2144         movdqa  %xmm15,%xmm13
 2145         psrad   $31,%xmm14
 2146         paddq   %xmm15,%xmm15
 2147         pand    %xmm8,%xmm14
 2148         pxor    %xmm0,%xmm13
 2149         pxor    %xmm14,%xmm15
 2150         movdqa  %xmm15,%xmm14
 2151         psrad   $31,%xmm9
 2152         paddq   %xmm15,%xmm15
 2153         pand    %xmm8,%xmm9
 2154         pxor    %xmm0,%xmm14
 2155         pxor    %xmm9,%xmm15
 2156         movaps  %xmm1,96(%rsp)
 2157 
 2158         subq    $96,%rdx
 2159         jc      .Lxts_dec_short
 2160 
 2161         movl    $16+96,%eax
 2162         leaq    32(%rbp,%r10,1),%rcx
 2163         subq    %r10,%rax
 2164         movups  16(%rbp),%xmm1
 2165         movq    %rax,%r10
 2166         leaq    .Lxts_magic(%rip),%r8
 2167         jmp     .Lxts_dec_grandloop
 2168 
 2169 .align  32
 2170 .Lxts_dec_grandloop:
 2171         movdqu  0(%rdi),%xmm2
 2172         movdqa  %xmm0,%xmm8
 2173         movdqu  16(%rdi),%xmm3
 2174         pxor    %xmm10,%xmm2
 2175         movdqu  32(%rdi),%xmm4
 2176         pxor    %xmm11,%xmm3
 2177 .byte   102,15,56,222,209
 2178         movdqu  48(%rdi),%xmm5
 2179         pxor    %xmm12,%xmm4
 2180 .byte   102,15,56,222,217
 2181         movdqu  64(%rdi),%xmm6
 2182         pxor    %xmm13,%xmm5
 2183 .byte   102,15,56,222,225
 2184         movdqu  80(%rdi),%xmm7
 2185         pxor    %xmm15,%xmm8
 2186         movdqa  96(%rsp),%xmm9
 2187         pxor    %xmm14,%xmm6
 2188 .byte   102,15,56,222,233
 2189         movups  32(%rbp),%xmm0
 2190         leaq    96(%rdi),%rdi
 2191         pxor    %xmm8,%xmm7
 2192 
 2193         pxor    %xmm9,%xmm10
 2194 .byte   102,15,56,222,241
 2195         pxor    %xmm9,%xmm11
 2196         movdqa  %xmm10,0(%rsp)
 2197 .byte   102,15,56,222,249
 2198         movups  48(%rbp),%xmm1
 2199         pxor    %xmm9,%xmm12
 2200 
 2201 .byte   102,15,56,222,208
 2202         pxor    %xmm9,%xmm13
 2203         movdqa  %xmm11,16(%rsp)
 2204 .byte   102,15,56,222,216
 2205         pxor    %xmm9,%xmm14
 2206         movdqa  %xmm12,32(%rsp)
 2207 .byte   102,15,56,222,224
 2208 .byte   102,15,56,222,232
 2209         pxor    %xmm9,%xmm8
 2210         movdqa  %xmm14,64(%rsp)
 2211 .byte   102,15,56,222,240
 2212 .byte   102,15,56,222,248
 2213         movups  64(%rbp),%xmm0
 2214         movdqa  %xmm8,80(%rsp)
 2215         pshufd  $0x5f,%xmm15,%xmm9
 2216         jmp     .Lxts_dec_loop6
 2217 .align  32
 2218 .Lxts_dec_loop6:
 2219 .byte   102,15,56,222,209
 2220 .byte   102,15,56,222,217
 2221 .byte   102,15,56,222,225
 2222 .byte   102,15,56,222,233
 2223 .byte   102,15,56,222,241
 2224 .byte   102,15,56,222,249
 2225         movups  -64(%rcx,%rax,1),%xmm1
 2226         addq    $32,%rax
 2227 
 2228 .byte   102,15,56,222,208
 2229 .byte   102,15,56,222,216
 2230 .byte   102,15,56,222,224
 2231 .byte   102,15,56,222,232
 2232 .byte   102,15,56,222,240
 2233 .byte   102,15,56,222,248
 2234         movups  -80(%rcx,%rax,1),%xmm0
 2235         jnz     .Lxts_dec_loop6
 2236 
 2237         movdqa  (%r8),%xmm8
 2238         movdqa  %xmm9,%xmm14
 2239         paddd   %xmm9,%xmm9
 2240 .byte   102,15,56,222,209
 2241         paddq   %xmm15,%xmm15
 2242         psrad   $31,%xmm14
 2243 .byte   102,15,56,222,217
 2244         pand    %xmm8,%xmm14
 2245         movups  (%rbp),%xmm10
 2246 .byte   102,15,56,222,225
 2247 .byte   102,15,56,222,233
 2248 .byte   102,15,56,222,241
 2249         pxor    %xmm14,%xmm15
 2250         movaps  %xmm10,%xmm11
 2251 .byte   102,15,56,222,249
 2252         movups  -64(%rcx),%xmm1
 2253 
 2254         movdqa  %xmm9,%xmm14
 2255 .byte   102,15,56,222,208
 2256         paddd   %xmm9,%xmm9
 2257         pxor    %xmm15,%xmm10
 2258 .byte   102,15,56,222,216
 2259         psrad   $31,%xmm14
 2260         paddq   %xmm15,%xmm15
 2261 .byte   102,15,56,222,224
 2262 .byte   102,15,56,222,232
 2263         pand    %xmm8,%xmm14
 2264         movaps  %xmm11,%xmm12
 2265 .byte   102,15,56,222,240
 2266         pxor    %xmm14,%xmm15
 2267         movdqa  %xmm9,%xmm14
 2268 .byte   102,15,56,222,248
 2269         movups  -48(%rcx),%xmm0
 2270 
 2271         paddd   %xmm9,%xmm9
 2272 .byte   102,15,56,222,209
 2273         pxor    %xmm15,%xmm11
 2274         psrad   $31,%xmm14
 2275 .byte   102,15,56,222,217
 2276         paddq   %xmm15,%xmm15
 2277         pand    %xmm8,%xmm14
 2278 .byte   102,15,56,222,225
 2279 .byte   102,15,56,222,233
 2280         movdqa  %xmm13,48(%rsp)
 2281         pxor    %xmm14,%xmm15
 2282 .byte   102,15,56,222,241
 2283         movaps  %xmm12,%xmm13
 2284         movdqa  %xmm9,%xmm14
 2285 .byte   102,15,56,222,249
 2286         movups  -32(%rcx),%xmm1
 2287 
 2288         paddd   %xmm9,%xmm9
 2289 .byte   102,15,56,222,208
 2290         pxor    %xmm15,%xmm12
 2291         psrad   $31,%xmm14
 2292 .byte   102,15,56,222,216
 2293         paddq   %xmm15,%xmm15
 2294         pand    %xmm8,%xmm14
 2295 .byte   102,15,56,222,224
 2296 .byte   102,15,56,222,232
 2297 .byte   102,15,56,222,240
 2298         pxor    %xmm14,%xmm15
 2299         movaps  %xmm13,%xmm14
 2300 .byte   102,15,56,222,248
 2301 
 2302         movdqa  %xmm9,%xmm0
 2303         paddd   %xmm9,%xmm9
 2304 .byte   102,15,56,222,209
 2305         pxor    %xmm15,%xmm13
 2306         psrad   $31,%xmm0
 2307 .byte   102,15,56,222,217
 2308         paddq   %xmm15,%xmm15
 2309         pand    %xmm8,%xmm0
 2310 .byte   102,15,56,222,225
 2311 .byte   102,15,56,222,233
 2312         pxor    %xmm0,%xmm15
 2313         movups  (%rbp),%xmm0
 2314 .byte   102,15,56,222,241
 2315 .byte   102,15,56,222,249
 2316         movups  16(%rbp),%xmm1
 2317 
 2318         pxor    %xmm15,%xmm14
 2319 .byte   102,15,56,223,84,36,0
 2320         psrad   $31,%xmm9
 2321         paddq   %xmm15,%xmm15
 2322 .byte   102,15,56,223,92,36,16
 2323 .byte   102,15,56,223,100,36,32
 2324         pand    %xmm8,%xmm9
 2325         movq    %r10,%rax
 2326 .byte   102,15,56,223,108,36,48
 2327 .byte   102,15,56,223,116,36,64
 2328 .byte   102,15,56,223,124,36,80
 2329         pxor    %xmm9,%xmm15
 2330 
 2331         leaq    96(%rsi),%rsi
 2332         movups  %xmm2,-96(%rsi)
 2333         movups  %xmm3,-80(%rsi)
 2334         movups  %xmm4,-64(%rsi)
 2335         movups  %xmm5,-48(%rsi)
 2336         movups  %xmm6,-32(%rsi)
 2337         movups  %xmm7,-16(%rsi)
 2338         subq    $96,%rdx
 2339         jnc     .Lxts_dec_grandloop
 2340 
 2341         movl    $16+96,%eax
 2342         subl    %r10d,%eax
 2343         movq    %rbp,%rcx
 2344         shrl    $4,%eax
 2345 
 2346 .Lxts_dec_short:
 2347 
 2348         movl    %eax,%r10d
 2349         pxor    %xmm0,%xmm10
 2350         pxor    %xmm0,%xmm11
 2351         addq    $96,%rdx
 2352         jz      .Lxts_dec_done
 2353 
 2354         pxor    %xmm0,%xmm12
 2355         cmpq    $0x20,%rdx
 2356         jb      .Lxts_dec_one
 2357         pxor    %xmm0,%xmm13
 2358         je      .Lxts_dec_two
 2359 
 2360         pxor    %xmm0,%xmm14
 2361         cmpq    $0x40,%rdx
 2362         jb      .Lxts_dec_three
 2363         je      .Lxts_dec_four
 2364 
 2365         movdqu  (%rdi),%xmm2
 2366         movdqu  16(%rdi),%xmm3
 2367         movdqu  32(%rdi),%xmm4
 2368         pxor    %xmm10,%xmm2
 2369         movdqu  48(%rdi),%xmm5
 2370         pxor    %xmm11,%xmm3
 2371         movdqu  64(%rdi),%xmm6
 2372         leaq    80(%rdi),%rdi
 2373         pxor    %xmm12,%xmm4
 2374         pxor    %xmm13,%xmm5
 2375         pxor    %xmm14,%xmm6
 2376 
 2377         call    _aesni_decrypt6
 2378 
 2379         xorps   %xmm10,%xmm2
 2380         xorps   %xmm11,%xmm3
 2381         xorps   %xmm12,%xmm4
 2382         movdqu  %xmm2,(%rsi)
 2383         xorps   %xmm13,%xmm5
 2384         movdqu  %xmm3,16(%rsi)
 2385         xorps   %xmm14,%xmm6
 2386         movdqu  %xmm4,32(%rsi)
 2387         pxor    %xmm14,%xmm14
 2388         movdqu  %xmm5,48(%rsi)
 2389         pcmpgtd %xmm15,%xmm14
 2390         movdqu  %xmm6,64(%rsi)
 2391         leaq    80(%rsi),%rsi
 2392         pshufd  $0x13,%xmm14,%xmm11
 2393         andq    $15,%r9
 2394         jz      .Lxts_dec_ret
 2395 
 2396         movdqa  %xmm15,%xmm10
 2397         paddq   %xmm15,%xmm15
 2398         pand    %xmm8,%xmm11
 2399         pxor    %xmm15,%xmm11
 2400         jmp     .Lxts_dec_done2
 2401 
 2402 .align  16
 2403 .Lxts_dec_one:
 2404         movups  (%rdi),%xmm2
 2405         leaq    16(%rdi),%rdi
 2406         xorps   %xmm10,%xmm2
 2407         movups  (%rcx),%xmm0
 2408         movups  16(%rcx),%xmm1
 2409         leaq    32(%rcx),%rcx
 2410         xorps   %xmm0,%xmm2
 2411 .Loop_dec1_12:
 2412 .byte   102,15,56,222,209
 2413         decl    %eax
 2414         movups  (%rcx),%xmm1
 2415         leaq    16(%rcx),%rcx
 2416         jnz     .Loop_dec1_12
 2417 .byte   102,15,56,223,209
 2418         xorps   %xmm10,%xmm2
 2419         movdqa  %xmm11,%xmm10
 2420         movups  %xmm2,(%rsi)
 2421         movdqa  %xmm12,%xmm11
 2422         leaq    16(%rsi),%rsi
 2423         jmp     .Lxts_dec_done
 2424 
 2425 .align  16
 2426 .Lxts_dec_two:
 2427         movups  (%rdi),%xmm2
 2428         movups  16(%rdi),%xmm3
 2429         leaq    32(%rdi),%rdi
 2430         xorps   %xmm10,%xmm2
 2431         xorps   %xmm11,%xmm3
 2432 
 2433         call    _aesni_decrypt2
 2434 
 2435         xorps   %xmm10,%xmm2
 2436         movdqa  %xmm12,%xmm10
 2437         xorps   %xmm11,%xmm3
 2438         movdqa  %xmm13,%xmm11
 2439         movups  %xmm2,(%rsi)
 2440         movups  %xmm3,16(%rsi)
 2441         leaq    32(%rsi),%rsi
 2442         jmp     .Lxts_dec_done
 2443 
 2444 .align  16
 2445 .Lxts_dec_three:
 2446         movups  (%rdi),%xmm2
 2447         movups  16(%rdi),%xmm3
 2448         movups  32(%rdi),%xmm4
 2449         leaq    48(%rdi),%rdi
 2450         xorps   %xmm10,%xmm2
 2451         xorps   %xmm11,%xmm3
 2452         xorps   %xmm12,%xmm4
 2453 
 2454         call    _aesni_decrypt3
 2455 
 2456         xorps   %xmm10,%xmm2
 2457         movdqa  %xmm13,%xmm10
 2458         xorps   %xmm11,%xmm3
 2459         movdqa  %xmm14,%xmm11
 2460         xorps   %xmm12,%xmm4
 2461         movups  %xmm2,(%rsi)
 2462         movups  %xmm3,16(%rsi)
 2463         movups  %xmm4,32(%rsi)
 2464         leaq    48(%rsi),%rsi
 2465         jmp     .Lxts_dec_done
 2466 
 2467 .align  16
 2468 .Lxts_dec_four:
 2469         movups  (%rdi),%xmm2
 2470         movups  16(%rdi),%xmm3
 2471         movups  32(%rdi),%xmm4
 2472         xorps   %xmm10,%xmm2
 2473         movups  48(%rdi),%xmm5
 2474         leaq    64(%rdi),%rdi
 2475         xorps   %xmm11,%xmm3
 2476         xorps   %xmm12,%xmm4
 2477         xorps   %xmm13,%xmm5
 2478 
 2479         call    _aesni_decrypt4
 2480 
 2481         pxor    %xmm10,%xmm2
 2482         movdqa  %xmm14,%xmm10
 2483         pxor    %xmm11,%xmm3
 2484         movdqa  %xmm15,%xmm11
 2485         pxor    %xmm12,%xmm4
 2486         movdqu  %xmm2,(%rsi)
 2487         pxor    %xmm13,%xmm5
 2488         movdqu  %xmm3,16(%rsi)
 2489         movdqu  %xmm4,32(%rsi)
 2490         movdqu  %xmm5,48(%rsi)
 2491         leaq    64(%rsi),%rsi
 2492         jmp     .Lxts_dec_done
 2493 
 2494 .align  16
 2495 .Lxts_dec_done:
 2496         andq    $15,%r9
 2497         jz      .Lxts_dec_ret
 2498 .Lxts_dec_done2:
 2499         movq    %r9,%rdx
 2500         movq    %rbp,%rcx
 2501         movl    %r10d,%eax
 2502 
 2503         movups  (%rdi),%xmm2
 2504         xorps   %xmm11,%xmm2
 2505         movups  (%rcx),%xmm0
 2506         movups  16(%rcx),%xmm1
 2507         leaq    32(%rcx),%rcx
 2508         xorps   %xmm0,%xmm2
 2509 .Loop_dec1_13:
 2510 .byte   102,15,56,222,209
 2511         decl    %eax
 2512         movups  (%rcx),%xmm1
 2513         leaq    16(%rcx),%rcx
 2514         jnz     .Loop_dec1_13
 2515 .byte   102,15,56,223,209
 2516         xorps   %xmm11,%xmm2
 2517         movups  %xmm2,(%rsi)
 2518 
 2519 .Lxts_dec_steal:
 2520         movzbl  16(%rdi),%eax
 2521         movzbl  (%rsi),%ecx
 2522         leaq    1(%rdi),%rdi
 2523         movb    %al,(%rsi)
 2524         movb    %cl,16(%rsi)
 2525         leaq    1(%rsi),%rsi
 2526         subq    $1,%rdx
 2527         jnz     .Lxts_dec_steal
 2528 
 2529         subq    %r9,%rsi
 2530         movq    %rbp,%rcx
 2531         movl    %r10d,%eax
 2532 
 2533         movups  (%rsi),%xmm2
 2534         xorps   %xmm10,%xmm2
 2535         movups  (%rcx),%xmm0
 2536         movups  16(%rcx),%xmm1
 2537         leaq    32(%rcx),%rcx
 2538         xorps   %xmm0,%xmm2
 2539 .Loop_dec1_14:
 2540 .byte   102,15,56,222,209
 2541         decl    %eax
 2542         movups  (%rcx),%xmm1
 2543         leaq    16(%rcx),%rcx
 2544         jnz     .Loop_dec1_14
 2545 .byte   102,15,56,223,209
 2546         xorps   %xmm10,%xmm2
 2547         movups  %xmm2,(%rsi)
 2548 
 2549 .Lxts_dec_ret:
 2550         xorps   %xmm0,%xmm0
 2551         pxor    %xmm1,%xmm1
 2552         pxor    %xmm2,%xmm2
 2553         pxor    %xmm3,%xmm3
 2554         pxor    %xmm4,%xmm4
 2555         pxor    %xmm5,%xmm5
 2556         pxor    %xmm6,%xmm6
 2557         pxor    %xmm7,%xmm7
 2558         movaps  %xmm0,0(%rsp)
 2559         pxor    %xmm8,%xmm8
 2560         movaps  %xmm0,16(%rsp)
 2561         pxor    %xmm9,%xmm9
 2562         movaps  %xmm0,32(%rsp)
 2563         pxor    %xmm10,%xmm10
 2564         movaps  %xmm0,48(%rsp)
 2565         pxor    %xmm11,%xmm11
 2566         movaps  %xmm0,64(%rsp)
 2567         pxor    %xmm12,%xmm12
 2568         movaps  %xmm0,80(%rsp)
 2569         pxor    %xmm13,%xmm13
 2570         movaps  %xmm0,96(%rsp)
 2571         pxor    %xmm14,%xmm14
 2572         pxor    %xmm15,%xmm15
 2573         movq    -8(%r11),%rbp
 2574 .cfi_restore    %rbp
 2575         leaq    (%r11),%rsp
 2576 .cfi_def_cfa_register   %rsp
 2577 .Lxts_dec_epilogue:
 2578         .byte   0xf3,0xc3
 2579 .cfi_endproc    
 2580 .size   aesni_xts_decrypt,.-aesni_xts_decrypt
 2581 .globl  aesni_ocb_encrypt
 2582 .type   aesni_ocb_encrypt,@function
 2583 .align  32
 2584 aesni_ocb_encrypt:
 2585 .cfi_startproc  
 2586         leaq    (%rsp),%rax
 2587         pushq   %rbx
 2588 .cfi_adjust_cfa_offset  8
 2589 .cfi_offset     %rbx,-16
 2590         pushq   %rbp
 2591 .cfi_adjust_cfa_offset  8
 2592 .cfi_offset     %rbp,-24
 2593         pushq   %r12
 2594 .cfi_adjust_cfa_offset  8
 2595 .cfi_offset     %r12,-32
 2596         pushq   %r13
 2597 .cfi_adjust_cfa_offset  8
 2598 .cfi_offset     %r13,-40
 2599         pushq   %r14
 2600 .cfi_adjust_cfa_offset  8
 2601 .cfi_offset     %r14,-48
 2602         movq    8(%rax),%rbx
 2603         movq    8+8(%rax),%rbp
 2604 
 2605         movl    240(%rcx),%r10d
 2606         movq    %rcx,%r11
 2607         shll    $4,%r10d
 2608         movups  (%rcx),%xmm9
 2609         movups  16(%rcx,%r10,1),%xmm1
 2610 
 2611         movdqu  (%r9),%xmm15
 2612         pxor    %xmm1,%xmm9
 2613         pxor    %xmm1,%xmm15
 2614 
 2615         movl    $16+32,%eax
 2616         leaq    32(%r11,%r10,1),%rcx
 2617         movups  16(%r11),%xmm1
 2618         subq    %r10,%rax
 2619         movq    %rax,%r10
 2620 
 2621         movdqu  (%rbx),%xmm10
 2622         movdqu  (%rbp),%xmm8
 2623 
 2624         testq   $1,%r8
 2625         jnz     .Locb_enc_odd
 2626 
 2627         bsfq    %r8,%r12
 2628         addq    $1,%r8
 2629         shlq    $4,%r12
 2630         movdqu  (%rbx,%r12,1),%xmm7
 2631         movdqu  (%rdi),%xmm2
 2632         leaq    16(%rdi),%rdi
 2633 
 2634         call    __ocb_encrypt1
 2635 
 2636         movdqa  %xmm7,%xmm15
 2637         movups  %xmm2,(%rsi)
 2638         leaq    16(%rsi),%rsi
 2639         subq    $1,%rdx
 2640         jz      .Locb_enc_done
 2641 
 2642 .Locb_enc_odd:
 2643         leaq    1(%r8),%r12
 2644         leaq    3(%r8),%r13
 2645         leaq    5(%r8),%r14
 2646         leaq    6(%r8),%r8
 2647         bsfq    %r12,%r12
 2648         bsfq    %r13,%r13
 2649         bsfq    %r14,%r14
 2650         shlq    $4,%r12
 2651         shlq    $4,%r13
 2652         shlq    $4,%r14
 2653 
 2654         subq    $6,%rdx
 2655         jc      .Locb_enc_short
 2656         jmp     .Locb_enc_grandloop
 2657 
 2658 .align  32
 2659 .Locb_enc_grandloop:
 2660         movdqu  0(%rdi),%xmm2
 2661         movdqu  16(%rdi),%xmm3
 2662         movdqu  32(%rdi),%xmm4
 2663         movdqu  48(%rdi),%xmm5
 2664         movdqu  64(%rdi),%xmm6
 2665         movdqu  80(%rdi),%xmm7
 2666         leaq    96(%rdi),%rdi
 2667 
 2668         call    __ocb_encrypt6
 2669 
 2670         movups  %xmm2,0(%rsi)
 2671         movups  %xmm3,16(%rsi)
 2672         movups  %xmm4,32(%rsi)
 2673         movups  %xmm5,48(%rsi)
 2674         movups  %xmm6,64(%rsi)
 2675         movups  %xmm7,80(%rsi)
 2676         leaq    96(%rsi),%rsi
 2677         subq    $6,%rdx
 2678         jnc     .Locb_enc_grandloop
 2679 
 2680 .Locb_enc_short:
 2681         addq    $6,%rdx
 2682         jz      .Locb_enc_done
 2683 
 2684         movdqu  0(%rdi),%xmm2
 2685         cmpq    $2,%rdx
 2686         jb      .Locb_enc_one
 2687         movdqu  16(%rdi),%xmm3
 2688         je      .Locb_enc_two
 2689 
 2690         movdqu  32(%rdi),%xmm4
 2691         cmpq    $4,%rdx
 2692         jb      .Locb_enc_three
 2693         movdqu  48(%rdi),%xmm5
 2694         je      .Locb_enc_four
 2695 
 2696         movdqu  64(%rdi),%xmm6
 2697         pxor    %xmm7,%xmm7
 2698 
 2699         call    __ocb_encrypt6
 2700 
 2701         movdqa  %xmm14,%xmm15
 2702         movups  %xmm2,0(%rsi)
 2703         movups  %xmm3,16(%rsi)
 2704         movups  %xmm4,32(%rsi)
 2705         movups  %xmm5,48(%rsi)
 2706         movups  %xmm6,64(%rsi)
 2707 
 2708         jmp     .Locb_enc_done
 2709 
 2710 .align  16
 2711 .Locb_enc_one:
 2712         movdqa  %xmm10,%xmm7
 2713 
 2714         call    __ocb_encrypt1
 2715 
 2716         movdqa  %xmm7,%xmm15
 2717         movups  %xmm2,0(%rsi)
 2718         jmp     .Locb_enc_done
 2719 
 2720 .align  16
 2721 .Locb_enc_two:
 2722         pxor    %xmm4,%xmm4
 2723         pxor    %xmm5,%xmm5
 2724 
 2725         call    __ocb_encrypt4
 2726 
 2727         movdqa  %xmm11,%xmm15
 2728         movups  %xmm2,0(%rsi)
 2729         movups  %xmm3,16(%rsi)
 2730 
 2731         jmp     .Locb_enc_done
 2732 
 2733 .align  16
 2734 .Locb_enc_three:
 2735         pxor    %xmm5,%xmm5
 2736 
 2737         call    __ocb_encrypt4
 2738 
 2739         movdqa  %xmm12,%xmm15
 2740         movups  %xmm2,0(%rsi)
 2741         movups  %xmm3,16(%rsi)
 2742         movups  %xmm4,32(%rsi)
 2743 
 2744         jmp     .Locb_enc_done
 2745 
 2746 .align  16
 2747 .Locb_enc_four:
 2748         call    __ocb_encrypt4
 2749 
 2750         movdqa  %xmm13,%xmm15
 2751         movups  %xmm2,0(%rsi)
 2752         movups  %xmm3,16(%rsi)
 2753         movups  %xmm4,32(%rsi)
 2754         movups  %xmm5,48(%rsi)
 2755 
 2756 .Locb_enc_done:
 2757         pxor    %xmm0,%xmm15
 2758         movdqu  %xmm8,(%rbp)
 2759         movdqu  %xmm15,(%r9)
 2760 
 2761         xorps   %xmm0,%xmm0
 2762         pxor    %xmm1,%xmm1
 2763         pxor    %xmm2,%xmm2
 2764         pxor    %xmm3,%xmm3
 2765         pxor    %xmm4,%xmm4
 2766         pxor    %xmm5,%xmm5
 2767         pxor    %xmm6,%xmm6
 2768         pxor    %xmm7,%xmm7
 2769         pxor    %xmm8,%xmm8
 2770         pxor    %xmm9,%xmm9
 2771         pxor    %xmm10,%xmm10
 2772         pxor    %xmm11,%xmm11
 2773         pxor    %xmm12,%xmm12
 2774         pxor    %xmm13,%xmm13
 2775         pxor    %xmm14,%xmm14
 2776         pxor    %xmm15,%xmm15
 2777         leaq    40(%rsp),%rax
 2778 .cfi_def_cfa    %rax,8
 2779         movq    -40(%rax),%r14
 2780 .cfi_restore    %r14
 2781         movq    -32(%rax),%r13
 2782 .cfi_restore    %r13
 2783         movq    -24(%rax),%r12
 2784 .cfi_restore    %r12
 2785         movq    -16(%rax),%rbp
 2786 .cfi_restore    %rbp
 2787         movq    -8(%rax),%rbx
 2788 .cfi_restore    %rbx
 2789         leaq    (%rax),%rsp
 2790 .cfi_def_cfa_register   %rsp
 2791 .Locb_enc_epilogue:
 2792         .byte   0xf3,0xc3
 2793 .cfi_endproc    
 2794 .size   aesni_ocb_encrypt,.-aesni_ocb_encrypt
 2795 
 2796 .type   __ocb_encrypt6,@function
 2797 .align  32
 2798 __ocb_encrypt6:
 2799 .cfi_startproc  
 2800         pxor    %xmm9,%xmm15
 2801         movdqu  (%rbx,%r12,1),%xmm11
 2802         movdqa  %xmm10,%xmm12
 2803         movdqu  (%rbx,%r13,1),%xmm13
 2804         movdqa  %xmm10,%xmm14
 2805         pxor    %xmm15,%xmm10
 2806         movdqu  (%rbx,%r14,1),%xmm15
 2807         pxor    %xmm10,%xmm11
 2808         pxor    %xmm2,%xmm8
 2809         pxor    %xmm10,%xmm2
 2810         pxor    %xmm11,%xmm12
 2811         pxor    %xmm3,%xmm8
 2812         pxor    %xmm11,%xmm3
 2813         pxor    %xmm12,%xmm13
 2814         pxor    %xmm4,%xmm8
 2815         pxor    %xmm12,%xmm4
 2816         pxor    %xmm13,%xmm14
 2817         pxor    %xmm5,%xmm8
 2818         pxor    %xmm13,%xmm5
 2819         pxor    %xmm14,%xmm15
 2820         pxor    %xmm6,%xmm8
 2821         pxor    %xmm14,%xmm6
 2822         pxor    %xmm7,%xmm8
 2823         pxor    %xmm15,%xmm7
 2824         movups  32(%r11),%xmm0
 2825 
 2826         leaq    1(%r8),%r12
 2827         leaq    3(%r8),%r13
 2828         leaq    5(%r8),%r14
 2829         addq    $6,%r8
 2830         pxor    %xmm9,%xmm10
 2831         bsfq    %r12,%r12
 2832         bsfq    %r13,%r13
 2833         bsfq    %r14,%r14
 2834 
 2835 .byte   102,15,56,220,209
 2836 .byte   102,15,56,220,217
 2837 .byte   102,15,56,220,225
 2838 .byte   102,15,56,220,233
 2839         pxor    %xmm9,%xmm11
 2840         pxor    %xmm9,%xmm12
 2841 .byte   102,15,56,220,241
 2842         pxor    %xmm9,%xmm13
 2843         pxor    %xmm9,%xmm14
 2844 .byte   102,15,56,220,249
 2845         movups  48(%r11),%xmm1
 2846         pxor    %xmm9,%xmm15
 2847 
 2848 .byte   102,15,56,220,208
 2849 .byte   102,15,56,220,216
 2850 .byte   102,15,56,220,224
 2851 .byte   102,15,56,220,232
 2852 .byte   102,15,56,220,240
 2853 .byte   102,15,56,220,248
 2854         movups  64(%r11),%xmm0
 2855         shlq    $4,%r12
 2856         shlq    $4,%r13
 2857         jmp     .Locb_enc_loop6
 2858 
 2859 .align  32
 2860 .Locb_enc_loop6:
 2861 .byte   102,15,56,220,209
 2862 .byte   102,15,56,220,217
 2863 .byte   102,15,56,220,225
 2864 .byte   102,15,56,220,233
 2865 .byte   102,15,56,220,241
 2866 .byte   102,15,56,220,249
 2867         movups  (%rcx,%rax,1),%xmm1
 2868         addq    $32,%rax
 2869 
 2870 .byte   102,15,56,220,208
 2871 .byte   102,15,56,220,216
 2872 .byte   102,15,56,220,224
 2873 .byte   102,15,56,220,232
 2874 .byte   102,15,56,220,240
 2875 .byte   102,15,56,220,248
 2876         movups  -16(%rcx,%rax,1),%xmm0
 2877         jnz     .Locb_enc_loop6
 2878 
 2879 .byte   102,15,56,220,209
 2880 .byte   102,15,56,220,217
 2881 .byte   102,15,56,220,225
 2882 .byte   102,15,56,220,233
 2883 .byte   102,15,56,220,241
 2884 .byte   102,15,56,220,249
 2885         movups  16(%r11),%xmm1
 2886         shlq    $4,%r14
 2887 
 2888 .byte   102,65,15,56,221,210
 2889         movdqu  (%rbx),%xmm10
 2890         movq    %r10,%rax
 2891 .byte   102,65,15,56,221,219
 2892 .byte   102,65,15,56,221,228
 2893 .byte   102,65,15,56,221,237
 2894 .byte   102,65,15,56,221,246
 2895 .byte   102,65,15,56,221,255
 2896         .byte   0xf3,0xc3
 2897 .cfi_endproc    
 2898 .size   __ocb_encrypt6,.-__ocb_encrypt6
 2899 
 2900 .type   __ocb_encrypt4,@function
 2901 .align  32
 2902 __ocb_encrypt4:
 2903 .cfi_startproc  
 2904         pxor    %xmm9,%xmm15
 2905         movdqu  (%rbx,%r12,1),%xmm11
 2906         movdqa  %xmm10,%xmm12
 2907         movdqu  (%rbx,%r13,1),%xmm13
 2908         pxor    %xmm15,%xmm10
 2909         pxor    %xmm10,%xmm11
 2910         pxor    %xmm2,%xmm8
 2911         pxor    %xmm10,%xmm2
 2912         pxor    %xmm11,%xmm12
 2913         pxor    %xmm3,%xmm8
 2914         pxor    %xmm11,%xmm3
 2915         pxor    %xmm12,%xmm13
 2916         pxor    %xmm4,%xmm8
 2917         pxor    %xmm12,%xmm4
 2918         pxor    %xmm5,%xmm8
 2919         pxor    %xmm13,%xmm5
 2920         movups  32(%r11),%xmm0
 2921 
 2922         pxor    %xmm9,%xmm10
 2923         pxor    %xmm9,%xmm11
 2924         pxor    %xmm9,%xmm12
 2925         pxor    %xmm9,%xmm13
 2926 
 2927 .byte   102,15,56,220,209
 2928 .byte   102,15,56,220,217
 2929 .byte   102,15,56,220,225
 2930 .byte   102,15,56,220,233
 2931         movups  48(%r11),%xmm1
 2932 
 2933 .byte   102,15,56,220,208
 2934 .byte   102,15,56,220,216
 2935 .byte   102,15,56,220,224
 2936 .byte   102,15,56,220,232
 2937         movups  64(%r11),%xmm0
 2938         jmp     .Locb_enc_loop4
 2939 
 2940 .align  32
 2941 .Locb_enc_loop4:
 2942 .byte   102,15,56,220,209
 2943 .byte   102,15,56,220,217
 2944 .byte   102,15,56,220,225
 2945 .byte   102,15,56,220,233
 2946         movups  (%rcx,%rax,1),%xmm1
 2947         addq    $32,%rax
 2948 
 2949 .byte   102,15,56,220,208
 2950 .byte   102,15,56,220,216
 2951 .byte   102,15,56,220,224
 2952 .byte   102,15,56,220,232
 2953         movups  -16(%rcx,%rax,1),%xmm0
 2954         jnz     .Locb_enc_loop4
 2955 
 2956 .byte   102,15,56,220,209
 2957 .byte   102,15,56,220,217
 2958 .byte   102,15,56,220,225
 2959 .byte   102,15,56,220,233
 2960         movups  16(%r11),%xmm1
 2961         movq    %r10,%rax
 2962 
 2963 .byte   102,65,15,56,221,210
 2964 .byte   102,65,15,56,221,219
 2965 .byte   102,65,15,56,221,228
 2966 .byte   102,65,15,56,221,237
 2967         .byte   0xf3,0xc3
 2968 .cfi_endproc    
 2969 .size   __ocb_encrypt4,.-__ocb_encrypt4
 2970 
 2971 .type   __ocb_encrypt1,@function
 2972 .align  32
 2973 __ocb_encrypt1:
 2974 .cfi_startproc  
 2975         pxor    %xmm15,%xmm7
 2976         pxor    %xmm9,%xmm7
 2977         pxor    %xmm2,%xmm8
 2978         pxor    %xmm7,%xmm2
 2979         movups  32(%r11),%xmm0
 2980 
 2981 .byte   102,15,56,220,209
 2982         movups  48(%r11),%xmm1
 2983         pxor    %xmm9,%xmm7
 2984 
 2985 .byte   102,15,56,220,208
 2986         movups  64(%r11),%xmm0
 2987         jmp     .Locb_enc_loop1
 2988 
 2989 .align  32
 2990 .Locb_enc_loop1:
 2991 .byte   102,15,56,220,209
 2992         movups  (%rcx,%rax,1),%xmm1
 2993         addq    $32,%rax
 2994 
 2995 .byte   102,15,56,220,208
 2996         movups  -16(%rcx,%rax,1),%xmm0
 2997         jnz     .Locb_enc_loop1
 2998 
 2999 .byte   102,15,56,220,209
 3000         movups  16(%r11),%xmm1
 3001         movq    %r10,%rax
 3002 
 3003 .byte   102,15,56,221,215
 3004         .byte   0xf3,0xc3
 3005 .cfi_endproc    
 3006 .size   __ocb_encrypt1,.-__ocb_encrypt1
 3007 
 3008 .globl  aesni_ocb_decrypt
 3009 .type   aesni_ocb_decrypt,@function
 3010 .align  32
 3011 aesni_ocb_decrypt:
 3012 .cfi_startproc  
 3013         leaq    (%rsp),%rax
 3014         pushq   %rbx
 3015 .cfi_adjust_cfa_offset  8
 3016 .cfi_offset     %rbx,-16
 3017         pushq   %rbp
 3018 .cfi_adjust_cfa_offset  8
 3019 .cfi_offset     %rbp,-24
 3020         pushq   %r12
 3021 .cfi_adjust_cfa_offset  8
 3022 .cfi_offset     %r12,-32
 3023         pushq   %r13
 3024 .cfi_adjust_cfa_offset  8
 3025 .cfi_offset     %r13,-40
 3026         pushq   %r14
 3027 .cfi_adjust_cfa_offset  8
 3028 .cfi_offset     %r14,-48
 3029         movq    8(%rax),%rbx
 3030         movq    8+8(%rax),%rbp
 3031 
 3032         movl    240(%rcx),%r10d
 3033         movq    %rcx,%r11
 3034         shll    $4,%r10d
 3035         movups  (%rcx),%xmm9
 3036         movups  16(%rcx,%r10,1),%xmm1
 3037 
 3038         movdqu  (%r9),%xmm15
 3039         pxor    %xmm1,%xmm9
 3040         pxor    %xmm1,%xmm15
 3041 
 3042         movl    $16+32,%eax
 3043         leaq    32(%r11,%r10,1),%rcx
 3044         movups  16(%r11),%xmm1
 3045         subq    %r10,%rax
 3046         movq    %rax,%r10
 3047 
 3048         movdqu  (%rbx),%xmm10
 3049         movdqu  (%rbp),%xmm8
 3050 
 3051         testq   $1,%r8
 3052         jnz     .Locb_dec_odd
 3053 
 3054         bsfq    %r8,%r12
 3055         addq    $1,%r8
 3056         shlq    $4,%r12
 3057         movdqu  (%rbx,%r12,1),%xmm7
 3058         movdqu  (%rdi),%xmm2
 3059         leaq    16(%rdi),%rdi
 3060 
 3061         call    __ocb_decrypt1
 3062 
 3063         movdqa  %xmm7,%xmm15
 3064         movups  %xmm2,(%rsi)
 3065         xorps   %xmm2,%xmm8
 3066         leaq    16(%rsi),%rsi
 3067         subq    $1,%rdx
 3068         jz      .Locb_dec_done
 3069 
 3070 .Locb_dec_odd:
 3071         leaq    1(%r8),%r12
 3072         leaq    3(%r8),%r13
 3073         leaq    5(%r8),%r14
 3074         leaq    6(%r8),%r8
 3075         bsfq    %r12,%r12
 3076         bsfq    %r13,%r13
 3077         bsfq    %r14,%r14
 3078         shlq    $4,%r12
 3079         shlq    $4,%r13
 3080         shlq    $4,%r14
 3081 
 3082         subq    $6,%rdx
 3083         jc      .Locb_dec_short
 3084         jmp     .Locb_dec_grandloop
 3085 
 3086 .align  32
 3087 .Locb_dec_grandloop:
 3088         movdqu  0(%rdi),%xmm2
 3089         movdqu  16(%rdi),%xmm3
 3090         movdqu  32(%rdi),%xmm4
 3091         movdqu  48(%rdi),%xmm5
 3092         movdqu  64(%rdi),%xmm6
 3093         movdqu  80(%rdi),%xmm7
 3094         leaq    96(%rdi),%rdi
 3095 
 3096         call    __ocb_decrypt6
 3097 
 3098         movups  %xmm2,0(%rsi)
 3099         pxor    %xmm2,%xmm8
 3100         movups  %xmm3,16(%rsi)
 3101         pxor    %xmm3,%xmm8
 3102         movups  %xmm4,32(%rsi)
 3103         pxor    %xmm4,%xmm8
 3104         movups  %xmm5,48(%rsi)
 3105         pxor    %xmm5,%xmm8
 3106         movups  %xmm6,64(%rsi)
 3107         pxor    %xmm6,%xmm8
 3108         movups  %xmm7,80(%rsi)
 3109         pxor    %xmm7,%xmm8
 3110         leaq    96(%rsi),%rsi
 3111         subq    $6,%rdx
 3112         jnc     .Locb_dec_grandloop
 3113 
 3114 .Locb_dec_short:
 3115         addq    $6,%rdx
 3116         jz      .Locb_dec_done
 3117 
 3118         movdqu  0(%rdi),%xmm2
 3119         cmpq    $2,%rdx
 3120         jb      .Locb_dec_one
 3121         movdqu  16(%rdi),%xmm3
 3122         je      .Locb_dec_two
 3123 
 3124         movdqu  32(%rdi),%xmm4
 3125         cmpq    $4,%rdx
 3126         jb      .Locb_dec_three
 3127         movdqu  48(%rdi),%xmm5
 3128         je      .Locb_dec_four
 3129 
 3130         movdqu  64(%rdi),%xmm6
 3131         pxor    %xmm7,%xmm7
 3132 
 3133         call    __ocb_decrypt6
 3134 
 3135         movdqa  %xmm14,%xmm15
 3136         movups  %xmm2,0(%rsi)
 3137         pxor    %xmm2,%xmm8
 3138         movups  %xmm3,16(%rsi)
 3139         pxor    %xmm3,%xmm8
 3140         movups  %xmm4,32(%rsi)
 3141         pxor    %xmm4,%xmm8
 3142         movups  %xmm5,48(%rsi)
 3143         pxor    %xmm5,%xmm8
 3144         movups  %xmm6,64(%rsi)
 3145         pxor    %xmm6,%xmm8
 3146 
 3147         jmp     .Locb_dec_done
 3148 
 3149 .align  16
 3150 .Locb_dec_one:
 3151         movdqa  %xmm10,%xmm7
 3152 
 3153         call    __ocb_decrypt1
 3154 
 3155         movdqa  %xmm7,%xmm15
 3156         movups  %xmm2,0(%rsi)
 3157         xorps   %xmm2,%xmm8
 3158         jmp     .Locb_dec_done
 3159 
 3160 .align  16
 3161 .Locb_dec_two:
 3162         pxor    %xmm4,%xmm4
 3163         pxor    %xmm5,%xmm5
 3164 
 3165         call    __ocb_decrypt4
 3166 
 3167         movdqa  %xmm11,%xmm15
 3168         movups  %xmm2,0(%rsi)
 3169         xorps   %xmm2,%xmm8
 3170         movups  %xmm3,16(%rsi)
 3171         xorps   %xmm3,%xmm8
 3172 
 3173         jmp     .Locb_dec_done
 3174 
 3175 .align  16
 3176 .Locb_dec_three:
 3177         pxor    %xmm5,%xmm5
 3178 
 3179         call    __ocb_decrypt4
 3180 
 3181         movdqa  %xmm12,%xmm15
 3182         movups  %xmm2,0(%rsi)
 3183         xorps   %xmm2,%xmm8
 3184         movups  %xmm3,16(%rsi)
 3185         xorps   %xmm3,%xmm8
 3186         movups  %xmm4,32(%rsi)
 3187         xorps   %xmm4,%xmm8
 3188 
 3189         jmp     .Locb_dec_done
 3190 
 3191 .align  16
 3192 .Locb_dec_four:
 3193         call    __ocb_decrypt4
 3194 
 3195         movdqa  %xmm13,%xmm15
 3196         movups  %xmm2,0(%rsi)
 3197         pxor    %xmm2,%xmm8
 3198         movups  %xmm3,16(%rsi)
 3199         pxor    %xmm3,%xmm8
 3200         movups  %xmm4,32(%rsi)
 3201         pxor    %xmm4,%xmm8
 3202         movups  %xmm5,48(%rsi)
 3203         pxor    %xmm5,%xmm8
 3204 
 3205 .Locb_dec_done:
 3206         pxor    %xmm0,%xmm15
 3207         movdqu  %xmm8,(%rbp)
 3208         movdqu  %xmm15,(%r9)
 3209 
 3210         xorps   %xmm0,%xmm0
 3211         pxor    %xmm1,%xmm1
 3212         pxor    %xmm2,%xmm2
 3213         pxor    %xmm3,%xmm3
 3214         pxor    %xmm4,%xmm4
 3215         pxor    %xmm5,%xmm5
 3216         pxor    %xmm6,%xmm6
 3217         pxor    %xmm7,%xmm7
 3218         pxor    %xmm8,%xmm8
 3219         pxor    %xmm9,%xmm9
 3220         pxor    %xmm10,%xmm10
 3221         pxor    %xmm11,%xmm11
 3222         pxor    %xmm12,%xmm12
 3223         pxor    %xmm13,%xmm13
 3224         pxor    %xmm14,%xmm14
 3225         pxor    %xmm15,%xmm15
 3226         leaq    40(%rsp),%rax
 3227 .cfi_def_cfa    %rax,8
 3228         movq    -40(%rax),%r14
 3229 .cfi_restore    %r14
 3230         movq    -32(%rax),%r13
 3231 .cfi_restore    %r13
 3232         movq    -24(%rax),%r12
 3233 .cfi_restore    %r12
 3234         movq    -16(%rax),%rbp
 3235 .cfi_restore    %rbp
 3236         movq    -8(%rax),%rbx
 3237 .cfi_restore    %rbx
 3238         leaq    (%rax),%rsp
 3239 .cfi_def_cfa_register   %rsp
 3240 .Locb_dec_epilogue:
 3241         .byte   0xf3,0xc3
 3242 .cfi_endproc    
 3243 .size   aesni_ocb_decrypt,.-aesni_ocb_decrypt
 3244 
 3245 .type   __ocb_decrypt6,@function
 3246 .align  32
 3247 __ocb_decrypt6:
 3248 .cfi_startproc  
 3249         pxor    %xmm9,%xmm15
 3250         movdqu  (%rbx,%r12,1),%xmm11
 3251         movdqa  %xmm10,%xmm12
 3252         movdqu  (%rbx,%r13,1),%xmm13
 3253         movdqa  %xmm10,%xmm14
 3254         pxor    %xmm15,%xmm10
 3255         movdqu  (%rbx,%r14,1),%xmm15
 3256         pxor    %xmm10,%xmm11
 3257         pxor    %xmm10,%xmm2
 3258         pxor    %xmm11,%xmm12
 3259         pxor    %xmm11,%xmm3
 3260         pxor    %xmm12,%xmm13
 3261         pxor    %xmm12,%xmm4
 3262         pxor    %xmm13,%xmm14
 3263         pxor    %xmm13,%xmm5
 3264         pxor    %xmm14,%xmm15
 3265         pxor    %xmm14,%xmm6
 3266         pxor    %xmm15,%xmm7
 3267         movups  32(%r11),%xmm0
 3268 
 3269         leaq    1(%r8),%r12
 3270         leaq    3(%r8),%r13
 3271         leaq    5(%r8),%r14
 3272         addq    $6,%r8
 3273         pxor    %xmm9,%xmm10
 3274         bsfq    %r12,%r12
 3275         bsfq    %r13,%r13
 3276         bsfq    %r14,%r14
 3277 
 3278 .byte   102,15,56,222,209
 3279 .byte   102,15,56,222,217
 3280 .byte   102,15,56,222,225
 3281 .byte   102,15,56,222,233
 3282         pxor    %xmm9,%xmm11
 3283         pxor    %xmm9,%xmm12
 3284 .byte   102,15,56,222,241
 3285         pxor    %xmm9,%xmm13
 3286         pxor    %xmm9,%xmm14
 3287 .byte   102,15,56,222,249
 3288         movups  48(%r11),%xmm1
 3289         pxor    %xmm9,%xmm15
 3290 
 3291 .byte   102,15,56,222,208
 3292 .byte   102,15,56,222,216
 3293 .byte   102,15,56,222,224
 3294 .byte   102,15,56,222,232
 3295 .byte   102,15,56,222,240
 3296 .byte   102,15,56,222,248
 3297         movups  64(%r11),%xmm0
 3298         shlq    $4,%r12
 3299         shlq    $4,%r13
 3300         jmp     .Locb_dec_loop6
 3301 
 3302 .align  32
 3303 .Locb_dec_loop6:
 3304 .byte   102,15,56,222,209
 3305 .byte   102,15,56,222,217
 3306 .byte   102,15,56,222,225
 3307 .byte   102,15,56,222,233
 3308 .byte   102,15,56,222,241
 3309 .byte   102,15,56,222,249
 3310         movups  (%rcx,%rax,1),%xmm1
 3311         addq    $32,%rax
 3312 
 3313 .byte   102,15,56,222,208
 3314 .byte   102,15,56,222,216
 3315 .byte   102,15,56,222,224
 3316 .byte   102,15,56,222,232
 3317 .byte   102,15,56,222,240
 3318 .byte   102,15,56,222,248
 3319         movups  -16(%rcx,%rax,1),%xmm0
 3320         jnz     .Locb_dec_loop6
 3321 
 3322 .byte   102,15,56,222,209
 3323 .byte   102,15,56,222,217
 3324 .byte   102,15,56,222,225
 3325 .byte   102,15,56,222,233
 3326 .byte   102,15,56,222,241
 3327 .byte   102,15,56,222,249
 3328         movups  16(%r11),%xmm1
 3329         shlq    $4,%r14
 3330 
 3331 .byte   102,65,15,56,223,210
 3332         movdqu  (%rbx),%xmm10
 3333         movq    %r10,%rax
 3334 .byte   102,65,15,56,223,219
 3335 .byte   102,65,15,56,223,228
 3336 .byte   102,65,15,56,223,237
 3337 .byte   102,65,15,56,223,246
 3338 .byte   102,65,15,56,223,255
 3339         .byte   0xf3,0xc3
 3340 .cfi_endproc    
 3341 .size   __ocb_decrypt6,.-__ocb_decrypt6
 3342 
 3343 .type   __ocb_decrypt4,@function
 3344 .align  32
 3345 __ocb_decrypt4:
 3346 .cfi_startproc  
 3347         pxor    %xmm9,%xmm15
 3348         movdqu  (%rbx,%r12,1),%xmm11
 3349         movdqa  %xmm10,%xmm12
 3350         movdqu  (%rbx,%r13,1),%xmm13
 3351         pxor    %xmm15,%xmm10
 3352         pxor    %xmm10,%xmm11
 3353         pxor    %xmm10,%xmm2
 3354         pxor    %xmm11,%xmm12
 3355         pxor    %xmm11,%xmm3
 3356         pxor    %xmm12,%xmm13
 3357         pxor    %xmm12,%xmm4
 3358         pxor    %xmm13,%xmm5
 3359         movups  32(%r11),%xmm0
 3360 
 3361         pxor    %xmm9,%xmm10
 3362         pxor    %xmm9,%xmm11
 3363         pxor    %xmm9,%xmm12
 3364         pxor    %xmm9,%xmm13
 3365 
 3366 .byte   102,15,56,222,209
 3367 .byte   102,15,56,222,217
 3368 .byte   102,15,56,222,225
 3369 .byte   102,15,56,222,233
 3370         movups  48(%r11),%xmm1
 3371 
 3372 .byte   102,15,56,222,208
 3373 .byte   102,15,56,222,216
 3374 .byte   102,15,56,222,224
 3375 .byte   102,15,56,222,232
 3376         movups  64(%r11),%xmm0
 3377         jmp     .Locb_dec_loop4
 3378 
 3379 .align  32
 3380 .Locb_dec_loop4:
 3381 .byte   102,15,56,222,209
 3382 .byte   102,15,56,222,217
 3383 .byte   102,15,56,222,225
 3384 .byte   102,15,56,222,233
 3385         movups  (%rcx,%rax,1),%xmm1
 3386         addq    $32,%rax
 3387 
 3388 .byte   102,15,56,222,208
 3389 .byte   102,15,56,222,216
 3390 .byte   102,15,56,222,224
 3391 .byte   102,15,56,222,232
 3392         movups  -16(%rcx,%rax,1),%xmm0
 3393         jnz     .Locb_dec_loop4
 3394 
 3395 .byte   102,15,56,222,209
 3396 .byte   102,15,56,222,217
 3397 .byte   102,15,56,222,225
 3398 .byte   102,15,56,222,233
 3399         movups  16(%r11),%xmm1
 3400         movq    %r10,%rax
 3401 
 3402 .byte   102,65,15,56,223,210
 3403 .byte   102,65,15,56,223,219
 3404 .byte   102,65,15,56,223,228
 3405 .byte   102,65,15,56,223,237
 3406         .byte   0xf3,0xc3
 3407 .cfi_endproc    
 3408 .size   __ocb_decrypt4,.-__ocb_decrypt4
 3409 
 3410 .type   __ocb_decrypt1,@function
 3411 .align  32
 3412 __ocb_decrypt1:
 3413 .cfi_startproc  
 3414         pxor    %xmm15,%xmm7
 3415         pxor    %xmm9,%xmm7
 3416         pxor    %xmm7,%xmm2
 3417         movups  32(%r11),%xmm0
 3418 
 3419 .byte   102,15,56,222,209
 3420         movups  48(%r11),%xmm1
 3421         pxor    %xmm9,%xmm7
 3422 
 3423 .byte   102,15,56,222,208
 3424         movups  64(%r11),%xmm0
 3425         jmp     .Locb_dec_loop1
 3426 
 3427 .align  32
 3428 .Locb_dec_loop1:
 3429 .byte   102,15,56,222,209
 3430         movups  (%rcx,%rax,1),%xmm1
 3431         addq    $32,%rax
 3432 
 3433 .byte   102,15,56,222,208
 3434         movups  -16(%rcx,%rax,1),%xmm0
 3435         jnz     .Locb_dec_loop1
 3436 
 3437 .byte   102,15,56,222,209
 3438         movups  16(%r11),%xmm1
 3439         movq    %r10,%rax
 3440 
 3441 .byte   102,15,56,223,215
 3442         .byte   0xf3,0xc3
 3443 .cfi_endproc    
 3444 .size   __ocb_decrypt1,.-__ocb_decrypt1
 3445 .globl  aesni_cbc_encrypt
 3446 .type   aesni_cbc_encrypt,@function
 3447 .align  16
 3448 aesni_cbc_encrypt:
 3449 .cfi_startproc  
 3450         testq   %rdx,%rdx
 3451         jz      .Lcbc_ret
 3452 
 3453         movl    240(%rcx),%r10d
 3454         movq    %rcx,%r11
 3455         testl   %r9d,%r9d
 3456         jz      .Lcbc_decrypt
 3457 
 3458         movups  (%r8),%xmm2
 3459         movl    %r10d,%eax
 3460         cmpq    $16,%rdx
 3461         jb      .Lcbc_enc_tail
 3462         subq    $16,%rdx
 3463         jmp     .Lcbc_enc_loop
 3464 .align  16
 3465 .Lcbc_enc_loop:
 3466         movups  (%rdi),%xmm3
 3467         leaq    16(%rdi),%rdi
 3468 
 3469         movups  (%rcx),%xmm0
 3470         movups  16(%rcx),%xmm1
 3471         xorps   %xmm0,%xmm3
 3472         leaq    32(%rcx),%rcx
 3473         xorps   %xmm3,%xmm2
 3474 .Loop_enc1_15:
 3475 .byte   102,15,56,220,209
 3476         decl    %eax
 3477         movups  (%rcx),%xmm1
 3478         leaq    16(%rcx),%rcx
 3479         jnz     .Loop_enc1_15
 3480 .byte   102,15,56,221,209
 3481         movl    %r10d,%eax
 3482         movq    %r11,%rcx
 3483         movups  %xmm2,0(%rsi)
 3484         leaq    16(%rsi),%rsi
 3485         subq    $16,%rdx
 3486         jnc     .Lcbc_enc_loop
 3487         addq    $16,%rdx
 3488         jnz     .Lcbc_enc_tail
 3489         pxor    %xmm0,%xmm0
 3490         pxor    %xmm1,%xmm1
 3491         movups  %xmm2,(%r8)
 3492         pxor    %xmm2,%xmm2
 3493         pxor    %xmm3,%xmm3
 3494         jmp     .Lcbc_ret
 3495 
 3496 .Lcbc_enc_tail:
 3497         movq    %rdx,%rcx
 3498         xchgq   %rdi,%rsi
 3499 .long   0x9066A4F3
 3500         movl    $16,%ecx
 3501         subq    %rdx,%rcx
 3502         xorl    %eax,%eax
 3503 .long   0x9066AAF3
 3504         leaq    -16(%rdi),%rdi
 3505         movl    %r10d,%eax
 3506         movq    %rdi,%rsi
 3507         movq    %r11,%rcx
 3508         xorq    %rdx,%rdx
 3509         jmp     .Lcbc_enc_loop
 3510 
 3511 .align  16
 3512 .Lcbc_decrypt:
 3513         cmpq    $16,%rdx
 3514         jne     .Lcbc_decrypt_bulk
 3515 
 3516 
 3517 
 3518         movdqu  (%rdi),%xmm2
 3519         movdqu  (%r8),%xmm3
 3520         movdqa  %xmm2,%xmm4
 3521         movups  (%rcx),%xmm0
 3522         movups  16(%rcx),%xmm1
 3523         leaq    32(%rcx),%rcx
 3524         xorps   %xmm0,%xmm2
 3525 .Loop_dec1_16:
 3526 .byte   102,15,56,222,209
 3527         decl    %r10d
 3528         movups  (%rcx),%xmm1
 3529         leaq    16(%rcx),%rcx
 3530         jnz     .Loop_dec1_16
 3531 .byte   102,15,56,223,209
 3532         pxor    %xmm0,%xmm0
 3533         pxor    %xmm1,%xmm1
 3534         movdqu  %xmm4,(%r8)
 3535         xorps   %xmm3,%xmm2
 3536         pxor    %xmm3,%xmm3
 3537         movups  %xmm2,(%rsi)
 3538         pxor    %xmm2,%xmm2
 3539         jmp     .Lcbc_ret
 3540 .align  16
 3541 .Lcbc_decrypt_bulk:
 3542         leaq    (%rsp),%r11
 3543 .cfi_def_cfa_register   %r11
 3544         pushq   %rbp
 3545 .cfi_offset     %rbp,-16
 3546         subq    $16,%rsp
 3547         andq    $-16,%rsp
 3548         movq    %rcx,%rbp
 3549         movups  (%r8),%xmm10
 3550         movl    %r10d,%eax
 3551         cmpq    $0x50,%rdx
 3552         jbe     .Lcbc_dec_tail
 3553 
 3554         movups  (%rcx),%xmm0
 3555         movdqu  0(%rdi),%xmm2
 3556         movdqu  16(%rdi),%xmm3
 3557         movdqa  %xmm2,%xmm11
 3558         movdqu  32(%rdi),%xmm4
 3559         movdqa  %xmm3,%xmm12
 3560         movdqu  48(%rdi),%xmm5
 3561         movdqa  %xmm4,%xmm13
 3562         movdqu  64(%rdi),%xmm6
 3563         movdqa  %xmm5,%xmm14
 3564         movdqu  80(%rdi),%xmm7
 3565         movdqa  %xmm6,%xmm15
 3566         movl    OPENSSL_ia32cap_P+4(%rip),%r9d
 3567         cmpq    $0x70,%rdx
 3568         jbe     .Lcbc_dec_six_or_seven
 3569 
 3570         andl    $71303168,%r9d
 3571         subq    $0x50,%rdx
 3572         cmpl    $4194304,%r9d
 3573         je      .Lcbc_dec_loop6_enter
 3574         subq    $0x20,%rdx
 3575         leaq    112(%rcx),%rcx
 3576         jmp     .Lcbc_dec_loop8_enter
 3577 .align  16
 3578 .Lcbc_dec_loop8:
 3579         movups  %xmm9,(%rsi)
 3580         leaq    16(%rsi),%rsi
 3581 .Lcbc_dec_loop8_enter:
 3582         movdqu  96(%rdi),%xmm8
 3583         pxor    %xmm0,%xmm2
 3584         movdqu  112(%rdi),%xmm9
 3585         pxor    %xmm0,%xmm3
 3586         movups  16-112(%rcx),%xmm1
 3587         pxor    %xmm0,%xmm4
 3588         movq    $-1,%rbp
 3589         cmpq    $0x70,%rdx
 3590         pxor    %xmm0,%xmm5
 3591         pxor    %xmm0,%xmm6
 3592         pxor    %xmm0,%xmm7
 3593         pxor    %xmm0,%xmm8
 3594 
 3595 .byte   102,15,56,222,209
 3596         pxor    %xmm0,%xmm9
 3597         movups  32-112(%rcx),%xmm0
 3598 .byte   102,15,56,222,217
 3599 .byte   102,15,56,222,225
 3600 .byte   102,15,56,222,233
 3601 .byte   102,15,56,222,241
 3602 .byte   102,15,56,222,249
 3603 .byte   102,68,15,56,222,193
 3604         adcq    $0,%rbp
 3605         andq    $128,%rbp
 3606 .byte   102,68,15,56,222,201
 3607         addq    %rdi,%rbp
 3608         movups  48-112(%rcx),%xmm1
 3609 .byte   102,15,56,222,208
 3610 .byte   102,15,56,222,216
 3611 .byte   102,15,56,222,224
 3612 .byte   102,15,56,222,232
 3613 .byte   102,15,56,222,240
 3614 .byte   102,15,56,222,248
 3615 .byte   102,68,15,56,222,192
 3616 .byte   102,68,15,56,222,200
 3617         movups  64-112(%rcx),%xmm0
 3618         nop
 3619 .byte   102,15,56,222,209
 3620 .byte   102,15,56,222,217
 3621 .byte   102,15,56,222,225
 3622 .byte   102,15,56,222,233
 3623 .byte   102,15,56,222,241
 3624 .byte   102,15,56,222,249
 3625 .byte   102,68,15,56,222,193
 3626 .byte   102,68,15,56,222,201
 3627         movups  80-112(%rcx),%xmm1
 3628         nop
 3629 .byte   102,15,56,222,208
 3630 .byte   102,15,56,222,216
 3631 .byte   102,15,56,222,224
 3632 .byte   102,15,56,222,232
 3633 .byte   102,15,56,222,240
 3634 .byte   102,15,56,222,248
 3635 .byte   102,68,15,56,222,192
 3636 .byte   102,68,15,56,222,200
 3637         movups  96-112(%rcx),%xmm0
 3638         nop
 3639 .byte   102,15,56,222,209
 3640 .byte   102,15,56,222,217
 3641 .byte   102,15,56,222,225
 3642 .byte   102,15,56,222,233
 3643 .byte   102,15,56,222,241
 3644 .byte   102,15,56,222,249
 3645 .byte   102,68,15,56,222,193
 3646 .byte   102,68,15,56,222,201
 3647         movups  112-112(%rcx),%xmm1
 3648         nop
 3649 .byte   102,15,56,222,208
 3650 .byte   102,15,56,222,216
 3651 .byte   102,15,56,222,224
 3652 .byte   102,15,56,222,232
 3653 .byte   102,15,56,222,240
 3654 .byte   102,15,56,222,248
 3655 .byte   102,68,15,56,222,192
 3656 .byte   102,68,15,56,222,200
 3657         movups  128-112(%rcx),%xmm0
 3658         nop
 3659 .byte   102,15,56,222,209
 3660 .byte   102,15,56,222,217
 3661 .byte   102,15,56,222,225
 3662 .byte   102,15,56,222,233
 3663 .byte   102,15,56,222,241
 3664 .byte   102,15,56,222,249
 3665 .byte   102,68,15,56,222,193
 3666 .byte   102,68,15,56,222,201
 3667         movups  144-112(%rcx),%xmm1
 3668         cmpl    $11,%eax
 3669 .byte   102,15,56,222,208
 3670 .byte   102,15,56,222,216
 3671 .byte   102,15,56,222,224
 3672 .byte   102,15,56,222,232
 3673 .byte   102,15,56,222,240
 3674 .byte   102,15,56,222,248
 3675 .byte   102,68,15,56,222,192
 3676 .byte   102,68,15,56,222,200
 3677         movups  160-112(%rcx),%xmm0
 3678         jb      .Lcbc_dec_done
 3679 .byte   102,15,56,222,209
 3680 .byte   102,15,56,222,217
 3681 .byte   102,15,56,222,225
 3682 .byte   102,15,56,222,233
 3683 .byte   102,15,56,222,241
 3684 .byte   102,15,56,222,249
 3685 .byte   102,68,15,56,222,193
 3686 .byte   102,68,15,56,222,201
 3687         movups  176-112(%rcx),%xmm1
 3688         nop
 3689 .byte   102,15,56,222,208
 3690 .byte   102,15,56,222,216
 3691 .byte   102,15,56,222,224
 3692 .byte   102,15,56,222,232
 3693 .byte   102,15,56,222,240
 3694 .byte   102,15,56,222,248
 3695 .byte   102,68,15,56,222,192
 3696 .byte   102,68,15,56,222,200
 3697         movups  192-112(%rcx),%xmm0
 3698         je      .Lcbc_dec_done
 3699 .byte   102,15,56,222,209
 3700 .byte   102,15,56,222,217
 3701 .byte   102,15,56,222,225
 3702 .byte   102,15,56,222,233
 3703 .byte   102,15,56,222,241
 3704 .byte   102,15,56,222,249
 3705 .byte   102,68,15,56,222,193
 3706 .byte   102,68,15,56,222,201
 3707         movups  208-112(%rcx),%xmm1
 3708         nop
 3709 .byte   102,15,56,222,208
 3710 .byte   102,15,56,222,216
 3711 .byte   102,15,56,222,224
 3712 .byte   102,15,56,222,232
 3713 .byte   102,15,56,222,240
 3714 .byte   102,15,56,222,248
 3715 .byte   102,68,15,56,222,192
 3716 .byte   102,68,15,56,222,200
 3717         movups  224-112(%rcx),%xmm0
 3718         jmp     .Lcbc_dec_done
 3719 .align  16
 3720 .Lcbc_dec_done:
 3721 .byte   102,15,56,222,209
 3722 .byte   102,15,56,222,217
 3723         pxor    %xmm0,%xmm10
 3724         pxor    %xmm0,%xmm11
 3725 .byte   102,15,56,222,225
 3726 .byte   102,15,56,222,233
 3727         pxor    %xmm0,%xmm12
 3728         pxor    %xmm0,%xmm13
 3729 .byte   102,15,56,222,241
 3730 .byte   102,15,56,222,249
 3731         pxor    %xmm0,%xmm14
 3732         pxor    %xmm0,%xmm15
 3733 .byte   102,68,15,56,222,193
 3734 .byte   102,68,15,56,222,201
 3735         movdqu  80(%rdi),%xmm1
 3736 
 3737 .byte   102,65,15,56,223,210
 3738         movdqu  96(%rdi),%xmm10
 3739         pxor    %xmm0,%xmm1
 3740 .byte   102,65,15,56,223,219
 3741         pxor    %xmm0,%xmm10
 3742         movdqu  112(%rdi),%xmm0
 3743 .byte   102,65,15,56,223,228
 3744         leaq    128(%rdi),%rdi
 3745         movdqu  0(%rbp),%xmm11
 3746 .byte   102,65,15,56,223,237
 3747 .byte   102,65,15,56,223,246
 3748         movdqu  16(%rbp),%xmm12
 3749         movdqu  32(%rbp),%xmm13
 3750 .byte   102,65,15,56,223,255
 3751 .byte   102,68,15,56,223,193
 3752         movdqu  48(%rbp),%xmm14
 3753         movdqu  64(%rbp),%xmm15
 3754 .byte   102,69,15,56,223,202
 3755         movdqa  %xmm0,%xmm10
 3756         movdqu  80(%rbp),%xmm1
 3757         movups  -112(%rcx),%xmm0
 3758 
 3759         movups  %xmm2,(%rsi)
 3760         movdqa  %xmm11,%xmm2
 3761         movups  %xmm3,16(%rsi)
 3762         movdqa  %xmm12,%xmm3
 3763         movups  %xmm4,32(%rsi)
 3764         movdqa  %xmm13,%xmm4
 3765         movups  %xmm5,48(%rsi)
 3766         movdqa  %xmm14,%xmm5
 3767         movups  %xmm6,64(%rsi)
 3768         movdqa  %xmm15,%xmm6
 3769         movups  %xmm7,80(%rsi)
 3770         movdqa  %xmm1,%xmm7
 3771         movups  %xmm8,96(%rsi)
 3772         leaq    112(%rsi),%rsi
 3773 
 3774         subq    $0x80,%rdx
 3775         ja      .Lcbc_dec_loop8
 3776 
 3777         movaps  %xmm9,%xmm2
 3778         leaq    -112(%rcx),%rcx
 3779         addq    $0x70,%rdx
 3780         jle     .Lcbc_dec_clear_tail_collected
 3781         movups  %xmm9,(%rsi)
 3782         leaq    16(%rsi),%rsi
 3783         cmpq    $0x50,%rdx
 3784         jbe     .Lcbc_dec_tail
 3785 
 3786         movaps  %xmm11,%xmm2
 3787 .Lcbc_dec_six_or_seven:
 3788         cmpq    $0x60,%rdx
 3789         ja      .Lcbc_dec_seven
 3790 
 3791         movaps  %xmm7,%xmm8
 3792         call    _aesni_decrypt6
 3793         pxor    %xmm10,%xmm2
 3794         movaps  %xmm8,%xmm10
 3795         pxor    %xmm11,%xmm3
 3796         movdqu  %xmm2,(%rsi)
 3797         pxor    %xmm12,%xmm4
 3798         movdqu  %xmm3,16(%rsi)
 3799         pxor    %xmm3,%xmm3
 3800         pxor    %xmm13,%xmm5
 3801         movdqu  %xmm4,32(%rsi)
 3802         pxor    %xmm4,%xmm4
 3803         pxor    %xmm14,%xmm6
 3804         movdqu  %xmm5,48(%rsi)
 3805         pxor    %xmm5,%xmm5
 3806         pxor    %xmm15,%xmm7
 3807         movdqu  %xmm6,64(%rsi)
 3808         pxor    %xmm6,%xmm6
 3809         leaq    80(%rsi),%rsi
 3810         movdqa  %xmm7,%xmm2
 3811         pxor    %xmm7,%xmm7
 3812         jmp     .Lcbc_dec_tail_collected
 3813 
 3814 .align  16
 3815 .Lcbc_dec_seven:
 3816         movups  96(%rdi),%xmm8
 3817         xorps   %xmm9,%xmm9
 3818         call    _aesni_decrypt8
 3819         movups  80(%rdi),%xmm9
 3820         pxor    %xmm10,%xmm2
 3821         movups  96(%rdi),%xmm10
 3822         pxor    %xmm11,%xmm3
 3823         movdqu  %xmm2,(%rsi)
 3824         pxor    %xmm12,%xmm4
 3825         movdqu  %xmm3,16(%rsi)
 3826         pxor    %xmm3,%xmm3
 3827         pxor    %xmm13,%xmm5
 3828         movdqu  %xmm4,32(%rsi)
 3829         pxor    %xmm4,%xmm4
 3830         pxor    %xmm14,%xmm6
 3831         movdqu  %xmm5,48(%rsi)
 3832         pxor    %xmm5,%xmm5
 3833         pxor    %xmm15,%xmm7
 3834         movdqu  %xmm6,64(%rsi)
 3835         pxor    %xmm6,%xmm6
 3836         pxor    %xmm9,%xmm8
 3837         movdqu  %xmm7,80(%rsi)
 3838         pxor    %xmm7,%xmm7
 3839         leaq    96(%rsi),%rsi
 3840         movdqa  %xmm8,%xmm2
 3841         pxor    %xmm8,%xmm8
 3842         pxor    %xmm9,%xmm9
 3843         jmp     .Lcbc_dec_tail_collected
 3844 
 3845 .align  16
 3846 .Lcbc_dec_loop6:
 3847         movups  %xmm7,(%rsi)
 3848         leaq    16(%rsi),%rsi
 3849         movdqu  0(%rdi),%xmm2
 3850         movdqu  16(%rdi),%xmm3
 3851         movdqa  %xmm2,%xmm11
 3852         movdqu  32(%rdi),%xmm4
 3853         movdqa  %xmm3,%xmm12
 3854         movdqu  48(%rdi),%xmm5
 3855         movdqa  %xmm4,%xmm13
 3856         movdqu  64(%rdi),%xmm6
 3857         movdqa  %xmm5,%xmm14
 3858         movdqu  80(%rdi),%xmm7
 3859         movdqa  %xmm6,%xmm15
 3860 .Lcbc_dec_loop6_enter:
 3861         leaq    96(%rdi),%rdi
 3862         movdqa  %xmm7,%xmm8
 3863 
 3864         call    _aesni_decrypt6
 3865 
 3866         pxor    %xmm10,%xmm2
 3867         movdqa  %xmm8,%xmm10
 3868         pxor    %xmm11,%xmm3
 3869         movdqu  %xmm2,(%rsi)
 3870         pxor    %xmm12,%xmm4
 3871         movdqu  %xmm3,16(%rsi)
 3872         pxor    %xmm13,%xmm5
 3873         movdqu  %xmm4,32(%rsi)
 3874         pxor    %xmm14,%xmm6
 3875         movq    %rbp,%rcx
 3876         movdqu  %xmm5,48(%rsi)
 3877         pxor    %xmm15,%xmm7
 3878         movl    %r10d,%eax
 3879         movdqu  %xmm6,64(%rsi)
 3880         leaq    80(%rsi),%rsi
 3881         subq    $0x60,%rdx
 3882         ja      .Lcbc_dec_loop6
 3883 
 3884         movdqa  %xmm7,%xmm2
 3885         addq    $0x50,%rdx
 3886         jle     .Lcbc_dec_clear_tail_collected
 3887         movups  %xmm7,(%rsi)
 3888         leaq    16(%rsi),%rsi
 3889 
 3890 .Lcbc_dec_tail:
 3891         movups  (%rdi),%xmm2
 3892         subq    $0x10,%rdx
 3893         jbe     .Lcbc_dec_one
 3894 
 3895         movups  16(%rdi),%xmm3
 3896         movaps  %xmm2,%xmm11
 3897         subq    $0x10,%rdx
 3898         jbe     .Lcbc_dec_two
 3899 
 3900         movups  32(%rdi),%xmm4
 3901         movaps  %xmm3,%xmm12
 3902         subq    $0x10,%rdx
 3903         jbe     .Lcbc_dec_three
 3904 
 3905         movups  48(%rdi),%xmm5
 3906         movaps  %xmm4,%xmm13
 3907         subq    $0x10,%rdx
 3908         jbe     .Lcbc_dec_four
 3909 
 3910         movups  64(%rdi),%xmm6
 3911         movaps  %xmm5,%xmm14
 3912         movaps  %xmm6,%xmm15
 3913         xorps   %xmm7,%xmm7
 3914         call    _aesni_decrypt6
 3915         pxor    %xmm10,%xmm2
 3916         movaps  %xmm15,%xmm10
 3917         pxor    %xmm11,%xmm3
 3918         movdqu  %xmm2,(%rsi)
 3919         pxor    %xmm12,%xmm4
 3920         movdqu  %xmm3,16(%rsi)
 3921         pxor    %xmm3,%xmm3
 3922         pxor    %xmm13,%xmm5
 3923         movdqu  %xmm4,32(%rsi)
 3924         pxor    %xmm4,%xmm4
 3925         pxor    %xmm14,%xmm6
 3926         movdqu  %xmm5,48(%rsi)
 3927         pxor    %xmm5,%xmm5
 3928         leaq    64(%rsi),%rsi
 3929         movdqa  %xmm6,%xmm2
 3930         pxor    %xmm6,%xmm6
 3931         pxor    %xmm7,%xmm7
 3932         subq    $0x10,%rdx
 3933         jmp     .Lcbc_dec_tail_collected
 3934 
 3935 .align  16
 3936 .Lcbc_dec_one:
 3937         movaps  %xmm2,%xmm11
 3938         movups  (%rcx),%xmm0
 3939         movups  16(%rcx),%xmm1
 3940         leaq    32(%rcx),%rcx
 3941         xorps   %xmm0,%xmm2
 3942 .Loop_dec1_17:
 3943 .byte   102,15,56,222,209
 3944         decl    %eax
 3945         movups  (%rcx),%xmm1
 3946         leaq    16(%rcx),%rcx
 3947         jnz     .Loop_dec1_17
 3948 .byte   102,15,56,223,209
 3949         xorps   %xmm10,%xmm2
 3950         movaps  %xmm11,%xmm10
 3951         jmp     .Lcbc_dec_tail_collected
 3952 .align  16
 3953 .Lcbc_dec_two:
 3954         movaps  %xmm3,%xmm12
 3955         call    _aesni_decrypt2
 3956         pxor    %xmm10,%xmm2
 3957         movaps  %xmm12,%xmm10
 3958         pxor    %xmm11,%xmm3
 3959         movdqu  %xmm2,(%rsi)
 3960         movdqa  %xmm3,%xmm2
 3961         pxor    %xmm3,%xmm3
 3962         leaq    16(%rsi),%rsi
 3963         jmp     .Lcbc_dec_tail_collected
 3964 .align  16
 3965 .Lcbc_dec_three:
 3966         movaps  %xmm4,%xmm13
 3967         call    _aesni_decrypt3
 3968         pxor    %xmm10,%xmm2
 3969         movaps  %xmm13,%xmm10
 3970         pxor    %xmm11,%xmm3
 3971         movdqu  %xmm2,(%rsi)
 3972         pxor    %xmm12,%xmm4
 3973         movdqu  %xmm3,16(%rsi)
 3974         pxor    %xmm3,%xmm3
 3975         movdqa  %xmm4,%xmm2
 3976         pxor    %xmm4,%xmm4
 3977         leaq    32(%rsi),%rsi
 3978         jmp     .Lcbc_dec_tail_collected
 3979 .align  16
 3980 .Lcbc_dec_four:
 3981         movaps  %xmm5,%xmm14
 3982         call    _aesni_decrypt4
 3983         pxor    %xmm10,%xmm2
 3984         movaps  %xmm14,%xmm10
 3985         pxor    %xmm11,%xmm3
 3986         movdqu  %xmm2,(%rsi)
 3987         pxor    %xmm12,%xmm4
 3988         movdqu  %xmm3,16(%rsi)
 3989         pxor    %xmm3,%xmm3
 3990         pxor    %xmm13,%xmm5
 3991         movdqu  %xmm4,32(%rsi)
 3992         pxor    %xmm4,%xmm4
 3993         movdqa  %xmm5,%xmm2
 3994         pxor    %xmm5,%xmm5
 3995         leaq    48(%rsi),%rsi
 3996         jmp     .Lcbc_dec_tail_collected
 3997 
 3998 .align  16
 3999 .Lcbc_dec_clear_tail_collected:
 4000         pxor    %xmm3,%xmm3
 4001         pxor    %xmm4,%xmm4
 4002         pxor    %xmm5,%xmm5
 4003         pxor    %xmm6,%xmm6
 4004         pxor    %xmm7,%xmm7
 4005         pxor    %xmm8,%xmm8
 4006         pxor    %xmm9,%xmm9
 4007 .Lcbc_dec_tail_collected:
 4008         movups  %xmm10,(%r8)
 4009         andq    $15,%rdx
 4010         jnz     .Lcbc_dec_tail_partial
 4011         movups  %xmm2,(%rsi)
 4012         pxor    %xmm2,%xmm2
 4013         jmp     .Lcbc_dec_ret
 4014 .align  16
 4015 .Lcbc_dec_tail_partial:
 4016         movaps  %xmm2,(%rsp)
 4017         pxor    %xmm2,%xmm2
 4018         movq    $16,%rcx
 4019         movq    %rsi,%rdi
 4020         subq    %rdx,%rcx
 4021         leaq    (%rsp),%rsi
 4022 .long   0x9066A4F3
 4023         movdqa  %xmm2,(%rsp)
 4024 
 4025 .Lcbc_dec_ret:
 4026         xorps   %xmm0,%xmm0
 4027         pxor    %xmm1,%xmm1
 4028         movq    -8(%r11),%rbp
 4029 .cfi_restore    %rbp
 4030         leaq    (%r11),%rsp
 4031 .cfi_def_cfa_register   %rsp
 4032 .Lcbc_ret:
 4033         .byte   0xf3,0xc3
 4034 .cfi_endproc    
 4035 .size   aesni_cbc_encrypt,.-aesni_cbc_encrypt
 4036 .globl  aesni_set_decrypt_key
 4037 .type   aesni_set_decrypt_key,@function
 4038 .align  16
 4039 aesni_set_decrypt_key:
 4040 .cfi_startproc  
 4041 .byte   0x48,0x83,0xEC,0x08
 4042 .cfi_adjust_cfa_offset  8
 4043         call    __aesni_set_encrypt_key
 4044         shll    $4,%esi
 4045         testl   %eax,%eax
 4046         jnz     .Ldec_key_ret
 4047         leaq    16(%rdx,%rsi,1),%rdi
 4048 
 4049         movups  (%rdx),%xmm0
 4050         movups  (%rdi),%xmm1
 4051         movups  %xmm0,(%rdi)
 4052         movups  %xmm1,(%rdx)
 4053         leaq    16(%rdx),%rdx
 4054         leaq    -16(%rdi),%rdi
 4055 
 4056 .Ldec_key_inverse:
 4057         movups  (%rdx),%xmm0
 4058         movups  (%rdi),%xmm1
 4059 .byte   102,15,56,219,192
 4060 .byte   102,15,56,219,201
 4061         leaq    16(%rdx),%rdx
 4062         leaq    -16(%rdi),%rdi
 4063         movups  %xmm0,16(%rdi)
 4064         movups  %xmm1,-16(%rdx)
 4065         cmpq    %rdx,%rdi
 4066         ja      .Ldec_key_inverse
 4067 
 4068         movups  (%rdx),%xmm0
 4069 .byte   102,15,56,219,192
 4070         pxor    %xmm1,%xmm1
 4071         movups  %xmm0,(%rdi)
 4072         pxor    %xmm0,%xmm0
 4073 .Ldec_key_ret:
 4074         addq    $8,%rsp
 4075 .cfi_adjust_cfa_offset  -8
 4076         .byte   0xf3,0xc3
 4077 .cfi_endproc    
 4078 .LSEH_end_set_decrypt_key:
 4079 .size   aesni_set_decrypt_key,.-aesni_set_decrypt_key
 4080 .globl  aesni_set_encrypt_key
 4081 .type   aesni_set_encrypt_key,@function
 4082 .align  16
 4083 aesni_set_encrypt_key:
 4084 __aesni_set_encrypt_key:
 4085 .cfi_startproc  
 4086 .byte   0x48,0x83,0xEC,0x08
 4087 .cfi_adjust_cfa_offset  8
 4088         movq    $-1,%rax
 4089         testq   %rdi,%rdi
 4090         jz      .Lenc_key_ret
 4091         testq   %rdx,%rdx
 4092         jz      .Lenc_key_ret
 4093 
 4094         movl    $268437504,%r10d
 4095         movups  (%rdi),%xmm0
 4096         xorps   %xmm4,%xmm4
 4097         andl    OPENSSL_ia32cap_P+4(%rip),%r10d
 4098         leaq    16(%rdx),%rax
 4099         cmpl    $256,%esi
 4100         je      .L14rounds
 4101         cmpl    $192,%esi
 4102         je      .L12rounds
 4103         cmpl    $128,%esi
 4104         jne     .Lbad_keybits
 4105 
 4106 .L10rounds:
 4107         movl    $9,%esi
 4108         cmpl    $268435456,%r10d
 4109         je      .L10rounds_alt
 4110 
 4111         movups  %xmm0,(%rdx)
 4112 .byte   102,15,58,223,200,1
 4113         call    .Lkey_expansion_128_cold
 4114 .byte   102,15,58,223,200,2
 4115         call    .Lkey_expansion_128
 4116 .byte   102,15,58,223,200,4
 4117         call    .Lkey_expansion_128
 4118 .byte   102,15,58,223,200,8
 4119         call    .Lkey_expansion_128
 4120 .byte   102,15,58,223,200,16
 4121         call    .Lkey_expansion_128
 4122 .byte   102,15,58,223,200,32
 4123         call    .Lkey_expansion_128
 4124 .byte   102,15,58,223,200,64
 4125         call    .Lkey_expansion_128
 4126 .byte   102,15,58,223,200,128
 4127         call    .Lkey_expansion_128
 4128 .byte   102,15,58,223,200,27
 4129         call    .Lkey_expansion_128
 4130 .byte   102,15,58,223,200,54
 4131         call    .Lkey_expansion_128
 4132         movups  %xmm0,(%rax)
 4133         movl    %esi,80(%rax)
 4134         xorl    %eax,%eax
 4135         jmp     .Lenc_key_ret
 4136 
 4137 .align  16
 4138 .L10rounds_alt:
 4139         movdqa  .Lkey_rotate(%rip),%xmm5
 4140         movl    $8,%r10d
 4141         movdqa  .Lkey_rcon1(%rip),%xmm4
 4142         movdqa  %xmm0,%xmm2
 4143         movdqu  %xmm0,(%rdx)
 4144         jmp     .Loop_key128
 4145 
 4146 .align  16
 4147 .Loop_key128:
 4148 .byte   102,15,56,0,197
 4149 .byte   102,15,56,221,196
 4150         pslld   $1,%xmm4
 4151         leaq    16(%rax),%rax
 4152 
 4153         movdqa  %xmm2,%xmm3
 4154         pslldq  $4,%xmm2
 4155         pxor    %xmm2,%xmm3
 4156         pslldq  $4,%xmm2
 4157         pxor    %xmm2,%xmm3
 4158         pslldq  $4,%xmm2
 4159         pxor    %xmm3,%xmm2
 4160 
 4161         pxor    %xmm2,%xmm0
 4162         movdqu  %xmm0,-16(%rax)
 4163         movdqa  %xmm0,%xmm2
 4164 
 4165         decl    %r10d
 4166         jnz     .Loop_key128
 4167 
 4168         movdqa  .Lkey_rcon1b(%rip),%xmm4
 4169 
 4170 .byte   102,15,56,0,197
 4171 .byte   102,15,56,221,196
 4172         pslld   $1,%xmm4
 4173 
 4174         movdqa  %xmm2,%xmm3
 4175         pslldq  $4,%xmm2
 4176         pxor    %xmm2,%xmm3
 4177         pslldq  $4,%xmm2
 4178         pxor    %xmm2,%xmm3
 4179         pslldq  $4,%xmm2
 4180         pxor    %xmm3,%xmm2
 4181 
 4182         pxor    %xmm2,%xmm0
 4183         movdqu  %xmm0,(%rax)
 4184 
 4185         movdqa  %xmm0,%xmm2
 4186 .byte   102,15,56,0,197
 4187 .byte   102,15,56,221,196
 4188 
 4189         movdqa  %xmm2,%xmm3
 4190         pslldq  $4,%xmm2
 4191         pxor    %xmm2,%xmm3
 4192         pslldq  $4,%xmm2
 4193         pxor    %xmm2,%xmm3
 4194         pslldq  $4,%xmm2
 4195         pxor    %xmm3,%xmm2
 4196 
 4197         pxor    %xmm2,%xmm0
 4198         movdqu  %xmm0,16(%rax)
 4199 
 4200         movl    %esi,96(%rax)
 4201         xorl    %eax,%eax
 4202         jmp     .Lenc_key_ret
 4203 
 4204 .align  16
 4205 .L12rounds:
 4206         movq    16(%rdi),%xmm2
 4207         movl    $11,%esi
 4208         cmpl    $268435456,%r10d
 4209         je      .L12rounds_alt
 4210 
 4211         movups  %xmm0,(%rdx)
 4212 .byte   102,15,58,223,202,1
 4213         call    .Lkey_expansion_192a_cold
 4214 .byte   102,15,58,223,202,2
 4215         call    .Lkey_expansion_192b
 4216 .byte   102,15,58,223,202,4
 4217         call    .Lkey_expansion_192a
 4218 .byte   102,15,58,223,202,8
 4219         call    .Lkey_expansion_192b
 4220 .byte   102,15,58,223,202,16
 4221         call    .Lkey_expansion_192a
 4222 .byte   102,15,58,223,202,32
 4223         call    .Lkey_expansion_192b
 4224 .byte   102,15,58,223,202,64
 4225         call    .Lkey_expansion_192a
 4226 .byte   102,15,58,223,202,128
 4227         call    .Lkey_expansion_192b
 4228         movups  %xmm0,(%rax)
 4229         movl    %esi,48(%rax)
 4230         xorq    %rax,%rax
 4231         jmp     .Lenc_key_ret
 4232 
 4233 .align  16
 4234 .L12rounds_alt:
 4235         movdqa  .Lkey_rotate192(%rip),%xmm5
 4236         movdqa  .Lkey_rcon1(%rip),%xmm4
 4237         movl    $8,%r10d
 4238         movdqu  %xmm0,(%rdx)
 4239         jmp     .Loop_key192
 4240 
 4241 .align  16
 4242 .Loop_key192:
 4243         movq    %xmm2,0(%rax)
 4244         movdqa  %xmm2,%xmm1
 4245 .byte   102,15,56,0,213
 4246 .byte   102,15,56,221,212
 4247         pslld   $1,%xmm4
 4248         leaq    24(%rax),%rax
 4249 
 4250         movdqa  %xmm0,%xmm3
 4251         pslldq  $4,%xmm0
 4252         pxor    %xmm0,%xmm3
 4253         pslldq  $4,%xmm0
 4254         pxor    %xmm0,%xmm3
 4255         pslldq  $4,%xmm0
 4256         pxor    %xmm3,%xmm0
 4257 
 4258         pshufd  $0xff,%xmm0,%xmm3
 4259         pxor    %xmm1,%xmm3
 4260         pslldq  $4,%xmm1
 4261         pxor    %xmm1,%xmm3
 4262 
 4263         pxor    %xmm2,%xmm0
 4264         pxor    %xmm3,%xmm2
 4265         movdqu  %xmm0,-16(%rax)
 4266 
 4267         decl    %r10d
 4268         jnz     .Loop_key192
 4269 
 4270         movl    %esi,32(%rax)
 4271         xorl    %eax,%eax
 4272         jmp     .Lenc_key_ret
 4273 
 4274 .align  16
 4275 .L14rounds:
 4276         movups  16(%rdi),%xmm2
 4277         movl    $13,%esi
 4278         leaq    16(%rax),%rax
 4279         cmpl    $268435456,%r10d
 4280         je      .L14rounds_alt
 4281 
 4282         movups  %xmm0,(%rdx)
 4283         movups  %xmm2,16(%rdx)
 4284 .byte   102,15,58,223,202,1
 4285         call    .Lkey_expansion_256a_cold
 4286 .byte   102,15,58,223,200,1
 4287         call    .Lkey_expansion_256b
 4288 .byte   102,15,58,223,202,2
 4289         call    .Lkey_expansion_256a
 4290 .byte   102,15,58,223,200,2
 4291         call    .Lkey_expansion_256b
 4292 .byte   102,15,58,223,202,4
 4293         call    .Lkey_expansion_256a
 4294 .byte   102,15,58,223,200,4
 4295         call    .Lkey_expansion_256b
 4296 .byte   102,15,58,223,202,8
 4297         call    .Lkey_expansion_256a
 4298 .byte   102,15,58,223,200,8
 4299         call    .Lkey_expansion_256b
 4300 .byte   102,15,58,223,202,16
 4301         call    .Lkey_expansion_256a
 4302 .byte   102,15,58,223,200,16
 4303         call    .Lkey_expansion_256b
 4304 .byte   102,15,58,223,202,32
 4305         call    .Lkey_expansion_256a
 4306 .byte   102,15,58,223,200,32
 4307         call    .Lkey_expansion_256b
 4308 .byte   102,15,58,223,202,64
 4309         call    .Lkey_expansion_256a
 4310         movups  %xmm0,(%rax)
 4311         movl    %esi,16(%rax)
 4312         xorq    %rax,%rax
 4313         jmp     .Lenc_key_ret
 4314 
 4315 .align  16
 4316 .L14rounds_alt:
 4317         movdqa  .Lkey_rotate(%rip),%xmm5
 4318         movdqa  .Lkey_rcon1(%rip),%xmm4
 4319         movl    $7,%r10d
 4320         movdqu  %xmm0,0(%rdx)
 4321         movdqa  %xmm2,%xmm1
 4322         movdqu  %xmm2,16(%rdx)
 4323         jmp     .Loop_key256
 4324 
 4325 .align  16
 4326 .Loop_key256:
 4327 .byte   102,15,56,0,213
 4328 .byte   102,15,56,221,212
 4329 
 4330         movdqa  %xmm0,%xmm3
 4331         pslldq  $4,%xmm0
 4332         pxor    %xmm0,%xmm3
 4333         pslldq  $4,%xmm0
 4334         pxor    %xmm0,%xmm3
 4335         pslldq  $4,%xmm0
 4336         pxor    %xmm3,%xmm0
 4337         pslld   $1,%xmm4
 4338 
 4339         pxor    %xmm2,%xmm0
 4340         movdqu  %xmm0,(%rax)
 4341 
 4342         decl    %r10d
 4343         jz      .Ldone_key256
 4344 
 4345         pshufd  $0xff,%xmm0,%xmm2
 4346         pxor    %xmm3,%xmm3
 4347 .byte   102,15,56,221,211
 4348 
 4349         movdqa  %xmm1,%xmm3
 4350         pslldq  $4,%xmm1
 4351         pxor    %xmm1,%xmm3
 4352         pslldq  $4,%xmm1
 4353         pxor    %xmm1,%xmm3
 4354         pslldq  $4,%xmm1
 4355         pxor    %xmm3,%xmm1
 4356 
 4357         pxor    %xmm1,%xmm2
 4358         movdqu  %xmm2,16(%rax)
 4359         leaq    32(%rax),%rax
 4360         movdqa  %xmm2,%xmm1
 4361 
 4362         jmp     .Loop_key256
 4363 
 4364 .Ldone_key256:
 4365         movl    %esi,16(%rax)
 4366         xorl    %eax,%eax
 4367         jmp     .Lenc_key_ret
 4368 
 4369 .align  16
 4370 .Lbad_keybits:
 4371         movq    $-2,%rax
 4372 .Lenc_key_ret:
 4373         pxor    %xmm0,%xmm0
 4374         pxor    %xmm1,%xmm1
 4375         pxor    %xmm2,%xmm2
 4376         pxor    %xmm3,%xmm3
 4377         pxor    %xmm4,%xmm4
 4378         pxor    %xmm5,%xmm5
 4379         addq    $8,%rsp
 4380 .cfi_adjust_cfa_offset  -8
 4381         .byte   0xf3,0xc3
 4382 .LSEH_end_set_encrypt_key:
 4383 
 4384 .align  16
 4385 .Lkey_expansion_128:
 4386         movups  %xmm0,(%rax)
 4387         leaq    16(%rax),%rax
 4388 .Lkey_expansion_128_cold:
 4389         shufps  $16,%xmm0,%xmm4
 4390         xorps   %xmm4,%xmm0
 4391         shufps  $140,%xmm0,%xmm4
 4392         xorps   %xmm4,%xmm0
 4393         shufps  $255,%xmm1,%xmm1
 4394         xorps   %xmm1,%xmm0
 4395         .byte   0xf3,0xc3
 4396 
 4397 .align  16
 4398 .Lkey_expansion_192a:
 4399         movups  %xmm0,(%rax)
 4400         leaq    16(%rax),%rax
 4401 .Lkey_expansion_192a_cold:
 4402         movaps  %xmm2,%xmm5
 4403 .Lkey_expansion_192b_warm:
 4404         shufps  $16,%xmm0,%xmm4
 4405         movdqa  %xmm2,%xmm3
 4406         xorps   %xmm4,%xmm0
 4407         shufps  $140,%xmm0,%xmm4
 4408         pslldq  $4,%xmm3
 4409         xorps   %xmm4,%xmm0
 4410         pshufd  $85,%xmm1,%xmm1
 4411         pxor    %xmm3,%xmm2
 4412         pxor    %xmm1,%xmm0
 4413         pshufd  $255,%xmm0,%xmm3
 4414         pxor    %xmm3,%xmm2
 4415         .byte   0xf3,0xc3
 4416 
 4417 .align  16
 4418 .Lkey_expansion_192b:
 4419         movaps  %xmm0,%xmm3
 4420         shufps  $68,%xmm0,%xmm5
 4421         movups  %xmm5,(%rax)
 4422         shufps  $78,%xmm2,%xmm3
 4423         movups  %xmm3,16(%rax)
 4424         leaq    32(%rax),%rax
 4425         jmp     .Lkey_expansion_192b_warm
 4426 
 4427 .align  16
 4428 .Lkey_expansion_256a:
 4429         movups  %xmm2,(%rax)
 4430         leaq    16(%rax),%rax
 4431 .Lkey_expansion_256a_cold:
 4432         shufps  $16,%xmm0,%xmm4
 4433         xorps   %xmm4,%xmm0
 4434         shufps  $140,%xmm0,%xmm4
 4435         xorps   %xmm4,%xmm0
 4436         shufps  $255,%xmm1,%xmm1
 4437         xorps   %xmm1,%xmm0
 4438         .byte   0xf3,0xc3
 4439 
 4440 .align  16
 4441 .Lkey_expansion_256b:
 4442         movups  %xmm0,(%rax)
 4443         leaq    16(%rax),%rax
 4444 
 4445         shufps  $16,%xmm2,%xmm4
 4446         xorps   %xmm4,%xmm2
 4447         shufps  $140,%xmm2,%xmm4
 4448         xorps   %xmm4,%xmm2
 4449         shufps  $170,%xmm1,%xmm1
 4450         xorps   %xmm1,%xmm2
 4451         .byte   0xf3,0xc3
 4452 .cfi_endproc    
 4453 .size   aesni_set_encrypt_key,.-aesni_set_encrypt_key
 4454 .size   __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
 4455 .align  64
 4456 .Lbswap_mask:
 4457 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
 4458 .Lincrement32:
 4459 .long   6,6,6,0
 4460 .Lincrement64:
 4461 .long   1,0,0,0
 4462 .Lxts_magic:
 4463 .long   0x87,0,1,0
 4464 .Lincrement1:
 4465 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 4466 .Lkey_rotate:
 4467 .long   0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
 4468 .Lkey_rotate192:
 4469 .long   0x04070605,0x04070605,0x04070605,0x04070605
 4470 .Lkey_rcon1:
 4471 .long   1,1,1,1
 4472 .Lkey_rcon1b:
 4473 .long   0x1b,0x1b,0x1b,0x1b
 4474 
 4475 .byte   65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 4476 .align  64

Cache object: b1cba92bbfdfe16bba0dca779e48990c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.