The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/amd64/ghash-x86_64.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from ghash-x86_64.pl. */
    3 .text   
    4 
    5 
    6 .globl  gcm_gmult_4bit
    7 .type   gcm_gmult_4bit,@function
    8 .align  16
    9 gcm_gmult_4bit:
   10 .cfi_startproc  
   11         pushq   %rbx
   12 .cfi_adjust_cfa_offset  8
   13 .cfi_offset     %rbx,-16
   14         pushq   %rbp
   15 .cfi_adjust_cfa_offset  8
   16 .cfi_offset     %rbp,-24
   17         pushq   %r12
   18 .cfi_adjust_cfa_offset  8
   19 .cfi_offset     %r12,-32
   20         pushq   %r13
   21 .cfi_adjust_cfa_offset  8
   22 .cfi_offset     %r13,-40
   23         pushq   %r14
   24 .cfi_adjust_cfa_offset  8
   25 .cfi_offset     %r14,-48
   26         pushq   %r15
   27 .cfi_adjust_cfa_offset  8
   28 .cfi_offset     %r15,-56
   29         subq    $280,%rsp
   30 .cfi_adjust_cfa_offset  280
   31 .Lgmult_prologue:
   32 
   33         movzbq  15(%rdi),%r8
   34         leaq    .Lrem_4bit(%rip),%r11
   35         xorq    %rax,%rax
   36         xorq    %rbx,%rbx
   37         movb    %r8b,%al
   38         movb    %r8b,%bl
   39         shlb    $4,%al
   40         movq    $14,%rcx
   41         movq    8(%rsi,%rax,1),%r8
   42         movq    (%rsi,%rax,1),%r9
   43         andb    $0xf0,%bl
   44         movq    %r8,%rdx
   45         jmp     .Loop1
   46 
   47 .align  16
   48 .Loop1:
   49         shrq    $4,%r8
   50         andq    $0xf,%rdx
   51         movq    %r9,%r10
   52         movb    (%rdi,%rcx,1),%al
   53         shrq    $4,%r9
   54         xorq    8(%rsi,%rbx,1),%r8
   55         shlq    $60,%r10
   56         xorq    (%rsi,%rbx,1),%r9
   57         movb    %al,%bl
   58         xorq    (%r11,%rdx,8),%r9
   59         movq    %r8,%rdx
   60         shlb    $4,%al
   61         xorq    %r10,%r8
   62         decq    %rcx
   63         js      .Lbreak1
   64 
   65         shrq    $4,%r8
   66         andq    $0xf,%rdx
   67         movq    %r9,%r10
   68         shrq    $4,%r9
   69         xorq    8(%rsi,%rax,1),%r8
   70         shlq    $60,%r10
   71         xorq    (%rsi,%rax,1),%r9
   72         andb    $0xf0,%bl
   73         xorq    (%r11,%rdx,8),%r9
   74         movq    %r8,%rdx
   75         xorq    %r10,%r8
   76         jmp     .Loop1
   77 
   78 .align  16
   79 .Lbreak1:
   80         shrq    $4,%r8
   81         andq    $0xf,%rdx
   82         movq    %r9,%r10
   83         shrq    $4,%r9
   84         xorq    8(%rsi,%rax,1),%r8
   85         shlq    $60,%r10
   86         xorq    (%rsi,%rax,1),%r9
   87         andb    $0xf0,%bl
   88         xorq    (%r11,%rdx,8),%r9
   89         movq    %r8,%rdx
   90         xorq    %r10,%r8
   91 
   92         shrq    $4,%r8
   93         andq    $0xf,%rdx
   94         movq    %r9,%r10
   95         shrq    $4,%r9
   96         xorq    8(%rsi,%rbx,1),%r8
   97         shlq    $60,%r10
   98         xorq    (%rsi,%rbx,1),%r9
   99         xorq    %r10,%r8
  100         xorq    (%r11,%rdx,8),%r9
  101 
  102         bswapq  %r8
  103         bswapq  %r9
  104         movq    %r8,8(%rdi)
  105         movq    %r9,(%rdi)
  106 
  107         leaq    280+48(%rsp),%rsi
  108 .cfi_def_cfa    %rsi,8
  109         movq    -8(%rsi),%rbx
  110 .cfi_restore    %rbx
  111         leaq    (%rsi),%rsp
  112 .cfi_def_cfa_register   %rsp
  113 .Lgmult_epilogue:
  114         .byte   0xf3,0xc3
  115 .cfi_endproc    
  116 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
  117 .globl  gcm_ghash_4bit
  118 .type   gcm_ghash_4bit,@function
  119 .align  16
  120 gcm_ghash_4bit:
  121 .cfi_startproc  
  122         pushq   %rbx
  123 .cfi_adjust_cfa_offset  8
  124 .cfi_offset     %rbx,-16
  125         pushq   %rbp
  126 .cfi_adjust_cfa_offset  8
  127 .cfi_offset     %rbp,-24
  128         pushq   %r12
  129 .cfi_adjust_cfa_offset  8
  130 .cfi_offset     %r12,-32
  131         pushq   %r13
  132 .cfi_adjust_cfa_offset  8
  133 .cfi_offset     %r13,-40
  134         pushq   %r14
  135 .cfi_adjust_cfa_offset  8
  136 .cfi_offset     %r14,-48
  137         pushq   %r15
  138 .cfi_adjust_cfa_offset  8
  139 .cfi_offset     %r15,-56
  140         subq    $280,%rsp
  141 .cfi_adjust_cfa_offset  280
  142 .Lghash_prologue:
  143         movq    %rdx,%r14
  144         movq    %rcx,%r15
  145         subq    $-128,%rsi
  146         leaq    16+128(%rsp),%rbp
  147         xorl    %edx,%edx
  148         movq    0+0-128(%rsi),%r8
  149         movq    0+8-128(%rsi),%rax
  150         movb    %al,%dl
  151         shrq    $4,%rax
  152         movq    %r8,%r10
  153         shrq    $4,%r8
  154         movq    16+0-128(%rsi),%r9
  155         shlb    $4,%dl
  156         movq    16+8-128(%rsi),%rbx
  157         shlq    $60,%r10
  158         movb    %dl,0(%rsp)
  159         orq     %r10,%rax
  160         movb    %bl,%dl
  161         shrq    $4,%rbx
  162         movq    %r9,%r10
  163         shrq    $4,%r9
  164         movq    %r8,0(%rbp)
  165         movq    32+0-128(%rsi),%r8
  166         shlb    $4,%dl
  167         movq    %rax,0-128(%rbp)
  168         movq    32+8-128(%rsi),%rax
  169         shlq    $60,%r10
  170         movb    %dl,1(%rsp)
  171         orq     %r10,%rbx
  172         movb    %al,%dl
  173         shrq    $4,%rax
  174         movq    %r8,%r10
  175         shrq    $4,%r8
  176         movq    %r9,8(%rbp)
  177         movq    48+0-128(%rsi),%r9
  178         shlb    $4,%dl
  179         movq    %rbx,8-128(%rbp)
  180         movq    48+8-128(%rsi),%rbx
  181         shlq    $60,%r10
  182         movb    %dl,2(%rsp)
  183         orq     %r10,%rax
  184         movb    %bl,%dl
  185         shrq    $4,%rbx
  186         movq    %r9,%r10
  187         shrq    $4,%r9
  188         movq    %r8,16(%rbp)
  189         movq    64+0-128(%rsi),%r8
  190         shlb    $4,%dl
  191         movq    %rax,16-128(%rbp)
  192         movq    64+8-128(%rsi),%rax
  193         shlq    $60,%r10
  194         movb    %dl,3(%rsp)
  195         orq     %r10,%rbx
  196         movb    %al,%dl
  197         shrq    $4,%rax
  198         movq    %r8,%r10
  199         shrq    $4,%r8
  200         movq    %r9,24(%rbp)
  201         movq    80+0-128(%rsi),%r9
  202         shlb    $4,%dl
  203         movq    %rbx,24-128(%rbp)
  204         movq    80+8-128(%rsi),%rbx
  205         shlq    $60,%r10
  206         movb    %dl,4(%rsp)
  207         orq     %r10,%rax
  208         movb    %bl,%dl
  209         shrq    $4,%rbx
  210         movq    %r9,%r10
  211         shrq    $4,%r9
  212         movq    %r8,32(%rbp)
  213         movq    96+0-128(%rsi),%r8
  214         shlb    $4,%dl
  215         movq    %rax,32-128(%rbp)
  216         movq    96+8-128(%rsi),%rax
  217         shlq    $60,%r10
  218         movb    %dl,5(%rsp)
  219         orq     %r10,%rbx
  220         movb    %al,%dl
  221         shrq    $4,%rax
  222         movq    %r8,%r10
  223         shrq    $4,%r8
  224         movq    %r9,40(%rbp)
  225         movq    112+0-128(%rsi),%r9
  226         shlb    $4,%dl
  227         movq    %rbx,40-128(%rbp)
  228         movq    112+8-128(%rsi),%rbx
  229         shlq    $60,%r10
  230         movb    %dl,6(%rsp)
  231         orq     %r10,%rax
  232         movb    %bl,%dl
  233         shrq    $4,%rbx
  234         movq    %r9,%r10
  235         shrq    $4,%r9
  236         movq    %r8,48(%rbp)
  237         movq    128+0-128(%rsi),%r8
  238         shlb    $4,%dl
  239         movq    %rax,48-128(%rbp)
  240         movq    128+8-128(%rsi),%rax
  241         shlq    $60,%r10
  242         movb    %dl,7(%rsp)
  243         orq     %r10,%rbx
  244         movb    %al,%dl
  245         shrq    $4,%rax
  246         movq    %r8,%r10
  247         shrq    $4,%r8
  248         movq    %r9,56(%rbp)
  249         movq    144+0-128(%rsi),%r9
  250         shlb    $4,%dl
  251         movq    %rbx,56-128(%rbp)
  252         movq    144+8-128(%rsi),%rbx
  253         shlq    $60,%r10
  254         movb    %dl,8(%rsp)
  255         orq     %r10,%rax
  256         movb    %bl,%dl
  257         shrq    $4,%rbx
  258         movq    %r9,%r10
  259         shrq    $4,%r9
  260         movq    %r8,64(%rbp)
  261         movq    160+0-128(%rsi),%r8
  262         shlb    $4,%dl
  263         movq    %rax,64-128(%rbp)
  264         movq    160+8-128(%rsi),%rax
  265         shlq    $60,%r10
  266         movb    %dl,9(%rsp)
  267         orq     %r10,%rbx
  268         movb    %al,%dl
  269         shrq    $4,%rax
  270         movq    %r8,%r10
  271         shrq    $4,%r8
  272         movq    %r9,72(%rbp)
  273         movq    176+0-128(%rsi),%r9
  274         shlb    $4,%dl
  275         movq    %rbx,72-128(%rbp)
  276         movq    176+8-128(%rsi),%rbx
  277         shlq    $60,%r10
  278         movb    %dl,10(%rsp)
  279         orq     %r10,%rax
  280         movb    %bl,%dl
  281         shrq    $4,%rbx
  282         movq    %r9,%r10
  283         shrq    $4,%r9
  284         movq    %r8,80(%rbp)
  285         movq    192+0-128(%rsi),%r8
  286         shlb    $4,%dl
  287         movq    %rax,80-128(%rbp)
  288         movq    192+8-128(%rsi),%rax
  289         shlq    $60,%r10
  290         movb    %dl,11(%rsp)
  291         orq     %r10,%rbx
  292         movb    %al,%dl
  293         shrq    $4,%rax
  294         movq    %r8,%r10
  295         shrq    $4,%r8
  296         movq    %r9,88(%rbp)
  297         movq    208+0-128(%rsi),%r9
  298         shlb    $4,%dl
  299         movq    %rbx,88-128(%rbp)
  300         movq    208+8-128(%rsi),%rbx
  301         shlq    $60,%r10
  302         movb    %dl,12(%rsp)
  303         orq     %r10,%rax
  304         movb    %bl,%dl
  305         shrq    $4,%rbx
  306         movq    %r9,%r10
  307         shrq    $4,%r9
  308         movq    %r8,96(%rbp)
  309         movq    224+0-128(%rsi),%r8
  310         shlb    $4,%dl
  311         movq    %rax,96-128(%rbp)
  312         movq    224+8-128(%rsi),%rax
  313         shlq    $60,%r10
  314         movb    %dl,13(%rsp)
  315         orq     %r10,%rbx
  316         movb    %al,%dl
  317         shrq    $4,%rax
  318         movq    %r8,%r10
  319         shrq    $4,%r8
  320         movq    %r9,104(%rbp)
  321         movq    240+0-128(%rsi),%r9
  322         shlb    $4,%dl
  323         movq    %rbx,104-128(%rbp)
  324         movq    240+8-128(%rsi),%rbx
  325         shlq    $60,%r10
  326         movb    %dl,14(%rsp)
  327         orq     %r10,%rax
  328         movb    %bl,%dl
  329         shrq    $4,%rbx
  330         movq    %r9,%r10
  331         shrq    $4,%r9
  332         movq    %r8,112(%rbp)
  333         shlb    $4,%dl
  334         movq    %rax,112-128(%rbp)
  335         shlq    $60,%r10
  336         movb    %dl,15(%rsp)
  337         orq     %r10,%rbx
  338         movq    %r9,120(%rbp)
  339         movq    %rbx,120-128(%rbp)
  340         addq    $-128,%rsi
  341         movq    8(%rdi),%r8
  342         movq    0(%rdi),%r9
  343         addq    %r14,%r15
  344         leaq    .Lrem_8bit(%rip),%r11
  345         jmp     .Louter_loop
  346 .align  16
  347 .Louter_loop:
  348         xorq    (%r14),%r9
  349         movq    8(%r14),%rdx
  350         leaq    16(%r14),%r14
  351         xorq    %r8,%rdx
  352         movq    %r9,(%rdi)
  353         movq    %rdx,8(%rdi)
  354         shrq    $32,%rdx
  355         xorq    %rax,%rax
  356         roll    $8,%edx
  357         movb    %dl,%al
  358         movzbl  %dl,%ebx
  359         shlb    $4,%al
  360         shrl    $4,%ebx
  361         roll    $8,%edx
  362         movq    8(%rsi,%rax,1),%r8
  363         movq    (%rsi,%rax,1),%r9
  364         movb    %dl,%al
  365         movzbl  %dl,%ecx
  366         shlb    $4,%al
  367         movzbq  (%rsp,%rbx,1),%r12
  368         shrl    $4,%ecx
  369         xorq    %r8,%r12
  370         movq    %r9,%r10
  371         shrq    $8,%r8
  372         movzbq  %r12b,%r12
  373         shrq    $8,%r9
  374         xorq    -128(%rbp,%rbx,8),%r8
  375         shlq    $56,%r10
  376         xorq    (%rbp,%rbx,8),%r9
  377         roll    $8,%edx
  378         xorq    8(%rsi,%rax,1),%r8
  379         xorq    (%rsi,%rax,1),%r9
  380         movb    %dl,%al
  381         xorq    %r10,%r8
  382         movzwq  (%r11,%r12,2),%r12
  383         movzbl  %dl,%ebx
  384         shlb    $4,%al
  385         movzbq  (%rsp,%rcx,1),%r13
  386         shrl    $4,%ebx
  387         shlq    $48,%r12
  388         xorq    %r8,%r13
  389         movq    %r9,%r10
  390         xorq    %r12,%r9
  391         shrq    $8,%r8
  392         movzbq  %r13b,%r13
  393         shrq    $8,%r9
  394         xorq    -128(%rbp,%rcx,8),%r8
  395         shlq    $56,%r10
  396         xorq    (%rbp,%rcx,8),%r9
  397         roll    $8,%edx
  398         xorq    8(%rsi,%rax,1),%r8
  399         xorq    (%rsi,%rax,1),%r9
  400         movb    %dl,%al
  401         xorq    %r10,%r8
  402         movzwq  (%r11,%r13,2),%r13
  403         movzbl  %dl,%ecx
  404         shlb    $4,%al
  405         movzbq  (%rsp,%rbx,1),%r12
  406         shrl    $4,%ecx
  407         shlq    $48,%r13
  408         xorq    %r8,%r12
  409         movq    %r9,%r10
  410         xorq    %r13,%r9
  411         shrq    $8,%r8
  412         movzbq  %r12b,%r12
  413         movl    8(%rdi),%edx
  414         shrq    $8,%r9
  415         xorq    -128(%rbp,%rbx,8),%r8
  416         shlq    $56,%r10
  417         xorq    (%rbp,%rbx,8),%r9
  418         roll    $8,%edx
  419         xorq    8(%rsi,%rax,1),%r8
  420         xorq    (%rsi,%rax,1),%r9
  421         movb    %dl,%al
  422         xorq    %r10,%r8
  423         movzwq  (%r11,%r12,2),%r12
  424         movzbl  %dl,%ebx
  425         shlb    $4,%al
  426         movzbq  (%rsp,%rcx,1),%r13
  427         shrl    $4,%ebx
  428         shlq    $48,%r12
  429         xorq    %r8,%r13
  430         movq    %r9,%r10
  431         xorq    %r12,%r9
  432         shrq    $8,%r8
  433         movzbq  %r13b,%r13
  434         shrq    $8,%r9
  435         xorq    -128(%rbp,%rcx,8),%r8
  436         shlq    $56,%r10
  437         xorq    (%rbp,%rcx,8),%r9
  438         roll    $8,%edx
  439         xorq    8(%rsi,%rax,1),%r8
  440         xorq    (%rsi,%rax,1),%r9
  441         movb    %dl,%al
  442         xorq    %r10,%r8
  443         movzwq  (%r11,%r13,2),%r13
  444         movzbl  %dl,%ecx
  445         shlb    $4,%al
  446         movzbq  (%rsp,%rbx,1),%r12
  447         shrl    $4,%ecx
  448         shlq    $48,%r13
  449         xorq    %r8,%r12
  450         movq    %r9,%r10
  451         xorq    %r13,%r9
  452         shrq    $8,%r8
  453         movzbq  %r12b,%r12
  454         shrq    $8,%r9
  455         xorq    -128(%rbp,%rbx,8),%r8
  456         shlq    $56,%r10
  457         xorq    (%rbp,%rbx,8),%r9
  458         roll    $8,%edx
  459         xorq    8(%rsi,%rax,1),%r8
  460         xorq    (%rsi,%rax,1),%r9
  461         movb    %dl,%al
  462         xorq    %r10,%r8
  463         movzwq  (%r11,%r12,2),%r12
  464         movzbl  %dl,%ebx
  465         shlb    $4,%al
  466         movzbq  (%rsp,%rcx,1),%r13
  467         shrl    $4,%ebx
  468         shlq    $48,%r12
  469         xorq    %r8,%r13
  470         movq    %r9,%r10
  471         xorq    %r12,%r9
  472         shrq    $8,%r8
  473         movzbq  %r13b,%r13
  474         shrq    $8,%r9
  475         xorq    -128(%rbp,%rcx,8),%r8
  476         shlq    $56,%r10
  477         xorq    (%rbp,%rcx,8),%r9
  478         roll    $8,%edx
  479         xorq    8(%rsi,%rax,1),%r8
  480         xorq    (%rsi,%rax,1),%r9
  481         movb    %dl,%al
  482         xorq    %r10,%r8
  483         movzwq  (%r11,%r13,2),%r13
  484         movzbl  %dl,%ecx
  485         shlb    $4,%al
  486         movzbq  (%rsp,%rbx,1),%r12
  487         shrl    $4,%ecx
  488         shlq    $48,%r13
  489         xorq    %r8,%r12
  490         movq    %r9,%r10
  491         xorq    %r13,%r9
  492         shrq    $8,%r8
  493         movzbq  %r12b,%r12
  494         movl    4(%rdi),%edx
  495         shrq    $8,%r9
  496         xorq    -128(%rbp,%rbx,8),%r8
  497         shlq    $56,%r10
  498         xorq    (%rbp,%rbx,8),%r9
  499         roll    $8,%edx
  500         xorq    8(%rsi,%rax,1),%r8
  501         xorq    (%rsi,%rax,1),%r9
  502         movb    %dl,%al
  503         xorq    %r10,%r8
  504         movzwq  (%r11,%r12,2),%r12
  505         movzbl  %dl,%ebx
  506         shlb    $4,%al
  507         movzbq  (%rsp,%rcx,1),%r13
  508         shrl    $4,%ebx
  509         shlq    $48,%r12
  510         xorq    %r8,%r13
  511         movq    %r9,%r10
  512         xorq    %r12,%r9
  513         shrq    $8,%r8
  514         movzbq  %r13b,%r13
  515         shrq    $8,%r9
  516         xorq    -128(%rbp,%rcx,8),%r8
  517         shlq    $56,%r10
  518         xorq    (%rbp,%rcx,8),%r9
  519         roll    $8,%edx
  520         xorq    8(%rsi,%rax,1),%r8
  521         xorq    (%rsi,%rax,1),%r9
  522         movb    %dl,%al
  523         xorq    %r10,%r8
  524         movzwq  (%r11,%r13,2),%r13
  525         movzbl  %dl,%ecx
  526         shlb    $4,%al
  527         movzbq  (%rsp,%rbx,1),%r12
  528         shrl    $4,%ecx
  529         shlq    $48,%r13
  530         xorq    %r8,%r12
  531         movq    %r9,%r10
  532         xorq    %r13,%r9
  533         shrq    $8,%r8
  534         movzbq  %r12b,%r12
  535         shrq    $8,%r9
  536         xorq    -128(%rbp,%rbx,8),%r8
  537         shlq    $56,%r10
  538         xorq    (%rbp,%rbx,8),%r9
  539         roll    $8,%edx
  540         xorq    8(%rsi,%rax,1),%r8
  541         xorq    (%rsi,%rax,1),%r9
  542         movb    %dl,%al
  543         xorq    %r10,%r8
  544         movzwq  (%r11,%r12,2),%r12
  545         movzbl  %dl,%ebx
  546         shlb    $4,%al
  547         movzbq  (%rsp,%rcx,1),%r13
  548         shrl    $4,%ebx
  549         shlq    $48,%r12
  550         xorq    %r8,%r13
  551         movq    %r9,%r10
  552         xorq    %r12,%r9
  553         shrq    $8,%r8
  554         movzbq  %r13b,%r13
  555         shrq    $8,%r9
  556         xorq    -128(%rbp,%rcx,8),%r8
  557         shlq    $56,%r10
  558         xorq    (%rbp,%rcx,8),%r9
  559         roll    $8,%edx
  560         xorq    8(%rsi,%rax,1),%r8
  561         xorq    (%rsi,%rax,1),%r9
  562         movb    %dl,%al
  563         xorq    %r10,%r8
  564         movzwq  (%r11,%r13,2),%r13
  565         movzbl  %dl,%ecx
  566         shlb    $4,%al
  567         movzbq  (%rsp,%rbx,1),%r12
  568         shrl    $4,%ecx
  569         shlq    $48,%r13
  570         xorq    %r8,%r12
  571         movq    %r9,%r10
  572         xorq    %r13,%r9
  573         shrq    $8,%r8
  574         movzbq  %r12b,%r12
  575         movl    0(%rdi),%edx
  576         shrq    $8,%r9
  577         xorq    -128(%rbp,%rbx,8),%r8
  578         shlq    $56,%r10
  579         xorq    (%rbp,%rbx,8),%r9
  580         roll    $8,%edx
  581         xorq    8(%rsi,%rax,1),%r8
  582         xorq    (%rsi,%rax,1),%r9
  583         movb    %dl,%al
  584         xorq    %r10,%r8
  585         movzwq  (%r11,%r12,2),%r12
  586         movzbl  %dl,%ebx
  587         shlb    $4,%al
  588         movzbq  (%rsp,%rcx,1),%r13
  589         shrl    $4,%ebx
  590         shlq    $48,%r12
  591         xorq    %r8,%r13
  592         movq    %r9,%r10
  593         xorq    %r12,%r9
  594         shrq    $8,%r8
  595         movzbq  %r13b,%r13
  596         shrq    $8,%r9
  597         xorq    -128(%rbp,%rcx,8),%r8
  598         shlq    $56,%r10
  599         xorq    (%rbp,%rcx,8),%r9
  600         roll    $8,%edx
  601         xorq    8(%rsi,%rax,1),%r8
  602         xorq    (%rsi,%rax,1),%r9
  603         movb    %dl,%al
  604         xorq    %r10,%r8
  605         movzwq  (%r11,%r13,2),%r13
  606         movzbl  %dl,%ecx
  607         shlb    $4,%al
  608         movzbq  (%rsp,%rbx,1),%r12
  609         shrl    $4,%ecx
  610         shlq    $48,%r13
  611         xorq    %r8,%r12
  612         movq    %r9,%r10
  613         xorq    %r13,%r9
  614         shrq    $8,%r8
  615         movzbq  %r12b,%r12
  616         shrq    $8,%r9
  617         xorq    -128(%rbp,%rbx,8),%r8
  618         shlq    $56,%r10
  619         xorq    (%rbp,%rbx,8),%r9
  620         roll    $8,%edx
  621         xorq    8(%rsi,%rax,1),%r8
  622         xorq    (%rsi,%rax,1),%r9
  623         movb    %dl,%al
  624         xorq    %r10,%r8
  625         movzwq  (%r11,%r12,2),%r12
  626         movzbl  %dl,%ebx
  627         shlb    $4,%al
  628         movzbq  (%rsp,%rcx,1),%r13
  629         shrl    $4,%ebx
  630         shlq    $48,%r12
  631         xorq    %r8,%r13
  632         movq    %r9,%r10
  633         xorq    %r12,%r9
  634         shrq    $8,%r8
  635         movzbq  %r13b,%r13
  636         shrq    $8,%r9
  637         xorq    -128(%rbp,%rcx,8),%r8
  638         shlq    $56,%r10
  639         xorq    (%rbp,%rcx,8),%r9
  640         roll    $8,%edx
  641         xorq    8(%rsi,%rax,1),%r8
  642         xorq    (%rsi,%rax,1),%r9
  643         movb    %dl,%al
  644         xorq    %r10,%r8
  645         movzwq  (%r11,%r13,2),%r13
  646         movzbl  %dl,%ecx
  647         shlb    $4,%al
  648         movzbq  (%rsp,%rbx,1),%r12
  649         andl    $240,%ecx
  650         shlq    $48,%r13
  651         xorq    %r8,%r12
  652         movq    %r9,%r10
  653         xorq    %r13,%r9
  654         shrq    $8,%r8
  655         movzbq  %r12b,%r12
  656         movl    -4(%rdi),%edx
  657         shrq    $8,%r9
  658         xorq    -128(%rbp,%rbx,8),%r8
  659         shlq    $56,%r10
  660         xorq    (%rbp,%rbx,8),%r9
  661         movzwq  (%r11,%r12,2),%r12
  662         xorq    8(%rsi,%rax,1),%r8
  663         xorq    (%rsi,%rax,1),%r9
  664         shlq    $48,%r12
  665         xorq    %r10,%r8
  666         xorq    %r12,%r9
  667         movzbq  %r8b,%r13
  668         shrq    $4,%r8
  669         movq    %r9,%r10
  670         shlb    $4,%r13b
  671         shrq    $4,%r9
  672         xorq    8(%rsi,%rcx,1),%r8
  673         movzwq  (%r11,%r13,2),%r13
  674         shlq    $60,%r10
  675         xorq    (%rsi,%rcx,1),%r9
  676         xorq    %r10,%r8
  677         shlq    $48,%r13
  678         bswapq  %r8
  679         xorq    %r13,%r9
  680         bswapq  %r9
  681         cmpq    %r15,%r14
  682         jb      .Louter_loop
  683         movq    %r8,8(%rdi)
  684         movq    %r9,(%rdi)
  685 
  686         leaq    280+48(%rsp),%rsi
  687 .cfi_def_cfa    %rsi,8
  688         movq    -48(%rsi),%r15
  689 .cfi_restore    %r15
  690         movq    -40(%rsi),%r14
  691 .cfi_restore    %r14
  692         movq    -32(%rsi),%r13
  693 .cfi_restore    %r13
  694         movq    -24(%rsi),%r12
  695 .cfi_restore    %r12
  696         movq    -16(%rsi),%rbp
  697 .cfi_restore    %rbp
  698         movq    -8(%rsi),%rbx
  699 .cfi_restore    %rbx
  700         leaq    0(%rsi),%rsp
  701 .cfi_def_cfa_register   %rsp
  702 .Lghash_epilogue:
  703         .byte   0xf3,0xc3
  704 .cfi_endproc    
  705 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
  706 .globl  gcm_init_clmul
  707 .type   gcm_init_clmul,@function
  708 .align  16
  709 gcm_init_clmul:
  710 .cfi_startproc  
  711 .L_init_clmul:
  712         movdqu  (%rsi),%xmm2
  713         pshufd  $78,%xmm2,%xmm2
  714 
  715 
  716         pshufd  $255,%xmm2,%xmm4
  717         movdqa  %xmm2,%xmm3
  718         psllq   $1,%xmm2
  719         pxor    %xmm5,%xmm5
  720         psrlq   $63,%xmm3
  721         pcmpgtd %xmm4,%xmm5
  722         pslldq  $8,%xmm3
  723         por     %xmm3,%xmm2
  724 
  725 
  726         pand    .L0x1c2_polynomial(%rip),%xmm5
  727         pxor    %xmm5,%xmm2
  728 
  729 
  730         pshufd  $78,%xmm2,%xmm6
  731         movdqa  %xmm2,%xmm0
  732         pxor    %xmm2,%xmm6
  733         movdqa  %xmm0,%xmm1
  734         pshufd  $78,%xmm0,%xmm3
  735         pxor    %xmm0,%xmm3
  736 .byte   102,15,58,68,194,0
  737 .byte   102,15,58,68,202,17
  738 .byte   102,15,58,68,222,0
  739         pxor    %xmm0,%xmm3
  740         pxor    %xmm1,%xmm3
  741 
  742         movdqa  %xmm3,%xmm4
  743         psrldq  $8,%xmm3
  744         pslldq  $8,%xmm4
  745         pxor    %xmm3,%xmm1
  746         pxor    %xmm4,%xmm0
  747 
  748         movdqa  %xmm0,%xmm4
  749         movdqa  %xmm0,%xmm3
  750         psllq   $5,%xmm0
  751         pxor    %xmm0,%xmm3
  752         psllq   $1,%xmm0
  753         pxor    %xmm3,%xmm0
  754         psllq   $57,%xmm0
  755         movdqa  %xmm0,%xmm3
  756         pslldq  $8,%xmm0
  757         psrldq  $8,%xmm3
  758         pxor    %xmm4,%xmm0
  759         pxor    %xmm3,%xmm1
  760 
  761 
  762         movdqa  %xmm0,%xmm4
  763         psrlq   $1,%xmm0
  764         pxor    %xmm4,%xmm1
  765         pxor    %xmm0,%xmm4
  766         psrlq   $5,%xmm0
  767         pxor    %xmm4,%xmm0
  768         psrlq   $1,%xmm0
  769         pxor    %xmm1,%xmm0
  770         pshufd  $78,%xmm2,%xmm3
  771         pshufd  $78,%xmm0,%xmm4
  772         pxor    %xmm2,%xmm3
  773         movdqu  %xmm2,0(%rdi)
  774         pxor    %xmm0,%xmm4
  775         movdqu  %xmm0,16(%rdi)
  776 .byte   102,15,58,15,227,8
  777         movdqu  %xmm4,32(%rdi)
  778         movdqa  %xmm0,%xmm1
  779         pshufd  $78,%xmm0,%xmm3
  780         pxor    %xmm0,%xmm3
  781 .byte   102,15,58,68,194,0
  782 .byte   102,15,58,68,202,17
  783 .byte   102,15,58,68,222,0
  784         pxor    %xmm0,%xmm3
  785         pxor    %xmm1,%xmm3
  786 
  787         movdqa  %xmm3,%xmm4
  788         psrldq  $8,%xmm3
  789         pslldq  $8,%xmm4
  790         pxor    %xmm3,%xmm1
  791         pxor    %xmm4,%xmm0
  792 
  793         movdqa  %xmm0,%xmm4
  794         movdqa  %xmm0,%xmm3
  795         psllq   $5,%xmm0
  796         pxor    %xmm0,%xmm3
  797         psllq   $1,%xmm0
  798         pxor    %xmm3,%xmm0
  799         psllq   $57,%xmm0
  800         movdqa  %xmm0,%xmm3
  801         pslldq  $8,%xmm0
  802         psrldq  $8,%xmm3
  803         pxor    %xmm4,%xmm0
  804         pxor    %xmm3,%xmm1
  805 
  806 
  807         movdqa  %xmm0,%xmm4
  808         psrlq   $1,%xmm0
  809         pxor    %xmm4,%xmm1
  810         pxor    %xmm0,%xmm4
  811         psrlq   $5,%xmm0
  812         pxor    %xmm4,%xmm0
  813         psrlq   $1,%xmm0
  814         pxor    %xmm1,%xmm0
  815         movdqa  %xmm0,%xmm5
  816         movdqa  %xmm0,%xmm1
  817         pshufd  $78,%xmm0,%xmm3
  818         pxor    %xmm0,%xmm3
  819 .byte   102,15,58,68,194,0
  820 .byte   102,15,58,68,202,17
  821 .byte   102,15,58,68,222,0
  822         pxor    %xmm0,%xmm3
  823         pxor    %xmm1,%xmm3
  824 
  825         movdqa  %xmm3,%xmm4
  826         psrldq  $8,%xmm3
  827         pslldq  $8,%xmm4
  828         pxor    %xmm3,%xmm1
  829         pxor    %xmm4,%xmm0
  830 
  831         movdqa  %xmm0,%xmm4
  832         movdqa  %xmm0,%xmm3
  833         psllq   $5,%xmm0
  834         pxor    %xmm0,%xmm3
  835         psllq   $1,%xmm0
  836         pxor    %xmm3,%xmm0
  837         psllq   $57,%xmm0
  838         movdqa  %xmm0,%xmm3
  839         pslldq  $8,%xmm0
  840         psrldq  $8,%xmm3
  841         pxor    %xmm4,%xmm0
  842         pxor    %xmm3,%xmm1
  843 
  844 
  845         movdqa  %xmm0,%xmm4
  846         psrlq   $1,%xmm0
  847         pxor    %xmm4,%xmm1
  848         pxor    %xmm0,%xmm4
  849         psrlq   $5,%xmm0
  850         pxor    %xmm4,%xmm0
  851         psrlq   $1,%xmm0
  852         pxor    %xmm1,%xmm0
  853         pshufd  $78,%xmm5,%xmm3
  854         pshufd  $78,%xmm0,%xmm4
  855         pxor    %xmm5,%xmm3
  856         movdqu  %xmm5,48(%rdi)
  857         pxor    %xmm0,%xmm4
  858         movdqu  %xmm0,64(%rdi)
  859 .byte   102,15,58,15,227,8
  860         movdqu  %xmm4,80(%rdi)
  861         .byte   0xf3,0xc3
  862 .cfi_endproc    
  863 .size   gcm_init_clmul,.-gcm_init_clmul
  864 .globl  gcm_gmult_clmul
  865 .type   gcm_gmult_clmul,@function
  866 .align  16
  867 gcm_gmult_clmul:
  868 .cfi_startproc  
  869 .L_gmult_clmul:
  870         movdqu  (%rdi),%xmm0
  871         movdqa  .Lbswap_mask(%rip),%xmm5
  872         movdqu  (%rsi),%xmm2
  873         movdqu  32(%rsi),%xmm4
  874 .byte   102,15,56,0,197
  875         movdqa  %xmm0,%xmm1
  876         pshufd  $78,%xmm0,%xmm3
  877         pxor    %xmm0,%xmm3
  878 .byte   102,15,58,68,194,0
  879 .byte   102,15,58,68,202,17
  880 .byte   102,15,58,68,220,0
  881         pxor    %xmm0,%xmm3
  882         pxor    %xmm1,%xmm3
  883 
  884         movdqa  %xmm3,%xmm4
  885         psrldq  $8,%xmm3
  886         pslldq  $8,%xmm4
  887         pxor    %xmm3,%xmm1
  888         pxor    %xmm4,%xmm0
  889 
  890         movdqa  %xmm0,%xmm4
  891         movdqa  %xmm0,%xmm3
  892         psllq   $5,%xmm0
  893         pxor    %xmm0,%xmm3
  894         psllq   $1,%xmm0
  895         pxor    %xmm3,%xmm0
  896         psllq   $57,%xmm0
  897         movdqa  %xmm0,%xmm3
  898         pslldq  $8,%xmm0
  899         psrldq  $8,%xmm3
  900         pxor    %xmm4,%xmm0
  901         pxor    %xmm3,%xmm1
  902 
  903 
  904         movdqa  %xmm0,%xmm4
  905         psrlq   $1,%xmm0
  906         pxor    %xmm4,%xmm1
  907         pxor    %xmm0,%xmm4
  908         psrlq   $5,%xmm0
  909         pxor    %xmm4,%xmm0
  910         psrlq   $1,%xmm0
  911         pxor    %xmm1,%xmm0
  912 .byte   102,15,56,0,197
  913         movdqu  %xmm0,(%rdi)
  914         .byte   0xf3,0xc3
  915 .cfi_endproc    
  916 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
  917 .globl  gcm_ghash_clmul
  918 .type   gcm_ghash_clmul,@function
  919 .align  32
  920 gcm_ghash_clmul:
  921 .cfi_startproc  
  922 .L_ghash_clmul:
  923         movdqa  .Lbswap_mask(%rip),%xmm10
  924 
  925         movdqu  (%rdi),%xmm0
  926         movdqu  (%rsi),%xmm2
  927         movdqu  32(%rsi),%xmm7
  928 .byte   102,65,15,56,0,194
  929 
  930         subq    $0x10,%rcx
  931         jz      .Lodd_tail
  932 
  933         movdqu  16(%rsi),%xmm6
  934         movl    OPENSSL_ia32cap_P+4(%rip),%eax
  935         cmpq    $0x30,%rcx
  936         jb      .Lskip4x
  937 
  938         andl    $71303168,%eax
  939         cmpl    $4194304,%eax
  940         je      .Lskip4x
  941 
  942         subq    $0x30,%rcx
  943         movq    $0xA040608020C0E000,%rax
  944         movdqu  48(%rsi),%xmm14
  945         movdqu  64(%rsi),%xmm15
  946 
  947 
  948 
  949 
  950         movdqu  48(%rdx),%xmm3
  951         movdqu  32(%rdx),%xmm11
  952 .byte   102,65,15,56,0,218
  953 .byte   102,69,15,56,0,218
  954         movdqa  %xmm3,%xmm5
  955         pshufd  $78,%xmm3,%xmm4
  956         pxor    %xmm3,%xmm4
  957 .byte   102,15,58,68,218,0
  958 .byte   102,15,58,68,234,17
  959 .byte   102,15,58,68,231,0
  960 
  961         movdqa  %xmm11,%xmm13
  962         pshufd  $78,%xmm11,%xmm12
  963         pxor    %xmm11,%xmm12
  964 .byte   102,68,15,58,68,222,0
  965 .byte   102,68,15,58,68,238,17
  966 .byte   102,68,15,58,68,231,16
  967         xorps   %xmm11,%xmm3
  968         xorps   %xmm13,%xmm5
  969         movups  80(%rsi),%xmm7
  970         xorps   %xmm12,%xmm4
  971 
  972         movdqu  16(%rdx),%xmm11
  973         movdqu  0(%rdx),%xmm8
  974 .byte   102,69,15,56,0,218
  975 .byte   102,69,15,56,0,194
  976         movdqa  %xmm11,%xmm13
  977         pshufd  $78,%xmm11,%xmm12
  978         pxor    %xmm8,%xmm0
  979         pxor    %xmm11,%xmm12
  980 .byte   102,69,15,58,68,222,0
  981         movdqa  %xmm0,%xmm1
  982         pshufd  $78,%xmm0,%xmm8
  983         pxor    %xmm0,%xmm8
  984 .byte   102,69,15,58,68,238,17
  985 .byte   102,68,15,58,68,231,0
  986         xorps   %xmm11,%xmm3
  987         xorps   %xmm13,%xmm5
  988 
  989         leaq    64(%rdx),%rdx
  990         subq    $0x40,%rcx
  991         jc      .Ltail4x
  992 
  993         jmp     .Lmod4_loop
  994 .align  32
  995 .Lmod4_loop:
  996 .byte   102,65,15,58,68,199,0
  997         xorps   %xmm12,%xmm4
  998         movdqu  48(%rdx),%xmm11
  999 .byte   102,69,15,56,0,218
 1000 .byte   102,65,15,58,68,207,17
 1001         xorps   %xmm3,%xmm0
 1002         movdqu  32(%rdx),%xmm3
 1003         movdqa  %xmm11,%xmm13
 1004 .byte   102,68,15,58,68,199,16
 1005         pshufd  $78,%xmm11,%xmm12
 1006         xorps   %xmm5,%xmm1
 1007         pxor    %xmm11,%xmm12
 1008 .byte   102,65,15,56,0,218
 1009         movups  32(%rsi),%xmm7
 1010         xorps   %xmm4,%xmm8
 1011 .byte   102,68,15,58,68,218,0
 1012         pshufd  $78,%xmm3,%xmm4
 1013 
 1014         pxor    %xmm0,%xmm8
 1015         movdqa  %xmm3,%xmm5
 1016         pxor    %xmm1,%xmm8
 1017         pxor    %xmm3,%xmm4
 1018         movdqa  %xmm8,%xmm9
 1019 .byte   102,68,15,58,68,234,17
 1020         pslldq  $8,%xmm8
 1021         psrldq  $8,%xmm9
 1022         pxor    %xmm8,%xmm0
 1023         movdqa  .L7_mask(%rip),%xmm8
 1024         pxor    %xmm9,%xmm1
 1025 .byte   102,76,15,110,200
 1026 
 1027         pand    %xmm0,%xmm8
 1028 .byte   102,69,15,56,0,200
 1029         pxor    %xmm0,%xmm9
 1030 .byte   102,68,15,58,68,231,0
 1031         psllq   $57,%xmm9
 1032         movdqa  %xmm9,%xmm8
 1033         pslldq  $8,%xmm9
 1034 .byte   102,15,58,68,222,0
 1035         psrldq  $8,%xmm8
 1036         pxor    %xmm9,%xmm0
 1037         pxor    %xmm8,%xmm1
 1038         movdqu  0(%rdx),%xmm8
 1039 
 1040         movdqa  %xmm0,%xmm9
 1041         psrlq   $1,%xmm0
 1042 .byte   102,15,58,68,238,17
 1043         xorps   %xmm11,%xmm3
 1044         movdqu  16(%rdx),%xmm11
 1045 .byte   102,69,15,56,0,218
 1046 .byte   102,15,58,68,231,16
 1047         xorps   %xmm13,%xmm5
 1048         movups  80(%rsi),%xmm7
 1049 .byte   102,69,15,56,0,194
 1050         pxor    %xmm9,%xmm1
 1051         pxor    %xmm0,%xmm9
 1052         psrlq   $5,%xmm0
 1053 
 1054         movdqa  %xmm11,%xmm13
 1055         pxor    %xmm12,%xmm4
 1056         pshufd  $78,%xmm11,%xmm12
 1057         pxor    %xmm9,%xmm0
 1058         pxor    %xmm8,%xmm1
 1059         pxor    %xmm11,%xmm12
 1060 .byte   102,69,15,58,68,222,0
 1061         psrlq   $1,%xmm0
 1062         pxor    %xmm1,%xmm0
 1063         movdqa  %xmm0,%xmm1
 1064 .byte   102,69,15,58,68,238,17
 1065         xorps   %xmm11,%xmm3
 1066         pshufd  $78,%xmm0,%xmm8
 1067         pxor    %xmm0,%xmm8
 1068 
 1069 .byte   102,68,15,58,68,231,0
 1070         xorps   %xmm13,%xmm5
 1071 
 1072         leaq    64(%rdx),%rdx
 1073         subq    $0x40,%rcx
 1074         jnc     .Lmod4_loop
 1075 
 1076 .Ltail4x:
 1077 .byte   102,65,15,58,68,199,0
 1078 .byte   102,65,15,58,68,207,17
 1079 .byte   102,68,15,58,68,199,16
 1080         xorps   %xmm12,%xmm4
 1081         xorps   %xmm3,%xmm0
 1082         xorps   %xmm5,%xmm1
 1083         pxor    %xmm0,%xmm1
 1084         pxor    %xmm4,%xmm8
 1085 
 1086         pxor    %xmm1,%xmm8
 1087         pxor    %xmm0,%xmm1
 1088 
 1089         movdqa  %xmm8,%xmm9
 1090         psrldq  $8,%xmm8
 1091         pslldq  $8,%xmm9
 1092         pxor    %xmm8,%xmm1
 1093         pxor    %xmm9,%xmm0
 1094 
 1095         movdqa  %xmm0,%xmm4
 1096         movdqa  %xmm0,%xmm3
 1097         psllq   $5,%xmm0
 1098         pxor    %xmm0,%xmm3
 1099         psllq   $1,%xmm0
 1100         pxor    %xmm3,%xmm0
 1101         psllq   $57,%xmm0
 1102         movdqa  %xmm0,%xmm3
 1103         pslldq  $8,%xmm0
 1104         psrldq  $8,%xmm3
 1105         pxor    %xmm4,%xmm0
 1106         pxor    %xmm3,%xmm1
 1107 
 1108 
 1109         movdqa  %xmm0,%xmm4
 1110         psrlq   $1,%xmm0
 1111         pxor    %xmm4,%xmm1
 1112         pxor    %xmm0,%xmm4
 1113         psrlq   $5,%xmm0
 1114         pxor    %xmm4,%xmm0
 1115         psrlq   $1,%xmm0
 1116         pxor    %xmm1,%xmm0
 1117         addq    $0x40,%rcx
 1118         jz      .Ldone
 1119         movdqu  32(%rsi),%xmm7
 1120         subq    $0x10,%rcx
 1121         jz      .Lodd_tail
 1122 .Lskip4x:
 1123 
 1124 
 1125 
 1126 
 1127 
 1128         movdqu  (%rdx),%xmm8
 1129         movdqu  16(%rdx),%xmm3
 1130 .byte   102,69,15,56,0,194
 1131 .byte   102,65,15,56,0,218
 1132         pxor    %xmm8,%xmm0
 1133 
 1134         movdqa  %xmm3,%xmm5
 1135         pshufd  $78,%xmm3,%xmm4
 1136         pxor    %xmm3,%xmm4
 1137 .byte   102,15,58,68,218,0
 1138 .byte   102,15,58,68,234,17
 1139 .byte   102,15,58,68,231,0
 1140 
 1141         leaq    32(%rdx),%rdx
 1142         nop
 1143         subq    $0x20,%rcx
 1144         jbe     .Leven_tail
 1145         nop
 1146         jmp     .Lmod_loop
 1147 
 1148 .align  32
 1149 .Lmod_loop:
 1150         movdqa  %xmm0,%xmm1
 1151         movdqa  %xmm4,%xmm8
 1152         pshufd  $78,%xmm0,%xmm4
 1153         pxor    %xmm0,%xmm4
 1154 
 1155 .byte   102,15,58,68,198,0
 1156 .byte   102,15,58,68,206,17
 1157 .byte   102,15,58,68,231,16
 1158 
 1159         pxor    %xmm3,%xmm0
 1160         pxor    %xmm5,%xmm1
 1161         movdqu  (%rdx),%xmm9
 1162         pxor    %xmm0,%xmm8
 1163 .byte   102,69,15,56,0,202
 1164         movdqu  16(%rdx),%xmm3
 1165 
 1166         pxor    %xmm1,%xmm8
 1167         pxor    %xmm9,%xmm1
 1168         pxor    %xmm8,%xmm4
 1169 .byte   102,65,15,56,0,218
 1170         movdqa  %xmm4,%xmm8
 1171         psrldq  $8,%xmm8
 1172         pslldq  $8,%xmm4
 1173         pxor    %xmm8,%xmm1
 1174         pxor    %xmm4,%xmm0
 1175 
 1176         movdqa  %xmm3,%xmm5
 1177 
 1178         movdqa  %xmm0,%xmm9
 1179         movdqa  %xmm0,%xmm8
 1180         psllq   $5,%xmm0
 1181         pxor    %xmm0,%xmm8
 1182 .byte   102,15,58,68,218,0
 1183         psllq   $1,%xmm0
 1184         pxor    %xmm8,%xmm0
 1185         psllq   $57,%xmm0
 1186         movdqa  %xmm0,%xmm8
 1187         pslldq  $8,%xmm0
 1188         psrldq  $8,%xmm8
 1189         pxor    %xmm9,%xmm0
 1190         pshufd  $78,%xmm5,%xmm4
 1191         pxor    %xmm8,%xmm1
 1192         pxor    %xmm5,%xmm4
 1193 
 1194         movdqa  %xmm0,%xmm9
 1195         psrlq   $1,%xmm0
 1196 .byte   102,15,58,68,234,17
 1197         pxor    %xmm9,%xmm1
 1198         pxor    %xmm0,%xmm9
 1199         psrlq   $5,%xmm0
 1200         pxor    %xmm9,%xmm0
 1201         leaq    32(%rdx),%rdx
 1202         psrlq   $1,%xmm0
 1203 .byte   102,15,58,68,231,0
 1204         pxor    %xmm1,%xmm0
 1205 
 1206         subq    $0x20,%rcx
 1207         ja      .Lmod_loop
 1208 
 1209 .Leven_tail:
 1210         movdqa  %xmm0,%xmm1
 1211         movdqa  %xmm4,%xmm8
 1212         pshufd  $78,%xmm0,%xmm4
 1213         pxor    %xmm0,%xmm4
 1214 
 1215 .byte   102,15,58,68,198,0
 1216 .byte   102,15,58,68,206,17
 1217 .byte   102,15,58,68,231,16
 1218 
 1219         pxor    %xmm3,%xmm0
 1220         pxor    %xmm5,%xmm1
 1221         pxor    %xmm0,%xmm8
 1222         pxor    %xmm1,%xmm8
 1223         pxor    %xmm8,%xmm4
 1224         movdqa  %xmm4,%xmm8
 1225         psrldq  $8,%xmm8
 1226         pslldq  $8,%xmm4
 1227         pxor    %xmm8,%xmm1
 1228         pxor    %xmm4,%xmm0
 1229 
 1230         movdqa  %xmm0,%xmm4
 1231         movdqa  %xmm0,%xmm3
 1232         psllq   $5,%xmm0
 1233         pxor    %xmm0,%xmm3
 1234         psllq   $1,%xmm0
 1235         pxor    %xmm3,%xmm0
 1236         psllq   $57,%xmm0
 1237         movdqa  %xmm0,%xmm3
 1238         pslldq  $8,%xmm0
 1239         psrldq  $8,%xmm3
 1240         pxor    %xmm4,%xmm0
 1241         pxor    %xmm3,%xmm1
 1242 
 1243 
 1244         movdqa  %xmm0,%xmm4
 1245         psrlq   $1,%xmm0
 1246         pxor    %xmm4,%xmm1
 1247         pxor    %xmm0,%xmm4
 1248         psrlq   $5,%xmm0
 1249         pxor    %xmm4,%xmm0
 1250         psrlq   $1,%xmm0
 1251         pxor    %xmm1,%xmm0
 1252         testq   %rcx,%rcx
 1253         jnz     .Ldone
 1254 
 1255 .Lodd_tail:
 1256         movdqu  (%rdx),%xmm8
 1257 .byte   102,69,15,56,0,194
 1258         pxor    %xmm8,%xmm0
 1259         movdqa  %xmm0,%xmm1
 1260         pshufd  $78,%xmm0,%xmm3
 1261         pxor    %xmm0,%xmm3
 1262 .byte   102,15,58,68,194,0
 1263 .byte   102,15,58,68,202,17
 1264 .byte   102,15,58,68,223,0
 1265         pxor    %xmm0,%xmm3
 1266         pxor    %xmm1,%xmm3
 1267 
 1268         movdqa  %xmm3,%xmm4
 1269         psrldq  $8,%xmm3
 1270         pslldq  $8,%xmm4
 1271         pxor    %xmm3,%xmm1
 1272         pxor    %xmm4,%xmm0
 1273 
 1274         movdqa  %xmm0,%xmm4
 1275         movdqa  %xmm0,%xmm3
 1276         psllq   $5,%xmm0
 1277         pxor    %xmm0,%xmm3
 1278         psllq   $1,%xmm0
 1279         pxor    %xmm3,%xmm0
 1280         psllq   $57,%xmm0
 1281         movdqa  %xmm0,%xmm3
 1282         pslldq  $8,%xmm0
 1283         psrldq  $8,%xmm3
 1284         pxor    %xmm4,%xmm0
 1285         pxor    %xmm3,%xmm1
 1286 
 1287 
 1288         movdqa  %xmm0,%xmm4
 1289         psrlq   $1,%xmm0
 1290         pxor    %xmm4,%xmm1
 1291         pxor    %xmm0,%xmm4
 1292         psrlq   $5,%xmm0
 1293         pxor    %xmm4,%xmm0
 1294         psrlq   $1,%xmm0
 1295         pxor    %xmm1,%xmm0
 1296 .Ldone:
 1297 .byte   102,65,15,56,0,194
 1298         movdqu  %xmm0,(%rdi)
 1299         .byte   0xf3,0xc3
 1300 .cfi_endproc    
 1301 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
 1302 .globl  gcm_init_avx
 1303 .type   gcm_init_avx,@function
 1304 .align  32
 1305 gcm_init_avx:
 1306 .cfi_startproc  
 1307         vzeroupper
 1308 
 1309         vmovdqu (%rsi),%xmm2
 1310         vpshufd $78,%xmm2,%xmm2
 1311 
 1312 
 1313         vpshufd $255,%xmm2,%xmm4
 1314         vpsrlq  $63,%xmm2,%xmm3
 1315         vpsllq  $1,%xmm2,%xmm2
 1316         vpxor   %xmm5,%xmm5,%xmm5
 1317         vpcmpgtd        %xmm4,%xmm5,%xmm5
 1318         vpslldq $8,%xmm3,%xmm3
 1319         vpor    %xmm3,%xmm2,%xmm2
 1320 
 1321 
 1322         vpand   .L0x1c2_polynomial(%rip),%xmm5,%xmm5
 1323         vpxor   %xmm5,%xmm2,%xmm2
 1324 
 1325         vpunpckhqdq     %xmm2,%xmm2,%xmm6
 1326         vmovdqa %xmm2,%xmm0
 1327         vpxor   %xmm2,%xmm6,%xmm6
 1328         movq    $4,%r10
 1329         jmp     .Linit_start_avx
 1330 .align  32
 1331 .Linit_loop_avx:
 1332         vpalignr        $8,%xmm3,%xmm4,%xmm5
 1333         vmovdqu %xmm5,-16(%rdi)
 1334         vpunpckhqdq     %xmm0,%xmm0,%xmm3
 1335         vpxor   %xmm0,%xmm3,%xmm3
 1336         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
 1337         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
 1338         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
 1339         vpxor   %xmm0,%xmm1,%xmm4
 1340         vpxor   %xmm4,%xmm3,%xmm3
 1341 
 1342         vpslldq $8,%xmm3,%xmm4
 1343         vpsrldq $8,%xmm3,%xmm3
 1344         vpxor   %xmm4,%xmm0,%xmm0
 1345         vpxor   %xmm3,%xmm1,%xmm1
 1346         vpsllq  $57,%xmm0,%xmm3
 1347         vpsllq  $62,%xmm0,%xmm4
 1348         vpxor   %xmm3,%xmm4,%xmm4
 1349         vpsllq  $63,%xmm0,%xmm3
 1350         vpxor   %xmm3,%xmm4,%xmm4
 1351         vpslldq $8,%xmm4,%xmm3
 1352         vpsrldq $8,%xmm4,%xmm4
 1353         vpxor   %xmm3,%xmm0,%xmm0
 1354         vpxor   %xmm4,%xmm1,%xmm1
 1355 
 1356         vpsrlq  $1,%xmm0,%xmm4
 1357         vpxor   %xmm0,%xmm1,%xmm1
 1358         vpxor   %xmm4,%xmm0,%xmm0
 1359         vpsrlq  $5,%xmm4,%xmm4
 1360         vpxor   %xmm4,%xmm0,%xmm0
 1361         vpsrlq  $1,%xmm0,%xmm0
 1362         vpxor   %xmm1,%xmm0,%xmm0
 1363 .Linit_start_avx:
 1364         vmovdqa %xmm0,%xmm5
 1365         vpunpckhqdq     %xmm0,%xmm0,%xmm3
 1366         vpxor   %xmm0,%xmm3,%xmm3
 1367         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
 1368         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
 1369         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
 1370         vpxor   %xmm0,%xmm1,%xmm4
 1371         vpxor   %xmm4,%xmm3,%xmm3
 1372 
 1373         vpslldq $8,%xmm3,%xmm4
 1374         vpsrldq $8,%xmm3,%xmm3
 1375         vpxor   %xmm4,%xmm0,%xmm0
 1376         vpxor   %xmm3,%xmm1,%xmm1
 1377         vpsllq  $57,%xmm0,%xmm3
 1378         vpsllq  $62,%xmm0,%xmm4
 1379         vpxor   %xmm3,%xmm4,%xmm4
 1380         vpsllq  $63,%xmm0,%xmm3
 1381         vpxor   %xmm3,%xmm4,%xmm4
 1382         vpslldq $8,%xmm4,%xmm3
 1383         vpsrldq $8,%xmm4,%xmm4
 1384         vpxor   %xmm3,%xmm0,%xmm0
 1385         vpxor   %xmm4,%xmm1,%xmm1
 1386 
 1387         vpsrlq  $1,%xmm0,%xmm4
 1388         vpxor   %xmm0,%xmm1,%xmm1
 1389         vpxor   %xmm4,%xmm0,%xmm0
 1390         vpsrlq  $5,%xmm4,%xmm4
 1391         vpxor   %xmm4,%xmm0,%xmm0
 1392         vpsrlq  $1,%xmm0,%xmm0
 1393         vpxor   %xmm1,%xmm0,%xmm0
 1394         vpshufd $78,%xmm5,%xmm3
 1395         vpshufd $78,%xmm0,%xmm4
 1396         vpxor   %xmm5,%xmm3,%xmm3
 1397         vmovdqu %xmm5,0(%rdi)
 1398         vpxor   %xmm0,%xmm4,%xmm4
 1399         vmovdqu %xmm0,16(%rdi)
 1400         leaq    48(%rdi),%rdi
 1401         subq    $1,%r10
 1402         jnz     .Linit_loop_avx
 1403 
 1404         vpalignr        $8,%xmm4,%xmm3,%xmm5
 1405         vmovdqu %xmm5,-16(%rdi)
 1406 
 1407         vzeroupper
 1408         .byte   0xf3,0xc3
 1409 .cfi_endproc    
 1410 .size   gcm_init_avx,.-gcm_init_avx
 1411 .globl  gcm_gmult_avx
 1412 .type   gcm_gmult_avx,@function
 1413 .align  32
 1414 gcm_gmult_avx:
 1415 .cfi_startproc  
 1416         jmp     .L_gmult_clmul
 1417 .cfi_endproc    
 1418 .size   gcm_gmult_avx,.-gcm_gmult_avx
 1419 .globl  gcm_ghash_avx
 1420 .type   gcm_ghash_avx,@function
 1421 .align  32
 1422 gcm_ghash_avx:
 1423 .cfi_startproc  
 1424         vzeroupper
 1425 
 1426         vmovdqu (%rdi),%xmm10
 1427         leaq    .L0x1c2_polynomial(%rip),%r10
 1428         leaq    64(%rsi),%rsi
 1429         vmovdqu .Lbswap_mask(%rip),%xmm13
 1430         vpshufb %xmm13,%xmm10,%xmm10
 1431         cmpq    $0x80,%rcx
 1432         jb      .Lshort_avx
 1433         subq    $0x80,%rcx
 1434 
 1435         vmovdqu 112(%rdx),%xmm14
 1436         vmovdqu 0-64(%rsi),%xmm6
 1437         vpshufb %xmm13,%xmm14,%xmm14
 1438         vmovdqu 32-64(%rsi),%xmm7
 1439 
 1440         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1441         vmovdqu 96(%rdx),%xmm15
 1442         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1443         vpxor   %xmm14,%xmm9,%xmm9
 1444         vpshufb %xmm13,%xmm15,%xmm15
 1445         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1446         vmovdqu 16-64(%rsi),%xmm6
 1447         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1448         vmovdqu 80(%rdx),%xmm14
 1449         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
 1450         vpxor   %xmm15,%xmm8,%xmm8
 1451 
 1452         vpshufb %xmm13,%xmm14,%xmm14
 1453         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
 1454         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1455         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
 1456         vmovdqu 48-64(%rsi),%xmm6
 1457         vpxor   %xmm14,%xmm9,%xmm9
 1458         vmovdqu 64(%rdx),%xmm15
 1459         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
 1460         vmovdqu 80-64(%rsi),%xmm7
 1461 
 1462         vpshufb %xmm13,%xmm15,%xmm15
 1463         vpxor   %xmm0,%xmm3,%xmm3
 1464         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1465         vpxor   %xmm1,%xmm4,%xmm4
 1466         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1467         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1468         vmovdqu 64-64(%rsi),%xmm6
 1469         vpxor   %xmm2,%xmm5,%xmm5
 1470         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
 1471         vpxor   %xmm15,%xmm8,%xmm8
 1472 
 1473         vmovdqu 48(%rdx),%xmm14
 1474         vpxor   %xmm3,%xmm0,%xmm0
 1475         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
 1476         vpxor   %xmm4,%xmm1,%xmm1
 1477         vpshufb %xmm13,%xmm14,%xmm14
 1478         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
 1479         vmovdqu 96-64(%rsi),%xmm6
 1480         vpxor   %xmm5,%xmm2,%xmm2
 1481         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1482         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
 1483         vmovdqu 128-64(%rsi),%xmm7
 1484         vpxor   %xmm14,%xmm9,%xmm9
 1485 
 1486         vmovdqu 32(%rdx),%xmm15
 1487         vpxor   %xmm0,%xmm3,%xmm3
 1488         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1489         vpxor   %xmm1,%xmm4,%xmm4
 1490         vpshufb %xmm13,%xmm15,%xmm15
 1491         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1492         vmovdqu 112-64(%rsi),%xmm6
 1493         vpxor   %xmm2,%xmm5,%xmm5
 1494         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1495         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
 1496         vpxor   %xmm15,%xmm8,%xmm8
 1497 
 1498         vmovdqu 16(%rdx),%xmm14
 1499         vpxor   %xmm3,%xmm0,%xmm0
 1500         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
 1501         vpxor   %xmm4,%xmm1,%xmm1
 1502         vpshufb %xmm13,%xmm14,%xmm14
 1503         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
 1504         vmovdqu 144-64(%rsi),%xmm6
 1505         vpxor   %xmm5,%xmm2,%xmm2
 1506         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1507         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
 1508         vmovdqu 176-64(%rsi),%xmm7
 1509         vpxor   %xmm14,%xmm9,%xmm9
 1510 
 1511         vmovdqu (%rdx),%xmm15
 1512         vpxor   %xmm0,%xmm3,%xmm3
 1513         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1514         vpxor   %xmm1,%xmm4,%xmm4
 1515         vpshufb %xmm13,%xmm15,%xmm15
 1516         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1517         vmovdqu 160-64(%rsi),%xmm6
 1518         vpxor   %xmm2,%xmm5,%xmm5
 1519         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
 1520 
 1521         leaq    128(%rdx),%rdx
 1522         cmpq    $0x80,%rcx
 1523         jb      .Ltail_avx
 1524 
 1525         vpxor   %xmm10,%xmm15,%xmm15
 1526         subq    $0x80,%rcx
 1527         jmp     .Loop8x_avx
 1528 
 1529 .align  32
 1530 .Loop8x_avx:
 1531         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1532         vmovdqu 112(%rdx),%xmm14
 1533         vpxor   %xmm0,%xmm3,%xmm3
 1534         vpxor   %xmm15,%xmm8,%xmm8
 1535         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm10
 1536         vpshufb %xmm13,%xmm14,%xmm14
 1537         vpxor   %xmm1,%xmm4,%xmm4
 1538         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm11
 1539         vmovdqu 0-64(%rsi),%xmm6
 1540         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1541         vpxor   %xmm2,%xmm5,%xmm5
 1542         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm12
 1543         vmovdqu 32-64(%rsi),%xmm7
 1544         vpxor   %xmm14,%xmm9,%xmm9
 1545 
 1546         vmovdqu 96(%rdx),%xmm15
 1547         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1548         vpxor   %xmm3,%xmm10,%xmm10
 1549         vpshufb %xmm13,%xmm15,%xmm15
 1550         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1551         vxorps  %xmm4,%xmm11,%xmm11
 1552         vmovdqu 16-64(%rsi),%xmm6
 1553         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1554         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
 1555         vpxor   %xmm5,%xmm12,%xmm12
 1556         vxorps  %xmm15,%xmm8,%xmm8
 1557 
 1558         vmovdqu 80(%rdx),%xmm14
 1559         vpxor   %xmm10,%xmm12,%xmm12
 1560         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
 1561         vpxor   %xmm11,%xmm12,%xmm12
 1562         vpslldq $8,%xmm12,%xmm9
 1563         vpxor   %xmm0,%xmm3,%xmm3
 1564         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
 1565         vpsrldq $8,%xmm12,%xmm12
 1566         vpxor   %xmm9,%xmm10,%xmm10
 1567         vmovdqu 48-64(%rsi),%xmm6
 1568         vpshufb %xmm13,%xmm14,%xmm14
 1569         vxorps  %xmm12,%xmm11,%xmm11
 1570         vpxor   %xmm1,%xmm4,%xmm4
 1571         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1572         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
 1573         vmovdqu 80-64(%rsi),%xmm7
 1574         vpxor   %xmm14,%xmm9,%xmm9
 1575         vpxor   %xmm2,%xmm5,%xmm5
 1576 
 1577         vmovdqu 64(%rdx),%xmm15
 1578         vpalignr        $8,%xmm10,%xmm10,%xmm12
 1579         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1580         vpshufb %xmm13,%xmm15,%xmm15
 1581         vpxor   %xmm3,%xmm0,%xmm0
 1582         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1583         vmovdqu 64-64(%rsi),%xmm6
 1584         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1585         vpxor   %xmm4,%xmm1,%xmm1
 1586         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
 1587         vxorps  %xmm15,%xmm8,%xmm8
 1588         vpxor   %xmm5,%xmm2,%xmm2
 1589 
 1590         vmovdqu 48(%rdx),%xmm14
 1591         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
 1592         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
 1593         vpshufb %xmm13,%xmm14,%xmm14
 1594         vpxor   %xmm0,%xmm3,%xmm3
 1595         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
 1596         vmovdqu 96-64(%rsi),%xmm6
 1597         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1598         vpxor   %xmm1,%xmm4,%xmm4
 1599         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
 1600         vmovdqu 128-64(%rsi),%xmm7
 1601         vpxor   %xmm14,%xmm9,%xmm9
 1602         vpxor   %xmm2,%xmm5,%xmm5
 1603 
 1604         vmovdqu 32(%rdx),%xmm15
 1605         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1606         vpshufb %xmm13,%xmm15,%xmm15
 1607         vpxor   %xmm3,%xmm0,%xmm0
 1608         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1609         vmovdqu 112-64(%rsi),%xmm6
 1610         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1611         vpxor   %xmm4,%xmm1,%xmm1
 1612         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
 1613         vpxor   %xmm15,%xmm8,%xmm8
 1614         vpxor   %xmm5,%xmm2,%xmm2
 1615         vxorps  %xmm12,%xmm10,%xmm10
 1616 
 1617         vmovdqu 16(%rdx),%xmm14
 1618         vpalignr        $8,%xmm10,%xmm10,%xmm12
 1619         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
 1620         vpshufb %xmm13,%xmm14,%xmm14
 1621         vpxor   %xmm0,%xmm3,%xmm3
 1622         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
 1623         vmovdqu 144-64(%rsi),%xmm6
 1624         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
 1625         vxorps  %xmm11,%xmm12,%xmm12
 1626         vpunpckhqdq     %xmm14,%xmm14,%xmm9
 1627         vpxor   %xmm1,%xmm4,%xmm4
 1628         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
 1629         vmovdqu 176-64(%rsi),%xmm7
 1630         vpxor   %xmm14,%xmm9,%xmm9
 1631         vpxor   %xmm2,%xmm5,%xmm5
 1632 
 1633         vmovdqu (%rdx),%xmm15
 1634         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
 1635         vpshufb %xmm13,%xmm15,%xmm15
 1636         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
 1637         vmovdqu 160-64(%rsi),%xmm6
 1638         vpxor   %xmm12,%xmm15,%xmm15
 1639         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
 1640         vpxor   %xmm10,%xmm15,%xmm15
 1641 
 1642         leaq    128(%rdx),%rdx
 1643         subq    $0x80,%rcx
 1644         jnc     .Loop8x_avx
 1645 
 1646         addq    $0x80,%rcx
 1647         jmp     .Ltail_no_xor_avx
 1648 
 1649 .align  32
 1650 .Lshort_avx:
 1651         vmovdqu -16(%rdx,%rcx,1),%xmm14
 1652         leaq    (%rdx,%rcx,1),%rdx
 1653         vmovdqu 0-64(%rsi),%xmm6
 1654         vmovdqu 32-64(%rsi),%xmm7
 1655         vpshufb %xmm13,%xmm14,%xmm15
 1656 
 1657         vmovdqa %xmm0,%xmm3
 1658         vmovdqa %xmm1,%xmm4
 1659         vmovdqa %xmm2,%xmm5
 1660         subq    $0x10,%rcx
 1661         jz      .Ltail_avx
 1662 
 1663         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1664         vpxor   %xmm0,%xmm3,%xmm3
 1665         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1666         vpxor   %xmm15,%xmm8,%xmm8
 1667         vmovdqu -32(%rdx),%xmm14
 1668         vpxor   %xmm1,%xmm4,%xmm4
 1669         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1670         vmovdqu 16-64(%rsi),%xmm6
 1671         vpshufb %xmm13,%xmm14,%xmm15
 1672         vpxor   %xmm2,%xmm5,%xmm5
 1673         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1674         vpsrldq $8,%xmm7,%xmm7
 1675         subq    $0x10,%rcx
 1676         jz      .Ltail_avx
 1677 
 1678         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1679         vpxor   %xmm0,%xmm3,%xmm3
 1680         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1681         vpxor   %xmm15,%xmm8,%xmm8
 1682         vmovdqu -48(%rdx),%xmm14
 1683         vpxor   %xmm1,%xmm4,%xmm4
 1684         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1685         vmovdqu 48-64(%rsi),%xmm6
 1686         vpshufb %xmm13,%xmm14,%xmm15
 1687         vpxor   %xmm2,%xmm5,%xmm5
 1688         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1689         vmovdqu 80-64(%rsi),%xmm7
 1690         subq    $0x10,%rcx
 1691         jz      .Ltail_avx
 1692 
 1693         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1694         vpxor   %xmm0,%xmm3,%xmm3
 1695         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1696         vpxor   %xmm15,%xmm8,%xmm8
 1697         vmovdqu -64(%rdx),%xmm14
 1698         vpxor   %xmm1,%xmm4,%xmm4
 1699         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1700         vmovdqu 64-64(%rsi),%xmm6
 1701         vpshufb %xmm13,%xmm14,%xmm15
 1702         vpxor   %xmm2,%xmm5,%xmm5
 1703         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1704         vpsrldq $8,%xmm7,%xmm7
 1705         subq    $0x10,%rcx
 1706         jz      .Ltail_avx
 1707 
 1708         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1709         vpxor   %xmm0,%xmm3,%xmm3
 1710         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1711         vpxor   %xmm15,%xmm8,%xmm8
 1712         vmovdqu -80(%rdx),%xmm14
 1713         vpxor   %xmm1,%xmm4,%xmm4
 1714         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1715         vmovdqu 96-64(%rsi),%xmm6
 1716         vpshufb %xmm13,%xmm14,%xmm15
 1717         vpxor   %xmm2,%xmm5,%xmm5
 1718         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1719         vmovdqu 128-64(%rsi),%xmm7
 1720         subq    $0x10,%rcx
 1721         jz      .Ltail_avx
 1722 
 1723         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1724         vpxor   %xmm0,%xmm3,%xmm3
 1725         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1726         vpxor   %xmm15,%xmm8,%xmm8
 1727         vmovdqu -96(%rdx),%xmm14
 1728         vpxor   %xmm1,%xmm4,%xmm4
 1729         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1730         vmovdqu 112-64(%rsi),%xmm6
 1731         vpshufb %xmm13,%xmm14,%xmm15
 1732         vpxor   %xmm2,%xmm5,%xmm5
 1733         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1734         vpsrldq $8,%xmm7,%xmm7
 1735         subq    $0x10,%rcx
 1736         jz      .Ltail_avx
 1737 
 1738         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1739         vpxor   %xmm0,%xmm3,%xmm3
 1740         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1741         vpxor   %xmm15,%xmm8,%xmm8
 1742         vmovdqu -112(%rdx),%xmm14
 1743         vpxor   %xmm1,%xmm4,%xmm4
 1744         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1745         vmovdqu 144-64(%rsi),%xmm6
 1746         vpshufb %xmm13,%xmm14,%xmm15
 1747         vpxor   %xmm2,%xmm5,%xmm5
 1748         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1749         vmovq   184-64(%rsi),%xmm7
 1750         subq    $0x10,%rcx
 1751         jmp     .Ltail_avx
 1752 
 1753 .align  32
 1754 .Ltail_avx:
 1755         vpxor   %xmm10,%xmm15,%xmm15
 1756 .Ltail_no_xor_avx:
 1757         vpunpckhqdq     %xmm15,%xmm15,%xmm8
 1758         vpxor   %xmm0,%xmm3,%xmm3
 1759         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
 1760         vpxor   %xmm15,%xmm8,%xmm8
 1761         vpxor   %xmm1,%xmm4,%xmm4
 1762         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
 1763         vpxor   %xmm2,%xmm5,%xmm5
 1764         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
 1765 
 1766         vmovdqu (%r10),%xmm12
 1767 
 1768         vpxor   %xmm0,%xmm3,%xmm10
 1769         vpxor   %xmm1,%xmm4,%xmm11
 1770         vpxor   %xmm2,%xmm5,%xmm5
 1771 
 1772         vpxor   %xmm10,%xmm5,%xmm5
 1773         vpxor   %xmm11,%xmm5,%xmm5
 1774         vpslldq $8,%xmm5,%xmm9
 1775         vpsrldq $8,%xmm5,%xmm5
 1776         vpxor   %xmm9,%xmm10,%xmm10
 1777         vpxor   %xmm5,%xmm11,%xmm11
 1778 
 1779         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
 1780         vpalignr        $8,%xmm10,%xmm10,%xmm10
 1781         vpxor   %xmm9,%xmm10,%xmm10
 1782 
 1783         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
 1784         vpalignr        $8,%xmm10,%xmm10,%xmm10
 1785         vpxor   %xmm11,%xmm10,%xmm10
 1786         vpxor   %xmm9,%xmm10,%xmm10
 1787 
 1788         cmpq    $0,%rcx
 1789         jne     .Lshort_avx
 1790 
 1791         vpshufb %xmm13,%xmm10,%xmm10
 1792         vmovdqu %xmm10,(%rdi)
 1793         vzeroupper
 1794         .byte   0xf3,0xc3
 1795 .cfi_endproc    
 1796 .size   gcm_ghash_avx,.-gcm_ghash_avx
 1797 .align  64
 1798 .Lbswap_mask:
 1799 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
 1800 .L0x1c2_polynomial:
 1801 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
 1802 .L7_mask:
 1803 .long   7,0,7,0
 1804 .L7_mask_poly:
 1805 .long   7,0,450,0
 1806 .align  64
 1807 .type   .Lrem_4bit,@object
 1808 .Lrem_4bit:
 1809 .long   0,0,0,471859200,0,943718400,0,610271232
 1810 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
 1811 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
 1812 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
 1813 .type   .Lrem_8bit,@object
 1814 .Lrem_8bit:
 1815 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
 1816 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
 1817 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
 1818 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
 1819 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
 1820 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
 1821 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
 1822 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
 1823 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
 1824 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
 1825 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
 1826 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
 1827 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
 1828 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
 1829 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
 1830 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
 1831 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
 1832 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
 1833 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
 1834 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
 1835 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
 1836 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
 1837 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
 1838 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
 1839 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
 1840 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
 1841 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
 1842 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
 1843 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
 1844 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
 1845 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
 1846 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
 1847 
 1848 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 1849 .align  64

Cache object: 5594d6c3f5360664f92bfc50db6b19f7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.