The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/openssl/aarch64/ghashv8-armx.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /* Do not modify. This file is auto-generated from ghashv8-armx.pl. */
    3 #include "arm_arch.h"
    4 
    5 #if __ARM_MAX_ARCH__>=7
    6 .text
    7 .globl  gcm_init_v8
    8 .type   gcm_init_v8,%function
    9 .align  4
   10 gcm_init_v8:
   11         ld1     {v17.2d},[x1]           //load input H
   12         movi    v19.16b,#0xe1
   13         shl     v19.2d,v19.2d,#57               //0xc2.0
   14         ext     v3.16b,v17.16b,v17.16b,#8
   15         ushr    v18.2d,v19.2d,#63
   16         dup     v17.4s,v17.s[1]
   17         ext     v16.16b,v18.16b,v19.16b,#8              //t0=0xc2....01
   18         ushr    v18.2d,v3.2d,#63
   19         sshr    v17.4s,v17.4s,#31               //broadcast carry bit
   20         and     v18.16b,v18.16b,v16.16b
   21         shl     v3.2d,v3.2d,#1
   22         ext     v18.16b,v18.16b,v18.16b,#8
   23         and     v16.16b,v16.16b,v17.16b
   24         orr     v3.16b,v3.16b,v18.16b           //H<<<=1
   25         eor     v20.16b,v3.16b,v16.16b          //twisted H
   26         st1     {v20.2d},[x0],#16               //store Htable[0]
   27 
   28         //calculate H^2
   29         ext     v16.16b,v20.16b,v20.16b,#8              //Karatsuba pre-processing
   30         pmull   v0.1q,v20.1d,v20.1d
   31         eor     v16.16b,v16.16b,v20.16b
   32         pmull2  v2.1q,v20.2d,v20.2d
   33         pmull   v1.1q,v16.1d,v16.1d
   34 
   35         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
   36         eor     v18.16b,v0.16b,v2.16b
   37         eor     v1.16b,v1.16b,v17.16b
   38         eor     v1.16b,v1.16b,v18.16b
   39         pmull   v18.1q,v0.1d,v19.1d             //1st phase
   40 
   41         ins     v2.d[0],v1.d[1]
   42         ins     v1.d[1],v0.d[0]
   43         eor     v0.16b,v1.16b,v18.16b
   44 
   45         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase
   46         pmull   v0.1q,v0.1d,v19.1d
   47         eor     v18.16b,v18.16b,v2.16b
   48         eor     v22.16b,v0.16b,v18.16b
   49 
   50         ext     v17.16b,v22.16b,v22.16b,#8              //Karatsuba pre-processing
   51         eor     v17.16b,v17.16b,v22.16b
   52         ext     v21.16b,v16.16b,v17.16b,#8              //pack Karatsuba pre-processed
   53         st1     {v21.2d,v22.2d},[x0],#32        //store Htable[1..2]
   54         //calculate H^3 and H^4
   55         pmull   v0.1q,v20.1d, v22.1d
   56         pmull   v5.1q,v22.1d,v22.1d
   57         pmull2  v2.1q,v20.2d, v22.2d
   58         pmull2  v7.1q,v22.2d,v22.2d
   59         pmull   v1.1q,v16.1d,v17.1d
   60         pmull   v6.1q,v17.1d,v17.1d
   61 
   62         ext     v16.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
   63         ext     v17.16b,v5.16b,v7.16b,#8
   64         eor     v18.16b,v0.16b,v2.16b
   65         eor     v1.16b,v1.16b,v16.16b
   66         eor     v4.16b,v5.16b,v7.16b
   67         eor     v6.16b,v6.16b,v17.16b
   68         eor     v1.16b,v1.16b,v18.16b
   69         pmull   v18.1q,v0.1d,v19.1d             //1st phase
   70         eor     v6.16b,v6.16b,v4.16b
   71         pmull   v4.1q,v5.1d,v19.1d
   72 
   73         ins     v2.d[0],v1.d[1]
   74         ins     v7.d[0],v6.d[1]
   75         ins     v1.d[1],v0.d[0]
   76         ins     v6.d[1],v5.d[0]
   77         eor     v0.16b,v1.16b,v18.16b
   78         eor     v5.16b,v6.16b,v4.16b
   79 
   80         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase
   81         ext     v4.16b,v5.16b,v5.16b,#8
   82         pmull   v0.1q,v0.1d,v19.1d
   83         pmull   v5.1q,v5.1d,v19.1d
   84         eor     v18.16b,v18.16b,v2.16b
   85         eor     v4.16b,v4.16b,v7.16b
   86         eor     v20.16b, v0.16b,v18.16b         //H^3
   87         eor     v22.16b,v5.16b,v4.16b           //H^4
   88 
   89         ext     v16.16b,v20.16b, v20.16b,#8             //Karatsuba pre-processing
   90         ext     v17.16b,v22.16b,v22.16b,#8
   91         eor     v16.16b,v16.16b,v20.16b
   92         eor     v17.16b,v17.16b,v22.16b
   93         ext     v21.16b,v16.16b,v17.16b,#8              //pack Karatsuba pre-processed
   94         st1     {v20.2d,v21.2d,v22.2d},[x0]             //store Htable[3..5]
   95         ret
   96 .size   gcm_init_v8,.-gcm_init_v8
   97 .globl  gcm_gmult_v8
   98 .type   gcm_gmult_v8,%function
   99 .align  4
  100 gcm_gmult_v8:
  101         ld1     {v17.2d},[x0]           //load Xi
  102         movi    v19.16b,#0xe1
  103         ld1     {v20.2d,v21.2d},[x1]    //load twisted H, ...
  104         shl     v19.2d,v19.2d,#57
  105 #ifndef __ARMEB__
  106         rev64   v17.16b,v17.16b
  107 #endif
  108         ext     v3.16b,v17.16b,v17.16b,#8
  109 
  110         pmull   v0.1q,v20.1d,v3.1d              //H.lo·Xi.lo
  111         eor     v17.16b,v17.16b,v3.16b          //Karatsuba pre-processing
  112         pmull2  v2.1q,v20.2d,v3.2d              //H.hi·Xi.hi
  113         pmull   v1.1q,v21.1d,v17.1d             //(H.lo+H.hi)·(Xi.lo+Xi.hi)
  114 
  115         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  116         eor     v18.16b,v0.16b,v2.16b
  117         eor     v1.16b,v1.16b,v17.16b
  118         eor     v1.16b,v1.16b,v18.16b
  119         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  120 
  121         ins     v2.d[0],v1.d[1]
  122         ins     v1.d[1],v0.d[0]
  123         eor     v0.16b,v1.16b,v18.16b
  124 
  125         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  126         pmull   v0.1q,v0.1d,v19.1d
  127         eor     v18.16b,v18.16b,v2.16b
  128         eor     v0.16b,v0.16b,v18.16b
  129 
  130 #ifndef __ARMEB__
  131         rev64   v0.16b,v0.16b
  132 #endif
  133         ext     v0.16b,v0.16b,v0.16b,#8
  134         st1     {v0.2d},[x0]            //write out Xi
  135 
  136         ret
  137 .size   gcm_gmult_v8,.-gcm_gmult_v8
  138 .globl  gcm_ghash_v8
  139 .type   gcm_ghash_v8,%function
  140 .align  4
  141 gcm_ghash_v8:
  142         cmp     x3,#64
  143         b.hs    .Lgcm_ghash_v8_4x
  144         ld1     {v0.2d},[x0]            //load [rotated] Xi
  145                                                 //"[rotated]" means that
  146                                                 //loaded value would have
  147                                                 //to be rotated in order to
  148                                                 //make it appear as in
  149                                                 //algorithm specification
  150         subs    x3,x3,#32               //see if x3 is 32 or larger
  151         mov     x12,#16         //x12 is used as post-
  152                                                 //increment for input pointer;
  153                                                 //as loop is modulo-scheduled
  154                                                 //x12 is zeroed just in time
  155                                                 //to preclude overstepping
  156                                                 //inp[len], which means that
  157                                                 //last block[s] are actually
  158                                                 //loaded twice, but last
  159                                                 //copy is not processed
  160         ld1     {v20.2d,v21.2d},[x1],#32        //load twisted H, ..., H^2
  161         movi    v19.16b,#0xe1
  162         ld1     {v22.2d},[x1]
  163         csel    x12,xzr,x12,eq                  //is it time to zero x12?
  164         ext     v0.16b,v0.16b,v0.16b,#8         //rotate Xi
  165         ld1     {v16.2d},[x2],#16       //load [rotated] I[0]
  166         shl     v19.2d,v19.2d,#57               //compose 0xc2.0 constant
  167 #ifndef __ARMEB__
  168         rev64   v16.16b,v16.16b
  169         rev64   v0.16b,v0.16b
  170 #endif
  171         ext     v3.16b,v16.16b,v16.16b,#8               //rotate I[0]
  172         b.lo    .Lodd_tail_v8           //x3 was less than 32
  173         ld1     {v17.2d},[x2],x12       //load [rotated] I[1]
  174 #ifndef __ARMEB__
  175         rev64   v17.16b,v17.16b
  176 #endif
  177         ext     v7.16b,v17.16b,v17.16b,#8
  178         eor     v3.16b,v3.16b,v0.16b            //I[i]^=Xi
  179         pmull   v4.1q,v20.1d,v7.1d              //H·Ii+1
  180         eor     v17.16b,v17.16b,v7.16b          //Karatsuba pre-processing
  181         pmull2  v6.1q,v20.2d,v7.2d
  182         b       .Loop_mod2x_v8
  183 
  184 .align  4
  185 .Loop_mod2x_v8:
  186         ext     v18.16b,v3.16b,v3.16b,#8
  187         subs    x3,x3,#32               //is there more data?
  188         pmull   v0.1q,v22.1d,v3.1d              //H^2.lo·Xi.lo
  189         csel    x12,xzr,x12,lo                  //is it time to zero x12?
  190 
  191         pmull   v5.1q,v21.1d,v17.1d
  192         eor     v18.16b,v18.16b,v3.16b          //Karatsuba pre-processing
  193         pmull2  v2.1q,v22.2d,v3.2d              //H^2.hi·Xi.hi
  194         eor     v0.16b,v0.16b,v4.16b            //accumulate
  195         pmull2  v1.1q,v21.2d,v18.2d             //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
  196         ld1     {v16.2d},[x2],x12       //load [rotated] I[i+2]
  197 
  198         eor     v2.16b,v2.16b,v6.16b
  199         csel    x12,xzr,x12,eq                  //is it time to zero x12?
  200         eor     v1.16b,v1.16b,v5.16b
  201 
  202         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  203         eor     v18.16b,v0.16b,v2.16b
  204         eor     v1.16b,v1.16b,v17.16b
  205         ld1     {v17.2d},[x2],x12       //load [rotated] I[i+3]
  206 #ifndef __ARMEB__
  207         rev64   v16.16b,v16.16b
  208 #endif
  209         eor     v1.16b,v1.16b,v18.16b
  210         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  211 
  212 #ifndef __ARMEB__
  213         rev64   v17.16b,v17.16b
  214 #endif
  215         ins     v2.d[0],v1.d[1]
  216         ins     v1.d[1],v0.d[0]
  217         ext     v7.16b,v17.16b,v17.16b,#8
  218         ext     v3.16b,v16.16b,v16.16b,#8
  219         eor     v0.16b,v1.16b,v18.16b
  220         pmull   v4.1q,v20.1d,v7.1d              //H·Ii+1
  221         eor     v3.16b,v3.16b,v2.16b            //accumulate v3.16b early
  222 
  223         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  224         pmull   v0.1q,v0.1d,v19.1d
  225         eor     v3.16b,v3.16b,v18.16b
  226         eor     v17.16b,v17.16b,v7.16b          //Karatsuba pre-processing
  227         eor     v3.16b,v3.16b,v0.16b
  228         pmull2  v6.1q,v20.2d,v7.2d
  229         b.hs    .Loop_mod2x_v8          //there was at least 32 more bytes
  230 
  231         eor     v2.16b,v2.16b,v18.16b
  232         ext     v3.16b,v16.16b,v16.16b,#8               //re-construct v3.16b
  233         adds    x3,x3,#32               //re-construct x3
  234         eor     v0.16b,v0.16b,v2.16b            //re-construct v0.16b
  235         b.eq    .Ldone_v8               //is x3 zero?
  236 .Lodd_tail_v8:
  237         ext     v18.16b,v0.16b,v0.16b,#8
  238         eor     v3.16b,v3.16b,v0.16b            //inp^=Xi
  239         eor     v17.16b,v16.16b,v18.16b         //v17.16b is rotated inp^Xi
  240 
  241         pmull   v0.1q,v20.1d,v3.1d              //H.lo·Xi.lo
  242         eor     v17.16b,v17.16b,v3.16b          //Karatsuba pre-processing
  243         pmull2  v2.1q,v20.2d,v3.2d              //H.hi·Xi.hi
  244         pmull   v1.1q,v21.1d,v17.1d             //(H.lo+H.hi)·(Xi.lo+Xi.hi)
  245 
  246         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  247         eor     v18.16b,v0.16b,v2.16b
  248         eor     v1.16b,v1.16b,v17.16b
  249         eor     v1.16b,v1.16b,v18.16b
  250         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  251 
  252         ins     v2.d[0],v1.d[1]
  253         ins     v1.d[1],v0.d[0]
  254         eor     v0.16b,v1.16b,v18.16b
  255 
  256         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  257         pmull   v0.1q,v0.1d,v19.1d
  258         eor     v18.16b,v18.16b,v2.16b
  259         eor     v0.16b,v0.16b,v18.16b
  260 
  261 .Ldone_v8:
  262 #ifndef __ARMEB__
  263         rev64   v0.16b,v0.16b
  264 #endif
  265         ext     v0.16b,v0.16b,v0.16b,#8
  266         st1     {v0.2d},[x0]            //write out Xi
  267 
  268         ret
  269 .size   gcm_ghash_v8,.-gcm_ghash_v8
  270 .type   gcm_ghash_v8_4x,%function
  271 .align  4
  272 gcm_ghash_v8_4x:
  273 .Lgcm_ghash_v8_4x:
  274         ld1     {v0.2d},[x0]            //load [rotated] Xi
  275         ld1     {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
  276         movi    v19.16b,#0xe1
  277         ld1     {v26.2d,v27.2d,v28.2d},[x1]     //load twisted H^3, ..., H^4
  278         shl     v19.2d,v19.2d,#57               //compose 0xc2.0 constant
  279 
  280         ld1     {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
  281 #ifndef __ARMEB__
  282         rev64   v0.16b,v0.16b
  283         rev64   v5.16b,v5.16b
  284         rev64   v6.16b,v6.16b
  285         rev64   v7.16b,v7.16b
  286         rev64   v4.16b,v4.16b
  287 #endif
  288         ext     v25.16b,v7.16b,v7.16b,#8
  289         ext     v24.16b,v6.16b,v6.16b,#8
  290         ext     v23.16b,v5.16b,v5.16b,#8
  291 
  292         pmull   v29.1q,v20.1d,v25.1d            //H·Ii+3
  293         eor     v7.16b,v7.16b,v25.16b
  294         pmull2  v31.1q,v20.2d,v25.2d
  295         pmull   v30.1q,v21.1d,v7.1d
  296 
  297         pmull   v16.1q,v22.1d,v24.1d            //H^2·Ii+2
  298         eor     v6.16b,v6.16b,v24.16b
  299         pmull2  v24.1q,v22.2d,v24.2d
  300         pmull2  v6.1q,v21.2d,v6.2d
  301 
  302         eor     v29.16b,v29.16b,v16.16b
  303         eor     v31.16b,v31.16b,v24.16b
  304         eor     v30.16b,v30.16b,v6.16b
  305 
  306         pmull   v7.1q,v26.1d,v23.1d             //H^3·Ii+1
  307         eor     v5.16b,v5.16b,v23.16b
  308         pmull2  v23.1q,v26.2d,v23.2d
  309         pmull   v5.1q,v27.1d,v5.1d
  310 
  311         eor     v29.16b,v29.16b,v7.16b
  312         eor     v31.16b,v31.16b,v23.16b
  313         eor     v30.16b,v30.16b,v5.16b
  314 
  315         subs    x3,x3,#128
  316         b.lo    .Ltail4x
  317 
  318         b       .Loop4x
  319 
  320 .align  4
  321 .Loop4x:
  322         eor     v16.16b,v4.16b,v0.16b
  323         ld1     {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
  324         ext     v3.16b,v16.16b,v16.16b,#8
  325 #ifndef __ARMEB__
  326         rev64   v5.16b,v5.16b
  327         rev64   v6.16b,v6.16b
  328         rev64   v7.16b,v7.16b
  329         rev64   v4.16b,v4.16b
  330 #endif
  331 
  332         pmull   v0.1q,v28.1d,v3.1d              //H^4·(Xi+Ii)
  333         eor     v16.16b,v16.16b,v3.16b
  334         pmull2  v2.1q,v28.2d,v3.2d
  335         ext     v25.16b,v7.16b,v7.16b,#8
  336         pmull2  v1.1q,v27.2d,v16.2d
  337 
  338         eor     v0.16b,v0.16b,v29.16b
  339         eor     v2.16b,v2.16b,v31.16b
  340         ext     v24.16b,v6.16b,v6.16b,#8
  341         eor     v1.16b,v1.16b,v30.16b
  342         ext     v23.16b,v5.16b,v5.16b,#8
  343 
  344         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  345         eor     v18.16b,v0.16b,v2.16b
  346         pmull   v29.1q,v20.1d,v25.1d            //H·Ii+3
  347         eor     v7.16b,v7.16b,v25.16b
  348         eor     v1.16b,v1.16b,v17.16b
  349         pmull2  v31.1q,v20.2d,v25.2d
  350         eor     v1.16b,v1.16b,v18.16b
  351         pmull   v30.1q,v21.1d,v7.1d
  352 
  353         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  354         ins     v2.d[0],v1.d[1]
  355         ins     v1.d[1],v0.d[0]
  356         pmull   v16.1q,v22.1d,v24.1d            //H^2·Ii+2
  357         eor     v6.16b,v6.16b,v24.16b
  358         pmull2  v24.1q,v22.2d,v24.2d
  359         eor     v0.16b,v1.16b,v18.16b
  360         pmull2  v6.1q,v21.2d,v6.2d
  361 
  362         eor     v29.16b,v29.16b,v16.16b
  363         eor     v31.16b,v31.16b,v24.16b
  364         eor     v30.16b,v30.16b,v6.16b
  365 
  366         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  367         pmull   v0.1q,v0.1d,v19.1d
  368         pmull   v7.1q,v26.1d,v23.1d             //H^3·Ii+1
  369         eor     v5.16b,v5.16b,v23.16b
  370         eor     v18.16b,v18.16b,v2.16b
  371         pmull2  v23.1q,v26.2d,v23.2d
  372         pmull   v5.1q,v27.1d,v5.1d
  373 
  374         eor     v0.16b,v0.16b,v18.16b
  375         eor     v29.16b,v29.16b,v7.16b
  376         eor     v31.16b,v31.16b,v23.16b
  377         ext     v0.16b,v0.16b,v0.16b,#8
  378         eor     v30.16b,v30.16b,v5.16b
  379 
  380         subs    x3,x3,#64
  381         b.hs    .Loop4x
  382 
  383 .Ltail4x:
  384         eor     v16.16b,v4.16b,v0.16b
  385         ext     v3.16b,v16.16b,v16.16b,#8
  386 
  387         pmull   v0.1q,v28.1d,v3.1d              //H^4·(Xi+Ii)
  388         eor     v16.16b,v16.16b,v3.16b
  389         pmull2  v2.1q,v28.2d,v3.2d
  390         pmull2  v1.1q,v27.2d,v16.2d
  391 
  392         eor     v0.16b,v0.16b,v29.16b
  393         eor     v2.16b,v2.16b,v31.16b
  394         eor     v1.16b,v1.16b,v30.16b
  395 
  396         adds    x3,x3,#64
  397         b.eq    .Ldone4x
  398 
  399         cmp     x3,#32
  400         b.lo    .Lone
  401         b.eq    .Ltwo
  402 .Lthree:
  403         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  404         eor     v18.16b,v0.16b,v2.16b
  405         eor     v1.16b,v1.16b,v17.16b
  406         ld1     {v4.2d,v5.2d,v6.2d},[x2]
  407         eor     v1.16b,v1.16b,v18.16b
  408 #ifndef __ARMEB__
  409         rev64   v5.16b,v5.16b
  410         rev64   v6.16b,v6.16b
  411         rev64   v4.16b,v4.16b
  412 #endif
  413 
  414         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  415         ins     v2.d[0],v1.d[1]
  416         ins     v1.d[1],v0.d[0]
  417         ext     v24.16b,v6.16b,v6.16b,#8
  418         ext     v23.16b,v5.16b,v5.16b,#8
  419         eor     v0.16b,v1.16b,v18.16b
  420 
  421         pmull   v29.1q,v20.1d,v24.1d            //H·Ii+2
  422         eor     v6.16b,v6.16b,v24.16b
  423 
  424         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  425         pmull   v0.1q,v0.1d,v19.1d
  426         eor     v18.16b,v18.16b,v2.16b
  427         pmull2  v31.1q,v20.2d,v24.2d
  428         pmull   v30.1q,v21.1d,v6.1d
  429         eor     v0.16b,v0.16b,v18.16b
  430         pmull   v7.1q,v22.1d,v23.1d             //H^2·Ii+1
  431         eor     v5.16b,v5.16b,v23.16b
  432         ext     v0.16b,v0.16b,v0.16b,#8
  433 
  434         pmull2  v23.1q,v22.2d,v23.2d
  435         eor     v16.16b,v4.16b,v0.16b
  436         pmull2  v5.1q,v21.2d,v5.2d
  437         ext     v3.16b,v16.16b,v16.16b,#8
  438 
  439         eor     v29.16b,v29.16b,v7.16b
  440         eor     v31.16b,v31.16b,v23.16b
  441         eor     v30.16b,v30.16b,v5.16b
  442 
  443         pmull   v0.1q,v26.1d,v3.1d              //H^3·(Xi+Ii)
  444         eor     v16.16b,v16.16b,v3.16b
  445         pmull2  v2.1q,v26.2d,v3.2d
  446         pmull   v1.1q,v27.1d,v16.1d
  447 
  448         eor     v0.16b,v0.16b,v29.16b
  449         eor     v2.16b,v2.16b,v31.16b
  450         eor     v1.16b,v1.16b,v30.16b
  451         b       .Ldone4x
  452 
  453 .align  4
  454 .Ltwo:
  455         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  456         eor     v18.16b,v0.16b,v2.16b
  457         eor     v1.16b,v1.16b,v17.16b
  458         ld1     {v4.2d,v5.2d},[x2]
  459         eor     v1.16b,v1.16b,v18.16b
  460 #ifndef __ARMEB__
  461         rev64   v5.16b,v5.16b
  462         rev64   v4.16b,v4.16b
  463 #endif
  464 
  465         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  466         ins     v2.d[0],v1.d[1]
  467         ins     v1.d[1],v0.d[0]
  468         ext     v23.16b,v5.16b,v5.16b,#8
  469         eor     v0.16b,v1.16b,v18.16b
  470 
  471         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  472         pmull   v0.1q,v0.1d,v19.1d
  473         eor     v18.16b,v18.16b,v2.16b
  474         eor     v0.16b,v0.16b,v18.16b
  475         ext     v0.16b,v0.16b,v0.16b,#8
  476 
  477         pmull   v29.1q,v20.1d,v23.1d            //H·Ii+1
  478         eor     v5.16b,v5.16b,v23.16b
  479 
  480         eor     v16.16b,v4.16b,v0.16b
  481         ext     v3.16b,v16.16b,v16.16b,#8
  482 
  483         pmull2  v31.1q,v20.2d,v23.2d
  484         pmull   v30.1q,v21.1d,v5.1d
  485 
  486         pmull   v0.1q,v22.1d,v3.1d              //H^2·(Xi+Ii)
  487         eor     v16.16b,v16.16b,v3.16b
  488         pmull2  v2.1q,v22.2d,v3.2d
  489         pmull2  v1.1q,v21.2d,v16.2d
  490 
  491         eor     v0.16b,v0.16b,v29.16b
  492         eor     v2.16b,v2.16b,v31.16b
  493         eor     v1.16b,v1.16b,v30.16b
  494         b       .Ldone4x
  495 
  496 .align  4
  497 .Lone:
  498         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  499         eor     v18.16b,v0.16b,v2.16b
  500         eor     v1.16b,v1.16b,v17.16b
  501         ld1     {v4.2d},[x2]
  502         eor     v1.16b,v1.16b,v18.16b
  503 #ifndef __ARMEB__
  504         rev64   v4.16b,v4.16b
  505 #endif
  506 
  507         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  508         ins     v2.d[0],v1.d[1]
  509         ins     v1.d[1],v0.d[0]
  510         eor     v0.16b,v1.16b,v18.16b
  511 
  512         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  513         pmull   v0.1q,v0.1d,v19.1d
  514         eor     v18.16b,v18.16b,v2.16b
  515         eor     v0.16b,v0.16b,v18.16b
  516         ext     v0.16b,v0.16b,v0.16b,#8
  517 
  518         eor     v16.16b,v4.16b,v0.16b
  519         ext     v3.16b,v16.16b,v16.16b,#8
  520 
  521         pmull   v0.1q,v20.1d,v3.1d
  522         eor     v16.16b,v16.16b,v3.16b
  523         pmull2  v2.1q,v20.2d,v3.2d
  524         pmull   v1.1q,v21.1d,v16.1d
  525 
  526 .Ldone4x:
  527         ext     v17.16b,v0.16b,v2.16b,#8                //Karatsuba post-processing
  528         eor     v18.16b,v0.16b,v2.16b
  529         eor     v1.16b,v1.16b,v17.16b
  530         eor     v1.16b,v1.16b,v18.16b
  531 
  532         pmull   v18.1q,v0.1d,v19.1d             //1st phase of reduction
  533         ins     v2.d[0],v1.d[1]
  534         ins     v1.d[1],v0.d[0]
  535         eor     v0.16b,v1.16b,v18.16b
  536 
  537         ext     v18.16b,v0.16b,v0.16b,#8                //2nd phase of reduction
  538         pmull   v0.1q,v0.1d,v19.1d
  539         eor     v18.16b,v18.16b,v2.16b
  540         eor     v0.16b,v0.16b,v18.16b
  541         ext     v0.16b,v0.16b,v0.16b,#8
  542 
  543 #ifndef __ARMEB__
  544         rev64   v0.16b,v0.16b
  545 #endif
  546         st1     {v0.2d},[x0]            //write out Xi
  547 
  548         ret
  549 .size   gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
  550 .byte   71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  551 .align  2
  552 .align  2
  553 #endif

Cache object: d2788a6c8bd3658bd98a0d8967ef226f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.