The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
   24  * Copyright (c) 2019-2022 Samuel Neves
   25  * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
   26  *
   27  * This is converted assembly: SSE4.1 -> ARMv8-A
   28  * Used tools: SIMDe https://github.com/simd-everywhere/simde
   29  */
   30 
   31 #if defined(__aarch64__)
   32         .text
   33         .section        .rodata.cst16,"aM",@progbits,16
   34         .p2align        4
   35 .LCPI0_0:
   36         .byte   2
   37         .byte   3
   38         .byte   0
   39         .byte   1
   40         .byte   6
   41         .byte   7
   42         .byte   4
   43         .byte   5
   44         .byte   10
   45         .byte   11
   46         .byte   8
   47         .byte   9
   48         .byte   14
   49         .byte   15
   50         .byte   12
   51         .byte   13
   52 .LCPI0_1:
   53         .word   1779033703
   54         .word   3144134277
   55         .word   1013904242
   56         .word   2773480762
   57 .LCPI0_2:
   58         .byte   1
   59         .byte   2
   60         .byte   3
   61         .byte   0
   62         .byte   5
   63         .byte   6
   64         .byte   7
   65         .byte   4
   66         .byte   9
   67         .byte   10
   68         .byte   11
   69         .byte   8
   70         .byte   13
   71         .byte   14
   72         .byte   15
   73         .byte   12
   74 .LCPI0_3:
   75         .byte   0
   76         .byte   1
   77         .byte   2
   78         .byte   3
   79         .byte   20
   80         .byte   21
   81         .byte   22
   82         .byte   23
   83         .byte   8
   84         .byte   9
   85         .byte   10
   86         .byte   11
   87         .byte   28
   88         .byte   29
   89         .byte   30
   90         .byte   31
   91 .LCPI0_4:
   92         .byte   0
   93         .byte   1
   94         .byte   2
   95         .byte   3
   96         .byte   4
   97         .byte   5
   98         .byte   6
   99         .byte   7
  100         .byte   8
  101         .byte   9
  102         .byte   10
  103         .byte   11
  104         .byte   28
  105         .byte   29
  106         .byte   30
  107         .byte   31
  108         .text
  109         .globl  zfs_blake3_compress_in_place_sse41
  110         .p2align        2
  111         .type   zfs_blake3_compress_in_place_sse41,@function
  112 zfs_blake3_compress_in_place_sse41:
  113         .cfi_startproc
  114         ldp     q7, q6, [x0]
  115         ldp     q17, q18, [x1]
  116         add     x12, x1, #32
  117         ld2     { v4.4s, v5.4s }, [x12]
  118         lsr     x10, x3, #32
  119         fmov    s16, w3
  120         adrp    x13, .LCPI0_0
  121         adrp    x11, .LCPI0_1
  122         and     w8, w2, #0xff
  123         mov     v16.s[1], w10
  124         ldr     q0, [x13, :lo12:.LCPI0_0]
  125         ldr     q20, [x11, :lo12:.LCPI0_1]
  126         adrp    x11, .LCPI0_4
  127         and     w9, w4, #0xff
  128         ldr     q2, [x11, :lo12:.LCPI0_4]
  129         mov     v16.s[2], w8
  130         uzp1    v21.4s, v17.4s, v18.4s
  131         add     v7.4s, v6.4s, v7.4s
  132         adrp    x12, .LCPI0_3
  133         mov     v16.s[3], w9
  134         uzp2    v18.4s, v17.4s, v18.4s
  135         add     v7.4s, v7.4s, v21.4s
  136         ext     v17.16b, v5.16b, v5.16b, #12
  137         ldr     q3, [x12, :lo12:.LCPI0_3]
  138         ext     v24.16b, v4.16b, v4.16b, #12
  139         eor     v16.16b, v7.16b, v16.16b
  140         mov     v27.16b, v17.16b
  141         uzp1    v19.4s, v21.4s, v21.4s
  142         ext     v25.16b, v21.16b, v21.16b, #12
  143         zip2    v28.4s, v18.4s, v17.4s
  144         tbl     v29.16b, { v16.16b }, v0.16b
  145         mov     v27.s[1], v24.s[2]
  146         zip1    v23.2d, v17.2d, v18.2d
  147         ext     v19.16b, v19.16b, v21.16b, #8
  148         add     v22.4s, v29.4s, v20.4s
  149         ext     v26.16b, v21.16b, v25.16b, #12
  150         tbl     v20.16b, { v23.16b, v24.16b }, v2.16b
  151         zip1    v21.4s, v28.4s, v24.4s
  152         zip1    v23.4s, v24.4s, v28.4s
  153         uzp2    v19.4s, v19.4s, v18.4s
  154         eor     v24.16b, v22.16b, v6.16b
  155         ext     v25.16b, v20.16b, v20.16b, #12
  156         ext     v6.16b, v23.16b, v21.16b, #8
  157         add     v7.4s, v7.4s, v18.4s
  158         ext     v18.16b, v19.16b, v19.16b, #4
  159         tbl     v16.16b, { v26.16b, v27.16b }, v3.16b
  160         uzp1    v21.4s, v20.4s, v25.4s
  161         mov     v26.16b, v6.16b
  162         ext     v23.16b, v18.16b, v18.16b, #12
  163         mov     v26.s[1], v21.s[2]
  164         adrp    x10, .LCPI0_2
  165         ext     v25.16b, v18.16b, v23.16b, #12
  166         uzp1    v23.4s, v18.4s, v18.4s
  167         ldr     q1, [x10, :lo12:.LCPI0_2]
  168         ext     v18.16b, v23.16b, v18.16b, #8
  169         ushr    v23.4s, v24.4s, #12
  170         shl     v24.4s, v24.4s, #20
  171         orr     v23.16b, v24.16b, v23.16b
  172         add     v7.4s, v7.4s, v23.4s
  173         eor     v27.16b, v29.16b, v7.16b
  174         add     v4.4s, v7.4s, v4.4s
  175         tbl     v7.16b, { v25.16b, v26.16b }, v3.16b
  176         tbl     v26.16b, { v27.16b }, v1.16b
  177         add     v22.4s, v22.4s, v26.4s
  178         uzp2    v18.4s, v18.4s, v16.4s
  179         eor     v23.16b, v23.16b, v22.16b
  180         ext     v5.16b, v18.16b, v18.16b, #4
  181         ushr    v27.4s, v23.4s, #7
  182         shl     v23.4s, v23.4s, #25
  183         uzp1    v25.4s, v5.4s, v5.4s
  184         orr     v23.16b, v23.16b, v27.16b
  185         ext     v28.16b, v4.16b, v4.16b, #12
  186         ext     v4.16b, v25.16b, v5.16b, #8
  187         ext     v25.16b, v26.16b, v26.16b, #8
  188         add     v26.4s, v28.4s, v23.4s
  189         eor     v25.16b, v26.16b, v25.16b
  190         ext     v22.16b, v22.16b, v22.16b, #4
  191         tbl     v25.16b, { v25.16b }, v0.16b
  192         add     v22.4s, v22.4s, v25.4s
  193         eor     v23.16b, v23.16b, v22.16b
  194         add     v17.4s, v26.4s, v17.4s
  195         ushr    v26.4s, v23.4s, #12
  196         shl     v23.4s, v23.4s, #20
  197         orr     v23.16b, v23.16b, v26.16b
  198         add     v17.4s, v17.4s, v23.4s
  199         eor     v25.16b, v25.16b, v17.16b
  200         add     v17.4s, v17.4s, v19.4s
  201         tbl     v19.16b, { v25.16b }, v1.16b
  202         add     v22.4s, v22.4s, v19.4s
  203         eor     v23.16b, v23.16b, v22.16b
  204         ushr    v25.4s, v23.4s, #7
  205         shl     v23.4s, v23.4s, #25
  206         ext     v17.16b, v17.16b, v17.16b, #4
  207         orr     v23.16b, v23.16b, v25.16b
  208         ext     v19.16b, v19.16b, v19.16b, #8
  209         add     v17.4s, v17.4s, v23.4s
  210         eor     v19.16b, v17.16b, v19.16b
  211         ext     v22.16b, v22.16b, v22.16b, #12
  212         tbl     v19.16b, { v19.16b }, v0.16b
  213         add     v22.4s, v22.4s, v19.4s
  214         eor     v23.16b, v23.16b, v22.16b
  215         ushr    v25.4s, v23.4s, #12
  216         shl     v23.4s, v23.4s, #20
  217         add     v17.4s, v17.4s, v16.4s
  218         orr     v23.16b, v23.16b, v25.16b
  219         add     v17.4s, v17.4s, v23.4s
  220         ext     v25.16b, v17.16b, v17.16b, #12
  221         eor     v17.16b, v19.16b, v17.16b
  222         tbl     v17.16b, { v17.16b }, v1.16b
  223         add     v19.4s, v22.4s, v17.4s
  224         eor     v22.16b, v23.16b, v19.16b
  225         add     v25.4s, v25.4s, v21.4s
  226         zip1    v20.2d, v6.2d, v16.2d
  227         ushr    v23.4s, v22.4s, #7
  228         shl     v22.4s, v22.4s, #25
  229         zip2    v24.4s, v16.4s, v6.4s
  230         tbl     v26.16b, { v20.16b, v21.16b }, v2.16b
  231         orr     v22.16b, v22.16b, v23.16b
  232         zip1    v16.4s, v24.4s, v21.4s
  233         zip1    v20.4s, v21.4s, v24.4s
  234         ext     v21.16b, v26.16b, v26.16b, #12
  235         ext     v17.16b, v17.16b, v17.16b, #8
  236         add     v25.4s, v25.4s, v22.4s
  237         ext     v16.16b, v20.16b, v16.16b, #8
  238         uzp1    v21.4s, v26.4s, v21.4s
  239         eor     v26.16b, v25.16b, v17.16b
  240         ext     v19.16b, v19.16b, v19.16b, #4
  241         tbl     v26.16b, { v26.16b }, v0.16b
  242         mov     v29.16b, v16.16b
  243         add     v19.4s, v19.4s, v26.4s
  244         ext     v27.16b, v5.16b, v5.16b, #12
  245         mov     v29.s[1], v21.s[2]
  246         eor     v22.16b, v22.16b, v19.16b
  247         ext     v28.16b, v5.16b, v27.16b, #12
  248         ushr    v27.4s, v22.4s, #12
  249         shl     v22.4s, v22.4s, #20
  250         add     v6.4s, v25.4s, v6.4s
  251         orr     v22.16b, v22.16b, v27.16b
  252         add     v6.4s, v6.4s, v22.4s
  253         eor     v26.16b, v26.16b, v6.16b
  254         add     v6.4s, v6.4s, v18.4s
  255         tbl     v18.16b, { v26.16b }, v1.16b
  256         add     v19.4s, v19.4s, v18.4s
  257         eor     v22.16b, v22.16b, v19.16b
  258         ushr    v26.4s, v22.4s, #7
  259         shl     v22.4s, v22.4s, #25
  260         ext     v6.16b, v6.16b, v6.16b, #4
  261         orr     v22.16b, v22.16b, v26.16b
  262         ext     v18.16b, v18.16b, v18.16b, #8
  263         add     v6.4s, v6.4s, v22.4s
  264         eor     v18.16b, v6.16b, v18.16b
  265         ext     v19.16b, v19.16b, v19.16b, #12
  266         tbl     v18.16b, { v18.16b }, v0.16b
  267         add     v19.4s, v19.4s, v18.4s
  268         eor     v22.16b, v22.16b, v19.16b
  269         ushr    v26.4s, v22.4s, #12
  270         shl     v22.4s, v22.4s, #20
  271         add     v6.4s, v6.4s, v7.4s
  272         orr     v22.16b, v22.16b, v26.16b
  273         add     v6.4s, v6.4s, v22.4s
  274         ext     v26.16b, v6.16b, v6.16b, #12
  275         eor     v6.16b, v18.16b, v6.16b
  276         uzp2    v4.4s, v4.4s, v7.4s
  277         zip2    v25.4s, v7.4s, v16.4s
  278         add     v26.4s, v26.4s, v21.4s
  279         zip1    v20.2d, v16.2d, v7.2d
  280         tbl     v6.16b, { v6.16b }, v1.16b
  281         ext     v24.16b, v4.16b, v4.16b, #4
  282         tbl     v27.16b, { v20.16b, v21.16b }, v2.16b
  283         zip1    v7.4s, v25.4s, v21.4s
  284         zip1    v20.4s, v21.4s, v25.4s
  285         add     v18.4s, v19.4s, v6.4s
  286         uzp1    v5.4s, v24.4s, v24.4s
  287         ext     v21.16b, v27.16b, v27.16b, #12
  288         ext     v7.16b, v20.16b, v7.16b, #8
  289         eor     v19.16b, v22.16b, v18.16b
  290         ext     v5.16b, v5.16b, v24.16b, #8
  291         tbl     v17.16b, { v28.16b, v29.16b }, v3.16b
  292         uzp1    v21.4s, v27.4s, v21.4s
  293         mov     v28.16b, v7.16b
  294         ushr    v22.4s, v19.4s, #7
  295         shl     v19.4s, v19.4s, #25
  296         ext     v23.16b, v24.16b, v24.16b, #12
  297         uzp2    v5.4s, v5.4s, v17.4s
  298         mov     v28.s[1], v21.s[2]
  299         orr     v19.16b, v19.16b, v22.16b
  300         ext     v27.16b, v24.16b, v23.16b, #12
  301         ext     v23.16b, v5.16b, v5.16b, #4
  302         ext     v6.16b, v6.16b, v6.16b, #8
  303         ext     v25.16b, v18.16b, v18.16b, #4
  304         add     v18.4s, v26.4s, v19.4s
  305         uzp1    v24.4s, v23.4s, v23.4s
  306         eor     v6.16b, v18.16b, v6.16b
  307         ext     v24.16b, v24.16b, v23.16b, #8
  308         add     v16.4s, v18.4s, v16.4s
  309         tbl     v18.16b, { v27.16b, v28.16b }, v3.16b
  310         tbl     v27.16b, { v6.16b }, v0.16b
  311         uzp2    v6.4s, v24.4s, v18.4s
  312         add     v24.4s, v25.4s, v27.4s
  313         eor     v19.16b, v19.16b, v24.16b
  314         ushr    v25.4s, v19.4s, #12
  315         shl     v19.4s, v19.4s, #20
  316         orr     v19.16b, v19.16b, v25.16b
  317         add     v16.4s, v16.4s, v19.4s
  318         eor     v25.16b, v27.16b, v16.16b
  319         add     v4.4s, v16.4s, v4.4s
  320         tbl     v16.16b, { v25.16b }, v1.16b
  321         add     v24.4s, v24.4s, v16.4s
  322         eor     v19.16b, v19.16b, v24.16b
  323         ushr    v25.4s, v19.4s, #7
  324         shl     v19.4s, v19.4s, #25
  325         ext     v4.16b, v4.16b, v4.16b, #4
  326         orr     v19.16b, v19.16b, v25.16b
  327         ext     v16.16b, v16.16b, v16.16b, #8
  328         add     v4.4s, v4.4s, v19.4s
  329         eor     v16.16b, v4.16b, v16.16b
  330         ext     v24.16b, v24.16b, v24.16b, #12
  331         tbl     v25.16b, { v16.16b }, v0.16b
  332         add     v24.4s, v24.4s, v25.4s
  333         eor     v16.16b, v19.16b, v24.16b
  334         ushr    v19.4s, v16.4s, #12
  335         shl     v16.4s, v16.4s, #20
  336         add     v4.4s, v4.4s, v17.4s
  337         orr     v19.16b, v16.16b, v19.16b
  338         add     v27.4s, v4.4s, v19.4s
  339         eor     v25.16b, v25.16b, v27.16b
  340         tbl     v25.16b, { v25.16b }, v1.16b
  341         add     v24.4s, v24.4s, v25.4s
  342         zip2    v26.4s, v17.4s, v7.4s
  343         ext     v4.16b, v27.16b, v27.16b, #12
  344         eor     v19.16b, v19.16b, v24.16b
  345         add     v28.4s, v4.4s, v21.4s
  346         zip1    v20.2d, v7.2d, v17.2d
  347         zip1    v4.4s, v26.4s, v21.4s
  348         zip1    v17.4s, v21.4s, v26.4s
  349         ushr    v26.4s, v19.4s, #7
  350         shl     v19.4s, v19.4s, #25
  351         orr     v19.16b, v19.16b, v26.16b
  352         ext     v25.16b, v25.16b, v25.16b, #8
  353         add     v27.4s, v28.4s, v19.4s
  354         eor     v25.16b, v27.16b, v25.16b
  355         ext     v24.16b, v24.16b, v24.16b, #4
  356         tbl     v25.16b, { v25.16b }, v0.16b
  357         add     v24.4s, v24.4s, v25.4s
  358         eor     v19.16b, v19.16b, v24.16b
  359         add     v7.4s, v27.4s, v7.4s
  360         ushr    v27.4s, v19.4s, #12
  361         shl     v19.4s, v19.4s, #20
  362         orr     v19.16b, v19.16b, v27.16b
  363         add     v7.4s, v7.4s, v19.4s
  364         eor     v25.16b, v25.16b, v7.16b
  365         add     v5.4s, v7.4s, v5.4s
  366         tbl     v7.16b, { v25.16b }, v1.16b
  367         add     v24.4s, v24.4s, v7.4s
  368         eor     v19.16b, v19.16b, v24.16b
  369         ushr    v25.4s, v19.4s, #7
  370         shl     v19.4s, v19.4s, #25
  371         ext     v5.16b, v5.16b, v5.16b, #4
  372         orr     v19.16b, v19.16b, v25.16b
  373         ext     v7.16b, v7.16b, v7.16b, #8
  374         add     v5.4s, v5.4s, v19.4s
  375         eor     v7.16b, v5.16b, v7.16b
  376         ext     v24.16b, v24.16b, v24.16b, #12
  377         tbl     v7.16b, { v7.16b }, v0.16b
  378         add     v24.4s, v24.4s, v7.4s
  379         eor     v19.16b, v19.16b, v24.16b
  380         ushr    v25.4s, v19.4s, #12
  381         shl     v19.4s, v19.4s, #20
  382         tbl     v16.16b, { v20.16b, v21.16b }, v2.16b
  383         add     v5.4s, v5.4s, v18.4s
  384         orr     v19.16b, v19.16b, v25.16b
  385         ext     v20.16b, v16.16b, v16.16b, #12
  386         ext     v4.16b, v17.16b, v4.16b, #8
  387         add     v5.4s, v5.4s, v19.4s
  388         uzp1    v21.4s, v16.4s, v20.4s
  389         mov     v17.16b, v4.16b
  390         ext     v25.16b, v5.16b, v5.16b, #12
  391         mov     v17.s[1], v21.s[2]
  392         add     v25.4s, v25.4s, v21.4s
  393         zip1    v20.2d, v4.2d, v18.2d
  394         ext     v22.16b, v23.16b, v23.16b, #12
  395         zip2    v26.4s, v18.4s, v4.4s
  396         tbl     v18.16b, { v20.16b, v21.16b }, v2.16b
  397         eor     v5.16b, v7.16b, v5.16b
  398         ext     v16.16b, v23.16b, v22.16b, #12
  399         ext     v22.16b, v6.16b, v6.16b, #4
  400         zip1    v27.4s, v26.4s, v21.4s
  401         zip1    v20.4s, v21.4s, v26.4s
  402         ext     v21.16b, v18.16b, v18.16b, #12
  403         tbl     v5.16b, { v5.16b }, v1.16b
  404         ext     v20.16b, v20.16b, v27.16b, #8
  405         uzp1    v27.4s, v18.4s, v21.4s
  406         uzp1    v18.4s, v22.4s, v22.4s
  407         add     v21.4s, v24.4s, v5.4s
  408         ext     v18.16b, v18.16b, v22.16b, #8
  409         eor     v19.16b, v19.16b, v21.16b
  410         tbl     v7.16b, { v16.16b, v17.16b }, v3.16b
  411         uzp2    v18.4s, v18.4s, v17.4s
  412         zip2    v16.4s, v16.4s, v20.4s
  413         ushr    v17.4s, v19.4s, #7
  414         shl     v19.4s, v19.4s, #25
  415         orr     v17.16b, v19.16b, v17.16b
  416         ext     v5.16b, v5.16b, v5.16b, #8
  417         add     v19.4s, v25.4s, v17.4s
  418         eor     v5.16b, v19.16b, v5.16b
  419         ext     v21.16b, v21.16b, v21.16b, #4
  420         tbl     v5.16b, { v5.16b }, v0.16b
  421         add     v4.4s, v19.4s, v4.4s
  422         add     v19.4s, v21.4s, v5.4s
  423         eor     v17.16b, v17.16b, v19.16b
  424         ushr    v21.4s, v17.4s, #12
  425         shl     v17.4s, v17.4s, #20
  426         orr     v17.16b, v17.16b, v21.16b
  427         add     v4.4s, v4.4s, v17.4s
  428         eor     v5.16b, v5.16b, v4.16b
  429         tbl     v5.16b, { v5.16b }, v1.16b
  430         add     v4.4s, v4.4s, v6.4s
  431         add     v6.4s, v19.4s, v5.4s
  432         eor     v17.16b, v17.16b, v6.16b
  433         ushr    v19.4s, v17.4s, #7
  434         shl     v17.4s, v17.4s, #25
  435         ext     v4.16b, v4.16b, v4.16b, #4
  436         orr     v17.16b, v17.16b, v19.16b
  437         ext     v5.16b, v5.16b, v5.16b, #8
  438         add     v4.4s, v4.4s, v17.4s
  439         eor     v5.16b, v4.16b, v5.16b
  440         ext     v6.16b, v6.16b, v6.16b, #12
  441         tbl     v5.16b, { v5.16b }, v0.16b
  442         add     v6.4s, v6.4s, v5.4s
  443         eor     v17.16b, v17.16b, v6.16b
  444         ushr    v19.4s, v17.4s, #12
  445         shl     v17.4s, v17.4s, #20
  446         add     v4.4s, v4.4s, v7.4s
  447         orr     v17.16b, v17.16b, v19.16b
  448         add     v4.4s, v4.4s, v17.4s
  449         eor     v5.16b, v5.16b, v4.16b
  450         tbl     v5.16b, { v5.16b }, v1.16b
  451         mov     v29.16b, v20.16b
  452         ext     v4.16b, v4.16b, v4.16b, #12
  453         add     v6.4s, v6.4s, v5.4s
  454         mov     v29.s[1], v27.s[2]
  455         add     v4.4s, v4.4s, v27.4s
  456         zip1    v26.2d, v20.2d, v7.2d
  457         zip1    v7.4s, v16.4s, v27.4s
  458         zip1    v16.4s, v27.4s, v16.4s
  459         eor     v17.16b, v17.16b, v6.16b
  460         ext     v7.16b, v16.16b, v7.16b, #8
  461         ushr    v16.4s, v17.4s, #7
  462         shl     v17.4s, v17.4s, #25
  463         orr     v16.16b, v17.16b, v16.16b
  464         ext     v5.16b, v5.16b, v5.16b, #8
  465         add     v4.4s, v4.4s, v16.4s
  466         eor     v5.16b, v4.16b, v5.16b
  467         ext     v6.16b, v6.16b, v6.16b, #4
  468         tbl     v5.16b, { v5.16b }, v0.16b
  469         add     v6.4s, v6.4s, v5.4s
  470         eor     v16.16b, v16.16b, v6.16b
  471         ushr    v17.4s, v16.4s, #12
  472         shl     v16.4s, v16.4s, #20
  473         add     v4.4s, v4.4s, v20.4s
  474         orr     v16.16b, v16.16b, v17.16b
  475         add     v4.4s, v4.4s, v16.4s
  476         eor     v5.16b, v5.16b, v4.16b
  477         tbl     v5.16b, { v5.16b }, v1.16b
  478         add     v6.4s, v6.4s, v5.4s
  479         eor     v16.16b, v16.16b, v6.16b
  480         add     v4.4s, v4.4s, v18.4s
  481         ushr    v17.4s, v16.4s, #7
  482         shl     v16.4s, v16.4s, #25
  483         ext     v23.16b, v22.16b, v22.16b, #12
  484         ext     v4.16b, v4.16b, v4.16b, #4
  485         orr     v16.16b, v16.16b, v17.16b
  486         ext     v28.16b, v22.16b, v23.16b, #12
  487         ext     v5.16b, v5.16b, v5.16b, #8
  488         add     v4.4s, v16.4s, v4.4s
  489         tbl     v3.16b, { v28.16b, v29.16b }, v3.16b
  490         eor     v5.16b, v4.16b, v5.16b
  491         ext     v6.16b, v6.16b, v6.16b, #12
  492         add     v3.4s, v4.4s, v3.4s
  493         tbl     v4.16b, { v5.16b }, v0.16b
  494         add     v5.4s, v6.4s, v4.4s
  495         eor     v6.16b, v16.16b, v5.16b
  496         ushr    v16.4s, v6.4s, #12
  497         shl     v6.4s, v6.4s, #20
  498         orr     v6.16b, v6.16b, v16.16b
  499         tbl     v2.16b, { v26.16b, v27.16b }, v2.16b
  500         add     v3.4s, v3.4s, v6.4s
  501         ext     v19.16b, v2.16b, v2.16b, #12
  502         eor     v4.16b, v4.16b, v3.16b
  503         uzp1    v2.4s, v2.4s, v19.4s
  504         ext     v3.16b, v3.16b, v3.16b, #12
  505         tbl     v4.16b, { v4.16b }, v1.16b
  506         add     v2.4s, v3.4s, v2.4s
  507         add     v3.4s, v5.4s, v4.4s
  508         eor     v5.16b, v6.16b, v3.16b
  509         ushr    v6.4s, v5.4s, #7
  510         shl     v5.4s, v5.4s, #25
  511         orr     v5.16b, v5.16b, v6.16b
  512         ext     v4.16b, v4.16b, v4.16b, #8
  513         add     v2.4s, v2.4s, v5.4s
  514         eor     v4.16b, v2.16b, v4.16b
  515         ext     v3.16b, v3.16b, v3.16b, #4
  516         tbl     v0.16b, { v4.16b }, v0.16b
  517         add     v3.4s, v3.4s, v0.4s
  518         eor     v4.16b, v5.16b, v3.16b
  519         ushr    v5.4s, v4.4s, #12
  520         shl     v4.4s, v4.4s, #20
  521         add     v2.4s, v2.4s, v7.4s
  522         orr     v4.16b, v4.16b, v5.16b
  523         add     v2.4s, v2.4s, v4.4s
  524         eor     v0.16b, v0.16b, v2.16b
  525         tbl     v0.16b, { v0.16b }, v1.16b
  526         add     v1.4s, v3.4s, v0.4s
  527         eor     v3.16b, v4.16b, v1.16b
  528         ext     v2.16b, v2.16b, v2.16b, #4
  529         ext     v1.16b, v1.16b, v1.16b, #12
  530         ushr    v4.4s, v3.4s, #7
  531         shl     v3.4s, v3.4s, #25
  532         ext     v0.16b, v0.16b, v0.16b, #8
  533         eor     v1.16b, v2.16b, v1.16b
  534         orr     v2.16b, v3.16b, v4.16b
  535         eor     v0.16b, v2.16b, v0.16b
  536         stp     q1, q0, [x0]
  537         ret
  538 .Lfunc_end0:
  539         .size   zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
  540         .cfi_endproc
  541 
  542         .section        .rodata.cst16,"aM",@progbits,16
  543         .p2align        4
  544 .LCPI1_0:
  545         .byte   2
  546         .byte   3
  547         .byte   0
  548         .byte   1
  549         .byte   6
  550         .byte   7
  551         .byte   4
  552         .byte   5
  553         .byte   10
  554         .byte   11
  555         .byte   8
  556         .byte   9
  557         .byte   14
  558         .byte   15
  559         .byte   12
  560         .byte   13
  561 .LCPI1_1:
  562         .word   1779033703
  563         .word   3144134277
  564         .word   1013904242
  565         .word   2773480762
  566 .LCPI1_2:
  567         .byte   1
  568         .byte   2
  569         .byte   3
  570         .byte   0
  571         .byte   5
  572         .byte   6
  573         .byte   7
  574         .byte   4
  575         .byte   9
  576         .byte   10
  577         .byte   11
  578         .byte   8
  579         .byte   13
  580         .byte   14
  581         .byte   15
  582         .byte   12
  583 .LCPI1_3:
  584         .byte   0
  585         .byte   1
  586         .byte   2
  587         .byte   3
  588         .byte   20
  589         .byte   21
  590         .byte   22
  591         .byte   23
  592         .byte   8
  593         .byte   9
  594         .byte   10
  595         .byte   11
  596         .byte   28
  597         .byte   29
  598         .byte   30
  599         .byte   31
  600 .LCPI1_4:
  601         .byte   0
  602         .byte   1
  603         .byte   2
  604         .byte   3
  605         .byte   4
  606         .byte   5
  607         .byte   6
  608         .byte   7
  609         .byte   8
  610         .byte   9
  611         .byte   10
  612         .byte   11
  613         .byte   28
  614         .byte   29
  615         .byte   30
  616         .byte   31
  617         .text
  618         .globl  zfs_blake3_compress_xof_sse41
  619         .p2align        2
  620         .type   zfs_blake3_compress_xof_sse41,@function
  621 zfs_blake3_compress_xof_sse41:
  622         .cfi_startproc
  623         ldp     q7, q6, [x0]
  624         ldp     q17, q18, [x1]
  625         add     x12, x1, #32
  626         ld2     { v4.4s, v5.4s }, [x12]
  627         lsr     x10, x3, #32
  628         fmov    s16, w3
  629         adrp    x13, .LCPI1_0
  630         adrp    x11, .LCPI1_1
  631         and     w8, w2, #0xff
  632         mov     v16.s[1], w10
  633         ldr     q0, [x13, :lo12:.LCPI1_0]
  634         ldr     q20, [x11, :lo12:.LCPI1_1]
  635         adrp    x11, .LCPI1_4
  636         and     w9, w4, #0xff
  637         ldr     q2, [x11, :lo12:.LCPI1_4]
  638         mov     v16.s[2], w8
  639         uzp1    v21.4s, v17.4s, v18.4s
  640         add     v7.4s, v6.4s, v7.4s
  641         adrp    x12, .LCPI1_3
  642         mov     v16.s[3], w9
  643         uzp2    v18.4s, v17.4s, v18.4s
  644         add     v7.4s, v7.4s, v21.4s
  645         ext     v17.16b, v5.16b, v5.16b, #12
  646         ldr     q3, [x12, :lo12:.LCPI1_3]
  647         ext     v24.16b, v4.16b, v4.16b, #12
  648         eor     v16.16b, v7.16b, v16.16b
  649         mov     v27.16b, v17.16b
  650         uzp1    v19.4s, v21.4s, v21.4s
  651         ext     v25.16b, v21.16b, v21.16b, #12
  652         zip2    v28.4s, v18.4s, v17.4s
  653         tbl     v29.16b, { v16.16b }, v0.16b
  654         mov     v27.s[1], v24.s[2]
  655         zip1    v23.2d, v17.2d, v18.2d
  656         ext     v19.16b, v19.16b, v21.16b, #8
  657         add     v22.4s, v29.4s, v20.4s
  658         ext     v26.16b, v21.16b, v25.16b, #12
  659         tbl     v20.16b, { v23.16b, v24.16b }, v2.16b
  660         zip1    v21.4s, v28.4s, v24.4s
  661         zip1    v23.4s, v24.4s, v28.4s
  662         uzp2    v19.4s, v19.4s, v18.4s
  663         eor     v24.16b, v22.16b, v6.16b
  664         ext     v25.16b, v20.16b, v20.16b, #12
  665         ext     v6.16b, v23.16b, v21.16b, #8
  666         add     v7.4s, v7.4s, v18.4s
  667         ext     v18.16b, v19.16b, v19.16b, #4
  668         tbl     v16.16b, { v26.16b, v27.16b }, v3.16b
  669         uzp1    v21.4s, v20.4s, v25.4s
  670         mov     v26.16b, v6.16b
  671         ext     v23.16b, v18.16b, v18.16b, #12
  672         mov     v26.s[1], v21.s[2]
  673         adrp    x10, .LCPI1_2
  674         ext     v25.16b, v18.16b, v23.16b, #12
  675         uzp1    v23.4s, v18.4s, v18.4s
  676         ldr     q1, [x10, :lo12:.LCPI1_2]
  677         ext     v18.16b, v23.16b, v18.16b, #8
  678         ushr    v23.4s, v24.4s, #12
  679         shl     v24.4s, v24.4s, #20
  680         orr     v23.16b, v24.16b, v23.16b
  681         add     v7.4s, v7.4s, v23.4s
  682         eor     v27.16b, v29.16b, v7.16b
  683         add     v4.4s, v7.4s, v4.4s
  684         tbl     v7.16b, { v25.16b, v26.16b }, v3.16b
  685         tbl     v26.16b, { v27.16b }, v1.16b
  686         add     v22.4s, v22.4s, v26.4s
  687         uzp2    v18.4s, v18.4s, v16.4s
  688         eor     v23.16b, v23.16b, v22.16b
  689         ext     v5.16b, v18.16b, v18.16b, #4
  690         ushr    v27.4s, v23.4s, #7
  691         shl     v23.4s, v23.4s, #25
  692         uzp1    v25.4s, v5.4s, v5.4s
  693         orr     v23.16b, v23.16b, v27.16b
  694         ext     v28.16b, v4.16b, v4.16b, #12
  695         ext     v4.16b, v25.16b, v5.16b, #8
  696         ext     v25.16b, v26.16b, v26.16b, #8
  697         add     v26.4s, v28.4s, v23.4s
  698         eor     v25.16b, v26.16b, v25.16b
  699         ext     v22.16b, v22.16b, v22.16b, #4
  700         tbl     v25.16b, { v25.16b }, v0.16b
  701         add     v22.4s, v22.4s, v25.4s
  702         eor     v23.16b, v23.16b, v22.16b
  703         add     v17.4s, v26.4s, v17.4s
  704         ushr    v26.4s, v23.4s, #12
  705         shl     v23.4s, v23.4s, #20
  706         orr     v23.16b, v23.16b, v26.16b
  707         add     v17.4s, v17.4s, v23.4s
  708         eor     v25.16b, v25.16b, v17.16b
  709         add     v17.4s, v17.4s, v19.4s
  710         tbl     v19.16b, { v25.16b }, v1.16b
  711         add     v22.4s, v22.4s, v19.4s
  712         eor     v23.16b, v23.16b, v22.16b
  713         ushr    v25.4s, v23.4s, #7
  714         shl     v23.4s, v23.4s, #25
  715         ext     v17.16b, v17.16b, v17.16b, #4
  716         orr     v23.16b, v23.16b, v25.16b
  717         ext     v19.16b, v19.16b, v19.16b, #8
  718         add     v17.4s, v17.4s, v23.4s
  719         eor     v19.16b, v17.16b, v19.16b
  720         ext     v22.16b, v22.16b, v22.16b, #12
  721         tbl     v19.16b, { v19.16b }, v0.16b
  722         add     v22.4s, v22.4s, v19.4s
  723         eor     v23.16b, v23.16b, v22.16b
  724         ushr    v25.4s, v23.4s, #12
  725         shl     v23.4s, v23.4s, #20
  726         add     v17.4s, v17.4s, v16.4s
  727         orr     v23.16b, v23.16b, v25.16b
  728         add     v17.4s, v17.4s, v23.4s
  729         ext     v25.16b, v17.16b, v17.16b, #12
  730         eor     v17.16b, v19.16b, v17.16b
  731         tbl     v17.16b, { v17.16b }, v1.16b
  732         add     v19.4s, v22.4s, v17.4s
  733         eor     v22.16b, v23.16b, v19.16b
  734         add     v25.4s, v25.4s, v21.4s
  735         zip1    v20.2d, v6.2d, v16.2d
  736         ushr    v23.4s, v22.4s, #7
  737         shl     v22.4s, v22.4s, #25
  738         zip2    v24.4s, v16.4s, v6.4s
  739         tbl     v26.16b, { v20.16b, v21.16b }, v2.16b
  740         orr     v22.16b, v22.16b, v23.16b
  741         zip1    v16.4s, v24.4s, v21.4s
  742         zip1    v20.4s, v21.4s, v24.4s
  743         ext     v21.16b, v26.16b, v26.16b, #12
  744         ext     v17.16b, v17.16b, v17.16b, #8
  745         add     v25.4s, v25.4s, v22.4s
  746         ext     v16.16b, v20.16b, v16.16b, #8
  747         uzp1    v21.4s, v26.4s, v21.4s
  748         eor     v26.16b, v25.16b, v17.16b
  749         ext     v19.16b, v19.16b, v19.16b, #4
  750         tbl     v26.16b, { v26.16b }, v0.16b
  751         mov     v29.16b, v16.16b
  752         add     v19.4s, v19.4s, v26.4s
  753         ext     v27.16b, v5.16b, v5.16b, #12
  754         mov     v29.s[1], v21.s[2]
  755         eor     v22.16b, v22.16b, v19.16b
  756         ext     v28.16b, v5.16b, v27.16b, #12
  757         ushr    v27.4s, v22.4s, #12
  758         shl     v22.4s, v22.4s, #20
  759         add     v6.4s, v25.4s, v6.4s
  760         orr     v22.16b, v22.16b, v27.16b
  761         add     v6.4s, v6.4s, v22.4s
  762         eor     v26.16b, v26.16b, v6.16b
  763         add     v6.4s, v6.4s, v18.4s
  764         tbl     v18.16b, { v26.16b }, v1.16b
  765         add     v19.4s, v19.4s, v18.4s
  766         eor     v22.16b, v22.16b, v19.16b
  767         ushr    v26.4s, v22.4s, #7
  768         shl     v22.4s, v22.4s, #25
  769         ext     v6.16b, v6.16b, v6.16b, #4
  770         orr     v22.16b, v22.16b, v26.16b
  771         ext     v18.16b, v18.16b, v18.16b, #8
  772         add     v6.4s, v6.4s, v22.4s
  773         eor     v18.16b, v6.16b, v18.16b
  774         ext     v19.16b, v19.16b, v19.16b, #12
  775         tbl     v18.16b, { v18.16b }, v0.16b
  776         add     v19.4s, v19.4s, v18.4s
  777         eor     v22.16b, v22.16b, v19.16b
  778         ushr    v26.4s, v22.4s, #12
  779         shl     v22.4s, v22.4s, #20
  780         add     v6.4s, v6.4s, v7.4s
  781         orr     v22.16b, v22.16b, v26.16b
  782         add     v6.4s, v6.4s, v22.4s
  783         ext     v26.16b, v6.16b, v6.16b, #12
  784         eor     v6.16b, v18.16b, v6.16b
  785         uzp2    v4.4s, v4.4s, v7.4s
  786         zip2    v25.4s, v7.4s, v16.4s
  787         add     v26.4s, v26.4s, v21.4s
  788         zip1    v20.2d, v16.2d, v7.2d
  789         tbl     v6.16b, { v6.16b }, v1.16b
  790         ext     v24.16b, v4.16b, v4.16b, #4
  791         tbl     v27.16b, { v20.16b, v21.16b }, v2.16b
  792         zip1    v7.4s, v25.4s, v21.4s
  793         zip1    v20.4s, v21.4s, v25.4s
  794         add     v18.4s, v19.4s, v6.4s
  795         uzp1    v5.4s, v24.4s, v24.4s
  796         ext     v21.16b, v27.16b, v27.16b, #12
  797         ext     v7.16b, v20.16b, v7.16b, #8
  798         eor     v19.16b, v22.16b, v18.16b
  799         ext     v5.16b, v5.16b, v24.16b, #8
  800         tbl     v17.16b, { v28.16b, v29.16b }, v3.16b
  801         uzp1    v21.4s, v27.4s, v21.4s
  802         mov     v28.16b, v7.16b
  803         ushr    v22.4s, v19.4s, #7
  804         shl     v19.4s, v19.4s, #25
  805         ext     v23.16b, v24.16b, v24.16b, #12
  806         uzp2    v5.4s, v5.4s, v17.4s
  807         mov     v28.s[1], v21.s[2]
  808         orr     v19.16b, v19.16b, v22.16b
  809         ext     v27.16b, v24.16b, v23.16b, #12
  810         ext     v23.16b, v5.16b, v5.16b, #4
  811         ext     v6.16b, v6.16b, v6.16b, #8
  812         ext     v25.16b, v18.16b, v18.16b, #4
  813         add     v18.4s, v26.4s, v19.4s
  814         uzp1    v24.4s, v23.4s, v23.4s
  815         eor     v6.16b, v18.16b, v6.16b
  816         ext     v24.16b, v24.16b, v23.16b, #8
  817         add     v16.4s, v18.4s, v16.4s
  818         tbl     v18.16b, { v27.16b, v28.16b }, v3.16b
  819         tbl     v27.16b, { v6.16b }, v0.16b
  820         uzp2    v6.4s, v24.4s, v18.4s
  821         add     v24.4s, v25.4s, v27.4s
  822         eor     v19.16b, v19.16b, v24.16b
  823         ushr    v25.4s, v19.4s, #12
  824         shl     v19.4s, v19.4s, #20
  825         orr     v19.16b, v19.16b, v25.16b
  826         add     v16.4s, v16.4s, v19.4s
  827         eor     v25.16b, v27.16b, v16.16b
  828         add     v4.4s, v16.4s, v4.4s
  829         tbl     v16.16b, { v25.16b }, v1.16b
  830         add     v24.4s, v24.4s, v16.4s
  831         eor     v19.16b, v19.16b, v24.16b
  832         ushr    v25.4s, v19.4s, #7
  833         shl     v19.4s, v19.4s, #25
  834         ext     v4.16b, v4.16b, v4.16b, #4
  835         orr     v19.16b, v19.16b, v25.16b
  836         ext     v16.16b, v16.16b, v16.16b, #8
  837         add     v4.4s, v4.4s, v19.4s
  838         eor     v16.16b, v4.16b, v16.16b
  839         ext     v24.16b, v24.16b, v24.16b, #12
  840         tbl     v25.16b, { v16.16b }, v0.16b
  841         add     v24.4s, v24.4s, v25.4s
  842         eor     v16.16b, v19.16b, v24.16b
  843         ushr    v19.4s, v16.4s, #12
  844         shl     v16.4s, v16.4s, #20
  845         add     v4.4s, v4.4s, v17.4s
  846         orr     v19.16b, v16.16b, v19.16b
  847         add     v27.4s, v4.4s, v19.4s
  848         eor     v25.16b, v25.16b, v27.16b
  849         tbl     v25.16b, { v25.16b }, v1.16b
  850         add     v24.4s, v24.4s, v25.4s
  851         zip2    v26.4s, v17.4s, v7.4s
  852         ext     v4.16b, v27.16b, v27.16b, #12
  853         eor     v19.16b, v19.16b, v24.16b
  854         add     v28.4s, v4.4s, v21.4s
  855         zip1    v20.2d, v7.2d, v17.2d
  856         zip1    v4.4s, v26.4s, v21.4s
  857         zip1    v17.4s, v21.4s, v26.4s
  858         ushr    v26.4s, v19.4s, #7
  859         shl     v19.4s, v19.4s, #25
  860         orr     v19.16b, v19.16b, v26.16b
  861         ext     v25.16b, v25.16b, v25.16b, #8
  862         add     v27.4s, v28.4s, v19.4s
  863         eor     v25.16b, v27.16b, v25.16b
  864         ext     v24.16b, v24.16b, v24.16b, #4
  865         tbl     v25.16b, { v25.16b }, v0.16b
  866         add     v24.4s, v24.4s, v25.4s
  867         eor     v19.16b, v19.16b, v24.16b
  868         add     v7.4s, v27.4s, v7.4s
  869         ushr    v27.4s, v19.4s, #12
  870         shl     v19.4s, v19.4s, #20
  871         orr     v19.16b, v19.16b, v27.16b
  872         add     v7.4s, v7.4s, v19.4s
  873         eor     v25.16b, v25.16b, v7.16b
  874         add     v5.4s, v7.4s, v5.4s
  875         tbl     v7.16b, { v25.16b }, v1.16b
  876         add     v24.4s, v24.4s, v7.4s
  877         eor     v19.16b, v19.16b, v24.16b
  878         ushr    v25.4s, v19.4s, #7
  879         shl     v19.4s, v19.4s, #25
  880         ext     v5.16b, v5.16b, v5.16b, #4
  881         orr     v19.16b, v19.16b, v25.16b
  882         ext     v7.16b, v7.16b, v7.16b, #8
  883         add     v5.4s, v5.4s, v19.4s
  884         eor     v7.16b, v5.16b, v7.16b
  885         ext     v24.16b, v24.16b, v24.16b, #12
  886         tbl     v7.16b, { v7.16b }, v0.16b
  887         add     v24.4s, v24.4s, v7.4s
  888         eor     v19.16b, v19.16b, v24.16b
  889         ushr    v25.4s, v19.4s, #12
  890         shl     v19.4s, v19.4s, #20
  891         tbl     v16.16b, { v20.16b, v21.16b }, v2.16b
  892         add     v5.4s, v5.4s, v18.4s
  893         orr     v19.16b, v19.16b, v25.16b
  894         ext     v20.16b, v16.16b, v16.16b, #12
  895         ext     v4.16b, v17.16b, v4.16b, #8
  896         add     v5.4s, v5.4s, v19.4s
  897         uzp1    v21.4s, v16.4s, v20.4s
  898         mov     v17.16b, v4.16b
  899         ext     v25.16b, v5.16b, v5.16b, #12
  900         mov     v17.s[1], v21.s[2]
  901         add     v25.4s, v25.4s, v21.4s
  902         zip1    v20.2d, v4.2d, v18.2d
  903         ext     v22.16b, v23.16b, v23.16b, #12
  904         zip2    v26.4s, v18.4s, v4.4s
  905         tbl     v18.16b, { v20.16b, v21.16b }, v2.16b
  906         eor     v5.16b, v7.16b, v5.16b
  907         ext     v16.16b, v23.16b, v22.16b, #12
  908         ext     v22.16b, v6.16b, v6.16b, #4
  909         zip1    v27.4s, v26.4s, v21.4s
  910         zip1    v20.4s, v21.4s, v26.4s
  911         ext     v21.16b, v18.16b, v18.16b, #12
  912         tbl     v5.16b, { v5.16b }, v1.16b
  913         ext     v20.16b, v20.16b, v27.16b, #8
  914         uzp1    v27.4s, v18.4s, v21.4s
  915         uzp1    v18.4s, v22.4s, v22.4s
  916         add     v21.4s, v24.4s, v5.4s
  917         ext     v18.16b, v18.16b, v22.16b, #8
  918         eor     v19.16b, v19.16b, v21.16b
  919         tbl     v7.16b, { v16.16b, v17.16b }, v3.16b
  920         uzp2    v18.4s, v18.4s, v17.4s
  921         zip2    v16.4s, v16.4s, v20.4s
  922         ushr    v17.4s, v19.4s, #7
  923         shl     v19.4s, v19.4s, #25
  924         orr     v17.16b, v19.16b, v17.16b
  925         ext     v5.16b, v5.16b, v5.16b, #8
  926         add     v19.4s, v25.4s, v17.4s
  927         eor     v5.16b, v19.16b, v5.16b
  928         ext     v21.16b, v21.16b, v21.16b, #4
  929         tbl     v5.16b, { v5.16b }, v0.16b
  930         add     v4.4s, v19.4s, v4.4s
  931         add     v19.4s, v21.4s, v5.4s
  932         eor     v17.16b, v17.16b, v19.16b
  933         ushr    v21.4s, v17.4s, #12
  934         shl     v17.4s, v17.4s, #20
  935         orr     v17.16b, v17.16b, v21.16b
  936         add     v4.4s, v4.4s, v17.4s
  937         eor     v5.16b, v5.16b, v4.16b
  938         tbl     v5.16b, { v5.16b }, v1.16b
  939         add     v4.4s, v4.4s, v6.4s
  940         add     v6.4s, v19.4s, v5.4s
  941         eor     v17.16b, v17.16b, v6.16b
  942         ushr    v19.4s, v17.4s, #7
  943         shl     v17.4s, v17.4s, #25
  944         ext     v4.16b, v4.16b, v4.16b, #4
  945         orr     v17.16b, v17.16b, v19.16b
  946         ext     v5.16b, v5.16b, v5.16b, #8
  947         add     v4.4s, v4.4s, v17.4s
  948         eor     v5.16b, v4.16b, v5.16b
  949         ext     v6.16b, v6.16b, v6.16b, #12
  950         tbl     v5.16b, { v5.16b }, v0.16b
  951         add     v6.4s, v6.4s, v5.4s
  952         eor     v17.16b, v17.16b, v6.16b
  953         ushr    v19.4s, v17.4s, #12
  954         shl     v17.4s, v17.4s, #20
  955         add     v4.4s, v4.4s, v7.4s
  956         orr     v17.16b, v17.16b, v19.16b
  957         add     v4.4s, v4.4s, v17.4s
  958         eor     v5.16b, v5.16b, v4.16b
  959         tbl     v5.16b, { v5.16b }, v1.16b
  960         mov     v29.16b, v20.16b
  961         ext     v4.16b, v4.16b, v4.16b, #12
  962         add     v6.4s, v6.4s, v5.4s
  963         mov     v29.s[1], v27.s[2]
  964         add     v4.4s, v4.4s, v27.4s
  965         zip1    v26.2d, v20.2d, v7.2d
  966         zip1    v7.4s, v16.4s, v27.4s
  967         zip1    v16.4s, v27.4s, v16.4s
  968         eor     v17.16b, v17.16b, v6.16b
  969         ext     v7.16b, v16.16b, v7.16b, #8
  970         ushr    v16.4s, v17.4s, #7
  971         shl     v17.4s, v17.4s, #25
  972         orr     v16.16b, v17.16b, v16.16b
  973         ext     v5.16b, v5.16b, v5.16b, #8
  974         add     v4.4s, v4.4s, v16.4s
  975         eor     v5.16b, v4.16b, v5.16b
  976         ext     v6.16b, v6.16b, v6.16b, #4
  977         tbl     v5.16b, { v5.16b }, v0.16b
  978         add     v6.4s, v6.4s, v5.4s
  979         eor     v16.16b, v16.16b, v6.16b
  980         ushr    v17.4s, v16.4s, #12
  981         shl     v16.4s, v16.4s, #20
  982         add     v4.4s, v4.4s, v20.4s
  983         orr     v16.16b, v16.16b, v17.16b
  984         add     v4.4s, v4.4s, v16.4s
  985         eor     v5.16b, v5.16b, v4.16b
  986         tbl     v5.16b, { v5.16b }, v1.16b
  987         add     v6.4s, v6.4s, v5.4s
  988         eor     v16.16b, v16.16b, v6.16b
  989         add     v4.4s, v4.4s, v18.4s
  990         ushr    v17.4s, v16.4s, #7
  991         shl     v16.4s, v16.4s, #25
  992         ext     v23.16b, v22.16b, v22.16b, #12
  993         ext     v4.16b, v4.16b, v4.16b, #4
  994         orr     v16.16b, v16.16b, v17.16b
  995         ext     v28.16b, v22.16b, v23.16b, #12
  996         ext     v5.16b, v5.16b, v5.16b, #8
  997         add     v4.4s, v16.4s, v4.4s
  998         tbl     v3.16b, { v28.16b, v29.16b }, v3.16b
  999         eor     v5.16b, v4.16b, v5.16b
 1000         ext     v6.16b, v6.16b, v6.16b, #12
 1001         add     v3.4s, v4.4s, v3.4s
 1002         tbl     v4.16b, { v5.16b }, v0.16b
 1003         add     v5.4s, v6.4s, v4.4s
 1004         eor     v6.16b, v16.16b, v5.16b
 1005         ushr    v16.4s, v6.4s, #12
 1006         shl     v6.4s, v6.4s, #20
 1007         orr     v6.16b, v6.16b, v16.16b
 1008         tbl     v2.16b, { v26.16b, v27.16b }, v2.16b
 1009         add     v3.4s, v3.4s, v6.4s
 1010         ext     v19.16b, v2.16b, v2.16b, #12
 1011         eor     v4.16b, v4.16b, v3.16b
 1012         uzp1    v2.4s, v2.4s, v19.4s
 1013         ext     v3.16b, v3.16b, v3.16b, #12
 1014         tbl     v4.16b, { v4.16b }, v1.16b
 1015         add     v2.4s, v3.4s, v2.4s
 1016         add     v3.4s, v5.4s, v4.4s
 1017         eor     v5.16b, v6.16b, v3.16b
 1018         ushr    v6.4s, v5.4s, #7
 1019         shl     v5.4s, v5.4s, #25
 1020         orr     v5.16b, v5.16b, v6.16b
 1021         ext     v4.16b, v4.16b, v4.16b, #8
 1022         add     v2.4s, v2.4s, v5.4s
 1023         eor     v4.16b, v2.16b, v4.16b
 1024         ext     v3.16b, v3.16b, v3.16b, #4
 1025         tbl     v0.16b, { v4.16b }, v0.16b
 1026         add     v3.4s, v3.4s, v0.4s
 1027         eor     v4.16b, v5.16b, v3.16b
 1028         ushr    v5.4s, v4.4s, #12
 1029         shl     v4.4s, v4.4s, #20
 1030         add     v2.4s, v2.4s, v7.4s
 1031         orr     v4.16b, v4.16b, v5.16b
 1032         add     v2.4s, v2.4s, v4.4s
 1033         eor     v0.16b, v0.16b, v2.16b
 1034         tbl     v0.16b, { v0.16b }, v1.16b
 1035         add     v1.4s, v3.4s, v0.4s
 1036         eor     v3.16b, v4.16b, v1.16b
 1037         ushr    v4.4s, v3.4s, #7
 1038         shl     v3.4s, v3.4s, #25
 1039         ext     v2.16b, v2.16b, v2.16b, #4
 1040         ext     v0.16b, v0.16b, v0.16b, #8
 1041         ext     v1.16b, v1.16b, v1.16b, #12
 1042         orr     v3.16b, v3.16b, v4.16b
 1043         eor     v2.16b, v2.16b, v1.16b
 1044         eor     v3.16b, v3.16b, v0.16b
 1045         stp     q2, q3, [x5]
 1046         ldr     q2, [x0]
 1047         eor     v1.16b, v2.16b, v1.16b
 1048         str     q1, [x5, #32]
 1049         ldr     q1, [x0, #16]
 1050         eor     v0.16b, v1.16b, v0.16b
 1051         str     q0, [x5, #48]
 1052         ret
 1053 .Lfunc_end1:
 1054         .size   zfs_blake3_compress_xof_sse41, .Lfunc_end1-zfs_blake3_compress_xof_sse41
 1055         .cfi_endproc
 1056 
 1057         .section        .rodata.cst16,"aM",@progbits,16
 1058         .p2align        4
 1059 .LCPI2_0:
 1060         .word   0
 1061         .word   1
 1062         .word   2
 1063         .word   3
 1064 .LCPI2_1:
 1065         .byte   2
 1066         .byte   3
 1067         .byte   0
 1068         .byte   1
 1069         .byte   6
 1070         .byte   7
 1071         .byte   4
 1072         .byte   5
 1073         .byte   10
 1074         .byte   11
 1075         .byte   8
 1076         .byte   9
 1077         .byte   14
 1078         .byte   15
 1079         .byte   12
 1080         .byte   13
 1081 .LCPI2_2:
 1082         .byte   1
 1083         .byte   2
 1084         .byte   3
 1085         .byte   0
 1086         .byte   5
 1087         .byte   6
 1088         .byte   7
 1089         .byte   4
 1090         .byte   9
 1091         .byte   10
 1092         .byte   11
 1093         .byte   8
 1094         .byte   13
 1095         .byte   14
 1096         .byte   15
 1097         .byte   12
 1098         .text
 1099         .globl  zfs_blake3_hash_many_sse41
 1100         .p2align        2
 1101         .type   zfs_blake3_hash_many_sse41,@function
 1102 zfs_blake3_hash_many_sse41:
 1103         .cfi_startproc
 1104         stp     d15, d14, [sp, #-160]!
 1105         stp     d13, d12, [sp, #16]
 1106         stp     d11, d10, [sp, #32]
 1107         stp     d9, d8, [sp, #48]
 1108         stp     x29, x30, [sp, #64]
 1109         stp     x28, x27, [sp, #80]
 1110         stp     x26, x25, [sp, #96]
 1111         stp     x24, x23, [sp, #112]
 1112         stp     x22, x21, [sp, #128]
 1113         stp     x20, x19, [sp, #144]
 1114         mov     x29, sp
 1115         sub     sp, sp, #448
 1116         .cfi_def_cfa w29, 160
 1117         .cfi_offset w19, -8
 1118         .cfi_offset w20, -16
 1119         .cfi_offset w21, -24
 1120         .cfi_offset w22, -32
 1121         .cfi_offset w23, -40
 1122         .cfi_offset w24, -48
 1123         .cfi_offset w25, -56
 1124         .cfi_offset w26, -64
 1125         .cfi_offset w27, -72
 1126         .cfi_offset w28, -80
 1127         .cfi_offset w30, -88
 1128         .cfi_offset w29, -96
 1129         .cfi_offset b8, -104
 1130         .cfi_offset b9, -112
 1131         .cfi_offset b10, -120
 1132         .cfi_offset b11, -128
 1133         .cfi_offset b12, -136
 1134         .cfi_offset b13, -144
 1135         .cfi_offset b14, -152
 1136         .cfi_offset b15, -160
 1137         ldr     x26, [x29, #168]
 1138         ldrb    w27, [x29, #160]
 1139         mov     w19, w6
 1140         mov     x20, x4
 1141         mov     x22, x2
 1142         mov     x28, x1
 1143         cmp     x1, #4
 1144         mov     x24, x0
 1145         str     x3, [sp, #40]
 1146         b.lo    .LBB2_8
 1147         adrp    x11, .LCPI2_0
 1148         ldr     q0, [x11, :lo12:.LCPI2_0]
 1149         sbfx    w13, w5, #0, #1
 1150         dup     v1.4s, w13
 1151         mov     w10, #58983
 1152         mov     w11, #44677
 1153         mov     w12, #62322
 1154         and     v0.16b, v1.16b, v0.16b
 1155         mov     w13, #62778
 1156         orr     w8, w7, w19
 1157         adrp    x9, .LCPI2_1
 1158         movk    w10, #27145, lsl #16
 1159         movk    w11, #47975, lsl #16
 1160         movk    w12, #15470, lsl #16
 1161         movk    w13, #42319, lsl #16
 1162         str     q0, [sp, #16]
 1163         orr     v0.4s, #128, lsl #24
 1164         adrp    x14, .LCPI2_2
 1165         str     q0, [sp]
 1166 .LBB2_2:
 1167         ldr     x2, [sp, #40]
 1168         mov     x15, x2
 1169         ld1r    { v7.4s }, [x15], #4
 1170         add     x16, x2, #8
 1171         add     x17, x2, #12
 1172         add     x18, x2, #16
 1173         add     x0, x2, #20
 1174         add     x3, x2, #24
 1175         add     x2, x2, #28
 1176         ld1r    { v6.4s }, [x16]
 1177         ld1r    { v17.4s }, [x17]
 1178         ld1r    { v10.4s }, [x18]
 1179         ld1r    { v11.4s }, [x0]
 1180         ld1r    { v19.4s }, [x3]
 1181         ld1r    { v18.4s }, [x15]
 1182         ld1r    { v16.4s }, [x2]
 1183         cbz     x22, .LBB2_7
 1184         ldr     q1, [sp, #16]
 1185         dup     v0.4s, w20
 1186         ldp     x15, x16, [x24]
 1187         ldp     x17, x18, [x24, #16]
 1188         add     v1.4s, v0.4s, v1.4s
 1189         movi    v0.4s, #128, lsl #24
 1190         str     q1, [sp, #64]
 1191         eor     v0.16b, v1.16b, v0.16b
 1192         ldr     q1, [sp]
 1193         lsr     x2, x20, #32
 1194         mov     x0, xzr
 1195         mov     w6, w8
 1196         cmgt    v0.4s, v1.4s, v0.4s
 1197         dup     v1.4s, w2
 1198         sub     v0.4s, v1.4s, v0.4s
 1199         str     q0, [sp, #48]
 1200 .LBB2_4:
 1201         mov     w4, #16
 1202         stp     q16, q17, [sp, #192]
 1203         bfi     x4, x0, #6, #58
 1204         ldr     q1, [x15, x4]
 1205         ldr     q3, [x16, x4]
 1206         ldr     q2, [x17, x4]
 1207         ldr     q4, [x18, x4]
 1208         mov     w4, #32
 1209         bfi     x4, x0, #6, #58
 1210         ldr     q5, [x15, x4]
 1211         ldr     q20, [x16, x4]
 1212         ldr     q21, [x17, x4]
 1213         ldr     q22, [x18, x4]
 1214         mov     w4, #48
 1215         lsl     x3, x0, #6
 1216         bfi     x4, x0, #6, #58
 1217         add     x0, x0, #1
 1218         ldr     q0, [x15, x3]
 1219         ldr     q23, [x16, x3]
 1220         ldr     q16, [x17, x3]
 1221         ldr     q17, [x18, x3]
 1222         cmp     x0, x22
 1223         ldr     q25, [x15, x4]
 1224         ldr     q14, [x16, x4]
 1225         ldr     q28, [x17, x4]
 1226         ldr     q31, [x18, x4]
 1227         csel    w4, w27, wzr, eq
 1228         orr     w4, w4, w6
 1229         mov     x2, xzr
 1230         and     w6, w4, #0xff
 1231         add     x3, x3, #256
 1232 .LBB2_5:
 1233         ldr     x4, [x24, x2]
 1234         add     x2, x2, #8
 1235         cmp     x2, #32
 1236         add     x4, x4, x3
 1237         prfm    pldl1keep, [x4]
 1238         b.ne    .LBB2_5
 1239         zip1    v29.4s, v0.4s, v23.4s
 1240         zip2    v23.4s, v0.4s, v23.4s
 1241         zip1    v0.4s, v16.4s, v17.4s
 1242         zip2    v24.4s, v16.4s, v17.4s
 1243         zip1    v9.4s, v1.4s, v3.4s
 1244         zip2    v26.4s, v1.4s, v3.4s
 1245         zip1    v27.4s, v2.4s, v4.4s
 1246         zip2    v17.4s, v2.4s, v4.4s
 1247         zip1    v12.4s, v21.4s, v22.4s
 1248         zip2    v13.4s, v21.4s, v22.4s
 1249         add     v2.4s, v7.4s, v10.4s
 1250         add     v1.4s, v18.4s, v11.4s
 1251         ext     v7.16b, v0.16b, v29.16b, #8
 1252         ext     v22.16b, v24.16b, v23.16b, #8
 1253         zip1    v30.4s, v5.4s, v20.4s
 1254         zip2    v20.4s, v5.4s, v20.4s
 1255         stp     q1, q2, [sp, #112]
 1256         ext     v2.16b, v29.16b, v7.16b, #8
 1257         mov     v29.d[1], v0.d[0]
 1258         ext     v18.16b, v23.16b, v22.16b, #8
 1259         mov     v23.d[1], v24.d[0]
 1260         zip1    v21.4s, v25.4s, v14.4s
 1261         zip2    v4.4s, v25.4s, v14.4s
 1262         zip1    v14.4s, v28.4s, v31.4s
 1263         zip2    v15.4s, v28.4s, v31.4s
 1264         add     v8.4s, v6.4s, v19.4s
 1265         ext     v28.16b, v27.16b, v9.16b, #8
 1266         ext     v31.16b, v17.16b, v26.16b, #8
 1267         stur    q2, [x29, #-208]
 1268         mov     v7.16b, v29.16b
 1269         ext     v0.16b, v12.16b, v30.16b, #8
 1270         stp     q23, q29, [x29, #-80]
 1271         mov     v2.16b, v19.16b
 1272         ext     v19.16b, v13.16b, v20.16b, #8
 1273         mov     v29.16b, v9.16b
 1274         ext     v25.16b, v9.16b, v28.16b, #8
 1275         mov     v29.d[1], v27.d[0]
 1276         ext     v24.16b, v26.16b, v31.16b, #8
 1277         mov     v26.d[1], v17.d[0]
 1278         ext     v17.16b, v15.16b, v4.16b, #8
 1279         ext     v27.16b, v30.16b, v0.16b, #8
 1280         ext     v0.16b, v20.16b, v19.16b, #8
 1281         stp     q0, q25, [sp, #80]
 1282         ext     v0.16b, v4.16b, v17.16b, #8
 1283         str     q0, [sp, #224]
 1284         ldr     q0, [sp, #128]
 1285         mov     v6.16b, v23.16b
 1286         mov     v22.16b, v4.16b
 1287         ldr     q16, [x9, :lo12:.LCPI2_1]
 1288         add     v17.4s, v0.4s, v7.4s
 1289         ldr     q0, [sp, #112]
 1290         mov     v30.d[1], v12.d[0]
 1291         add     v7.4s, v8.4s, v29.4s
 1292         mov     v20.d[1], v13.d[0]
 1293         add     v4.4s, v0.4s, v6.4s
 1294         ldr     q0, [sp, #64]
 1295         dup     v3.4s, w12
 1296         ext     v28.16b, v14.16b, v21.16b, #8
 1297         dup     v1.4s, w10
 1298         eor     v19.16b, v17.16b, v0.16b
 1299         ldr     q0, [sp, #48]
 1300         ext     v23.16b, v21.16b, v28.16b, #8
 1301         mov     v21.d[1], v14.d[0]
 1302         tbl     v14.16b, { v19.16b }, v16.16b
 1303         eor     v12.16b, v4.16b, v0.16b
 1304         movi    v0.4s, #64
 1305         eor     v13.16b, v7.16b, v0.16b
 1306         tbl     v13.16b, { v13.16b }, v16.16b
 1307         add     v6.4s, v13.4s, v3.4s
 1308         dup     v5.4s, w11
 1309         tbl     v12.16b, { v12.16b }, v16.16b
 1310         add     v1.4s, v14.4s, v1.4s
 1311         eor     v9.16b, v6.16b, v2.16b
 1312         ldp     q2, q0, [sp, #192]
 1313         add     v5.4s, v12.4s, v5.4s
 1314         eor     v19.16b, v1.16b, v10.16b
 1315         eor     v10.16b, v5.16b, v11.16b
 1316         ushr    v11.4s, v19.4s, #12
 1317         shl     v19.4s, v19.4s, #20
 1318         orr     v11.16b, v19.16b, v11.16b
 1319         ushr    v19.4s, v10.4s, #12
 1320         shl     v10.4s, v10.4s, #20
 1321         mov     v22.d[1], v15.d[0]
 1322         orr     v10.16b, v10.16b, v19.16b
 1323         ushr    v19.4s, v9.4s, #12
 1324         shl     v9.4s, v9.4s, #20
 1325         add     v15.4s, v0.4s, v2.4s
 1326         orr     v9.16b, v9.16b, v19.16b
 1327         dup     v19.4s, w6
 1328         add     v15.4s, v15.4s, v26.4s
 1329         eor     v19.16b, v15.16b, v19.16b
 1330         tbl     v3.16b, { v19.16b }, v16.16b
 1331         dup     v19.4s, w13
 1332         add     v8.4s, v3.4s, v19.4s
 1333         ldur    q31, [x29, #-208]
 1334         eor     v19.16b, v8.16b, v2.16b
 1335         ushr    v0.4s, v19.4s, #12
 1336         shl     v19.4s, v19.4s, #20
 1337         orr     v2.16b, v19.16b, v0.16b
 1338         ldr     q19, [x14, :lo12:.LCPI2_2]
 1339         add     v17.4s, v17.4s, v31.4s
 1340         add     v17.4s, v17.4s, v11.4s
 1341         eor     v14.16b, v14.16b, v17.16b
 1342         tbl     v14.16b, { v14.16b }, v19.16b
 1343         add     v1.4s, v1.4s, v14.4s
 1344         eor     v11.16b, v1.16b, v11.16b
 1345         add     v4.4s, v4.4s, v18.4s
 1346         ushr    v0.4s, v11.4s, #7
 1347         shl     v11.4s, v11.4s, #25
 1348         add     v4.4s, v4.4s, v10.4s
 1349         orr     v0.16b, v11.16b, v0.16b
 1350         eor     v11.16b, v12.16b, v4.16b
 1351         tbl     v11.16b, { v11.16b }, v19.16b
 1352         add     v5.4s, v5.4s, v11.4s
 1353         eor     v10.16b, v5.16b, v10.16b
 1354         add     v7.4s, v7.4s, v25.4s
 1355         ushr    v12.4s, v10.4s, #7
 1356         shl     v10.4s, v10.4s, #25
 1357         add     v7.4s, v7.4s, v9.4s
 1358         orr     v10.16b, v10.16b, v12.16b
 1359         eor     v12.16b, v13.16b, v7.16b
 1360         tbl     v12.16b, { v12.16b }, v19.16b
 1361         add     v6.4s, v6.4s, v12.4s
 1362         eor     v9.16b, v6.16b, v9.16b
 1363         ushr    v13.4s, v9.4s, #7
 1364         shl     v9.4s, v9.4s, #25
 1365         orr     v9.16b, v9.16b, v13.16b
 1366         add     v13.4s, v15.4s, v24.4s
 1367         add     v13.4s, v13.4s, v2.4s
 1368         eor     v3.16b, v3.16b, v13.16b
 1369         tbl     v3.16b, { v3.16b }, v19.16b
 1370         add     v8.4s, v8.4s, v3.4s
 1371         eor     v2.16b, v8.16b, v2.16b
 1372         add     v17.4s, v17.4s, v30.4s
 1373         ushr    v15.4s, v2.4s, #7
 1374         shl     v2.4s, v2.4s, #25
 1375         add     v17.4s, v17.4s, v10.4s
 1376         add     v4.4s, v4.4s, v20.4s
 1377         orr     v2.16b, v2.16b, v15.16b
 1378         eor     v3.16b, v3.16b, v17.16b
 1379         add     v4.4s, v4.4s, v9.4s
 1380         add     v7.4s, v7.4s, v21.4s
 1381         tbl     v3.16b, { v3.16b }, v16.16b
 1382         eor     v14.16b, v14.16b, v4.16b
 1383         add     v7.4s, v7.4s, v2.4s
 1384         add     v13.4s, v13.4s, v22.4s
 1385         mov     v28.16b, v26.16b
 1386         stur    q26, [x29, #-112]
 1387         mov     v26.16b, v18.16b
 1388         mov     v18.16b, v24.16b
 1389         stur    q24, [x29, #-160]
 1390         add     v6.4s, v6.4s, v3.4s
 1391         mov     v24.16b, v20.16b
 1392         tbl     v14.16b, { v14.16b }, v16.16b
 1393         eor     v11.16b, v11.16b, v7.16b
 1394         add     v13.4s, v13.4s, v0.4s
 1395         ldr     q20, [sp, #80]
 1396         eor     v10.16b, v6.16b, v10.16b
 1397         add     v8.4s, v8.4s, v14.4s
 1398         tbl     v11.16b, { v11.16b }, v16.16b
 1399         eor     v12.16b, v12.16b, v13.16b
 1400         stp     q30, q22, [x29, #-192]
 1401         ushr    v15.4s, v10.4s, #12
 1402         shl     v10.4s, v10.4s, #20
 1403         eor     v9.16b, v8.16b, v9.16b
 1404         add     v1.4s, v1.4s, v11.4s
 1405         tbl     v12.16b, { v12.16b }, v16.16b
 1406         mov     v30.16b, v27.16b
 1407         add     v17.4s, v17.4s, v27.4s
 1408         ldr     q27, [sp, #224]
 1409         orr     v10.16b, v10.16b, v15.16b
 1410         ushr    v15.4s, v9.4s, #12
 1411         shl     v9.4s, v9.4s, #20
 1412         eor     v2.16b, v1.16b, v2.16b
 1413         add     v5.4s, v5.4s, v12.4s
 1414         orr     v9.16b, v9.16b, v15.16b
 1415         ushr    v15.4s, v2.4s, #12
 1416         shl     v2.4s, v2.4s, #20
 1417         eor     v0.16b, v5.16b, v0.16b
 1418         add     v17.4s, v17.4s, v10.4s
 1419         add     v4.4s, v4.4s, v20.4s
 1420         orr     v2.16b, v2.16b, v15.16b
 1421         ushr    v15.4s, v0.4s, #12
 1422         shl     v0.4s, v0.4s, #20
 1423         eor     v3.16b, v3.16b, v17.16b
 1424         add     v4.4s, v4.4s, v9.4s
 1425         add     v7.4s, v7.4s, v23.4s
 1426         orr     v0.16b, v0.16b, v15.16b
 1427         tbl     v3.16b, { v3.16b }, v19.16b
 1428         eor     v14.16b, v14.16b, v4.16b
 1429         add     v7.4s, v7.4s, v2.4s
 1430         add     v13.4s, v13.4s, v27.4s
 1431         add     v6.4s, v6.4s, v3.4s
 1432         tbl     v14.16b, { v14.16b }, v19.16b
 1433         eor     v11.16b, v11.16b, v7.16b
 1434         add     v13.4s, v13.4s, v0.4s
 1435         eor     v10.16b, v6.16b, v10.16b
 1436         add     v8.4s, v8.4s, v14.4s
 1437         tbl     v11.16b, { v11.16b }, v19.16b
 1438         eor     v12.16b, v12.16b, v13.16b
 1439         stur    q21, [x29, #-144]
 1440         ushr    v15.4s, v10.4s, #7
 1441         shl     v10.4s, v10.4s, #25
 1442         eor     v9.16b, v8.16b, v9.16b
 1443         add     v1.4s, v1.4s, v11.4s
 1444         tbl     v12.16b, { v12.16b }, v19.16b
 1445         ldur    q21, [x29, #-80]
 1446         orr     v10.16b, v10.16b, v15.16b
 1447         ushr    v15.4s, v9.4s, #7
 1448         shl     v9.4s, v9.4s, #25
 1449         eor     v2.16b, v1.16b, v2.16b
 1450         add     v5.4s, v5.4s, v12.4s
 1451         orr     v9.16b, v9.16b, v15.16b
 1452         ushr    v15.4s, v2.4s, #7
 1453         shl     v2.4s, v2.4s, #25
 1454         eor     v0.16b, v5.16b, v0.16b
 1455         orr     v2.16b, v2.16b, v15.16b
 1456         ushr    v15.4s, v0.4s, #7
 1457         shl     v0.4s, v0.4s, #25
 1458         orr     v0.16b, v0.16b, v15.16b
 1459         add     v17.4s, v17.4s, v21.4s
 1460         add     v17.4s, v17.4s, v0.4s
 1461         add     v4.4s, v4.4s, v26.4s
 1462         eor     v14.16b, v14.16b, v17.16b
 1463         add     v4.4s, v4.4s, v10.4s
 1464         add     v7.4s, v7.4s, v18.4s
 1465         tbl     v14.16b, { v14.16b }, v16.16b
 1466         eor     v11.16b, v11.16b, v4.16b
 1467         add     v7.4s, v7.4s, v9.4s
 1468         add     v13.4s, v13.4s, v29.4s
 1469         add     v1.4s, v1.4s, v14.4s
 1470         tbl     v11.16b, { v11.16b }, v16.16b
 1471         eor     v12.16b, v12.16b, v7.16b
 1472         add     v13.4s, v13.4s, v2.4s
 1473         eor     v0.16b, v0.16b, v1.16b
 1474         add     v5.4s, v5.4s, v11.4s
 1475         tbl     v12.16b, { v12.16b }, v16.16b
 1476         eor     v3.16b, v3.16b, v13.16b
 1477         ldur    q22, [x29, #-64]
 1478         ushr    v15.4s, v0.4s, #12
 1479         shl     v0.4s, v0.4s, #20
 1480         eor     v10.16b, v5.16b, v10.16b
 1481         add     v6.4s, v6.4s, v12.4s
 1482         tbl     v3.16b, { v3.16b }, v16.16b
 1483         orr     v0.16b, v0.16b, v15.16b
 1484         ushr    v15.4s, v10.4s, #12
 1485         shl     v10.4s, v10.4s, #20
 1486         eor     v9.16b, v6.16b, v9.16b
 1487         add     v8.4s, v8.4s, v3.4s
 1488         add     v17.4s, v17.4s, v28.4s
 1489         orr     v10.16b, v10.16b, v15.16b
 1490         ushr    v15.4s, v9.4s, #12
 1491         shl     v9.4s, v9.4s, #20
 1492         eor     v2.16b, v8.16b, v2.16b
 1493         add     v17.4s, v17.4s, v0.4s
 1494         add     v4.4s, v4.4s, v24.4s
 1495         orr     v9.16b, v9.16b, v15.16b
 1496         ushr    v15.4s, v2.4s, #12
 1497         shl     v2.4s, v2.4s, #20
 1498         eor     v14.16b, v14.16b, v17.16b
 1499         add     v4.4s, v4.4s, v10.4s
 1500         add     v7.4s, v7.4s, v22.4s
 1501         orr     v2.16b, v2.16b, v15.16b
 1502         tbl     v14.16b, { v14.16b }, v19.16b
 1503         eor     v11.16b, v11.16b, v4.16b
 1504         add     v7.4s, v7.4s, v9.4s
 1505         add     v13.4s, v13.4s, v23.4s
 1506         add     v1.4s, v1.4s, v14.4s
 1507         tbl     v11.16b, { v11.16b }, v19.16b
 1508         eor     v12.16b, v12.16b, v7.16b
 1509         add     v13.4s, v13.4s, v2.4s
 1510         eor     v0.16b, v0.16b, v1.16b
 1511         add     v5.4s, v5.4s, v11.4s
 1512         tbl     v12.16b, { v12.16b }, v19.16b
 1513         eor     v3.16b, v3.16b, v13.16b
 1514         ldur    q22, [x29, #-144]
 1515         ushr    v15.4s, v0.4s, #7
 1516         shl     v0.4s, v0.4s, #25
 1517         eor     v10.16b, v5.16b, v10.16b
 1518         add     v6.4s, v6.4s, v12.4s
 1519         tbl     v3.16b, { v3.16b }, v19.16b
 1520         orr     v0.16b, v0.16b, v15.16b
 1521         ushr    v15.4s, v10.4s, #7
 1522         shl     v10.4s, v10.4s, #25
 1523         eor     v9.16b, v6.16b, v9.16b
 1524         add     v8.4s, v8.4s, v3.4s
 1525         orr     v10.16b, v10.16b, v15.16b
 1526         ushr    v15.4s, v9.4s, #7
 1527         shl     v9.4s, v9.4s, #25
 1528         eor     v2.16b, v8.16b, v2.16b
 1529         add     v17.4s, v17.4s, v31.4s
 1530         orr     v9.16b, v9.16b, v15.16b
 1531         ushr    v15.4s, v2.4s, #7
 1532         shl     v2.4s, v2.4s, #25
 1533         add     v17.4s, v17.4s, v10.4s
 1534         add     v4.4s, v4.4s, v22.4s
 1535         orr     v2.16b, v2.16b, v15.16b
 1536         eor     v3.16b, v3.16b, v17.16b
 1537         add     v4.4s, v4.4s, v9.4s
 1538         add     v7.4s, v7.4s, v30.4s
 1539         tbl     v3.16b, { v3.16b }, v16.16b
 1540         eor     v14.16b, v14.16b, v4.16b
 1541         add     v7.4s, v7.4s, v2.4s
 1542         add     v13.4s, v13.4s, v27.4s
 1543         add     v6.4s, v6.4s, v3.4s
 1544         tbl     v14.16b, { v14.16b }, v16.16b
 1545         eor     v11.16b, v11.16b, v7.16b
 1546         add     v13.4s, v13.4s, v0.4s
 1547         ldr     q27, [sp, #96]
 1548         mov     v21.16b, v26.16b
 1549         stur    q26, [x29, #-96]
 1550         mov     v28.16b, v31.16b
 1551         eor     v10.16b, v6.16b, v10.16b
 1552         add     v8.4s, v8.4s, v14.4s
 1553         tbl     v11.16b, { v11.16b }, v16.16b
 1554         eor     v12.16b, v12.16b, v13.16b
 1555         ldp     q31, q26, [x29, #-192]
 1556         ushr    v15.4s, v10.4s, #12
 1557         shl     v10.4s, v10.4s, #20
 1558         eor     v9.16b, v8.16b, v9.16b
 1559         add     v1.4s, v1.4s, v11.4s
 1560         tbl     v12.16b, { v12.16b }, v16.16b
 1561         orr     v10.16b, v10.16b, v15.16b
 1562         ushr    v15.4s, v9.4s, #12
 1563         shl     v9.4s, v9.4s, #20
 1564         eor     v2.16b, v1.16b, v2.16b
 1565         add     v5.4s, v5.4s, v12.4s
 1566         add     v17.4s, v17.4s, v20.4s
 1567         orr     v9.16b, v9.16b, v15.16b
 1568         ushr    v15.4s, v2.4s, #12
 1569         shl     v2.4s, v2.4s, #20
 1570         eor     v0.16b, v5.16b, v0.16b
 1571         add     v17.4s, v17.4s, v10.4s
 1572         add     v4.4s, v4.4s, v27.4s
 1573         orr     v2.16b, v2.16b, v15.16b
 1574         ushr    v15.4s, v0.4s, #12
 1575         shl     v0.4s, v0.4s, #20
 1576         eor     v3.16b, v3.16b, v17.16b
 1577         add     v4.4s, v4.4s, v9.4s
 1578         add     v7.4s, v7.4s, v26.4s
 1579         orr     v0.16b, v0.16b, v15.16b
 1580         tbl     v3.16b, { v3.16b }, v19.16b
 1581         eor     v14.16b, v14.16b, v4.16b
 1582         add     v7.4s, v7.4s, v2.4s
 1583         add     v13.4s, v13.4s, v31.4s
 1584         add     v6.4s, v6.4s, v3.4s
 1585         tbl     v14.16b, { v14.16b }, v19.16b
 1586         eor     v11.16b, v11.16b, v7.16b
 1587         add     v13.4s, v13.4s, v0.4s
 1588         eor     v10.16b, v6.16b, v10.16b
 1589         add     v8.4s, v8.4s, v14.4s
 1590         tbl     v11.16b, { v11.16b }, v19.16b
 1591         eor     v12.16b, v12.16b, v13.16b
 1592         ushr    v15.4s, v10.4s, #7
 1593         shl     v10.4s, v10.4s, #25
 1594         eor     v9.16b, v8.16b, v9.16b
 1595         add     v1.4s, v1.4s, v11.4s
 1596         tbl     v12.16b, { v12.16b }, v19.16b
 1597         orr     v10.16b, v10.16b, v15.16b
 1598         ushr    v15.4s, v9.4s, #7
 1599         shl     v9.4s, v9.4s, #25
 1600         eor     v2.16b, v1.16b, v2.16b
 1601         add     v5.4s, v5.4s, v12.4s
 1602         orr     v9.16b, v9.16b, v15.16b
 1603         ushr    v15.4s, v2.4s, #7
 1604         shl     v2.4s, v2.4s, #25
 1605         eor     v0.16b, v5.16b, v0.16b
 1606         mov     v18.16b, v24.16b
 1607         mov     v24.16b, v20.16b
 1608         orr     v2.16b, v2.16b, v15.16b
 1609         ushr    v15.4s, v0.4s, #7
 1610         shl     v0.4s, v0.4s, #25
 1611         ldur    q20, [x29, #-160]
 1612         orr     v0.16b, v0.16b, v15.16b
 1613         add     v17.4s, v17.4s, v21.4s
 1614         add     v17.4s, v17.4s, v0.4s
 1615         add     v4.4s, v4.4s, v18.4s
 1616         eor     v14.16b, v14.16b, v17.16b
 1617         add     v4.4s, v4.4s, v10.4s
 1618         add     v7.4s, v7.4s, v23.4s
 1619         tbl     v14.16b, { v14.16b }, v16.16b
 1620         eor     v11.16b, v11.16b, v4.16b
 1621         add     v7.4s, v7.4s, v9.4s
 1622         add     v13.4s, v13.4s, v20.4s
 1623         add     v1.4s, v1.4s, v14.4s
 1624         tbl     v11.16b, { v11.16b }, v16.16b
 1625         eor     v12.16b, v12.16b, v7.16b
 1626         add     v13.4s, v13.4s, v2.4s
 1627         eor     v0.16b, v0.16b, v1.16b
 1628         add     v5.4s, v5.4s, v11.4s
 1629         tbl     v12.16b, { v12.16b }, v16.16b
 1630         eor     v3.16b, v3.16b, v13.16b
 1631         ldur    q25, [x29, #-80]
 1632         ushr    v15.4s, v0.4s, #12
 1633         shl     v0.4s, v0.4s, #20
 1634         eor     v10.16b, v5.16b, v10.16b
 1635         add     v6.4s, v6.4s, v12.4s
 1636         tbl     v3.16b, { v3.16b }, v16.16b
 1637         orr     v0.16b, v0.16b, v15.16b
 1638         ushr    v15.4s, v10.4s, #12
 1639         shl     v10.4s, v10.4s, #20
 1640         eor     v9.16b, v6.16b, v9.16b
 1641         add     v8.4s, v8.4s, v3.4s
 1642         add     v17.4s, v17.4s, v29.4s
 1643         orr     v10.16b, v10.16b, v15.16b
 1644         ushr    v15.4s, v9.4s, #12
 1645         shl     v9.4s, v9.4s, #20
 1646         eor     v2.16b, v8.16b, v2.16b
 1647         add     v17.4s, v17.4s, v0.4s
 1648         add     v4.4s, v4.4s, v22.4s
 1649         orr     v9.16b, v9.16b, v15.16b
 1650         ushr    v15.4s, v2.4s, #12
 1651         shl     v2.4s, v2.4s, #20
 1652         eor     v14.16b, v14.16b, v17.16b
 1653         add     v4.4s, v4.4s, v10.4s
 1654         add     v7.4s, v7.4s, v25.4s
 1655         orr     v2.16b, v2.16b, v15.16b
 1656         tbl     v14.16b, { v14.16b }, v19.16b
 1657         eor     v11.16b, v11.16b, v4.16b
 1658         add     v7.4s, v7.4s, v9.4s
 1659         add     v13.4s, v13.4s, v26.4s
 1660         add     v1.4s, v1.4s, v14.4s
 1661         tbl     v11.16b, { v11.16b }, v19.16b
 1662         eor     v12.16b, v12.16b, v7.16b
 1663         add     v13.4s, v13.4s, v2.4s
 1664         ldur    q25, [x29, #-112]
 1665         eor     v0.16b, v0.16b, v1.16b
 1666         add     v5.4s, v5.4s, v11.4s
 1667         tbl     v12.16b, { v12.16b }, v19.16b
 1668         eor     v3.16b, v3.16b, v13.16b
 1669         ushr    v15.4s, v0.4s, #7
 1670         shl     v0.4s, v0.4s, #25
 1671         eor     v10.16b, v5.16b, v10.16b
 1672         add     v6.4s, v6.4s, v12.4s
 1673         tbl     v3.16b, { v3.16b }, v19.16b
 1674         orr     v0.16b, v0.16b, v15.16b
 1675         ushr    v15.4s, v10.4s, #7
 1676         shl     v10.4s, v10.4s, #25
 1677         eor     v9.16b, v6.16b, v9.16b
 1678         add     v8.4s, v8.4s, v3.4s
 1679         orr     v10.16b, v10.16b, v15.16b
 1680         ushr    v15.4s, v9.4s, #7
 1681         shl     v9.4s, v9.4s, #25
 1682         eor     v2.16b, v8.16b, v2.16b
 1683         add     v17.4s, v17.4s, v25.4s
 1684         orr     v9.16b, v9.16b, v15.16b
 1685         ushr    v15.4s, v2.4s, #7
 1686         shl     v2.4s, v2.4s, #25
 1687         add     v17.4s, v17.4s, v10.4s
 1688         add     v4.4s, v4.4s, v30.4s
 1689         orr     v2.16b, v2.16b, v15.16b
 1690         eor     v3.16b, v3.16b, v17.16b
 1691         add     v4.4s, v4.4s, v9.4s
 1692         add     v7.4s, v7.4s, v24.4s
 1693         tbl     v3.16b, { v3.16b }, v16.16b
 1694         eor     v14.16b, v14.16b, v4.16b
 1695         add     v7.4s, v7.4s, v2.4s
 1696         add     v13.4s, v13.4s, v31.4s
 1697         add     v6.4s, v6.4s, v3.4s
 1698         tbl     v14.16b, { v14.16b }, v16.16b
 1699         eor     v11.16b, v11.16b, v7.16b
 1700         add     v13.4s, v13.4s, v0.4s
 1701         ldur    q25, [x29, #-64]
 1702         eor     v10.16b, v6.16b, v10.16b
 1703         add     v8.4s, v8.4s, v14.4s
 1704         tbl     v11.16b, { v11.16b }, v16.16b
 1705         eor     v12.16b, v12.16b, v13.16b
 1706         ldr     q31, [sp, #224]
 1707         ushr    v15.4s, v10.4s, #12
 1708         shl     v10.4s, v10.4s, #20
 1709         eor     v9.16b, v8.16b, v9.16b
 1710         add     v1.4s, v1.4s, v11.4s
 1711         tbl     v12.16b, { v12.16b }, v16.16b
 1712         orr     v10.16b, v10.16b, v15.16b
 1713         ushr    v15.4s, v9.4s, #12
 1714         shl     v9.4s, v9.4s, #20
 1715         eor     v2.16b, v1.16b, v2.16b
 1716         add     v5.4s, v5.4s, v12.4s
 1717         add     v17.4s, v17.4s, v27.4s
 1718         orr     v9.16b, v9.16b, v15.16b
 1719         ushr    v15.4s, v2.4s, #12
 1720         shl     v2.4s, v2.4s, #20
 1721         eor     v0.16b, v5.16b, v0.16b
 1722         add     v17.4s, v17.4s, v10.4s
 1723         add     v4.4s, v4.4s, v25.4s
 1724         orr     v2.16b, v2.16b, v15.16b
 1725         ushr    v15.4s, v0.4s, #12
 1726         shl     v0.4s, v0.4s, #20
 1727         eor     v3.16b, v3.16b, v17.16b
 1728         add     v4.4s, v4.4s, v9.4s
 1729         add     v7.4s, v7.4s, v31.4s
 1730         orr     v0.16b, v0.16b, v15.16b
 1731         tbl     v3.16b, { v3.16b }, v19.16b
 1732         eor     v14.16b, v14.16b, v4.16b
 1733         add     v7.4s, v7.4s, v2.4s
 1734         add     v13.4s, v13.4s, v28.4s
 1735         add     v6.4s, v6.4s, v3.4s
 1736         tbl     v14.16b, { v14.16b }, v19.16b
 1737         eor     v11.16b, v11.16b, v7.16b
 1738         add     v13.4s, v13.4s, v0.4s
 1739         eor     v10.16b, v6.16b, v10.16b
 1740         add     v8.4s, v8.4s, v14.4s
 1741         tbl     v11.16b, { v11.16b }, v19.16b
 1742         eor     v12.16b, v12.16b, v13.16b
 1743         ushr    v15.4s, v10.4s, #7
 1744         shl     v10.4s, v10.4s, #25
 1745         eor     v9.16b, v8.16b, v9.16b
 1746         add     v1.4s, v1.4s, v11.4s
 1747         tbl     v12.16b, { v12.16b }, v19.16b
 1748         orr     v10.16b, v10.16b, v15.16b
 1749         ushr    v15.4s, v9.4s, #7
 1750         shl     v9.4s, v9.4s, #25
 1751         eor     v2.16b, v1.16b, v2.16b
 1752         add     v5.4s, v5.4s, v12.4s
 1753         orr     v9.16b, v9.16b, v15.16b
 1754         ushr    v15.4s, v2.4s, #7
 1755         shl     v2.4s, v2.4s, #25
 1756         eor     v0.16b, v5.16b, v0.16b
 1757         orr     v2.16b, v2.16b, v15.16b
 1758         ushr    v15.4s, v0.4s, #7
 1759         shl     v0.4s, v0.4s, #25
 1760         orr     v0.16b, v0.16b, v15.16b
 1761         add     v17.4s, v17.4s, v18.4s
 1762         add     v17.4s, v17.4s, v0.4s
 1763         add     v4.4s, v4.4s, v22.4s
 1764         eor     v14.16b, v14.16b, v17.16b
 1765         add     v4.4s, v4.4s, v10.4s
 1766         add     v7.4s, v7.4s, v26.4s
 1767         tbl     v14.16b, { v14.16b }, v16.16b
 1768         eor     v11.16b, v11.16b, v4.16b
 1769         add     v7.4s, v7.4s, v9.4s
 1770         add     v13.4s, v13.4s, v23.4s
 1771         add     v1.4s, v1.4s, v14.4s
 1772         tbl     v11.16b, { v11.16b }, v16.16b
 1773         eor     v12.16b, v12.16b, v7.16b
 1774         add     v13.4s, v13.4s, v2.4s
 1775         mov     v21.16b, v29.16b
 1776         stur    q29, [x29, #-128]
 1777         mov     v29.16b, v30.16b
 1778         mov     v30.16b, v27.16b
 1779         mov     v27.16b, v18.16b
 1780         str     q18, [sp, #176]
 1781         eor     v0.16b, v0.16b, v1.16b
 1782         mov     v18.16b, v22.16b
 1783         add     v5.4s, v5.4s, v11.4s
 1784         tbl     v12.16b, { v12.16b }, v16.16b
 1785         eor     v3.16b, v3.16b, v13.16b
 1786         ldur    q22, [x29, #-96]
 1787         ushr    v15.4s, v0.4s, #12
 1788         shl     v0.4s, v0.4s, #20
 1789         eor     v10.16b, v5.16b, v10.16b
 1790         add     v6.4s, v6.4s, v12.4s
 1791         tbl     v3.16b, { v3.16b }, v16.16b
 1792         orr     v0.16b, v0.16b, v15.16b
 1793         ushr    v15.4s, v10.4s, #12
 1794         shl     v10.4s, v10.4s, #20
 1795         eor     v9.16b, v6.16b, v9.16b
 1796         add     v8.4s, v8.4s, v3.4s
 1797         add     v17.4s, v17.4s, v20.4s
 1798         orr     v10.16b, v10.16b, v15.16b
 1799         ushr    v15.4s, v9.4s, #12
 1800         shl     v9.4s, v9.4s, #20
 1801         eor     v2.16b, v8.16b, v2.16b
 1802         add     v17.4s, v17.4s, v0.4s
 1803         add     v4.4s, v4.4s, v29.4s
 1804         orr     v9.16b, v9.16b, v15.16b
 1805         ushr    v15.4s, v2.4s, #12
 1806         shl     v2.4s, v2.4s, #20
 1807         eor     v14.16b, v14.16b, v17.16b
 1808         add     v4.4s, v4.4s, v10.4s
 1809         add     v7.4s, v7.4s, v22.4s
 1810         orr     v2.16b, v2.16b, v15.16b
 1811         tbl     v14.16b, { v14.16b }, v19.16b
 1812         eor     v11.16b, v11.16b, v4.16b
 1813         add     v7.4s, v7.4s, v9.4s
 1814         add     v13.4s, v13.4s, v31.4s
 1815         add     v1.4s, v1.4s, v14.4s
 1816         tbl     v11.16b, { v11.16b }, v19.16b
 1817         eor     v12.16b, v12.16b, v7.16b
 1818         add     v13.4s, v13.4s, v2.4s
 1819         eor     v0.16b, v0.16b, v1.16b
 1820         add     v5.4s, v5.4s, v11.4s
 1821         tbl     v12.16b, { v12.16b }, v19.16b
 1822         eor     v3.16b, v3.16b, v13.16b
 1823         ushr    v15.4s, v0.4s, #7
 1824         shl     v0.4s, v0.4s, #25
 1825         eor     v10.16b, v5.16b, v10.16b
 1826         add     v6.4s, v6.4s, v12.4s
 1827         tbl     v3.16b, { v3.16b }, v19.16b
 1828         orr     v0.16b, v0.16b, v15.16b
 1829         ushr    v15.4s, v10.4s, #7
 1830         shl     v10.4s, v10.4s, #25
 1831         eor     v9.16b, v6.16b, v9.16b
 1832         add     v8.4s, v8.4s, v3.4s
 1833         orr     v10.16b, v10.16b, v15.16b
 1834         ushr    v15.4s, v9.4s, #7
 1835         shl     v9.4s, v9.4s, #25
 1836         eor     v2.16b, v8.16b, v2.16b
 1837         add     v17.4s, v17.4s, v21.4s
 1838         orr     v9.16b, v9.16b, v15.16b
 1839         ushr    v15.4s, v2.4s, #7
 1840         shl     v2.4s, v2.4s, #25
 1841         add     v17.4s, v17.4s, v10.4s
 1842         add     v4.4s, v4.4s, v24.4s
 1843         orr     v2.16b, v2.16b, v15.16b
 1844         eor     v3.16b, v3.16b, v17.16b
 1845         add     v4.4s, v4.4s, v9.4s
 1846         add     v7.4s, v7.4s, v30.4s
 1847         tbl     v3.16b, { v3.16b }, v16.16b
 1848         eor     v14.16b, v14.16b, v4.16b
 1849         add     v7.4s, v7.4s, v2.4s
 1850         add     v13.4s, v13.4s, v28.4s
 1851         add     v6.4s, v6.4s, v3.4s
 1852         mov     v22.16b, v24.16b
 1853         tbl     v14.16b, { v14.16b }, v16.16b
 1854         eor     v11.16b, v11.16b, v7.16b
 1855         add     v13.4s, v13.4s, v0.4s
 1856         ldur    q24, [x29, #-80]
 1857         eor     v10.16b, v6.16b, v10.16b
 1858         add     v8.4s, v8.4s, v14.4s
 1859         mov     v21.16b, v30.16b
 1860         tbl     v11.16b, { v11.16b }, v16.16b
 1861         eor     v12.16b, v12.16b, v13.16b
 1862         ldur    q30, [x29, #-192]
 1863         mov     v20.16b, v29.16b
 1864         ushr    v15.4s, v10.4s, #12
 1865         shl     v10.4s, v10.4s, #20
 1866         eor     v9.16b, v8.16b, v9.16b
 1867         add     v1.4s, v1.4s, v11.4s
 1868         tbl     v12.16b, { v12.16b }, v16.16b
 1869         ldur    q29, [x29, #-112]
 1870         orr     v10.16b, v10.16b, v15.16b
 1871         ushr    v15.4s, v9.4s, #12
 1872         shl     v9.4s, v9.4s, #20
 1873         eor     v2.16b, v1.16b, v2.16b
 1874         add     v5.4s, v5.4s, v12.4s
 1875         add     v17.4s, v17.4s, v25.4s
 1876         orr     v9.16b, v9.16b, v15.16b
 1877         ushr    v15.4s, v2.4s, #12
 1878         shl     v2.4s, v2.4s, #20
 1879         eor     v0.16b, v5.16b, v0.16b
 1880         add     v17.4s, v17.4s, v10.4s
 1881         add     v4.4s, v4.4s, v24.4s
 1882         orr     v2.16b, v2.16b, v15.16b
 1883         ushr    v15.4s, v0.4s, #12
 1884         shl     v0.4s, v0.4s, #20
 1885         eor     v3.16b, v3.16b, v17.16b
 1886         add     v4.4s, v4.4s, v9.4s
 1887         add     v7.4s, v7.4s, v30.4s
 1888         orr     v0.16b, v0.16b, v15.16b
 1889         tbl     v3.16b, { v3.16b }, v19.16b
 1890         eor     v14.16b, v14.16b, v4.16b
 1891         add     v7.4s, v7.4s, v2.4s
 1892         add     v13.4s, v13.4s, v29.4s
 1893         add     v6.4s, v6.4s, v3.4s
 1894         tbl     v14.16b, { v14.16b }, v19.16b
 1895         eor     v11.16b, v11.16b, v7.16b
 1896         add     v13.4s, v13.4s, v0.4s
 1897         eor     v10.16b, v6.16b, v10.16b
 1898         add     v8.4s, v8.4s, v14.4s
 1899         tbl     v11.16b, { v11.16b }, v19.16b
 1900         eor     v12.16b, v12.16b, v13.16b
 1901         ushr    v15.4s, v10.4s, #7
 1902         shl     v10.4s, v10.4s, #25
 1903         eor     v9.16b, v8.16b, v9.16b
 1904         add     v1.4s, v1.4s, v11.4s
 1905         tbl     v12.16b, { v12.16b }, v19.16b
 1906         orr     v10.16b, v10.16b, v15.16b
 1907         ushr    v15.4s, v9.4s, #7
 1908         shl     v9.4s, v9.4s, #25
 1909         eor     v2.16b, v1.16b, v2.16b
 1910         add     v5.4s, v5.4s, v12.4s
 1911         orr     v9.16b, v9.16b, v15.16b
 1912         ushr    v15.4s, v2.4s, #7
 1913         shl     v2.4s, v2.4s, #25
 1914         eor     v0.16b, v5.16b, v0.16b
 1915         orr     v2.16b, v2.16b, v15.16b
 1916         ushr    v15.4s, v0.4s, #7
 1917         shl     v0.4s, v0.4s, #25
 1918         orr     v0.16b, v0.16b, v15.16b
 1919         add     v17.4s, v17.4s, v18.4s
 1920         add     v17.4s, v17.4s, v0.4s
 1921         add     v4.4s, v4.4s, v20.4s
 1922         eor     v14.16b, v14.16b, v17.16b
 1923         add     v4.4s, v4.4s, v10.4s
 1924         add     v7.4s, v7.4s, v31.4s
 1925         tbl     v14.16b, { v14.16b }, v16.16b
 1926         eor     v11.16b, v11.16b, v4.16b
 1927         add     v7.4s, v7.4s, v9.4s
 1928         add     v13.4s, v13.4s, v26.4s
 1929         add     v1.4s, v1.4s, v14.4s
 1930         tbl     v11.16b, { v11.16b }, v16.16b
 1931         eor     v12.16b, v12.16b, v7.16b
 1932         add     v13.4s, v13.4s, v2.4s
 1933         eor     v0.16b, v0.16b, v1.16b
 1934         add     v5.4s, v5.4s, v11.4s
 1935         tbl     v12.16b, { v12.16b }, v16.16b
 1936         eor     v3.16b, v3.16b, v13.16b
 1937         ushr    v15.4s, v0.4s, #12
 1938         shl     v0.4s, v0.4s, #20
 1939         eor     v10.16b, v5.16b, v10.16b
 1940         add     v6.4s, v6.4s, v12.4s
 1941         tbl     v3.16b, { v3.16b }, v16.16b
 1942         orr     v0.16b, v0.16b, v15.16b
 1943         ushr    v15.4s, v10.4s, #12
 1944         shl     v10.4s, v10.4s, #20
 1945         eor     v9.16b, v6.16b, v9.16b
 1946         add     v8.4s, v8.4s, v3.4s
 1947         add     v17.4s, v17.4s, v23.4s
 1948         orr     v10.16b, v10.16b, v15.16b
 1949         ushr    v15.4s, v9.4s, #12
 1950         shl     v9.4s, v9.4s, #20
 1951         eor     v2.16b, v8.16b, v2.16b
 1952         add     v17.4s, v17.4s, v0.4s
 1953         add     v4.4s, v4.4s, v22.4s
 1954         orr     v9.16b, v9.16b, v15.16b
 1955         ushr    v15.4s, v2.4s, #12
 1956         shl     v2.4s, v2.4s, #20
 1957         eor     v14.16b, v14.16b, v17.16b
 1958         add     v4.4s, v4.4s, v10.4s
 1959         add     v7.4s, v7.4s, v27.4s
 1960         orr     v2.16b, v2.16b, v15.16b
 1961         tbl     v14.16b, { v14.16b }, v19.16b
 1962         eor     v11.16b, v11.16b, v4.16b
 1963         add     v7.4s, v7.4s, v9.4s
 1964         add     v13.4s, v13.4s, v30.4s
 1965         add     v1.4s, v1.4s, v14.4s
 1966         tbl     v11.16b, { v11.16b }, v19.16b
 1967         eor     v12.16b, v12.16b, v7.16b
 1968         add     v13.4s, v13.4s, v2.4s
 1969         ldur    q27, [x29, #-160]
 1970         eor     v0.16b, v0.16b, v1.16b
 1971         add     v5.4s, v5.4s, v11.4s
 1972         tbl     v12.16b, { v12.16b }, v19.16b
 1973         eor     v3.16b, v3.16b, v13.16b
 1974         ushr    v15.4s, v0.4s, #7
 1975         shl     v0.4s, v0.4s, #25
 1976         eor     v10.16b, v5.16b, v10.16b
 1977         add     v6.4s, v6.4s, v12.4s
 1978         tbl     v3.16b, { v3.16b }, v19.16b
 1979         orr     v0.16b, v0.16b, v15.16b
 1980         ushr    v15.4s, v10.4s, #7
 1981         shl     v10.4s, v10.4s, #25
 1982         eor     v9.16b, v6.16b, v9.16b
 1983         add     v8.4s, v8.4s, v3.4s
 1984         orr     v10.16b, v10.16b, v15.16b
 1985         ushr    v15.4s, v9.4s, #7
 1986         shl     v9.4s, v9.4s, #25
 1987         eor     v2.16b, v8.16b, v2.16b
 1988         add     v17.4s, v17.4s, v27.4s
 1989         mov     v28.16b, v25.16b
 1990         orr     v9.16b, v9.16b, v15.16b
 1991         ushr    v15.4s, v2.4s, #7
 1992         shl     v2.4s, v2.4s, #25
 1993         add     v17.4s, v17.4s, v10.4s
 1994         add     v4.4s, v4.4s, v21.4s
 1995         orr     v2.16b, v2.16b, v15.16b
 1996         eor     v3.16b, v3.16b, v17.16b
 1997         add     v4.4s, v4.4s, v9.4s
 1998         add     v7.4s, v7.4s, v28.4s
 1999         tbl     v3.16b, { v3.16b }, v16.16b
 2000         eor     v14.16b, v14.16b, v4.16b
 2001         add     v7.4s, v7.4s, v2.4s
 2002         add     v13.4s, v13.4s, v29.4s
 2003         mov     v25.16b, v31.16b
 2004         add     v6.4s, v6.4s, v3.4s
 2005         tbl     v14.16b, { v14.16b }, v16.16b
 2006         eor     v11.16b, v11.16b, v7.16b
 2007         add     v13.4s, v13.4s, v0.4s
 2008         ldur    q31, [x29, #-96]
 2009         eor     v10.16b, v6.16b, v10.16b
 2010         add     v8.4s, v8.4s, v14.4s
 2011         tbl     v11.16b, { v11.16b }, v16.16b
 2012         eor     v12.16b, v12.16b, v13.16b
 2013         ldur    q28, [x29, #-208]
 2014         mov     v18.16b, v20.16b
 2015         str     q20, [sp, #144]
 2016         ushr    v15.4s, v10.4s, #12
 2017         shl     v10.4s, v10.4s, #20
 2018         eor     v9.16b, v8.16b, v9.16b
 2019         add     v1.4s, v1.4s, v11.4s
 2020         tbl     v12.16b, { v12.16b }, v16.16b
 2021         ldur    q20, [x29, #-128]
 2022         orr     v10.16b, v10.16b, v15.16b
 2023         ushr    v15.4s, v9.4s, #12
 2024         shl     v9.4s, v9.4s, #20
 2025         eor     v2.16b, v1.16b, v2.16b
 2026         add     v5.4s, v5.4s, v12.4s
 2027         add     v17.4s, v17.4s, v24.4s
 2028         orr     v9.16b, v9.16b, v15.16b
 2029         ushr    v15.4s, v2.4s, #12
 2030         shl     v2.4s, v2.4s, #20
 2031         eor     v0.16b, v5.16b, v0.16b
 2032         add     v17.4s, v17.4s, v10.4s
 2033         add     v4.4s, v4.4s, v31.4s
 2034         orr     v2.16b, v2.16b, v15.16b
 2035         ushr    v15.4s, v0.4s, #12
 2036         shl     v0.4s, v0.4s, #20
 2037         eor     v3.16b, v3.16b, v17.16b
 2038         add     v4.4s, v4.4s, v9.4s
 2039         add     v7.4s, v7.4s, v28.4s
 2040         orr     v0.16b, v0.16b, v15.16b
 2041         tbl     v3.16b, { v3.16b }, v19.16b
 2042         eor     v14.16b, v14.16b, v4.16b
 2043         add     v7.4s, v7.4s, v2.4s
 2044         add     v13.4s, v13.4s, v20.4s
 2045         add     v6.4s, v6.4s, v3.4s
 2046         tbl     v14.16b, { v14.16b }, v19.16b
 2047         eor     v11.16b, v11.16b, v7.16b
 2048         add     v13.4s, v13.4s, v0.4s
 2049         eor     v10.16b, v6.16b, v10.16b
 2050         add     v8.4s, v8.4s, v14.4s
 2051         tbl     v11.16b, { v11.16b }, v19.16b
 2052         eor     v12.16b, v12.16b, v13.16b
 2053         ushr    v15.4s, v10.4s, #7
 2054         shl     v10.4s, v10.4s, #25
 2055         eor     v9.16b, v8.16b, v9.16b
 2056         add     v1.4s, v1.4s, v11.4s
 2057         tbl     v12.16b, { v12.16b }, v19.16b
 2058         orr     v10.16b, v10.16b, v15.16b
 2059         ushr    v15.4s, v9.4s, #7
 2060         shl     v9.4s, v9.4s, #25
 2061         eor     v2.16b, v1.16b, v2.16b
 2062         add     v5.4s, v5.4s, v12.4s
 2063         orr     v9.16b, v9.16b, v15.16b
 2064         ushr    v15.4s, v2.4s, #7
 2065         shl     v2.4s, v2.4s, #25
 2066         eor     v0.16b, v5.16b, v0.16b
 2067         orr     v2.16b, v2.16b, v15.16b
 2068         ushr    v15.4s, v0.4s, #7
 2069         shl     v0.4s, v0.4s, #25
 2070         orr     v0.16b, v0.16b, v15.16b
 2071         add     v17.4s, v17.4s, v18.4s
 2072         add     v17.4s, v17.4s, v0.4s
 2073         add     v4.4s, v4.4s, v22.4s
 2074         eor     v14.16b, v14.16b, v17.16b
 2075         add     v4.4s, v4.4s, v10.4s
 2076         add     v7.4s, v7.4s, v30.4s
 2077         tbl     v14.16b, { v14.16b }, v16.16b
 2078         eor     v11.16b, v11.16b, v4.16b
 2079         add     v7.4s, v7.4s, v9.4s
 2080         add     v13.4s, v13.4s, v25.4s
 2081         add     v1.4s, v1.4s, v14.4s
 2082         tbl     v11.16b, { v11.16b }, v16.16b
 2083         eor     v12.16b, v12.16b, v7.16b
 2084         add     v13.4s, v13.4s, v2.4s
 2085         eor     v0.16b, v0.16b, v1.16b
 2086         add     v5.4s, v5.4s, v11.4s
 2087         tbl     v12.16b, { v12.16b }, v16.16b
 2088         eor     v3.16b, v3.16b, v13.16b
 2089         add     v17.4s, v17.4s, v26.4s
 2090         mov     v26.16b, v21.16b
 2091         add     v4.4s, v4.4s, v21.4s
 2092         ldur    q21, [x29, #-144]
 2093         ushr    v15.4s, v0.4s, #12
 2094         shl     v0.4s, v0.4s, #20
 2095         eor     v10.16b, v5.16b, v10.16b
 2096         add     v6.4s, v6.4s, v12.4s
 2097         tbl     v3.16b, { v3.16b }, v16.16b
 2098         orr     v0.16b, v0.16b, v15.16b
 2099         ushr    v15.4s, v10.4s, #12
 2100         shl     v10.4s, v10.4s, #20
 2101         eor     v9.16b, v6.16b, v9.16b
 2102         add     v8.4s, v8.4s, v3.4s
 2103         orr     v10.16b, v10.16b, v15.16b
 2104         ushr    v15.4s, v9.4s, #12
 2105         shl     v9.4s, v9.4s, #20
 2106         eor     v2.16b, v8.16b, v2.16b
 2107         add     v17.4s, v17.4s, v0.4s
 2108         orr     v9.16b, v9.16b, v15.16b
 2109         ushr    v15.4s, v2.4s, #12
 2110         shl     v2.4s, v2.4s, #20
 2111         eor     v14.16b, v14.16b, v17.16b
 2112         add     v4.4s, v4.4s, v10.4s
 2113         add     v7.4s, v7.4s, v21.4s
 2114         orr     v2.16b, v2.16b, v15.16b
 2115         tbl     v14.16b, { v14.16b }, v19.16b
 2116         eor     v11.16b, v11.16b, v4.16b
 2117         add     v7.4s, v7.4s, v9.4s
 2118         add     v13.4s, v13.4s, v28.4s
 2119         add     v1.4s, v1.4s, v14.4s
 2120         tbl     v11.16b, { v11.16b }, v19.16b
 2121         eor     v12.16b, v12.16b, v7.16b
 2122         add     v13.4s, v13.4s, v2.4s
 2123         str     q23, [sp, #160]
 2124         eor     v0.16b, v0.16b, v1.16b
 2125         add     v5.4s, v5.4s, v11.4s
 2126         tbl     v12.16b, { v12.16b }, v19.16b
 2127         eor     v3.16b, v3.16b, v13.16b
 2128         add     v17.4s, v17.4s, v23.4s
 2129         ldur    q23, [x29, #-64]
 2130         ushr    v15.4s, v0.4s, #7
 2131         shl     v0.4s, v0.4s, #25
 2132         eor     v10.16b, v5.16b, v10.16b
 2133         add     v6.4s, v6.4s, v12.4s
 2134         tbl     v3.16b, { v3.16b }, v19.16b
 2135         orr     v0.16b, v0.16b, v15.16b
 2136         ushr    v15.4s, v10.4s, #7
 2137         shl     v10.4s, v10.4s, #25
 2138         eor     v9.16b, v6.16b, v9.16b
 2139         add     v8.4s, v8.4s, v3.4s
 2140         orr     v10.16b, v10.16b, v15.16b
 2141         ushr    v15.4s, v9.4s, #7
 2142         shl     v9.4s, v9.4s, #25
 2143         eor     v2.16b, v8.16b, v2.16b
 2144         orr     v9.16b, v9.16b, v15.16b
 2145         ushr    v15.4s, v2.4s, #7
 2146         shl     v2.4s, v2.4s, #25
 2147         add     v17.4s, v17.4s, v10.4s
 2148         add     v4.4s, v4.4s, v23.4s
 2149         orr     v2.16b, v2.16b, v15.16b
 2150         eor     v3.16b, v3.16b, v17.16b
 2151         add     v4.4s, v4.4s, v9.4s
 2152         add     v7.4s, v7.4s, v24.4s
 2153         tbl     v3.16b, { v3.16b }, v16.16b
 2154         eor     v14.16b, v14.16b, v4.16b
 2155         add     v7.4s, v7.4s, v2.4s
 2156         add     v6.4s, v6.4s, v3.4s
 2157         tbl     v14.16b, { v14.16b }, v16.16b
 2158         eor     v11.16b, v11.16b, v7.16b
 2159         add     v13.4s, v13.4s, v20.4s
 2160         eor     v10.16b, v6.16b, v10.16b
 2161         add     v8.4s, v8.4s, v14.4s
 2162         tbl     v11.16b, { v11.16b }, v16.16b
 2163         add     v13.4s, v13.4s, v0.4s
 2164         ldr     q20, [sp, #176]
 2165         ushr    v15.4s, v10.4s, #12
 2166         shl     v10.4s, v10.4s, #20
 2167         eor     v9.16b, v8.16b, v9.16b
 2168         add     v1.4s, v1.4s, v11.4s
 2169         eor     v12.16b, v12.16b, v13.16b
 2170         orr     v10.16b, v10.16b, v15.16b
 2171         ushr    v15.4s, v9.4s, #12
 2172         shl     v9.4s, v9.4s, #20
 2173         eor     v2.16b, v1.16b, v2.16b
 2174         tbl     v12.16b, { v12.16b }, v16.16b
 2175         orr     v9.16b, v9.16b, v15.16b
 2176         ushr    v15.4s, v2.4s, #12
 2177         shl     v2.4s, v2.4s, #20
 2178         add     v5.4s, v5.4s, v12.4s
 2179         add     v17.4s, v17.4s, v31.4s
 2180         orr     v2.16b, v2.16b, v15.16b
 2181         eor     v0.16b, v5.16b, v0.16b
 2182         add     v17.4s, v17.4s, v10.4s
 2183         add     v4.4s, v4.4s, v20.4s
 2184         add     v7.4s, v7.4s, v29.4s
 2185         ushr    v15.4s, v0.4s, #12
 2186         shl     v0.4s, v0.4s, #20
 2187         eor     v3.16b, v3.16b, v17.16b
 2188         add     v4.4s, v4.4s, v9.4s
 2189         add     v7.4s, v7.4s, v2.4s
 2190         orr     v0.16b, v0.16b, v15.16b
 2191         mov     v15.16b, v31.16b
 2192         add     v17.4s, v17.4s, v22.4s
 2193         eor     v31.16b, v14.16b, v4.16b
 2194         eor     v22.16b, v11.16b, v7.16b
 2195         add     v11.4s, v13.4s, v27.4s
 2196         tbl     v3.16b, { v3.16b }, v19.16b
 2197         add     v11.4s, v11.4s, v0.4s
 2198         tbl     v31.16b, { v31.16b }, v19.16b
 2199         add     v6.4s, v6.4s, v3.4s
 2200         eor     v12.16b, v12.16b, v11.16b
 2201         tbl     v22.16b, { v22.16b }, v19.16b
 2202         add     v8.4s, v8.4s, v31.4s
 2203         eor     v10.16b, v6.16b, v10.16b
 2204         add     v30.4s, v11.4s, v30.4s
 2205         tbl     v11.16b, { v12.16b }, v19.16b
 2206         add     v1.4s, v1.4s, v22.4s
 2207         eor     v9.16b, v8.16b, v9.16b
 2208         ushr    v12.4s, v10.4s, #7
 2209         shl     v10.4s, v10.4s, #25
 2210         add     v5.4s, v5.4s, v11.4s
 2211         eor     v2.16b, v1.16b, v2.16b
 2212         orr     v10.16b, v10.16b, v12.16b
 2213         ushr    v12.4s, v9.4s, #7
 2214         shl     v9.4s, v9.4s, #25
 2215         eor     v0.16b, v5.16b, v0.16b
 2216         orr     v9.16b, v9.16b, v12.16b
 2217         ushr    v12.4s, v2.4s, #7
 2218         shl     v2.4s, v2.4s, #25
 2219         orr     v2.16b, v2.16b, v12.16b
 2220         ushr    v12.4s, v0.4s, #7
 2221         shl     v0.4s, v0.4s, #25
 2222         orr     v0.16b, v0.16b, v12.16b
 2223         add     v4.4s, v4.4s, v26.4s
 2224         add     v17.4s, v17.4s, v0.4s
 2225         add     v7.4s, v7.4s, v28.4s
 2226         mov     v18.16b, v27.16b
 2227         eor     v31.16b, v31.16b, v17.16b
 2228         add     v4.4s, v4.4s, v10.4s
 2229         add     v27.4s, v30.4s, v2.4s
 2230         eor     v22.16b, v22.16b, v4.16b
 2231         add     v7.4s, v7.4s, v9.4s
 2232         eor     v3.16b, v3.16b, v27.16b
 2233         add     v26.4s, v27.4s, v29.4s
 2234         tbl     v27.16b, { v31.16b }, v16.16b
 2235         eor     v28.16b, v11.16b, v7.16b
 2236         tbl     v22.16b, { v22.16b }, v16.16b
 2237         add     v1.4s, v1.4s, v27.4s
 2238         add     v4.4s, v4.4s, v23.4s
 2239         ldr     q23, [sp, #144]
 2240         tbl     v28.16b, { v28.16b }, v16.16b
 2241         tbl     v3.16b, { v3.16b }, v16.16b
 2242         add     v5.4s, v5.4s, v22.4s
 2243         eor     v0.16b, v0.16b, v1.16b
 2244         add     v6.4s, v6.4s, v28.4s
 2245         add     v29.4s, v8.4s, v3.4s
 2246         eor     v30.16b, v5.16b, v10.16b
 2247         ushr    v8.4s, v0.4s, #12
 2248         shl     v0.4s, v0.4s, #20
 2249         eor     v31.16b, v6.16b, v9.16b
 2250         orr     v0.16b, v0.16b, v8.16b
 2251         ushr    v8.4s, v30.4s, #12
 2252         shl     v30.4s, v30.4s, #20
 2253         eor     v2.16b, v29.16b, v2.16b
 2254         orr     v30.16b, v30.16b, v8.16b
 2255         ushr    v8.4s, v31.4s, #12
 2256         shl     v31.4s, v31.4s, #20
 2257         add     v17.4s, v17.4s, v25.4s
 2258         add     v7.4s, v7.4s, v23.4s
 2259         orr     v31.16b, v31.16b, v8.16b
 2260         ushr    v8.4s, v2.4s, #12
 2261         shl     v2.4s, v2.4s, #20
 2262         ldur    q23, [x29, #-176]
 2263         orr     v2.16b, v2.16b, v8.16b
 2264         add     v17.4s, v17.4s, v0.4s
 2265         eor     v27.16b, v27.16b, v17.16b
 2266         add     v4.4s, v4.4s, v30.4s
 2267         add     v25.4s, v26.4s, v2.4s
 2268         eor     v22.16b, v22.16b, v4.16b
 2269         add     v4.4s, v4.4s, v24.4s
 2270         add     v7.4s, v7.4s, v31.4s
 2271         eor     v3.16b, v3.16b, v25.16b
 2272         add     v24.4s, v25.4s, v18.4s
 2273         tbl     v25.16b, { v27.16b }, v19.16b
 2274         add     v17.4s, v17.4s, v23.4s
 2275         eor     v23.16b, v28.16b, v7.16b
 2276         tbl     v22.16b, { v22.16b }, v19.16b
 2277         add     v1.4s, v1.4s, v25.4s
 2278         tbl     v23.16b, { v23.16b }, v19.16b
 2279         tbl     v3.16b, { v3.16b }, v19.16b
 2280         add     v5.4s, v5.4s, v22.4s
 2281         eor     v0.16b, v0.16b, v1.16b
 2282         add     v6.4s, v6.4s, v23.4s
 2283         add     v26.4s, v29.4s, v3.4s
 2284         eor     v27.16b, v5.16b, v30.16b
 2285         ushr    v29.4s, v0.4s, #7
 2286         shl     v0.4s, v0.4s, #25
 2287         eor     v28.16b, v6.16b, v31.16b
 2288         orr     v0.16b, v0.16b, v29.16b
 2289         ushr    v29.4s, v27.4s, #7
 2290         shl     v27.4s, v27.4s, #25
 2291         eor     v2.16b, v26.16b, v2.16b
 2292         orr     v27.16b, v27.16b, v29.16b
 2293         ushr    v29.4s, v28.4s, #7
 2294         shl     v28.4s, v28.4s, #25
 2295         ldur    q18, [x29, #-128]
 2296         orr     v28.16b, v28.16b, v29.16b
 2297         ushr    v29.4s, v2.4s, #7
 2298         shl     v2.4s, v2.4s, #25
 2299         add     v7.4s, v7.4s, v15.4s
 2300         orr     v2.16b, v2.16b, v29.16b
 2301         add     v17.4s, v17.4s, v27.4s
 2302         add     v4.4s, v4.4s, v28.4s
 2303         add     v7.4s, v7.4s, v2.4s
 2304         eor     v3.16b, v3.16b, v17.16b
 2305         add     v17.4s, v17.4s, v20.4s
 2306         eor     v20.16b, v25.16b, v4.16b
 2307         add     v4.4s, v4.4s, v21.4s
 2308         eor     v21.16b, v22.16b, v7.16b
 2309         add     v7.4s, v7.4s, v18.4s
 2310         add     v18.4s, v24.4s, v0.4s
 2311         eor     v22.16b, v23.16b, v18.16b
 2312         ldr     q23, [sp, #160]
 2313         tbl     v3.16b, { v3.16b }, v16.16b
 2314         tbl     v20.16b, { v20.16b }, v16.16b
 2315         add     v6.4s, v6.4s, v3.4s
 2316         add     v18.4s, v18.4s, v23.4s
 2317         tbl     v21.16b, { v21.16b }, v16.16b
 2318         tbl     v16.16b, { v22.16b }, v16.16b
 2319         add     v22.4s, v26.4s, v20.4s
 2320         eor     v23.16b, v6.16b, v27.16b
 2321         add     v1.4s, v1.4s, v21.4s
 2322         eor     v24.16b, v22.16b, v28.16b
 2323         ushr    v25.4s, v23.4s, #12
 2324         shl     v23.4s, v23.4s, #20
 2325         add     v5.4s, v5.4s, v16.4s
 2326         eor     v2.16b, v1.16b, v2.16b
 2327         orr     v23.16b, v23.16b, v25.16b
 2328         ushr    v25.4s, v24.4s, #12
 2329         shl     v24.4s, v24.4s, #20
 2330         eor     v0.16b, v5.16b, v0.16b
 2331         orr     v24.16b, v24.16b, v25.16b
 2332         ushr    v25.4s, v2.4s, #12
 2333         shl     v2.4s, v2.4s, #20
 2334         orr     v2.16b, v2.16b, v25.16b
 2335         ushr    v25.4s, v0.4s, #12
 2336         shl     v0.4s, v0.4s, #20
 2337         orr     v0.16b, v0.16b, v25.16b
 2338         add     v25.4s, v7.4s, v2.4s
 2339         add     v26.4s, v18.4s, v0.4s
 2340         eor     v18.16b, v21.16b, v25.16b
 2341         add     v17.4s, v17.4s, v23.4s
 2342         add     v4.4s, v4.4s, v24.4s
 2343         eor     v16.16b, v16.16b, v26.16b
 2344         tbl     v21.16b, { v18.16b }, v19.16b
 2345         eor     v3.16b, v3.16b, v17.16b
 2346         eor     v7.16b, v20.16b, v4.16b
 2347         tbl     v16.16b, { v16.16b }, v19.16b
 2348         add     v1.4s, v1.4s, v21.4s
 2349         tbl     v3.16b, { v3.16b }, v19.16b
 2350         tbl     v20.16b, { v7.16b }, v19.16b
 2351         eor     v2.16b, v1.16b, v2.16b
 2352         eor     v7.16b, v1.16b, v17.16b
 2353         add     v1.4s, v5.4s, v16.4s
 2354         eor     v0.16b, v1.16b, v0.16b
 2355         eor     v18.16b, v1.16b, v4.16b
 2356         add     v1.4s, v6.4s, v3.4s
 2357         eor     v4.16b, v1.16b, v23.16b
 2358         eor     v6.16b, v25.16b, v1.16b
 2359         add     v1.4s, v22.4s, v20.4s
 2360         eor     v5.16b, v1.16b, v24.16b
 2361         eor     v17.16b, v26.16b, v1.16b
 2362         ushr    v1.4s, v4.4s, #7
 2363         shl     v4.4s, v4.4s, #25
 2364         orr     v1.16b, v4.16b, v1.16b
 2365         ushr    v4.4s, v5.4s, #7
 2366         shl     v5.4s, v5.4s, #25
 2367         orr     v4.16b, v5.16b, v4.16b
 2368         ushr    v5.4s, v2.4s, #7
 2369         shl     v2.4s, v2.4s, #25
 2370         orr     v2.16b, v2.16b, v5.16b
 2371         ushr    v5.4s, v0.4s, #7
 2372         shl     v0.4s, v0.4s, #25
 2373         orr     v0.16b, v0.16b, v5.16b
 2374         eor     v10.16b, v0.16b, v20.16b
 2375         eor     v11.16b, v1.16b, v21.16b
 2376         eor     v19.16b, v4.16b, v16.16b
 2377         cmp     x0, x22
 2378         eor     v16.16b, v2.16b, v3.16b
 2379         mov     w6, w19
 2380         b.ne    .LBB2_4
 2381 .LBB2_7:
 2382         zip1    v0.4s, v7.4s, v18.4s
 2383         zip2    v1.4s, v7.4s, v18.4s
 2384         zip1    v2.4s, v6.4s, v17.4s
 2385         zip2    v3.4s, v6.4s, v17.4s
 2386         zip1    v4.4s, v10.4s, v11.4s
 2387         zip2    v5.4s, v10.4s, v11.4s
 2388         zip1    v6.4s, v19.4s, v16.4s
 2389         zip2    v7.4s, v19.4s, v16.4s
 2390         add     x15, x20, #4
 2391         tst     w5, #0x1
 2392         sub     x28, x28, #4
 2393         zip1    v16.2d, v0.2d, v2.2d
 2394         zip2    v0.2d, v0.2d, v2.2d
 2395         zip1    v2.2d, v1.2d, v3.2d
 2396         zip2    v1.2d, v1.2d, v3.2d
 2397         zip1    v3.2d, v4.2d, v6.2d
 2398         zip2    v4.2d, v4.2d, v6.2d
 2399         zip1    v6.2d, v5.2d, v7.2d
 2400         zip2    v5.2d, v5.2d, v7.2d
 2401         add     x24, x24, #32
 2402         csel    x20, x15, x20, ne
 2403         cmp     x28, #3
 2404         stp     q16, q3, [x26]
 2405         stp     q0, q4, [x26, #32]
 2406         stp     q2, q6, [x26, #64]
 2407         stp     q1, q5, [x26, #96]
 2408         add     x26, x26, #128
 2409         b.hi    .LBB2_2
 2410 .LBB2_8:
 2411         cbz     x28, .LBB2_16
 2412         orr     w8, w7, w19
 2413         and     x21, x5, #0x1
 2414         stur    w8, [x29, #-64]
 2415 .LBB2_10:
 2416         ldr     x8, [sp, #40]
 2417         ldr     x25, [x24]
 2418         ldur    w4, [x29, #-64]
 2419         ldp     q1, q0, [x8]
 2420         mov     x8, x22
 2421         stp     q1, q0, [x29, #-48]
 2422 .LBB2_11:
 2423         subs    x23, x8, #1
 2424         b.eq    .LBB2_13
 2425         cbnz    x8, .LBB2_14
 2426         b       .LBB2_15
 2427 .LBB2_13:
 2428         orr     w4, w4, w27
 2429 .LBB2_14:
 2430         sub     x0, x29, #48
 2431         mov     w2, #64
 2432         mov     x1, x25
 2433         mov     x3, x20
 2434         bl      zfs_blake3_compress_in_place_sse41
 2435         add     x25, x25, #64
 2436         mov     x8, x23
 2437         mov     w4, w19
 2438         b       .LBB2_11
 2439 .LBB2_15:
 2440         ldp     q0, q1, [x29, #-48]
 2441         add     x20, x20, x21
 2442         add     x24, x24, #8
 2443         subs    x28, x28, #1
 2444         stp     q0, q1, [x26], #32
 2445         b.ne    .LBB2_10
 2446 .LBB2_16:
 2447         add     sp, sp, #448
 2448         ldp     x20, x19, [sp, #144]
 2449         ldp     x22, x21, [sp, #128]
 2450         ldp     x24, x23, [sp, #112]
 2451         ldp     x26, x25, [sp, #96]
 2452         ldp     x28, x27, [sp, #80]
 2453         ldp     x29, x30, [sp, #64]
 2454         ldp     d9, d8, [sp, #48]
 2455         ldp     d11, d10, [sp, #32]
 2456         ldp     d13, d12, [sp, #16]
 2457         ldp     d15, d14, [sp], #160
 2458         ret
 2459 .Lfunc_end2:
 2460         .size   zfs_blake3_hash_many_sse41, .Lfunc_end2-zfs_blake3_hash_many_sse41
 2461         .cfi_endproc
 2462         .section        ".note.GNU-stack","",@progbits
 2463 #endif

Cache object: df617b3ba1e112de58d22b1af1ea672d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.